def run_csv_jsonl(source_file_path, timeline_name, index_name, source_type, delimiter=None, username=None): """Create a Celery task for processing a CSV or JSONL file. Args: source_file_path: Path to CSV or JSONL file. timeline_name: Name of the Timesketch timeline. index_name: Name of the datastore index. source_type: Type of file, csv or jsonl. delimiter: Character used as a field separator username: Username of the user who will own the timeline. Returns: Dictionary with count of processed events. """ event_type = u'generic_event' # Document type for Elasticsearch validators = { u'csv': read_and_validate_csv, u'jsonl': read_and_validate_jsonl } read_and_validate = validators.get(source_type) # Log information to Celery logging.info(u'Index name: %s', index_name) logging.info(u'Timeline name: %s', timeline_name) logging.info(u'Source type: %s', source_type) logging.info(u'Document type: %s', event_type) logging.info(u'Owner: %s', username) es = ElasticsearchDataStore(host=current_app.config[u'ELASTIC_HOST'], port=current_app.config[u'ELASTIC_PORT']) # Reason for the broad exception catch is that we want to capture # all possible errors and exit the task. try: es.create_index(index_name=index_name, doc_type=event_type) for event in read_and_validate(source_file_path, delimiter): es.import_event(index_name, event_type, event) # Import the remaining events total_events = es.import_event(index_name, event_type) except Exception as e: # Mark the searchindex and timelines as failed and exit the task error_msg = traceback.format_exc(e) _set_timeline_status(index_name, status=u'fail', error_msg=error_msg) logging.error(error_msg) return # Set status to ready when done _set_timeline_status(index_name, status=u'ready') return {u'Events processed': total_events}
def run_csv_jsonl(source_file_path, timeline_name, index_name, source_type): """Create a Celery task for processing a CSV or JSONL file. Args: source_file_path: Path to CSV or JSONL file. timeline_name: Name of the Timesketch timeline. index_name: Name of the datastore index. source_type: Type of file, csv or jsonl. Returns: Name (str) of the index. """ event_type = 'generic_event' # Document type for Elasticsearch validators = { 'csv': read_and_validate_csv, 'jsonl': read_and_validate_jsonl } read_and_validate = validators.get(source_type) # Log information to Celery logging.info( 'Index timeline [{0:s}] to index [{1:s}] (source: {2:s})'.format( timeline_name, index_name, source_type)) es = ElasticsearchDataStore( host=current_app.config['ELASTIC_HOST'], port=current_app.config['ELASTIC_PORT']) # Reason for the broad exception catch is that we want to capture # all possible errors and exit the task. try: es.create_index(index_name=index_name, doc_type=event_type) for event in read_and_validate(source_file_path): es.import_event(index_name, event_type, event) # Import the remaining events es.flush_queued_events() except (ImportError, NameError, UnboundLocalError): raise except Exception as e: # pylint: disable=broad-except # Mark the searchindex and timelines as failed and exit the task error_msg = traceback.format_exc(e) _set_timeline_status(index_name, status='fail', error_msg=error_msg) logging.error(error_msg) return None # Set status to ready when done _set_timeline_status(index_name, status='ready') return index_name
def run_csv(source_file_path, timeline_name, index_name, username=None): """Create a Celery task for processing a CSV file. Args: source_file_path: Path to CSV file. timeline_name: Name of the Timesketch timeline. index_name: Name of the datastore index. username: Username of the user who will own the timeline. Returns: Dictionary with count of processed events. """ flush_interval = 1000 # events to queue before bulk index event_type = u'generic_event' # Document type for Elasticsearch app = create_app() # Log information to Celery logging.info(u'Index name: %s', index_name) logging.info(u'Timeline name: %s', timeline_name) logging.info(u'Flush interval: %d', flush_interval) logging.info(u'Document type: %s', event_type) logging.info(u'Owner: %s', username) es = ElasticsearchDataStore( host=current_app.config[u'ELASTIC_HOST'], port=current_app.config[u'ELASTIC_PORT']) es.create_index(index_name=index_name, doc_type=event_type) for event in read_and_validate_csv(source_file_path): es.import_event( flush_interval, index_name, event_type, event) # Import the remaining events total_events = es.import_event(flush_interval, index_name, event_type) # We are done so let's remove the processing status flag with app.app_context(): search_index = SearchIndex.query.filter_by( index_name=index_name).first() search_index.status.remove(search_index.status[0]) db_session.add(search_index) db_session.commit() return {u'Events processed': total_events}
def run_csv_jsonl(file_path, events, timeline_name, index_name, source_type): """Create a Celery task for processing a CSV or JSONL file. Args: file_path: Path to the JSON or CSV file. events: A string with the events. timeline_name: Name of the Timesketch timeline. index_name: Name of the datastore index. source_type: Type of file, csv or jsonl. Returns: Name (str) of the index. """ if events: file_handle = io.StringIO(events) source_type = 'jsonl' else: file_handle = codecs.open(file_path, 'r', encoding='utf-8', errors='replace') event_type = 'generic_event' # Document type for Elasticsearch validators = { 'csv': read_and_validate_csv, 'jsonl': read_and_validate_jsonl, } read_and_validate = validators.get(source_type) # Log information to Celery logging.info( 'Index timeline [{0:s}] to index [{1:s}] (source: {2:s})'.format( timeline_name, index_name, source_type)) es = ElasticsearchDataStore(host=current_app.config['ELASTIC_HOST'], port=current_app.config['ELASTIC_PORT']) # Reason for the broad exception catch is that we want to capture # all possible errors and exit the task. try: es.create_index(index_name=index_name, doc_type=event_type) for event in read_and_validate(file_handle): es.import_event(index_name, event_type, event) # Import the remaining events es.flush_queued_events() except errors.DataIngestionError as e: _set_timeline_status(index_name, status='fail', error_msg=str(e)) raise except (RuntimeError, ImportError, NameError, UnboundLocalError, RequestError) as e: _set_timeline_status(index_name, status='fail', error_msg=str(e)) raise except Exception as e: # pylint: disable=broad-except # Mark the searchindex and timelines as failed and exit the task error_msg = traceback.format_exc() _set_timeline_status(index_name, status='fail', error_msg=error_msg) logging.error('Error: {0!s}\n{1:s}'.format(e, error_msg)) return None # Set status to ready when done _set_timeline_status(index_name, status='ready') return index_name
def run_csv_jsonl(file_path, events, timeline_name, index_name, source_type, timeline_id): """Create a Celery task for processing a CSV or JSONL file. Args: file_path: Path to the JSON or CSV file. events: A string with the events. timeline_name: Name of the Timesketch timeline. index_name: Name of the datastore index. source_type: Type of file, csv or jsonl. timeline_id: ID of the timeline object this data belongs to. Returns: Name (str) of the index. """ if events: file_handle = io.StringIO(events) source_type = 'jsonl' else: file_handle = codecs.open(file_path, 'r', encoding='utf-8', errors='replace') event_type = 'generic_event' # Document type for Elasticsearch validators = { 'csv': read_and_validate_csv, 'jsonl': read_and_validate_jsonl, } read_and_validate = validators.get(source_type) # Log information to Celery logger.info( 'Index timeline [{0:s}] to index [{1:s}] (source: {2:s})'.format( timeline_name, index_name, source_type)) es = ElasticsearchDataStore(host=current_app.config['ELASTIC_HOST'], port=current_app.config['ELASTIC_PORT']) # Reason for the broad exception catch is that we want to capture # all possible errors and exit the task. final_counter = 0 error_msg = '' error_count = 0 try: es.create_index(index_name=index_name, doc_type=event_type) for event in read_and_validate(file_handle): es.import_event(index_name, event_type, event, timeline_id=timeline_id) final_counter += 1 # Import the remaining events results = es.flush_queued_events() error_container = results.get('error_container', {}) error_msg = get_import_errors(error_container=error_container, index_name=index_name, total_count=results.get( 'total_events', 0)) except errors.DataIngestionError as e: _set_timeline_status(timeline_id, status='fail', error_msg=str(e)) _close_index(index_name=index_name, data_store=es, timeline_id=timeline_id) raise except (RuntimeError, ImportError, NameError, UnboundLocalError, RequestError) as e: _set_timeline_status(timeline_id, status='fail', error_msg=str(e)) _close_index(index_name=index_name, data_store=es, timeline_id=timeline_id) raise except Exception as e: # pylint: disable=broad-except # Mark the searchindex and timelines as failed and exit the task error_msg = traceback.format_exc() _set_timeline_status(timeline_id, status='fail', error_msg=error_msg) _close_index(index_name=index_name, data_store=es, timeline_id=timeline_id) logger.error('Error: {0!s}\n{1:s}'.format(e, error_msg)) return None if error_count: logger.info( 'Index timeline: [{0:s}] to index [{1:s}] - {2:d} out of {3:d} ' 'events imported (in total {4:d} errors were discovered) '.format( timeline_name, index_name, (final_counter - error_count), final_counter, error_count)) else: logger.info('Index timeline: [{0:s}] to index [{1:s}] - {2:d} ' 'events imported.'.format(timeline_name, index_name, final_counter)) # Set status to ready when done _set_timeline_status(timeline_id, status='ready', error_msg=error_msg) return index_name
def run_plaso(file_path, events, timeline_name, index_name, source_type, timeline_id): """Create a Celery task for processing Plaso storage file. Args: file_path: Path to the plaso file on disk. events: String with event data, invalid for plaso files. timeline_name: Name of the Timesketch timeline. index_name: Name of the datastore index. source_type: Type of file, csv or jsonl. timeline_id: ID of the timeline object this data belongs to. Raises: RuntimeError: If the function is called using events, plaso is not installed or is of unsupported version. Returns: Name (str) of the index. """ if not plaso: raise RuntimeError( 'Plaso isn\'t installed, unable to continue processing plaso ' 'files.') plaso_version = int(plaso.__version__) if plaso_version <= PLASO_MINIMUM_VERSION: raise RuntimeError( 'Plaso version is out of date (version {0:d}, please upgrade to a ' 'version that is later than {1:d}'.format(plaso_version, PLASO_MINIMUM_VERSION)) if events: raise RuntimeError('Plaso uploads needs a file, not events.') event_type = 'generic_event' # Document type for Elasticsearch mappings = None mappings_file_path = current_app.config.get('PLASO_MAPPING_FILE', '') if os.path.isfile(mappings_file_path): try: with open(mappings_file_path, 'r') as mfh: mappings = json.load(mfh) if not isinstance(mappings, dict): raise RuntimeError( 'Unable to create mappings, the mappings are not a ' 'dict, please look at the file: {0:s}'.format( mappings_file_path)) except (json.JSONDecodeError, IOError): logger.error('Unable to read in mapping', exc_info=True) elastic_server = current_app.config.get('ELASTIC_HOST') if not elastic_server: raise RuntimeError( 'Unable to connect to Elastic, no server set, unable to ' 'process plaso file.') elastic_port = current_app.config.get('ELASTIC_PORT') if not elastic_port: raise RuntimeError( 'Unable to connect to Elastic, no port set, unable to ' 'process plaso file.') es = ElasticsearchDataStore(host=elastic_server, port=elastic_port) try: es.create_index(index_name=index_name, doc_type=event_type, mappings=mappings) except errors.DataIngestionError as e: _set_timeline_status(timeline_id, status='fail', error_msg=str(e)) _close_index(index_name=index_name, data_store=es, timeline_id=timeline_id) raise except (RuntimeError, ImportError, NameError, UnboundLocalError, RequestError) as e: _set_timeline_status(timeline_id, status='fail', error_msg=str(e)) _close_index(index_name=index_name, data_store=es, timeline_id=timeline_id) raise except Exception as e: # pylint: disable=broad-except # Mark the searchindex and timelines as failed and exit the task error_msg = traceback.format_exc() _set_timeline_status(timeline_id, status='fail', error_msg=error_msg) logger.error('Error: {0!s}\n{1:s}'.format(e, error_msg)) _close_index(index_name=index_name, data_store=es, timeline_id=timeline_id) return None message = 'Index timeline [{0:s}] to index [{1:s}] (source: {2:s})' logger.info(message.format(timeline_name, index_name, source_type)) try: psort_path = current_app.config['PSORT_PATH'] except KeyError: psort_path = 'psort.py' cmd = [ psort_path, '-o', 'elastic_ts', file_path, '--server', elastic_server, '--port', str(elastic_port), '--status_view', 'none', '--index_name', index_name, ] if mappings_file_path: cmd.extend(['--elastic_mappings', mappings_file_path]) if timeline_id: cmd.extend(['--timeline_identifier', str(timeline_id)]) # Run psort.py try: subprocess.check_output(cmd, stderr=subprocess.STDOUT, encoding='utf-8') except subprocess.CalledProcessError as e: # Mark the searchindex and timelines as failed and exit the task _set_timeline_status(timeline_id, status='fail', error_msg=e.output) _close_index(index_name=index_name, data_store=es, timeline_id=timeline_id) return e.output # Mark the searchindex and timelines as ready _set_timeline_status(timeline_id, status='ready') return index_name