Esempio n. 1
0
def run_csv_jsonl(source_file_path,
                  timeline_name,
                  index_name,
                  source_type,
                  delimiter=None,
                  username=None):
    """Create a Celery task for processing a CSV or JSONL file.

    Args:
        source_file_path: Path to CSV or JSONL file.
        timeline_name: Name of the Timesketch timeline.
        index_name: Name of the datastore index.
        source_type: Type of file, csv or jsonl.
        delimiter: Character used as a field separator
        username: Username of the user who will own the timeline.

    Returns:
        Dictionary with count of processed events.
    """
    event_type = u'generic_event'  # Document type for Elasticsearch
    validators = {
        u'csv': read_and_validate_csv,
        u'jsonl': read_and_validate_jsonl
    }
    read_and_validate = validators.get(source_type)

    # Log information to Celery
    logging.info(u'Index name: %s', index_name)
    logging.info(u'Timeline name: %s', timeline_name)
    logging.info(u'Source type: %s', source_type)
    logging.info(u'Document type: %s', event_type)
    logging.info(u'Owner: %s', username)

    es = ElasticsearchDataStore(host=current_app.config[u'ELASTIC_HOST'],
                                port=current_app.config[u'ELASTIC_PORT'])

    # Reason for the broad exception catch is that we want to capture
    # all possible errors and exit the task.
    try:
        es.create_index(index_name=index_name, doc_type=event_type)
        for event in read_and_validate(source_file_path, delimiter):
            es.import_event(index_name, event_type, event)
        # Import the remaining events
        total_events = es.import_event(index_name, event_type)
    except Exception as e:
        # Mark the searchindex and timelines as failed and exit the task
        error_msg = traceback.format_exc(e)
        _set_timeline_status(index_name, status=u'fail', error_msg=error_msg)
        logging.error(error_msg)
        return

    # Set status to ready when done
    _set_timeline_status(index_name, status=u'ready')

    return {u'Events processed': total_events}
Esempio n. 2
0
def run_csv_jsonl(source_file_path, timeline_name, index_name, source_type):
    """Create a Celery task for processing a CSV or JSONL file.

    Args:
        source_file_path: Path to CSV or JSONL file.
        timeline_name: Name of the Timesketch timeline.
        index_name: Name of the datastore index.
        source_type: Type of file, csv or jsonl.

    Returns:
        Name (str) of the index.
    """
    event_type = 'generic_event'  # Document type for Elasticsearch
    validators = {
        'csv': read_and_validate_csv,
        'jsonl': read_and_validate_jsonl
    }
    read_and_validate = validators.get(source_type)

    # Log information to Celery
    logging.info(
        'Index timeline [{0:s}] to index [{1:s}] (source: {2:s})'.format(
            timeline_name, index_name, source_type))

    es = ElasticsearchDataStore(
        host=current_app.config['ELASTIC_HOST'],
        port=current_app.config['ELASTIC_PORT'])

    # Reason for the broad exception catch is that we want to capture
    # all possible errors and exit the task.
    try:
        es.create_index(index_name=index_name, doc_type=event_type)
        for event in read_and_validate(source_file_path):
            es.import_event(index_name, event_type, event)
        # Import the remaining events
        es.flush_queued_events()

    except (ImportError, NameError, UnboundLocalError):
        raise

    except Exception as e:  # pylint: disable=broad-except
        # Mark the searchindex and timelines as failed and exit the task
        error_msg = traceback.format_exc(e)
        _set_timeline_status(index_name, status='fail', error_msg=error_msg)
        logging.error(error_msg)
        return None

    # Set status to ready when done
    _set_timeline_status(index_name, status='ready')

    return index_name
Esempio n. 3
0
def run_csv(source_file_path, timeline_name, index_name, username=None):
    """Create a Celery task for processing a CSV file.

    Args:
        source_file_path: Path to CSV file.
        timeline_name: Name of the Timesketch timeline.
        index_name: Name of the datastore index.
        username: Username of the user who will own the timeline.

    Returns:
        Dictionary with count of processed events.
    """
    flush_interval = 1000  # events to queue before bulk index
    event_type = u'generic_event'  # Document type for Elasticsearch
    app = create_app()

    # Log information to Celery
    logging.info(u'Index name: %s', index_name)
    logging.info(u'Timeline name: %s', timeline_name)
    logging.info(u'Flush interval: %d', flush_interval)
    logging.info(u'Document type: %s', event_type)
    logging.info(u'Owner: %s', username)

    es = ElasticsearchDataStore(
        host=current_app.config[u'ELASTIC_HOST'],
        port=current_app.config[u'ELASTIC_PORT'])

    es.create_index(index_name=index_name, doc_type=event_type)
    for event in read_and_validate_csv(source_file_path):
        es.import_event(
            flush_interval, index_name, event_type, event)

    # Import the remaining events
    total_events = es.import_event(flush_interval, index_name, event_type)

    # We are done so let's remove the processing status flag
    with app.app_context():
        search_index = SearchIndex.query.filter_by(
            index_name=index_name).first()
        search_index.status.remove(search_index.status[0])
        db_session.add(search_index)
        db_session.commit()

    return {u'Events processed': total_events}
Esempio n. 4
0
def run_csv_jsonl(file_path, events, timeline_name, index_name, source_type):
    """Create a Celery task for processing a CSV or JSONL file.

    Args:
        file_path: Path to the JSON or CSV file.
        events: A string with the events.
        timeline_name: Name of the Timesketch timeline.
        index_name: Name of the datastore index.
        source_type: Type of file, csv or jsonl.

    Returns:
        Name (str) of the index.
    """
    if events:
        file_handle = io.StringIO(events)
        source_type = 'jsonl'
    else:
        file_handle = codecs.open(file_path,
                                  'r',
                                  encoding='utf-8',
                                  errors='replace')

    event_type = 'generic_event'  # Document type for Elasticsearch
    validators = {
        'csv': read_and_validate_csv,
        'jsonl': read_and_validate_jsonl,
    }
    read_and_validate = validators.get(source_type)

    # Log information to Celery
    logging.info(
        'Index timeline [{0:s}] to index [{1:s}] (source: {2:s})'.format(
            timeline_name, index_name, source_type))

    es = ElasticsearchDataStore(host=current_app.config['ELASTIC_HOST'],
                                port=current_app.config['ELASTIC_PORT'])

    # Reason for the broad exception catch is that we want to capture
    # all possible errors and exit the task.
    try:
        es.create_index(index_name=index_name, doc_type=event_type)
        for event in read_and_validate(file_handle):
            es.import_event(index_name, event_type, event)
        # Import the remaining events
        es.flush_queued_events()

    except errors.DataIngestionError as e:
        _set_timeline_status(index_name, status='fail', error_msg=str(e))
        raise

    except (RuntimeError, ImportError, NameError, UnboundLocalError,
            RequestError) as e:
        _set_timeline_status(index_name, status='fail', error_msg=str(e))
        raise

    except Exception as e:  # pylint: disable=broad-except
        # Mark the searchindex and timelines as failed and exit the task
        error_msg = traceback.format_exc()
        _set_timeline_status(index_name, status='fail', error_msg=error_msg)
        logging.error('Error: {0!s}\n{1:s}'.format(e, error_msg))
        return None

    # Set status to ready when done
    _set_timeline_status(index_name, status='ready')

    return index_name
Esempio n. 5
0
def run_csv_jsonl(file_path, events, timeline_name, index_name, source_type,
                  timeline_id):
    """Create a Celery task for processing a CSV or JSONL file.

    Args:
        file_path: Path to the JSON or CSV file.
        events: A string with the events.
        timeline_name: Name of the Timesketch timeline.
        index_name: Name of the datastore index.
        source_type: Type of file, csv or jsonl.
        timeline_id: ID of the timeline object this data belongs to.

    Returns:
        Name (str) of the index.
    """
    if events:
        file_handle = io.StringIO(events)
        source_type = 'jsonl'
    else:
        file_handle = codecs.open(file_path,
                                  'r',
                                  encoding='utf-8',
                                  errors='replace')

    event_type = 'generic_event'  # Document type for Elasticsearch
    validators = {
        'csv': read_and_validate_csv,
        'jsonl': read_and_validate_jsonl,
    }
    read_and_validate = validators.get(source_type)

    # Log information to Celery
    logger.info(
        'Index timeline [{0:s}] to index [{1:s}] (source: {2:s})'.format(
            timeline_name, index_name, source_type))

    es = ElasticsearchDataStore(host=current_app.config['ELASTIC_HOST'],
                                port=current_app.config['ELASTIC_PORT'])

    # Reason for the broad exception catch is that we want to capture
    # all possible errors and exit the task.
    final_counter = 0
    error_msg = ''
    error_count = 0
    try:
        es.create_index(index_name=index_name, doc_type=event_type)
        for event in read_and_validate(file_handle):
            es.import_event(index_name,
                            event_type,
                            event,
                            timeline_id=timeline_id)
            final_counter += 1

        # Import the remaining events
        results = es.flush_queued_events()

        error_container = results.get('error_container', {})
        error_msg = get_import_errors(error_container=error_container,
                                      index_name=index_name,
                                      total_count=results.get(
                                          'total_events', 0))

    except errors.DataIngestionError as e:
        _set_timeline_status(timeline_id, status='fail', error_msg=str(e))
        _close_index(index_name=index_name,
                     data_store=es,
                     timeline_id=timeline_id)
        raise

    except (RuntimeError, ImportError, NameError, UnboundLocalError,
            RequestError) as e:
        _set_timeline_status(timeline_id, status='fail', error_msg=str(e))
        _close_index(index_name=index_name,
                     data_store=es,
                     timeline_id=timeline_id)
        raise

    except Exception as e:  # pylint: disable=broad-except
        # Mark the searchindex and timelines as failed and exit the task
        error_msg = traceback.format_exc()
        _set_timeline_status(timeline_id, status='fail', error_msg=error_msg)
        _close_index(index_name=index_name,
                     data_store=es,
                     timeline_id=timeline_id)
        logger.error('Error: {0!s}\n{1:s}'.format(e, error_msg))
        return None

    if error_count:
        logger.info(
            'Index timeline: [{0:s}] to index [{1:s}] - {2:d} out of {3:d} '
            'events imported (in total {4:d} errors were discovered) '.format(
                timeline_name, index_name, (final_counter - error_count),
                final_counter, error_count))
    else:
        logger.info('Index timeline: [{0:s}] to index [{1:s}] - {2:d} '
                    'events imported.'.format(timeline_name, index_name,
                                              final_counter))

    # Set status to ready when done
    _set_timeline_status(timeline_id, status='ready', error_msg=error_msg)

    return index_name
Esempio n. 6
0
def run_plaso(file_path, events, timeline_name, index_name, source_type,
              timeline_id):
    """Create a Celery task for processing Plaso storage file.

    Args:
        file_path: Path to the plaso file on disk.
        events: String with event data, invalid for plaso files.
        timeline_name: Name of the Timesketch timeline.
        index_name: Name of the datastore index.
        source_type: Type of file, csv or jsonl.
        timeline_id: ID of the timeline object this data belongs to.

    Raises:
        RuntimeError: If the function is called using events, plaso
            is not installed or is of unsupported version.
    Returns:
        Name (str) of the index.
    """
    if not plaso:
        raise RuntimeError(
            'Plaso isn\'t installed, unable to continue processing plaso '
            'files.')

    plaso_version = int(plaso.__version__)
    if plaso_version <= PLASO_MINIMUM_VERSION:
        raise RuntimeError(
            'Plaso version is out of date (version {0:d}, please upgrade to a '
            'version that is later than {1:d}'.format(plaso_version,
                                                      PLASO_MINIMUM_VERSION))

    if events:
        raise RuntimeError('Plaso uploads needs a file, not events.')

    event_type = 'generic_event'  # Document type for Elasticsearch

    mappings = None
    mappings_file_path = current_app.config.get('PLASO_MAPPING_FILE', '')
    if os.path.isfile(mappings_file_path):
        try:
            with open(mappings_file_path, 'r') as mfh:
                mappings = json.load(mfh)

                if not isinstance(mappings, dict):
                    raise RuntimeError(
                        'Unable to create mappings, the mappings are not a '
                        'dict, please look at the file: {0:s}'.format(
                            mappings_file_path))
        except (json.JSONDecodeError, IOError):
            logger.error('Unable to read in mapping', exc_info=True)

    elastic_server = current_app.config.get('ELASTIC_HOST')
    if not elastic_server:
        raise RuntimeError(
            'Unable to connect to Elastic, no server set, unable to '
            'process plaso file.')
    elastic_port = current_app.config.get('ELASTIC_PORT')
    if not elastic_port:
        raise RuntimeError(
            'Unable to connect to Elastic, no port set, unable to '
            'process plaso file.')

    es = ElasticsearchDataStore(host=elastic_server, port=elastic_port)

    try:
        es.create_index(index_name=index_name,
                        doc_type=event_type,
                        mappings=mappings)
    except errors.DataIngestionError as e:
        _set_timeline_status(timeline_id, status='fail', error_msg=str(e))
        _close_index(index_name=index_name,
                     data_store=es,
                     timeline_id=timeline_id)
        raise

    except (RuntimeError, ImportError, NameError, UnboundLocalError,
            RequestError) as e:
        _set_timeline_status(timeline_id, status='fail', error_msg=str(e))
        _close_index(index_name=index_name,
                     data_store=es,
                     timeline_id=timeline_id)
        raise

    except Exception as e:  # pylint: disable=broad-except
        # Mark the searchindex and timelines as failed and exit the task
        error_msg = traceback.format_exc()
        _set_timeline_status(timeline_id, status='fail', error_msg=error_msg)
        logger.error('Error: {0!s}\n{1:s}'.format(e, error_msg))
        _close_index(index_name=index_name,
                     data_store=es,
                     timeline_id=timeline_id)
        return None

    message = 'Index timeline [{0:s}] to index [{1:s}] (source: {2:s})'
    logger.info(message.format(timeline_name, index_name, source_type))

    try:
        psort_path = current_app.config['PSORT_PATH']
    except KeyError:
        psort_path = 'psort.py'

    cmd = [
        psort_path,
        '-o',
        'elastic_ts',
        file_path,
        '--server',
        elastic_server,
        '--port',
        str(elastic_port),
        '--status_view',
        'none',
        '--index_name',
        index_name,
    ]

    if mappings_file_path:
        cmd.extend(['--elastic_mappings', mappings_file_path])

    if timeline_id:
        cmd.extend(['--timeline_identifier', str(timeline_id)])

    # Run psort.py
    try:
        subprocess.check_output(cmd,
                                stderr=subprocess.STDOUT,
                                encoding='utf-8')
    except subprocess.CalledProcessError as e:
        # Mark the searchindex and timelines as failed and exit the task
        _set_timeline_status(timeline_id, status='fail', error_msg=e.output)
        _close_index(index_name=index_name,
                     data_store=es,
                     timeline_id=timeline_id)
        return e.output

    # Mark the searchindex and timelines as ready
    _set_timeline_status(timeline_id, status='ready')

    return index_name