Exemple #1
0
def callback(message):
    """Google PubSub callback.

    This function is called on all incoming messages on the configured topic.

    Args:
        message: (dict) PubSub message
    """
    message.ack()
    gcs_full_path = message.attributes.get("objectId")

    # Exit early if the file type is wrong.
    if not gcs_full_path.endswith(".plaso.metadata.json"):
        return

    gcs_base_path = os.path.dirname(gcs_full_path)
    gcs_metadata_filename = os.path.basename(gcs_full_path)
    gcs_base_filename = gcs_metadata_filename.replace(".metadata.json", "")
    gcs_plaso_filename = gcs_base_filename

    # Download files from GCS
    local_metadata_file = download_from_gcs(gcs_base_path,
                                            gcs_metadata_filename)
    local_plaso_file = download_from_gcs(gcs_base_path, gcs_plaso_filename)

    with open(local_metadata_file, "r") as metadata_file:
        metadata = json.load(metadata_file)
        username = metadata.get("globals", {}).get("requester")
        if not username:
            # Backwards compatibility for old Turbinia versions.
            username = metadata.get("requester")
        sketch_id_from_metadata = metadata.get("sketch_id")

    if not username:
        logger.error("Missing username")
        return

    timeline_name = os.path.splitext(gcs_plaso_filename)[0]
    index_name = uuid.uuid4().hex
    sketch_id, timeline_id = setup_sketch(timeline_name, index_name, "admin",
                                          sketch_id_from_metadata)

    # Start indexing
    with app.app_context():
        pipeline = tasks.build_index_pipeline(
            file_path=local_plaso_file,
            timeline_name=gcs_base_filename,
            index_name=index_name,
            file_extension="plaso",
            sketch_id=sketch_id,
            timeline_id=timeline_id,
        )
        pipeline.apply_async()
        logger.info("File sent for indexing: {}".format(gcs_base_filename))
Exemple #2
0
def callback(message):
    """Google PubSub callback.

    This function is called on all incoming messages on the configured topic.

    Args:
        message: (dict) PubSub message
    """
    message.ack()
    gcs_full_path = message.attributes.get('objectId')

    # Exit early if the file type is wrong.
    if not gcs_full_path.endswith('.plaso.metadata.json'):
        return

    gcs_base_path = os.path.dirname(gcs_full_path)
    gcs_metadata_filename = os.path.basename(gcs_full_path)
    gcs_base_filename = gcs_metadata_filename.replace('.metadata.json', '')
    gcs_plaso_filename = gcs_base_filename

    # Download files from GCS
    local_metadata_file = download_from_gcs(gcs_base_path,
                                            gcs_metadata_filename)
    local_plaso_file = download_from_gcs(gcs_base_path, gcs_plaso_filename)

    with open(local_metadata_file, 'r') as metadata_file:
        metadata = json.load(metadata_file)
        username = metadata.get('requester')
        sketch_id_from_metadata = metadata.get('sketch_id')

    if not username:
        logger.error('Missing username')
        return

    timeline_name = os.path.splitext(gcs_plaso_filename)[0]
    index_name = uuid.uuid4().hex
    sketch_id, timeline_id = setup_sketch(timeline_name, index_name, 'admin',
                                          sketch_id_from_metadata)

    # Start indexing
    with app.app_context():
        pipeline = tasks.build_index_pipeline(file_path=local_plaso_file,
                                              timeline_name=gcs_base_filename,
                                              index_name=index_name,
                                              file_extension='plaso',
                                              sketch_id=sketch_id,
                                              timeline_id=timeline_id)
        pipeline.apply_async()
        logger.info('File sent for indexing: {}'.format(gcs_base_filename))
Exemple #3
0
    def run(self, file_path, sketch_id, username, timeline_name):
        """This is the run method."""

        file_path = os.path.realpath(file_path)
        file_path_no_extension, extension = os.path.splitext(file_path)
        extension = extension.lstrip('.')
        filename = os.path.basename(file_path_no_extension)

        supported_extensions = ('plaso', 'csv', 'jsonl')

        if not os.path.isfile(file_path):
            sys.exit('No such file: {0:s}'.format(file_path))

        if extension not in supported_extensions:
            sys.exit('Extension {0:s} is not supported. '
                     '(supported extensions are: {1:s})'.format(
                         extension, ', '.join(supported_extensions)))

        user = None
        if not username:
            username = pwd.getpwuid(os.stat(file_path).st_uid).pw_name
        if not username == 'root':
            if not isinstance(username, six.text_type):
                username = codecs.decode(username, 'utf-8')
            user = User.query.filter_by(username=username).first()
        if not user:
            sys.exit('Cannot determine user for file: {0:s}'.format(file_path))

        sketch = None
        # If filename starts with <number> then use that as sketch_id.
        # E.g: 42_file_name.plaso means sketch_id is 42.
        sketch_id_from_filename = filename.split('_')[0]
        if not sketch_id and sketch_id_from_filename.isdigit():
            sketch_id = sketch_id_from_filename

        if sketch_id:
            try:
                sketch = Sketch.query.get_with_acl(sketch_id, user=user)
            except Forbidden:
                pass

        if not timeline_name:
            if timeline_name is None:
                timeline_name = '{0:s}_timeline'.format(filename)

            if not isinstance(timeline_name, six.text_type):
                timeline_name = codecs.decode(timeline_name, 'utf-8')

            timeline_name = timeline_name.replace('_', ' ')
            # Remove sketch ID if present in the filename.
            timeline_parts = timeline_name.split()
            if timeline_parts[0].isdigit():
                timeline_name = ' '.join(timeline_name.split()[1:])

        if not sketch:
            # Create a new sketch.
            sketch_name = 'Sketch for: {0:s}'.format(timeline_name)
            sketch = Sketch(name=sketch_name,
                            description=sketch_name,
                            user=user)
            # Need to commit here to be able to set permissions later.
            db_session.add(sketch)
            db_session.commit()
            sketch.grant_permission(permission='read', user=user)
            sketch.grant_permission(permission='write', user=user)
            sketch.grant_permission(permission='delete', user=user)
            sketch.status.append(sketch.Status(user=None, status='new'))
            db_session.add(sketch)
            db_session.commit()

        index_name = uuid.uuid4().hex
        if not isinstance(index_name, six.text_type):
            index_name = codecs.decode(index_name, 'utf-8')

        searchindex = SearchIndex.get_or_create(name=timeline_name,
                                                description=timeline_name,
                                                user=user,
                                                index_name=index_name)

        searchindex.grant_permission(permission='read', user=user)
        searchindex.grant_permission(permission='write', user=user)
        searchindex.grant_permission(permission='delete', user=user)

        searchindex.set_status('processing')
        db_session.add(searchindex)
        db_session.commit()

        if sketch and sketch.has_permission(user, 'write'):
            timeline = Timeline(name=searchindex.name,
                                description=searchindex.description,
                                sketch=sketch,
                                user=user,
                                searchindex=searchindex)
            timeline.set_status('processing')
            sketch.timelines.append(timeline)
            db_session.add(timeline)
            db_session.commit()

        # Start Celery pipeline for indexing and analysis.
        # Import here to avoid circular imports.
        from timesketch.lib import tasks  # pylint: disable=import-outside-toplevel
        pipeline = tasks.build_index_pipeline(file_path=file_path,
                                              events='',
                                              timeline_name=timeline_name,
                                              index_name=index_name,
                                              file_extension=extension,
                                              sketch_id=sketch.id)
        pipeline.apply_async(task_id=index_name)

        print('Imported {0:s} to sketch: {1:d} ({2:s})'.format(
            file_path, sketch.id, sketch.name))
Exemple #4
0
    def _upload_and_index(
            self, file_extension, timeline_name, index_name, sketch, form,
            enable_stream, original_filename='', data_label='', file_path='',
            events='', meta=None):
        """Creates a full pipeline for an uploaded file and returns the results.

        Args:
            file_extension: the extension of the uploaded file.
            timeline_name: name the timeline will be stored under in the
                           datastore.
            index_name: the Elastic index name for the timeline.
            sketch: Instance of timesketch.models.sketch.Sketch
            form: a dict with the configuration for the upload.
            enable_stream: boolean indicating whether this is file is part of a
                           stream or not.
            original_filename: Original filename from the upload.
            data_label: Optional string with a data label for the search index.
            file_path: the path to the file to be uploaded (optional).
            events: a string with events to upload (optional).
            meta: optional dict with additional meta fields that will be
                  included in the return.

        Returns:
            A timeline if created otherwise a search index in JSON (instance
            of flask.wrappers.Response)
        """
        searchindex = self._get_index(
            name=timeline_name,
            description=timeline_name,
            sketch=sketch,
            index_name=index_name,
            data_label=data_label,
            extension=file_extension)

        if not searchindex:
            abort(
                HTTP_STATUS_CODE_BAD_REQUEST,
                'We were unable to acquire a searchindex and therefore not '
                'able to upload data, please try again. If this error persist '
                'please create an issue on Github: https://github.com/'
                'google/timesketch/issues/new/choose')

        timelines = Timeline.query.filter_by(
            name=timeline_name, sketch=sketch).all()

        timeline = None
        for timeline_ in timelines:
            if timeline_.searchindex.index_name == searchindex.index_name:
                timeline = timeline_
                break

            logger.error(
                'There is a timeline in the sketch that has the same name '
                'but is stored in a different index: name {0:s} attempting '
                'index: {1:s} but found index {2:s} - retrying with a '
                'different timeline name.'.format(
                    timeline_name, searchindex.index_name,
                    timeline_.searchindex.index_name))

            timeline_name = '{0:s}_{1:s}'.format(
                timeline_name, uuid.uuid4().hex[-5:])
            return self._upload_and_index(
                file_extension=file_extension, timeline_name=timeline_name,
                index_name=searchindex.index_name, sketch=sketch, form=form,
                enable_stream=enable_stream,
                original_filename=original_filename, data_label=data_label,
                file_path=file_path, events=events, meta=meta)

        searchindex.set_status('processing')

        if not timeline:
            timeline = Timeline.get_or_create(
                name=timeline_name,
                description=timeline_name,
                sketch=sketch,
                user=current_user,
                searchindex=searchindex)

        if not timeline:
            abort(
                HTTP_STATUS_CODE_BAD_REQUEST,
                'Unable to get or create a new Timeline object.')

        # If the timeline already existed and has associated data sources
        # then we don't want to set the status to processing.
        if not timeline.datasources:
            timeline.set_status('processing')

        sketch.timelines.append(timeline)

        labels_to_prevent_deletion = current_app.config.get(
            'LABELS_TO_PREVENT_DELETION', [])
        for sketch_label in sketch.get_labels:
            if sketch_label not in labels_to_prevent_deletion:
                continue
            timeline.add_label(sketch_label)
            searchindex.add_label(sketch_label)

        file_size = form.get('total_file_size', 0)
        datasource = DataSource(
            timeline=timeline,
            user=current_user,
            provider=form.get('provider', 'N/A'),
            context=form.get('context', 'N/A'),
            file_on_disk=file_path,
            file_size=int(file_size),
            original_filename=original_filename,
            data_label=data_label
        )

        timeline.datasources.append(datasource)
        db_session.add(datasource)
        db_session.add(timeline)
        db_session.commit()

        sketch_id = sketch.id
        # Start Celery pipeline for indexing and analysis.
        # Import here to avoid circular imports.
        # pylint: disable=import-outside-toplevel
        from timesketch.lib import tasks
        pipeline = tasks.build_index_pipeline(
            file_path=file_path, events=events, timeline_name=timeline_name,
            index_name=searchindex.index_name, file_extension=file_extension,
            sketch_id=sketch_id, only_index=enable_stream,
            timeline_id=timeline.id)
        task_id = uuid.uuid4().hex
        pipeline.apply_async(task_id=task_id)

        if meta is None:
            meta = {}

        meta['task_id'] = task_id
        return self.to_json(
            timeline, status_code=HTTP_STATUS_CODE_CREATED, meta=meta)
Exemple #5
0
    def _upload_and_index(self,
                          file_extension,
                          timeline_name,
                          index_name,
                          sketch,
                          enable_stream,
                          file_path='',
                          events='',
                          meta=None):
        """Creates a full pipeline for an uploaded file and returns the results.

        Args:
            file_extension: the extension of the uploaded file.
            timeline_name: name the timeline will be stored under in the
                           datastore.
            index_name: the Elastic index name for the timeline.
            sketch: Instance of timesketch.models.sketch.Sketch
            enable_stream: boolean indicating whether this is file is part of a
                           stream or not.
            file_path: the path to the file to be uploaded (optional).
            events: a string with events to upload (optional).
            meta: optional dict with additional meta fields that will be
                  included in the return.

        Returns:
            A timeline if created otherwise a search index in JSON (instance
            of flask.wrappers.Response)
        """
        # Check if search index already exists.
        searchindex = SearchIndex.query.filter_by(
            name=timeline_name,
            description=timeline_name,
            user=current_user,
            index_name=index_name).first()

        timeline = None

        if searchindex:
            searchindex.set_status('processing')
            timeline = Timeline.query.filter_by(
                name=searchindex.name,
                description=searchindex.description,
                sketch=sketch,
                user=current_user,
                searchindex=searchindex).first()
        else:
            # Create the search index in the Timesketch database
            searchindex = SearchIndex.get_or_create(name=timeline_name,
                                                    description='',
                                                    user=current_user,
                                                    index_name=index_name)
            searchindex.grant_permission(permission='read', user=current_user)
            searchindex.grant_permission(permission='write', user=current_user)
            searchindex.grant_permission(permission='delete',
                                         user=current_user)
            searchindex.set_status('processing')
            db_session.add(searchindex)
            db_session.commit()

            if sketch and sketch.has_permission(current_user, 'write'):
                labels_to_prevent_deletion = current_app.config.get(
                    'LABELS_TO_PREVENT_DELETION', [])
                timeline = Timeline(name=searchindex.name,
                                    description=searchindex.description,
                                    sketch=sketch,
                                    user=current_user,
                                    searchindex=searchindex)
                timeline.set_status('processing')
                sketch.timelines.append(timeline)
                for label in sketch.get_labels:
                    if label not in labels_to_prevent_deletion:
                        continue
                    timeline.add_label(label)
                    searchindex.add_label(label)
                db_session.add(timeline)
                db_session.commit()

        # Start Celery pipeline for indexing and analysis.
        # Import here to avoid circular imports.
        # pylint: disable=import-outside-toplevel
        from timesketch.lib import tasks
        pipeline = tasks.build_index_pipeline(file_path=file_path,
                                              events=events,
                                              timeline_name=timeline_name,
                                              index_name=index_name,
                                              file_extension=file_extension,
                                              sketch_id=sketch.id,
                                              only_index=enable_stream)
        pipeline.apply_async()

        # Return Timeline if it was created.
        # pylint: disable=no-else-return
        if timeline:
            return self.to_json(timeline,
                                status_code=HTTP_STATUS_CODE_CREATED,
                                meta=meta)

        return self.to_json(searchindex,
                            status_code=HTTP_STATUS_CODE_CREATED,
                            meta=meta)
Exemple #6
0
    def _upload_and_index(self,
                          file_extension,
                          timeline_name,
                          index_name,
                          sketch,
                          enable_stream,
                          data_label='',
                          file_path='',
                          events='',
                          meta=None):
        """Creates a full pipeline for an uploaded file and returns the results.

        Args:
            file_extension: the extension of the uploaded file.
            timeline_name: name the timeline will be stored under in the
                           datastore.
            index_name: the Elastic index name for the timeline.
            sketch: Instance of timesketch.models.sketch.Sketch
            enable_stream: boolean indicating whether this is file is part of a
                           stream or not.
            data_label: Optional string with a data label for the search index.
            file_path: the path to the file to be uploaded (optional).
            events: a string with events to upload (optional).
            meta: optional dict with additional meta fields that will be
                  included in the return.

        Returns:
            A timeline if created otherwise a search index in JSON (instance
            of flask.wrappers.Response)
        """
        searchindex = self._get_index(name=timeline_name,
                                      description=timeline_name,
                                      sketch=sketch,
                                      index_name=index_name,
                                      data_label=data_label,
                                      extension=file_extension)
        searchindex.set_status('processing')

        timelines = Timeline.query.filter_by(name=timeline_name,
                                             sketch=sketch).all()

        timeline = None
        for timeline_ in timelines:
            if timeline_.searchindex.index_name == searchindex.index_name:
                timeline = timeline_
                break

            abort(
                HTTP_STATUS_CODE_BAD_REQUEST,
                'There is a timeline in the sketch that has the same name '
                'but is stored in a different index, check the data_label '
                'on the uploaded data')

        if not timeline:
            timeline = Timeline.get_or_create(name=timeline_name,
                                              description=timeline_name,
                                              sketch=sketch,
                                              user=current_user,
                                              searchindex=searchindex)

        if not timeline:
            abort(HTTP_STATUS_CODE_BAD_REQUEST,
                  'Unable to get or create a new Timeline object.')

        timeline.set_status('processing')
        sketch.timelines.append(timeline)

        labels_to_prevent_deletion = current_app.config.get(
            'LABELS_TO_PREVENT_DELETION', [])
        for sketch_label in sketch.get_labels:
            if sketch_label not in labels_to_prevent_deletion:
                continue
            timeline.add_label(sketch_label)
            searchindex.add_label(sketch_label)

        db_session.add(timeline)
        db_session.commit()

        sketch_id = sketch.id
        # Start Celery pipeline for indexing and analysis.
        # Import here to avoid circular imports.
        # pylint: disable=import-outside-toplevel
        from timesketch.lib import tasks
        pipeline = tasks.build_index_pipeline(
            file_path=file_path,
            events=events,
            timeline_name=timeline_name,
            index_name=searchindex.index_name,
            file_extension=file_extension,
            sketch_id=sketch_id,
            only_index=enable_stream,
            timeline_id=timeline.id)
        pipeline.apply_async()

        return self.to_json(timeline,
                            status_code=HTTP_STATUS_CODE_CREATED,
                            meta=meta)
Exemple #7
0
    def run(self, file_path, sketch_id, username, timeline_name):
        """This is the run method."""

        file_path = os.path.realpath(file_path)
        file_path_no_extension, extension = os.path.splitext(file_path)
        extension = extension.lstrip('.')
        filename = os.path.basename(file_path_no_extension)

        supported_extensions = ('plaso', 'csv', 'jsonl')

        if not os.path.isfile(file_path):
            sys.exit('No such file: {0:s}'.format(file_path))

        if extension not in supported_extensions:
            sys.exit(
                'Extension {0:s} is not supported. '
                '(supported extensions are: {1:s})'.format(
                    extension, ', '.join(supported_extensions)))

        user = None
        if not username:
            username = pwd.getpwuid(os.stat(file_path).st_uid).pw_name
        if not username == 'root':
            if not isinstance(username, six.text_type):
                username = codecs.decode(username, 'utf-8')
            user = User.query.filter_by(username=username).first()
        if not user:
            sys.exit('Cannot determine user for file: {0:s}'.format(file_path))

        sketch = None
        # If filename starts with <number> then use that as sketch_id.
        # E.g: 42_file_name.plaso means sketch_id is 42.
        sketch_id_from_filename = filename.split('_')[0]
        if not sketch_id and sketch_id_from_filename.isdigit():
            sketch_id = sketch_id_from_filename

        if sketch_id:
            try:
                sketch = Sketch.query.get_with_acl(sketch_id, user=user)
            except Forbidden:
                pass

        if not timeline_name:
            if not isinstance(timeline_name, six.text_type):
                timeline_name = codecs.decode(timeline_name, 'utf-8')

            timeline_name = timeline_name.replace('_', ' ')
            # Remove sketch ID if present in the filename.
            timeline_parts = timeline_name.split()
            if timeline_parts[0].isdigit():
                timeline_name = ' '.join(timeline_name.split()[1:])

        if not sketch:
            # Create a new sketch.
            sketch_name = 'Sketch for: {0:s}'.format(timeline_name)
            sketch = Sketch(
                name=sketch_name, description=sketch_name, user=user)
            # Need to commit here to be able to set permissions later.
            db_session.add(sketch)
            db_session.commit()
            sketch.grant_permission(permission='read', user=user)
            sketch.grant_permission(permission='write', user=user)
            sketch.grant_permission(permission='delete', user=user)
            sketch.status.append(sketch.Status(user=None, status='new'))
            db_session.add(sketch)
            db_session.commit()

        index_name = uuid.uuid4().hex
        if not isinstance(index_name, six.text_type):
            index_name = codecs.decode(index_name, 'utf-8')

        searchindex = SearchIndex.get_or_create(
            name=timeline_name,
            description=timeline_name,
            user=user,
            index_name=index_name)

        searchindex.grant_permission(permission='read', user=user)
        searchindex.grant_permission(permission='write', user=user)
        searchindex.grant_permission(permission='delete', user=user)

        searchindex.set_status('processing')
        db_session.add(searchindex)
        db_session.commit()

        if sketch and sketch.has_permission(user, 'write'):
            timeline = Timeline(
                name=searchindex.name,
                description=searchindex.description,
                sketch=sketch,
                user=user,
                searchindex=searchindex)
            timeline.set_status('processing')
            sketch.timelines.append(timeline)
            db_session.add(timeline)
            db_session.commit()

        # Start Celery pipeline for indexing and analysis.
        # Import here to avoid circular imports.
        from timesketch.lib import tasks
        pipeline = tasks.build_index_pipeline(
            file_path, timeline_name, index_name, extension, sketch.id)
        pipeline.apply_async(task_id=index_name)

        print('Imported {0:s} to sketch: {1:d} ({2:s})'.format(
            file_path, sketch.id, sketch.name))