def callback(message): """Google PubSub callback. This function is called on all incoming messages on the configured topic. Args: message: (dict) PubSub message """ message.ack() gcs_full_path = message.attributes.get("objectId") # Exit early if the file type is wrong. if not gcs_full_path.endswith(".plaso.metadata.json"): return gcs_base_path = os.path.dirname(gcs_full_path) gcs_metadata_filename = os.path.basename(gcs_full_path) gcs_base_filename = gcs_metadata_filename.replace(".metadata.json", "") gcs_plaso_filename = gcs_base_filename # Download files from GCS local_metadata_file = download_from_gcs(gcs_base_path, gcs_metadata_filename) local_plaso_file = download_from_gcs(gcs_base_path, gcs_plaso_filename) with open(local_metadata_file, "r") as metadata_file: metadata = json.load(metadata_file) username = metadata.get("globals", {}).get("requester") if not username: # Backwards compatibility for old Turbinia versions. username = metadata.get("requester") sketch_id_from_metadata = metadata.get("sketch_id") if not username: logger.error("Missing username") return timeline_name = os.path.splitext(gcs_plaso_filename)[0] index_name = uuid.uuid4().hex sketch_id, timeline_id = setup_sketch(timeline_name, index_name, "admin", sketch_id_from_metadata) # Start indexing with app.app_context(): pipeline = tasks.build_index_pipeline( file_path=local_plaso_file, timeline_name=gcs_base_filename, index_name=index_name, file_extension="plaso", sketch_id=sketch_id, timeline_id=timeline_id, ) pipeline.apply_async() logger.info("File sent for indexing: {}".format(gcs_base_filename))
def callback(message): """Google PubSub callback. This function is called on all incoming messages on the configured topic. Args: message: (dict) PubSub message """ message.ack() gcs_full_path = message.attributes.get('objectId') # Exit early if the file type is wrong. if not gcs_full_path.endswith('.plaso.metadata.json'): return gcs_base_path = os.path.dirname(gcs_full_path) gcs_metadata_filename = os.path.basename(gcs_full_path) gcs_base_filename = gcs_metadata_filename.replace('.metadata.json', '') gcs_plaso_filename = gcs_base_filename # Download files from GCS local_metadata_file = download_from_gcs(gcs_base_path, gcs_metadata_filename) local_plaso_file = download_from_gcs(gcs_base_path, gcs_plaso_filename) with open(local_metadata_file, 'r') as metadata_file: metadata = json.load(metadata_file) username = metadata.get('requester') sketch_id_from_metadata = metadata.get('sketch_id') if not username: logger.error('Missing username') return timeline_name = os.path.splitext(gcs_plaso_filename)[0] index_name = uuid.uuid4().hex sketch_id, timeline_id = setup_sketch(timeline_name, index_name, 'admin', sketch_id_from_metadata) # Start indexing with app.app_context(): pipeline = tasks.build_index_pipeline(file_path=local_plaso_file, timeline_name=gcs_base_filename, index_name=index_name, file_extension='plaso', sketch_id=sketch_id, timeline_id=timeline_id) pipeline.apply_async() logger.info('File sent for indexing: {}'.format(gcs_base_filename))
def run(self, file_path, sketch_id, username, timeline_name): """This is the run method.""" file_path = os.path.realpath(file_path) file_path_no_extension, extension = os.path.splitext(file_path) extension = extension.lstrip('.') filename = os.path.basename(file_path_no_extension) supported_extensions = ('plaso', 'csv', 'jsonl') if not os.path.isfile(file_path): sys.exit('No such file: {0:s}'.format(file_path)) if extension not in supported_extensions: sys.exit('Extension {0:s} is not supported. ' '(supported extensions are: {1:s})'.format( extension, ', '.join(supported_extensions))) user = None if not username: username = pwd.getpwuid(os.stat(file_path).st_uid).pw_name if not username == 'root': if not isinstance(username, six.text_type): username = codecs.decode(username, 'utf-8') user = User.query.filter_by(username=username).first() if not user: sys.exit('Cannot determine user for file: {0:s}'.format(file_path)) sketch = None # If filename starts with <number> then use that as sketch_id. # E.g: 42_file_name.plaso means sketch_id is 42. sketch_id_from_filename = filename.split('_')[0] if not sketch_id and sketch_id_from_filename.isdigit(): sketch_id = sketch_id_from_filename if sketch_id: try: sketch = Sketch.query.get_with_acl(sketch_id, user=user) except Forbidden: pass if not timeline_name: if timeline_name is None: timeline_name = '{0:s}_timeline'.format(filename) if not isinstance(timeline_name, six.text_type): timeline_name = codecs.decode(timeline_name, 'utf-8') timeline_name = timeline_name.replace('_', ' ') # Remove sketch ID if present in the filename. timeline_parts = timeline_name.split() if timeline_parts[0].isdigit(): timeline_name = ' '.join(timeline_name.split()[1:]) if not sketch: # Create a new sketch. sketch_name = 'Sketch for: {0:s}'.format(timeline_name) sketch = Sketch(name=sketch_name, description=sketch_name, user=user) # Need to commit here to be able to set permissions later. db_session.add(sketch) db_session.commit() sketch.grant_permission(permission='read', user=user) sketch.grant_permission(permission='write', user=user) sketch.grant_permission(permission='delete', user=user) sketch.status.append(sketch.Status(user=None, status='new')) db_session.add(sketch) db_session.commit() index_name = uuid.uuid4().hex if not isinstance(index_name, six.text_type): index_name = codecs.decode(index_name, 'utf-8') searchindex = SearchIndex.get_or_create(name=timeline_name, description=timeline_name, user=user, index_name=index_name) searchindex.grant_permission(permission='read', user=user) searchindex.grant_permission(permission='write', user=user) searchindex.grant_permission(permission='delete', user=user) searchindex.set_status('processing') db_session.add(searchindex) db_session.commit() if sketch and sketch.has_permission(user, 'write'): timeline = Timeline(name=searchindex.name, description=searchindex.description, sketch=sketch, user=user, searchindex=searchindex) timeline.set_status('processing') sketch.timelines.append(timeline) db_session.add(timeline) db_session.commit() # Start Celery pipeline for indexing and analysis. # Import here to avoid circular imports. from timesketch.lib import tasks # pylint: disable=import-outside-toplevel pipeline = tasks.build_index_pipeline(file_path=file_path, events='', timeline_name=timeline_name, index_name=index_name, file_extension=extension, sketch_id=sketch.id) pipeline.apply_async(task_id=index_name) print('Imported {0:s} to sketch: {1:d} ({2:s})'.format( file_path, sketch.id, sketch.name))
def _upload_and_index( self, file_extension, timeline_name, index_name, sketch, form, enable_stream, original_filename='', data_label='', file_path='', events='', meta=None): """Creates a full pipeline for an uploaded file and returns the results. Args: file_extension: the extension of the uploaded file. timeline_name: name the timeline will be stored under in the datastore. index_name: the Elastic index name for the timeline. sketch: Instance of timesketch.models.sketch.Sketch form: a dict with the configuration for the upload. enable_stream: boolean indicating whether this is file is part of a stream or not. original_filename: Original filename from the upload. data_label: Optional string with a data label for the search index. file_path: the path to the file to be uploaded (optional). events: a string with events to upload (optional). meta: optional dict with additional meta fields that will be included in the return. Returns: A timeline if created otherwise a search index in JSON (instance of flask.wrappers.Response) """ searchindex = self._get_index( name=timeline_name, description=timeline_name, sketch=sketch, index_name=index_name, data_label=data_label, extension=file_extension) if not searchindex: abort( HTTP_STATUS_CODE_BAD_REQUEST, 'We were unable to acquire a searchindex and therefore not ' 'able to upload data, please try again. If this error persist ' 'please create an issue on Github: https://github.com/' 'google/timesketch/issues/new/choose') timelines = Timeline.query.filter_by( name=timeline_name, sketch=sketch).all() timeline = None for timeline_ in timelines: if timeline_.searchindex.index_name == searchindex.index_name: timeline = timeline_ break logger.error( 'There is a timeline in the sketch that has the same name ' 'but is stored in a different index: name {0:s} attempting ' 'index: {1:s} but found index {2:s} - retrying with a ' 'different timeline name.'.format( timeline_name, searchindex.index_name, timeline_.searchindex.index_name)) timeline_name = '{0:s}_{1:s}'.format( timeline_name, uuid.uuid4().hex[-5:]) return self._upload_and_index( file_extension=file_extension, timeline_name=timeline_name, index_name=searchindex.index_name, sketch=sketch, form=form, enable_stream=enable_stream, original_filename=original_filename, data_label=data_label, file_path=file_path, events=events, meta=meta) searchindex.set_status('processing') if not timeline: timeline = Timeline.get_or_create( name=timeline_name, description=timeline_name, sketch=sketch, user=current_user, searchindex=searchindex) if not timeline: abort( HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to get or create a new Timeline object.') # If the timeline already existed and has associated data sources # then we don't want to set the status to processing. if not timeline.datasources: timeline.set_status('processing') sketch.timelines.append(timeline) labels_to_prevent_deletion = current_app.config.get( 'LABELS_TO_PREVENT_DELETION', []) for sketch_label in sketch.get_labels: if sketch_label not in labels_to_prevent_deletion: continue timeline.add_label(sketch_label) searchindex.add_label(sketch_label) file_size = form.get('total_file_size', 0) datasource = DataSource( timeline=timeline, user=current_user, provider=form.get('provider', 'N/A'), context=form.get('context', 'N/A'), file_on_disk=file_path, file_size=int(file_size), original_filename=original_filename, data_label=data_label ) timeline.datasources.append(datasource) db_session.add(datasource) db_session.add(timeline) db_session.commit() sketch_id = sketch.id # Start Celery pipeline for indexing and analysis. # Import here to avoid circular imports. # pylint: disable=import-outside-toplevel from timesketch.lib import tasks pipeline = tasks.build_index_pipeline( file_path=file_path, events=events, timeline_name=timeline_name, index_name=searchindex.index_name, file_extension=file_extension, sketch_id=sketch_id, only_index=enable_stream, timeline_id=timeline.id) task_id = uuid.uuid4().hex pipeline.apply_async(task_id=task_id) if meta is None: meta = {} meta['task_id'] = task_id return self.to_json( timeline, status_code=HTTP_STATUS_CODE_CREATED, meta=meta)
def _upload_and_index(self, file_extension, timeline_name, index_name, sketch, enable_stream, file_path='', events='', meta=None): """Creates a full pipeline for an uploaded file and returns the results. Args: file_extension: the extension of the uploaded file. timeline_name: name the timeline will be stored under in the datastore. index_name: the Elastic index name for the timeline. sketch: Instance of timesketch.models.sketch.Sketch enable_stream: boolean indicating whether this is file is part of a stream or not. file_path: the path to the file to be uploaded (optional). events: a string with events to upload (optional). meta: optional dict with additional meta fields that will be included in the return. Returns: A timeline if created otherwise a search index in JSON (instance of flask.wrappers.Response) """ # Check if search index already exists. searchindex = SearchIndex.query.filter_by( name=timeline_name, description=timeline_name, user=current_user, index_name=index_name).first() timeline = None if searchindex: searchindex.set_status('processing') timeline = Timeline.query.filter_by( name=searchindex.name, description=searchindex.description, sketch=sketch, user=current_user, searchindex=searchindex).first() else: # Create the search index in the Timesketch database searchindex = SearchIndex.get_or_create(name=timeline_name, description='', user=current_user, index_name=index_name) searchindex.grant_permission(permission='read', user=current_user) searchindex.grant_permission(permission='write', user=current_user) searchindex.grant_permission(permission='delete', user=current_user) searchindex.set_status('processing') db_session.add(searchindex) db_session.commit() if sketch and sketch.has_permission(current_user, 'write'): labels_to_prevent_deletion = current_app.config.get( 'LABELS_TO_PREVENT_DELETION', []) timeline = Timeline(name=searchindex.name, description=searchindex.description, sketch=sketch, user=current_user, searchindex=searchindex) timeline.set_status('processing') sketch.timelines.append(timeline) for label in sketch.get_labels: if label not in labels_to_prevent_deletion: continue timeline.add_label(label) searchindex.add_label(label) db_session.add(timeline) db_session.commit() # Start Celery pipeline for indexing and analysis. # Import here to avoid circular imports. # pylint: disable=import-outside-toplevel from timesketch.lib import tasks pipeline = tasks.build_index_pipeline(file_path=file_path, events=events, timeline_name=timeline_name, index_name=index_name, file_extension=file_extension, sketch_id=sketch.id, only_index=enable_stream) pipeline.apply_async() # Return Timeline if it was created. # pylint: disable=no-else-return if timeline: return self.to_json(timeline, status_code=HTTP_STATUS_CODE_CREATED, meta=meta) return self.to_json(searchindex, status_code=HTTP_STATUS_CODE_CREATED, meta=meta)
def _upload_and_index(self, file_extension, timeline_name, index_name, sketch, enable_stream, data_label='', file_path='', events='', meta=None): """Creates a full pipeline for an uploaded file and returns the results. Args: file_extension: the extension of the uploaded file. timeline_name: name the timeline will be stored under in the datastore. index_name: the Elastic index name for the timeline. sketch: Instance of timesketch.models.sketch.Sketch enable_stream: boolean indicating whether this is file is part of a stream or not. data_label: Optional string with a data label for the search index. file_path: the path to the file to be uploaded (optional). events: a string with events to upload (optional). meta: optional dict with additional meta fields that will be included in the return. Returns: A timeline if created otherwise a search index in JSON (instance of flask.wrappers.Response) """ searchindex = self._get_index(name=timeline_name, description=timeline_name, sketch=sketch, index_name=index_name, data_label=data_label, extension=file_extension) searchindex.set_status('processing') timelines = Timeline.query.filter_by(name=timeline_name, sketch=sketch).all() timeline = None for timeline_ in timelines: if timeline_.searchindex.index_name == searchindex.index_name: timeline = timeline_ break abort( HTTP_STATUS_CODE_BAD_REQUEST, 'There is a timeline in the sketch that has the same name ' 'but is stored in a different index, check the data_label ' 'on the uploaded data') if not timeline: timeline = Timeline.get_or_create(name=timeline_name, description=timeline_name, sketch=sketch, user=current_user, searchindex=searchindex) if not timeline: abort(HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to get or create a new Timeline object.') timeline.set_status('processing') sketch.timelines.append(timeline) labels_to_prevent_deletion = current_app.config.get( 'LABELS_TO_PREVENT_DELETION', []) for sketch_label in sketch.get_labels: if sketch_label not in labels_to_prevent_deletion: continue timeline.add_label(sketch_label) searchindex.add_label(sketch_label) db_session.add(timeline) db_session.commit() sketch_id = sketch.id # Start Celery pipeline for indexing and analysis. # Import here to avoid circular imports. # pylint: disable=import-outside-toplevel from timesketch.lib import tasks pipeline = tasks.build_index_pipeline( file_path=file_path, events=events, timeline_name=timeline_name, index_name=searchindex.index_name, file_extension=file_extension, sketch_id=sketch_id, only_index=enable_stream, timeline_id=timeline.id) pipeline.apply_async() return self.to_json(timeline, status_code=HTTP_STATUS_CODE_CREATED, meta=meta)
def run(self, file_path, sketch_id, username, timeline_name): """This is the run method.""" file_path = os.path.realpath(file_path) file_path_no_extension, extension = os.path.splitext(file_path) extension = extension.lstrip('.') filename = os.path.basename(file_path_no_extension) supported_extensions = ('plaso', 'csv', 'jsonl') if not os.path.isfile(file_path): sys.exit('No such file: {0:s}'.format(file_path)) if extension not in supported_extensions: sys.exit( 'Extension {0:s} is not supported. ' '(supported extensions are: {1:s})'.format( extension, ', '.join(supported_extensions))) user = None if not username: username = pwd.getpwuid(os.stat(file_path).st_uid).pw_name if not username == 'root': if not isinstance(username, six.text_type): username = codecs.decode(username, 'utf-8') user = User.query.filter_by(username=username).first() if not user: sys.exit('Cannot determine user for file: {0:s}'.format(file_path)) sketch = None # If filename starts with <number> then use that as sketch_id. # E.g: 42_file_name.plaso means sketch_id is 42. sketch_id_from_filename = filename.split('_')[0] if not sketch_id and sketch_id_from_filename.isdigit(): sketch_id = sketch_id_from_filename if sketch_id: try: sketch = Sketch.query.get_with_acl(sketch_id, user=user) except Forbidden: pass if not timeline_name: if not isinstance(timeline_name, six.text_type): timeline_name = codecs.decode(timeline_name, 'utf-8') timeline_name = timeline_name.replace('_', ' ') # Remove sketch ID if present in the filename. timeline_parts = timeline_name.split() if timeline_parts[0].isdigit(): timeline_name = ' '.join(timeline_name.split()[1:]) if not sketch: # Create a new sketch. sketch_name = 'Sketch for: {0:s}'.format(timeline_name) sketch = Sketch( name=sketch_name, description=sketch_name, user=user) # Need to commit here to be able to set permissions later. db_session.add(sketch) db_session.commit() sketch.grant_permission(permission='read', user=user) sketch.grant_permission(permission='write', user=user) sketch.grant_permission(permission='delete', user=user) sketch.status.append(sketch.Status(user=None, status='new')) db_session.add(sketch) db_session.commit() index_name = uuid.uuid4().hex if not isinstance(index_name, six.text_type): index_name = codecs.decode(index_name, 'utf-8') searchindex = SearchIndex.get_or_create( name=timeline_name, description=timeline_name, user=user, index_name=index_name) searchindex.grant_permission(permission='read', user=user) searchindex.grant_permission(permission='write', user=user) searchindex.grant_permission(permission='delete', user=user) searchindex.set_status('processing') db_session.add(searchindex) db_session.commit() if sketch and sketch.has_permission(user, 'write'): timeline = Timeline( name=searchindex.name, description=searchindex.description, sketch=sketch, user=user, searchindex=searchindex) timeline.set_status('processing') sketch.timelines.append(timeline) db_session.add(timeline) db_session.commit() # Start Celery pipeline for indexing and analysis. # Import here to avoid circular imports. from timesketch.lib import tasks pipeline = tasks.build_index_pipeline( file_path, timeline_name, index_name, extension, sketch.id) pipeline.apply_async(task_id=index_name) print('Imported {0:s} to sketch: {1:d} ({2:s})'.format( file_path, sketch.id, sketch.name))