def _upload_and_index( self, file_extension, timeline_name, index_name, sketch, form, enable_stream, original_filename='', data_label='', file_path='', events='', meta=None): """Creates a full pipeline for an uploaded file and returns the results. Args: file_extension: the extension of the uploaded file. timeline_name: name the timeline will be stored under in the datastore. index_name: the Elastic index name for the timeline. sketch: Instance of timesketch.models.sketch.Sketch form: a dict with the configuration for the upload. enable_stream: boolean indicating whether this is file is part of a stream or not. original_filename: Original filename from the upload. data_label: Optional string with a data label for the search index. file_path: the path to the file to be uploaded (optional). events: a string with events to upload (optional). meta: optional dict with additional meta fields that will be included in the return. Returns: A timeline if created otherwise a search index in JSON (instance of flask.wrappers.Response) """ searchindex = self._get_index( name=timeline_name, description=timeline_name, sketch=sketch, index_name=index_name, data_label=data_label, extension=file_extension) if not searchindex: abort( HTTP_STATUS_CODE_BAD_REQUEST, 'We were unable to acquire a searchindex and therefore not ' 'able to upload data, please try again. If this error persist ' 'please create an issue on Github: https://github.com/' 'google/timesketch/issues/new/choose') timelines = Timeline.query.filter_by( name=timeline_name, sketch=sketch).all() timeline = None for timeline_ in timelines: if timeline_.searchindex.index_name == searchindex.index_name: timeline = timeline_ break logger.error( 'There is a timeline in the sketch that has the same name ' 'but is stored in a different index: name {0:s} attempting ' 'index: {1:s} but found index {2:s} - retrying with a ' 'different timeline name.'.format( timeline_name, searchindex.index_name, timeline_.searchindex.index_name)) timeline_name = '{0:s}_{1:s}'.format( timeline_name, uuid.uuid4().hex[-5:]) return self._upload_and_index( file_extension=file_extension, timeline_name=timeline_name, index_name=searchindex.index_name, sketch=sketch, form=form, enable_stream=enable_stream, original_filename=original_filename, data_label=data_label, file_path=file_path, events=events, meta=meta) searchindex.set_status('processing') if not timeline: timeline = Timeline.get_or_create( name=timeline_name, description=timeline_name, sketch=sketch, user=current_user, searchindex=searchindex) if not timeline: abort( HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to get or create a new Timeline object.') # If the timeline already existed and has associated data sources # then we don't want to set the status to processing. if not timeline.datasources: timeline.set_status('processing') sketch.timelines.append(timeline) labels_to_prevent_deletion = current_app.config.get( 'LABELS_TO_PREVENT_DELETION', []) for sketch_label in sketch.get_labels: if sketch_label not in labels_to_prevent_deletion: continue timeline.add_label(sketch_label) searchindex.add_label(sketch_label) file_size = form.get('total_file_size', 0) datasource = DataSource( timeline=timeline, user=current_user, provider=form.get('provider', 'N/A'), context=form.get('context', 'N/A'), file_on_disk=file_path, file_size=int(file_size), original_filename=original_filename, data_label=data_label ) timeline.datasources.append(datasource) db_session.add(datasource) db_session.add(timeline) db_session.commit() sketch_id = sketch.id # Start Celery pipeline for indexing and analysis. # Import here to avoid circular imports. # pylint: disable=import-outside-toplevel from timesketch.lib import tasks pipeline = tasks.build_index_pipeline( file_path=file_path, events=events, timeline_name=timeline_name, index_name=searchindex.index_name, file_extension=file_extension, sketch_id=sketch_id, only_index=enable_stream, timeline_id=timeline.id) task_id = uuid.uuid4().hex pipeline.apply_async(task_id=task_id) if meta is None: meta = {} meta['task_id'] = task_id return self.to_json( timeline, status_code=HTTP_STATUS_CODE_CREATED, meta=meta)
def post(self, sketch_id): """Handles POST request to the resource. Handler for /api/v1/sketches/:sketch_id/event/create/ Args: sketch_id: Integer primary key for a sketch database model Returns: An annotation in JSON (instance of flask.wrappers.Response) """ sketch = Sketch.query.get_with_acl(sketch_id) if not sketch: abort(HTTP_STATUS_CODE_NOT_FOUND, 'No sketch found with this ID.') if not sketch.has_permission(current_user, 'write'): abort(HTTP_STATUS_CODE_FORBIDDEN, 'User does not have write access controls on sketch.') form = request.json if not form: form = request.data timeline_name = 'sketch specific timeline' index_name_seed = 'timesketch_{0:d}'.format(sketch_id) event_type = 'user_created_event' date_string = form.get('date_string') if not date_string: date = datetime.datetime.utcnow().isoformat() else: # derive datetime from timestamp: try: date = dateutil.parser.parse(date_string) except (dateutil.parser.ParserError, OverflowError) as e: logger.error('Unable to convert date string', exc_info=True) abort( HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to add event, not able to convert the date ' 'string. Was it properly formatted? Error: ' '{0!s}'.format(e)) timestamp = int(time.mktime(date.utctimetuple())) * 1000000 timestamp += date.microsecond event = { 'datetime': date_string, 'timestamp': timestamp, 'timestamp_desc': form.get('timestamp_desc', 'Event Happened'), 'message': form.get('message', 'No message string'), } attributes = form.get('attributes', {}) if not isinstance(attributes, dict): abort( HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to add an event where the attributes are not a ' 'dict object.') event.update(attributes) tag = form.get('tag', []) if not isinstance(tag, list): abort( HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to add an event where the tags are not a ' 'list of strings.') if tag and any([not isinstance(x, str) for x in tag]): abort( HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to add an event where the tags are not a ' 'list of strings.') event['tag'] = tag # We do not need a human readable filename or # datastore index name, so we use UUIDs here. index_name = hashlib.md5(index_name_seed.encode()).hexdigest() if six.PY2: index_name = codecs.decode(index_name, 'utf-8') # Try to create index try: # Create the index in Elasticsearch (unless it already exists) self.datastore.create_index(index_name=index_name, doc_type=event_type) # Create the search index in the Timesketch database searchindex = SearchIndex.get_or_create( name=timeline_name, description='internal timeline for user-created events', user=current_user, index_name=index_name) searchindex.grant_permission(permission='read', user=current_user) searchindex.grant_permission(permission='write', user=current_user) searchindex.grant_permission(permission='delete', user=current_user) searchindex.set_status('ready') db_session.add(searchindex) db_session.commit() timeline = None if sketch and sketch.has_permission(current_user, 'write'): self.datastore.import_event(index_name, event_type, event, flush_interval=1) timeline = Timeline.get_or_create( name=searchindex.name, description=searchindex.description, sketch=sketch, user=current_user, searchindex=searchindex) if timeline not in sketch.timelines: sketch.timelines.append(timeline) timeline.set_status('ready') db_session.add(timeline) db_session.commit() # Return Timeline if it was created. # pylint: disable=no-else-return if timeline: return self.to_json(timeline, status_code=HTTP_STATUS_CODE_CREATED) else: return self.to_json(searchindex, status_code=HTTP_STATUS_CODE_CREATED) # TODO: Can this be narrowed down, both in terms of the scope it # applies to, as well as not to catch a generic exception. except Exception as e: # pylint: disable=broad-except abort(HTTP_STATUS_CODE_BAD_REQUEST, 'Failed to add event ({0!s})'.format(e))
def _upload_and_index(self, file_extension, timeline_name, index_name, sketch, enable_stream, data_label='', file_path='', events='', meta=None): """Creates a full pipeline for an uploaded file and returns the results. Args: file_extension: the extension of the uploaded file. timeline_name: name the timeline will be stored under in the datastore. index_name: the Elastic index name for the timeline. sketch: Instance of timesketch.models.sketch.Sketch enable_stream: boolean indicating whether this is file is part of a stream or not. data_label: Optional string with a data label for the search index. file_path: the path to the file to be uploaded (optional). events: a string with events to upload (optional). meta: optional dict with additional meta fields that will be included in the return. Returns: A timeline if created otherwise a search index in JSON (instance of flask.wrappers.Response) """ searchindex = self._get_index(name=timeline_name, description=timeline_name, sketch=sketch, index_name=index_name, data_label=data_label, extension=file_extension) searchindex.set_status('processing') timelines = Timeline.query.filter_by(name=timeline_name, sketch=sketch).all() timeline = None for timeline_ in timelines: if timeline_.searchindex.index_name == searchindex.index_name: timeline = timeline_ break abort( HTTP_STATUS_CODE_BAD_REQUEST, 'There is a timeline in the sketch that has the same name ' 'but is stored in a different index, check the data_label ' 'on the uploaded data') if not timeline: timeline = Timeline.get_or_create(name=timeline_name, description=timeline_name, sketch=sketch, user=current_user, searchindex=searchindex) if not timeline: abort(HTTP_STATUS_CODE_BAD_REQUEST, 'Unable to get or create a new Timeline object.') timeline.set_status('processing') sketch.timelines.append(timeline) labels_to_prevent_deletion = current_app.config.get( 'LABELS_TO_PREVENT_DELETION', []) for sketch_label in sketch.get_labels: if sketch_label not in labels_to_prevent_deletion: continue timeline.add_label(sketch_label) searchindex.add_label(sketch_label) db_session.add(timeline) db_session.commit() sketch_id = sketch.id # Start Celery pipeline for indexing and analysis. # Import here to avoid circular imports. # pylint: disable=import-outside-toplevel from timesketch.lib import tasks pipeline = tasks.build_index_pipeline( file_path=file_path, events=events, timeline_name=timeline_name, index_name=searchindex.index_name, file_extension=file_extension, sketch_id=sketch_id, only_index=enable_stream, timeline_id=timeline.id) pipeline.apply_async() return self.to_json(timeline, status_code=HTTP_STATUS_CODE_CREATED, meta=meta)