def generate_file_name_from_export(export, session): """ Returns a string with the final filename for an export. :param export: :return: """ # TODO (low priority) switch to starting to array with "".join() it # it's a bit faster and more importantly easier to read / check. filename = '_diffgram_annotations__source_' + str(export.source) + '_' if export.source == "task": filename += str(export.task.id) if export.source == "job": job = Job.get_by_id(session=session, job_id=export.job_id) if job: filename += str(job.name) if export.source == "directory": filename += str(export.working_dir.nickname) # Always add timestamps to avoid duplicate names. filename += "_datetime_" + datetime.datetime.utcnow().isoformat() return filename
def test_task_template_launch_core(self): # Create mock tasks label = data_mocking.create_label({ 'name': 'mylabel', }, self.session) label_file = data_mocking.create_label_file( { 'label': label, 'project_id': self.project.id }, self.session) file = data_mocking.create_file({'project_id': self.project.id}, self.session) job = data_mocking.create_job( { 'name': 'my-test-job-{}'.format(1), 'project': self.project, 'type': "Normal", }, self.session) result = task_template_launch_handler.task_template_launch_core( self.session, job) self.assertEqual(result, job) result = task_template_launch_handler.task_template_launch_core( self.session, None) self.session.commit() self.assertEqual(result, False) job = Job.get_by_id(self.session, job_id=job.id) self.assertEqual(job.status, 'active')
def task_trainer_request_api(job_id): """ """ log = regular_input.regular_log.default_api_log() with sessionMaker.session_scope() as session: # Job id will have already been checked in permissions job = Job.get_by_id(session, job_id) ### MAIN user = User.get(session) member = user.member result, task = task_trainer_request( session = session, user = user, job = job) #### if result is True: task_serialized = task.serialize_trainer_annotate(session) log['success'] = True return jsonify( log = log, task = task_serialized), 200 # TODO front end handling on this log['error']['task_request'] = "No tasks available." return jsonify( log = log), 200
def labelbox_web_hook_manager(): """ Webhook for receiving data on Diffgram once finished on labelbox. # NOTE: Labelbox does not supportText or dropdown classifications in export for videos. :return: """ # First check if secret is correct payload = request.data secret = settings.LABEL_BOX_SECRET log = regular_log.default() computed_signature = hmac.new(bytearray(secret.encode('utf-8')), msg=payload, digestmod=hashlib.sha1).hexdigest() if request.headers['X-Hub-Signature'] != 'sha1=' + computed_signature: error = 'Error: computed_signature does not match signature provided in the headers' logger.error( 'Error: computed_signature does not match signature provided in the headers' ) return error with sessionMaker.session_scope() as session: labelbox_event = request.headers['X-Labelbox-Event'] payload = request.json logger.debug('Payload for labelbox webhooks: {}'.format(payload)) labelbox_project_id = payload['project']['id'] project_external_mapping = ExternalMap.get( session=session, external_id=labelbox_project_id, type='labelbox', diffgram_class_string='task_template') if project_external_mapping: task_template = Job.get_by_id(session, project_external_mapping.job_id) if task_template: connection = task_template.interface_connection logger.debug('Connection for labelbox: {}'.format(connection)) connector_manager = ConnectorManager(connection=connection, session=session) connector = connector_manager.get_connector_instance() connector.connect() sync_manager = LabelBoxSyncManager( session=session, task_template=task_template, labelbox_project=None, log=log, labelbox_connector=connector) sync_manager.handle_task_creation_hook(payload) return jsonify({'message': 'OK.'}) else: log['error']['task_template'] = 'Task template not found.' return jsonify(log) else: log['error'][ 'labelbox_project'] = 'Labelbox external mapping not found.' return jsonify(log)
def test_task_template_new_exam(self): job = data_mocking.create_job({ 'name': 'my-test-job-{}'.format(1), 'project': self.project, 'type': "Normal", }, self.session) # TODO: analyze if provision_root_tasks_is needed now. result = task_template_launch_handler.task_template_new_exam(self.session, job) self.session.commit() job = Job.get_by_id(self.session, job_id=job.id) self.assertEqual(job.status, 'active')
def get_project_string_from_job_id(session, job_id): job = Job.get_by_id(session, job_id) if job is None: raise Forbidden if job.project is None: raise Forbidden project_string_id = job.project.project_string_id return project_string_id
def guide_attach_to_job_api(): """ API to attach guide to a job Basic value quality checking Then calls guide_to_job_core() Concept of purposely only wanting one guide attached to each thing """ spec_list = [{ "guide_id": int }, { "job_id": int }, { "kind": str }, { "update_or_remove": str }] log, input, untrusted_input = regular_input.master(request=request, spec_list=spec_list) if len(log["error"].keys()) >= 1: return jsonify(log=log), 400 with sessionMaker.session_scope() as session: job = Job.get_by_id(session, input['job_id']) guide = Guide.get_by_id(session, input['guide_id']) ### MAIN result, log = guide_to_job_core( session=session, log=log, guide=guide, job=job, job_id=input['job_id'], kind=input['kind'], update_or_remove=input['update_or_remove']) if result is False: return jsonify(log=log), 400 #### log['success'] = True return jsonify(log=log), 200
def job_trainer_info_start_api(job_id): """ Starting trainer job """ log = regular_input.regular_log.default_api_log() with sessionMaker.session_scope() as session: job = Job.get_by_id(session, job_id) user = User.get(session) # This is only showing a user's own stuff right... job_serialized = job.serialize_trainer_info_default(session=session, user_id=user.id) log['success'] = True return jsonify(log=log, job=job_serialized), 200
def job_trainer_info_api(job_id): """ Basic information for job Labels and guides come from task But things like name, what type of job it is, etc... Other stuff can come from here """ log = regular_input.regular_log.default_api_log() with sessionMaker.session_scope() as session: job = Job.get_by_id(session, job_id) user = User.get(session) job_serialized = job.serialize_trainer_info_default(session=session, user_id=user.id) log['success'] = True return jsonify(log=log, job=job_serialized), 200
def job_info_builder_api(job_id): """ """ spec_list = [ { 'mode_data': None }, # Default of None is ok, if data, then type str { 'refresh_stats': { 'default': True, 'kind': bool, 'required': False } } ] log, input, untrusted_input = regular_input.master(request=request, spec_list=spec_list) if len(log["error"].keys()) >= 1: return jsonify(log=log), 400 with sessionMaker.session_scope() as session: job = Job.get_by_id(session, job_id) user = User.get(session=session) job_serialized = job_info_builder_core(session=session, job=job, user=user, input=input) log['success'] = True return jsonify(log=log, job=job_serialized), 200
def __generate_payload_for_task_template(self, session, start_time=None): task_template = Job.get_by_id(session=session, job_id=self.notification_relation.job_id) payload = task_template.serialize_builder_info_default(session=session) return payload
def job_pin_core(session, job_id): job = Job.get_by_id(session, job_id) job.is_pinned = not job.is_pinned return job.serialize_minimal_info()
def update_dirs_to_job_api(project_string_id): """ Method adds files to a job. Used in the job creation section when attaching files. Still need a method for individual file selection so start with this Adds selected files to a job Assumes the job already has a directory created For each file creates a pointer Assumption here is we are attaching files directly... ie we don't have to check latest version or how that's happening... Security for checking files are in directory... """ # get / declare file list... # Maybe instead of passing literal file list, we pass the search criteria? # But limit of this is a user may select specific files... # TODO a lot of this feels like generic stuff for # Adding or removing to a directory spec_list = [ { "directory_list": { 'kind': list, 'allow_empty': True } }, { "job_id": { 'kind': int } }, ] log, input, untrusted_input = regular_input.master(request=request, spec_list=spec_list) if len(log["error"].keys()) >= 1: return jsonify(log=log), 400 with sessionMaker.session_scope() as session: project = Project.get(session, project_string_id) job = Job.get_by_id(session, input['job_id']) Job_permissions.check_job_after_project_already_valid(job=job, project=project) directory_list = input['directory_list'] # Do nothing for emptry dir list. if len(directory_list) == 0: return jsonify(log=log, job=job.serialize_builder_info_edit(session)), 200 directory = job.directory ## TODO review how we are getting user's directory user = User.get(session) session.add(job) dir_ids = [x['directory_id'] for x in directory_list] # Check that all directories exist and belong to current project, selected_dirs = session.query(WorkingDir).filter( WorkingDir.id.in_(dir_ids)) for dir in selected_dirs: if dir.project_id != project.id: log['error'][ 'directory_list'] = 'Provide only directories belonging to the project.' if len(log["error"].keys()) >= 1: return jsonify(log=log), 400 # Update directories job.update_attached_directories(session, directory_list, delete_existing=True) job.set_cache_key_dirty(cache_key="attached_directories_dict") user_email = None member_id = None if user: user_email = user.email member_id = user.member_id Event.new(kind="job_attached_directories_update", session=session, member_id=member_id, success=True, email=user_email) log['success'] = True return jsonify(log=log, job=job.serialize_builder_info_edit(session)), 200
def add_files_to_job_api(project_string_id): """ Method adds files to a job. Used in the job creation section when attaching files. Still need a method for individual file selection so start with this Adds selected files to a job Assumes the job already has a directory created For each file creates a pointer Assumption here is we are attaching files directly... ie we don't have to check latest version or how that's happening... Security for checking files are in directory... """ # get / declare file list... # Maybe instead of passing literal file list, we pass the search criteria? # But limit of this is a user may select specific files... # TODO a lot of this feels like generic stuff for # Adding or removing to a directory spec_list = [ {"file_list_selected": {'kind': list} }, {"job_id": {'kind': int} }, {"add_or_remove": {'kind': str} }, {"directory_id": {'default': None} }, {"select_from_metadata": # WIP NOT yet implemented {'default': False, 'kind': bool} }, {"metadata_proposed": # WIP NOT yet implemented {'default': None, 'kind': dict} } ] log, input, untrusted_input = regular_input.master(request=request, spec_list=spec_list) if len(log["error"].keys()) >= 1: return jsonify(log=log), 400 with sessionMaker.session_scope() as session: project = Project.get(session, project_string_id) job = Job.get_by_id(session, input['job_id']) Job_permissions.check_job_after_project_already_valid( job=job, project=project) add_or_remove = input['add_or_remove'] file_list_selected = input['file_list_selected'] directory = job.directory ## TODO review how we are getting user's directory user = User.get(session) directory_id = untrusted_input.get('directory_id', None) incoming_directory = WorkingDir.get_with_fallback( session=session, directory_id=directory_id, project=project) if incoming_directory is False: log['error']['directory'] = "No directory found" return jsonify(log=log), 400 session.add(job) result, count_changed = file_list_to_directory( session=session, add_or_remove=add_or_remove, log=log, directory=directory, file_list=file_list_selected, incoming_directory=incoming_directory, job=job ) # Context of not wanting to show # Success when there are "sub errors" # Not clear on optimal way to handle this # Maybe ideally show success / errors on a per file basis if len(log["error"].keys()) >= 1: return jsonify(log=log), 400 # updates file_count_statistic job.update_file_count_statistic(session=session) user_email = None member_id = None if user: user_email = user.email member_id = user.member_id Event.new( kind="job_file_update", session=session, member_id=member_id, success=True, email=user_email ) log['success'] = True return jsonify( log=log, job=job.serialize_builder_info_edit(session)), 200
def web_export_to_file(project_string_id): """ Generates annotations Assumes latest version if none provided... # TODO working on how we want to handle this especially in relationship to different branches and versions and working dirs... Shouldn't actually do latest should do the first version Long running operation (starts new thread) Input example (JSON) { directory_id: 1059 file_comparison_mode: "latest" kind: "Annotations" masks: false source: "directory" version_id: 0 } wait_for_export_generation == True is in conjunction with return_type For job permissions: We assume that we already are operating in context of project permissions, so as long as the job is in the project then it's fine. including things like API enabled builder """ spec_list = [ { "kind": str }, # ["Annotations", "TF Records"] { "source": str }, # ["job", "directory", "task", "version"] { "file_comparison_mode": str }, { "masks": bool }, # TODO could we merge all of these ids into # and "id" field, and then rely on the source thing? feels # strange to have it seperate like that... # would make it a lot cleaner... # ie "id" and may be of ["job", "directory", "version"] { "version_id": None }, # int, but not required? { "directory_id": None }, # int, but not required? { "job_id": None }, { "task_id": None }, { "return_type": None }, # ["url", "data"] { "wait_for_export_generation": { 'default': False, 'kind': bool } }, { "ann_is_complete": { # April 22, 2020, assumes "file" (not task) 'default': None, # None means all 'kind': bool } } ] log, input, untrusted_input = regular_input.master(request=request, spec_list=spec_list) if len(log["error"].keys()) >= 1: return jsonify(log=log), 400 @copy_current_request_context def export_on_thread(project_string_id, export_id): with sessionMaker.session_scope_threaded() as session: export_web_core(session=session, project_string_id=project_string_id, export_id=export_id) t.cancel() """ Permission model for task if task exists, we set job from task then use same permissinon system as job. Did some manual tests account and worked as expected (denied for ids not in project) still feels a little bit brittle / a lot of little assumptions about stuff matching (ie source being checked / matching for 'task') string. but otherwise seems to work ok. """ # TODO: if reusing the code somewhere else. Make sure to take it out into a separate function to # follow DRY. with sessionMaker.session_scope() as session: project = Project.get(session, project_string_id) if input["source"] == "task": task = Task.get_by_id(session=session, task_id=input["task_id"]) if task is None: log["error"]["task_id"] = "Invalid task id" return jsonify(log=log), 400 job = task.job elif input["source"] == "job": job = Job.get_by_id(session, input["job_id"]) # need directory for label stuff right directory = None if input["source"] in ["task", "job"]: Job_permissions.check_job_after_project_already_valid( job=job, project=project) # TODO verify this is working as expected. directory = job.completion_directory # print("directory", directory) if not directory: directory = WorkingDir.get_with_fallback( session=session, project=project, directory_id=input["directory_id"]) if directory is None: log["error"]["directory"] = "Invalid directory" return jsonify(log=log), 400 if len(log["error"].keys()) >= 1: return jsonify(log=log), 400 # TODO - the directory we pull from may need to make sense in terms of job or not... # Class Export() item to track it export = Export(project=project, file_comparison_mode=input['file_comparison_mode'], kind=input["kind"], source=input["source"], masks=input["masks"], job_id=input["job_id"], task_id=input["task_id"], ann_is_complete=input['ann_is_complete'], working_dir_id=directory.id) if export.kind not in ["Annotations", "TF Records"]: log["error"]["kind"] = "Invalid kind" return jsonify(log=log), 400 session.add(export) session.flush() # Long running operation if input['wait_for_export_generation'] is False: t = threading.Timer(0, export_on_thread, args=( project_string_id, export.id, )) t.daemon = True t.start() return jsonify(success=True, export=export.serialize()) # Immediate return, ie for mock test data else: export_web_core(session=session, project_string_id=project_string_id, export_id=export.id) result = export_view_core(export=export, format="JSON", return_type=input['return_type']) # If it's a TF records or other cases it will be ignored? return jsonify(result), 200
def guide_view_core( session, metadata_proposed, project, mode="serialize", user=None): """ mode serialize is in context of web, ie serialize the resulting list currently defaults to this context objects returns the database objects, ie for auto commit """ meta = default_metadata(metadata_proposed) start_time = time.time() output_file_list = [] limit_counter = 0 query = session.query(Guide) meta['guide_info'] = {} ### START FILTERS ### if meta["my_stuff_only"]: # assumes in context of user doing search not API user = User.get(session) query = query.filter(Guide.member_created == user.member) #if meta["field"]: # Get field id? or ... # WIP #query = query.filter(Job.field == None) query = query.filter(Guide.project == project) if meta['job_id'] and meta['mode'] == 'attach': job = Job.get_by_id(session = session, job_id = meta['job_id']) ignore_id_list = [] # TODO eventually use templates if job.guide_default_id: ignore_id_list.append(job.guide_default_id) serialized = job.guide_default.serialize_for_list_view() serialized["kind"] = "default" meta['guide_info']['guide_default_id'] = job.guide_default_id output_file_list.append(serialized) if job.guide_review_id: ignore_id_list.append(job.guide_review_id) serialized = job.guide_review.serialize_for_list_view() serialized["kind"] = "review" meta['guide_info']['guide_review_id'] = job.guide_review_id output_file_list.append(serialized) if len(ignore_id_list) != 0: query = query.filter(Guide.id.notin_(ignore_id_list)) # Not archived query = query.filter(Guide.archived == False) #### END FILTERS ### query = query.limit(meta["limit"]) query = query.offset(meta["start_index"]) guide_list = query.all() if mode == "serialize": for guide in guide_list: serialized = guide.serialize_for_list_view() output_file_list.append(serialized) limit_counter += 1 meta['end_index'] = meta['start_index'] + len(guide_list) meta['length_current_page'] = len(output_file_list) if limit_counter == 0: meta['no_results_match_meta'] = True end_time = time.time() print("guide meta time", end_time - start_time) return output_file_list, meta
def job_cancel_core(session, user, log, mode, job_id): """ QUESTIONs option to "hide" job as well? What about super admin option to actually delete (ie for database clean up...) Arguments session, db ojbect user, class User object job, class Job object log, diffgram regular log dict Returns """ job = Job.get_by_id(session=session, job_id=job_id) if user is None or job is None: log['error']['user_job'] = "No user or job" return False, log # JOB LIMITs result, log = job_cancel_limits(session, log, user, job, mode) if result is False: return result, log # TASK spcific limits # Difference that a job may have tasks that # Aren't cancelable status_list = None if mode in ["cancel"]: status_list = ["created", "available", "active"] if mode in ["delete"]: # Don't allow even a super admin to delete completed # from this method? # QUESTION # For that matter should a "completed" job even be allowed to be deleted? status_list = ["draft", "created", "available", "active"] # TODO disallow deleting jobs that have # any completed tasks / transactions if status_list: # Just a question, is there really any point of doing this # If the the job was cancelled? # like maybe for deleting but status I don't know task_list = job.task_list(session=session, status_list=status_list) for task in task_list: if mode == "cancel": session.add(task) task.status = "cancelled" if mode == "delete": session.delete(task) if mode == "archive": # We may want to rename "hidden" to archived? session.add(job) job.status = 'archived' job.hidden = True job.member_updated = user.member # Assume we want to remove sync dirs on archive, we might remove if that is not the case. job_dir_sync_manager = job_dir_sync_utils.JobDirectorySyncManager( job=job, session=session, log=log) job_dir_sync_manager.remove_job_from_all_dirs() if mode == "cancel": session.add(job) job.status = "cancelled" job.member_updated = user.member if mode == "delete": """ Question, is there a better way to do this with CASCADE / sql rules? It feels a bit funny to do it this way BUT also want to be careful since so much reuse!!! ie wouldn't want to delete a guide that was attached to a job on cascade """ # What about a job's directory, # TODO what about deleting associated credential links / other tables? user_to_job = User_To_Job.get_single_by_ids(session=session, user_id=user.id, job_id=job.id) task_list = job.task_list(session) for task in task_list: if task.file.type == "video": # Is this the right way to delete stuff here? video_frame_query = WorkingDirFileLink.image_file_list_from_video( session=session, video_parent_file_id=task.file.id, return_mode="query") # Not working yet! video_frame_query.delete() session.delete(task) session.delete(task.file) # TODO still getting an integrity error # Must be some file that exists related to this job? # Or some other file that got updated incorrectly? job_dir_sync_manager = job_dir_sync_utils.JobDirectorySyncManager( job=job, session=session, log=log) job_dir_sync_manager.remove_job_from_all_dirs(soft_delete=False) session.delete(job) session.delete(user_to_job) return True, log
def file_view_core(self, mode="serialize"): """ mode serialize is in context of web, ie serialize the resulting list currently defaults to this context objects returns the database objects, ie for auto commit """ output_file_list = [] limit_counter = 0 file_count = 0 # File count includes ones we don't actually query # outside of limits... if self.metadata['file_view_mode'] is None or \ self.metadata['file_view_mode'] not in ["changes", "annotation", "home", "task"]: return "Invalid file_view_mode", False ignore_id_list = None # For creating / viewing Jobs if self.metadata['file_view_mode'] == "task" and self.metadata[ 'job_id']: # TODO permissions check on job id? # TODO handling for larger file sizes # TODO better null handling here... a lot of assumptions # Would prefer to declare "new" job condition # Instead of inferring from file_view_mode and job_id being present job = Job.get_by_id(session=self.session, job_id=self.metadata['job_id']) file_list_attached_to_job = WorkingDirFileLink.file_list( session=self.session, working_dir_id=job.directory_id, limit=None) # TODO future maybe just get ids only from sql ignore_id_list = [i.id for i in file_list_attached_to_job] for index_file_attach, file in enumerate( file_list_attached_to_job): file_serialized = file.serialize_with_type( session=self.session) output_file_list.append(file_serialized) output_file_list[index_file_attach]['attached_to_job'] = True ann_is_complete = None if self.metadata['annotation_status'] == "Completed": ann_is_complete = True if self.metadata['annotation_status'] == "Not completed": ann_is_complete = False has_some_machine_made_instances = None if self.metadata['machine_made_setting'] == "Predictions only": has_some_machine_made_instances = True if self.metadata['machine_made_setting'] == "Human only": has_some_machine_made_instances = False media_type = self.metadata.get("media_type", None) if media_type in ["All", None]: media_type_query = ["image", "video", "text"] if media_type == "Image": media_type_query = "image" if media_type == "Video": media_type_query = "video" exclude_removed = True if self.metadata['file_view_mode'] in ["changes"]: exclude_removed = False job_id = None if self.metadata['file_view_mode'] in ["annotation", "home"]: # TODO clarify this is in context of viewing Files for a project if self.metadata['job_id']: job_id = self.metadata['job_id'] order_by_direction = desc requested_direction = self.metadata['pagination'].get('descending') if requested_direction == False: # defaults to true... order_by_direction = asc # Default order_by_class_and_attribute = File.time_last_updated requested_order_by = self.metadata['pagination'].get('sortBy') if requested_order_by: if requested_order_by == "filename": order_by_class_and_attribute = File.original_filename if requested_order_by == "created_time": order_by_class_and_attribute = File.created_time if requested_order_by == "time_last_updated": order_by_class_and_attribute = File.time_last_updated query, count = WorkingDirFileLink.file_list( session=self.session, working_dir_id=self.directory.id, ann_is_complete=ann_is_complete, type=media_type_query, return_mode="query", limit=self.metadata["limit"], date_from=self.metadata["date_from"], date_to=self.metadata["date_to"], issues_filter=self.metadata["issues_filter"], offset=self.metadata["start_index"], original_filename=self.metadata['search_term'], order_by_class_and_attribute=File.id, order_by_direction=order_by_direction, exclude_removed=exclude_removed, file_view_mode=self.metadata['file_view_mode'], job_id=job_id, has_some_machine_made_instances=has_some_machine_made_instances, ignore_id_list=ignore_id_list, count_before_limit=True) file_count += count working_dir_file_list = query.all() if mode == "serialize": for index_file, file in enumerate(working_dir_file_list): file_serialized = file.serialize_with_type(self.session) output_file_list.append(file_serialized) limit_counter += 1 if mode == "objects": output_file_list.extend(working_dir_file_list) limit_counter += len(working_dir_file_list) self.metadata['end_index'] = self.metadata['start_index'] + len( working_dir_file_list) #search_info['no_results_match_search'] = True self.metadata['length_current_page'] = len(output_file_list) self.metadata['file_count'] = file_count # TODO not clear why we need this / why this is seperate # from file_count if limit_counter == 0: self.metadata['no_results_match_search'] = True return output_file_list