def __fetch_object(self, opts): """ Upload a file to Diffgram from an Azure Blob :param opts: Dictionary with parameters for object fetching. :return: file obj if file was uploaded, else False """ spec_list = [{'bucket_name': str, 'path': str}] log = regular_log.default() log, input = regular_input.input_check_many(untrusted_input = opts, spec_list = spec_list, log = log) if len(log["error"].keys()) >= 1: return {'log': log} shared_access_signature = BlobSharedAccessSignature( account_name = self.connection_client.account_name, account_key = self.connection_client.credential.account_key ) expiration_offset = 40368000 blob_name = opts['path'] container = opts['bucket_name'] added_seconds = datetime.timedelta(0, expiration_offset) expiry_time = datetime.datetime.utcnow() + added_seconds filename = blob_name.split("/")[-1] sas = shared_access_signature.generate_blob( container_name = container, blob_name = blob_name, start = datetime.datetime.utcnow(), expiry = expiry_time, permission = BlobSasPermissions(read = True), content_disposition = 'attachment; filename=' + filename, ) sas_url = 'https://{}.blob.core.windows.net/{}/{}?{}'.format( self.connection_client.account_name, container, blob_name, sas ) with sessionMaker.session_scope() as session: project = Project.get_by_string_id(session, self.config_data.get('project_string_id')) member = session.query(Member).filter(Member.user_id == opts['event_data']['request_user']).first() # Deduct Media Type: extension = Path(opts['path']).suffix extension = extension.lower() media_type = None if extension in images_allowed_file_names: media_type = 'image' elif extension in videos_allowed_file_names: media_type = 'video' else: # TODO: Decide, do we want to raise an exception? or just do nothing? log = regular_log.default() log['error']['invalid_type'] = 'File must type of: {} {}'.format(str(images_allowed_file_names), str(videos_allowed_file_names)) log['error']['file_name'] = opts['path'] log['opts'] = opts Event.new( session = session, member_id = opts['event_data']['request_user'], kind = 'microsoft_azure_new_import_warning', description = 'Skipped import for {}, invalid file type.'.format(opts['path']), error_log = log, project_id = project.id, member = member, success = False ) return None # metadata = self.connection_client.head_object(Bucket=self.config_data['bucket_name'], Key=path) created_input = packet.enqueue_packet(self.config_data['project_string_id'], session = session, media_url = sas_url, media_type = media_type, job_id = opts.get('job_id'), batch_id = opts.get('batch_id'), file_name = opts.get('path'), video_split_duration = opts.get('video_split_duration'), directory_id = opts.get('directory_id'), extract_labels_from_batch = True) log = regular_log.default() log['opts'] = opts Event.new( session = session, member_id = opts['event_data']['request_user'], kind = 'microsoft_azure_new_import_success', description = 'New cloud import for {}'.format(opts['path']), error_log = opts, project_id = project.id, member = member, success = True ) return created_input
def __send_export(self, opts): spec_list = [{'project_string_id': dict}] log = regular_log.default() log, input = regular_input.input_check_many(untrusted_input = self.config_data, spec_list = spec_list, log = log) if len(log["error"].keys()) >= 1: return {'log': log} spec_list = [ {'path': str}, {"format": { 'default': 'JSON', 'kind': str, 'valid_values_list': ['JSON', 'YAML'] }}, {'export_id': str}, {'bucket_name': str}, ] log = regular_log.default() log, input = regular_input.input_check_many(untrusted_input = opts, spec_list = spec_list, log = log, string_len_not_zero = False) if len(log["error"].keys()) >= 1: return {'log': log} if not opts['path'].endswith('/') and opts['path'] != '': log['error']['path'] = 'Path on bucket must be a folder, not a filename.' return log with sessionMaker.session_scope() as session: project = Project.get_by_string_id(session, self.config_data['project_string_id']) member = session.query(Member).filter(Member.user_id == opts['event_data']['request_user']).first() export = session.query(Export).filter(Export.id == opts['export_id']).first() # Check perms and export status. export_check_result = check_export_permissions_and_status(export, self.config_data['project_string_id'], session) if len(export_check_result['error'].keys()) > 1: return export_check_result result = export_view_core( export = export, format = opts['format'], return_type = 'bytes') filename = generate_file_name_from_export(export, session) if opts['path'] != '': key = '{}{}.{}'.format(opts['path'], filename, opts['format'].lower()) else: key = '{}.{}'.format(filename, opts['format'].lower()) file = io.BytesIO(result) blob_client = self.connection_client.get_blob_client(container = opts['bucket_name'], blob = key) content_type = mimetypes.guess_type(filename)[0] my_content_settings = ContentSettings(content_type = content_type) blob_client.upload_blob(file, content_settings = my_content_settings) log = regular_log.default() log['opts'] = opts Event.new( session = session, member_id = opts['event_data']['request_user'], kind = 'microsoft_azure_new_export_success', description = 'New cloud export for {}{}'.format(opts['path'], filename), error_log = opts, member = member, project_id = project.id, success = True ) return {'result': True}
def job_launch_list_core(session, metadata_proposed, output_mode="serialize"): """ Get the job_launch objects based on filters in metadata_proposed. """ meta = default_metadata(metadata_proposed) output_job_launch_list = [] limit_counter = 0 # CAUTION # Multiple "modes", for output and trainer builder, maybe more in future builder_or_trainer_mode = meta['builder_or_trainer']['mode'] # It doesn't really make sense to have this here # Should be part of some other meta data checking or something. if builder_or_trainer_mode not in ["builder", "trainer"]: raise Forbidden("Invalid builder_or_trainer_mode mode.") query = session.query(JobLaunch).join(Job) user = User.get(session) if user.last_builder_or_trainer_mode != builder_or_trainer_mode: raise Forbidden( "Invalid user relation to builder_or_trainer_mode mode.") ### START FILTERS ### if meta["status"]: if meta["status"] != "All": query = query.filter(JobLaunch.status == meta["status"]) if meta["date_from"]: date_from = datetime.datetime.strptime(meta["date_from"], "%Y-%m-%d") date_from = date_from.replace(hour=0, minute=0, second=0, microsecond=0) query = query.filter(JobLaunch.time_created >= date_from) if meta["date_to"]: date_to = datetime.datetime.strptime(meta["date_to"], "%Y-%m-%d") date_to = date_to.replace(hour=0, minute=0, second=0, microsecond=0) query = query.filter(JobLaunch.time_created <= date_to) if meta["job_ids"]: query = query.filter(Job.id.in_(meta["job_ids"])) # Also assumes org is None. # Actually this should be complimentary still if meta["project_string_id"]: project = Project.get_by_string_id( session=session, project_string_id=meta["project_string_id"]) query = query.filter(Job.project_id == project.id) #### END FILTERS ### query = query.order_by(Job.time_created.desc()) query = query.limit(meta["limit"]) query = query.offset(meta["start_index"]) job_launch_list = query.all() if output_mode == "serialize": for job_launch in job_launch_list: serialized = job_launch.serialize_for_list_view(session=session) output_job_launch_list.append(serialized) limit_counter += 1 meta['end_index'] = meta['start_index'] + len(output_job_launch_list) meta['length_current_page'] = len(output_job_launch_list) if limit_counter == 0: meta['no_results_match_meta'] = True return output_job_launch_list, meta
def __fetch_folder(self, opts): result = [] if self.config_data.get('project_string_id') is None: return {'result': 'error'} paths = opts['path'] if type(paths) != list: paths = [paths] with sessionMaker.session_scope() as session: project = Project.get_by_string_id( session, self.config_data.get('project_string_id')) member = session.query(Member).filter( Member.user_id == opts['event_data']['request_user']).first() for path in paths: blobs = self.connection_client.list_blobs(opts['bucket_name'], prefix=path) for blob in blobs: # Deduct Media Type: if blob.name.endswith('/'): continue blob_expiry = int(time.time() + (60 * 60 * 24 * 30)) signed_url = blob.generate_signed_url( expiration=blob_expiry) extension = Path(blob.path).suffix media_type = None if extension in images_allowed_file_names: media_type = 'image' elif extension in videos_allowed_file_names: media_type = 'video' else: logging.warn('File: {} must type of: {} {}'.format( blob.name, str(images_allowed_file_names), str(videos_allowed_file_names))) log = regular_log.default() log['error'][ 'invalid_type'] = 'File must type of: {} {}'.format( str(images_allowed_file_names), str(videos_allowed_file_names)) log['error']['file_name'] = path log['opts'] = opts Event.new( session=session, member_id=opts['event_data']['request_user'], kind='google_cloud_new_import_warning', description= 'Skipped import for {}, invalid file type.'.format( blob.name), error_log=log, project_id=project.id, member=member, success=False) continue result = [] # TODO: check Input() table for duplicate file? created_input = packet.enqueue_packet( self.config_data['project_string_id'], session=session, media_url=signed_url, media_type=media_type, job_id=opts.get('job_id'), batch_id=opts.get('batch_id'), file_name=path, video_split_duration=opts.get('video_split_duration'), directory_id=opts.get('directory_id'), extract_labels_from_batch=True) log = regular_log.default() log['opts'] = opts Event.new(session=session, member_id=opts['event_data']['request_user'], kind='google_cloud_new_import_success', description='New cloud import for {}'.format( blob.name), error_log=opts, project_id=project.id, member=member, success=True) result.append(created_input) return result
def __fetch_object(self, opts): """Upload a file to diffgram from an S3 bucket :param s3_file_key: path of file to fetch from :return: file obj if file was uploaded, else False """ spec_list = [{'bucket_name': str, 'path': str}] log = regular_log.default() log, input = regular_input.input_check_many(untrusted_input=opts, spec_list=spec_list, log=log) if len(log["error"].keys()) >= 1: return {'log': log} # This might be an issue. Currently not supporting urls with no expiration. Biggest time is 1 week. signed_url = self.connection_client.generate_presigned_url('get_object', Params={'Bucket': opts['bucket_name'], 'Key': opts['path']}, ExpiresIn=3600 * 24 * 6) # 5 Days. with sessionMaker.session_scope() as session: project = Project.get_by_string_id(session, self.config_data.get('project_string_id')) member = session.query(Member).filter(Member.user_id == opts['event_data']['request_user']).first() # Deduct Media Type: extension = Path(opts['path']).suffix extension = extension.lower() media_type = None if extension in images_allowed_file_names: media_type = 'image' elif extension in videos_allowed_file_names: media_type = 'video' else: # TODO: Decide, do we want to raise an exception? or just do nothing? log = regular_log.default() log['error']['invalid_type'] = 'File must type of: {} {}'.format(str(images_allowed_file_names), str(videos_allowed_file_names)) log['error']['file_name'] = opts['path'] log['opts'] = opts Event.new( session=session, member_id=opts['event_data']['request_user'], kind='aws_s3_new_import_warning', description='Skipped import for {}, invalid file type.'.format(opts['path']), error_log=log, project_id=project.id, member=member, success=False ) return None # print('AAAAA', opts, opts.get('job_id')) # metadata = self.connection_client.head_object(Bucket=self.config_data['bucket_name'], Key=path) created_input = packet.enqueue_packet(self.config_data['project_string_id'], session=session, media_url=signed_url, media_type=media_type, job_id=opts.get('job_id'), video_split_duration=opts.get('video_split_duration'), directory_id=opts.get('directory_id')) log = regular_log.default() log['opts'] = opts Event.new( session=session, member_id=opts['event_data']['request_user'], kind='aws_s3_new_import_success', description='New cloud import for {}'.format(opts['path']), error_log=opts, project_id=project.id, member=member, success=True ) return created_input