def assembly_data_to_rows(data): """Converts assembly data dictionary to text rows""" rows = [] data_key = "assembly_data" kbase_key = "kbase_assembly_input" lib_key = "file_sets" info_key = "file_infos" if data_key in data: data = data[data_key] else: if kbase_key in data: data = kb_to_asm(data[kbase_key]) for lib in data.get(lib_key, []): libtype = lib.get("type", "unknown") files = [] for info in lib.get(info_key, []): filename = info.get("filename", None) if not filename: filename = info.get("direct_url", "") filename = re.sub(r'.*/', '', filename) filesize = info.get("filesize", None) filesize = " (%s)" % sizeof_fmt(filesize) if filesize else "" files.append("%s%s" % (filename, filesize)) rows.append([libtype, " ".join(files)]) return rows
def _get_data(self, body): params = json.loads(body) filepath = os.path.join(self.datapath, params['ARASTUSER'], str(params['data_id'])) datapath = filepath filepath += "/raw/" all_files = [] user = params['ARASTUSER'] token = params['oauth_token'] uid = params['_id'] ##### Get data from ID ##### data_doc = self.metadata.get_data_docs(params['ARASTUSER'], params['data_id']) if not data_doc: raise Exception('Invalid Data ID: {}'.format(params['data_id'])) if 'kbase_assembly_input' in data_doc: params['assembly_data'] = kb_to_asm(data_doc['kbase_assembly_input']) elif 'assembly_data' in data_doc: params['assembly_data'] = data_doc['assembly_data'] ##### Get data from assembly_data ##### self.metadata.update_job(uid, 'status', 'Data transfer') try:os.makedirs(filepath) except:pass ### TODO Garbage collect ### download_url = 'http://{}'.format(self.shockurl) file_sets = params['assembly_data']['file_sets'] for file_set in file_sets: if file_set['type'] == 'paired_url': file_set['type'] = 'paired' elif file_set['type'] == 'single_url': file_set['type'] = 'single' elif file_set['type'] == 'reference_url': file_set['type'] = 'reference' file_set['files'] = [] #legacy for file_info in file_set['file_infos']: #### File is stored on Shock if file_info['filename']: local_file = os.path.join(filepath, file_info['filename']) if os.path.exists(local_file): logging.info("Requested data exists on node: {}".format(local_file)) else: local_file = self.download_shock(download_url, user, token, file_info['shock_id'], filepath) elif file_info['direct_url']: local_file = os.path.join(filepath, os.path.basename(file_info['direct_url'])) if os.path.exists(local_file): logging.info("Requested data exists on node: {}".format(local_file)) else: local_file = self.download_url(file_info['direct_url'], filepath) file_info['local_file'] = local_file file_set['files'].append(local_file) #legacy all_files.append(file_set) return datapath, all_files
def _get_data(self, body): params = json.loads(body) filepath = os.path.join(self.datapath, params['ARASTUSER'], str(params['data_id'])) datapath = filepath filepath += "/raw/" all_files = [] user = params['ARASTUSER'] token = params['oauth_token'] uid = params['_id'] ##### Get data from ID ##### data_doc = self.metadata.get_doc_by_data_id(params['data_id'], params['ARASTUSER']) if not data_doc: raise Exception('Invalid Data ID: {}'.format(params['data_id'])) if 'kbase_assembly_input' in data_doc: params['assembly_data'] = kb_to_asm( data_doc['kbase_assembly_input']) elif 'assembly_data' in data_doc: params['assembly_data'] = data_doc['assembly_data'] ##### Get data from assembly_data ##### self.metadata.update_job(uid, 'status', 'Data transfer') try: os.makedirs(filepath) except: pass ### TODO Garbage collect ### download_url = 'http://{}'.format(self.shockurl) file_sets = params['assembly_data']['file_sets'] for file_set in file_sets: file_set['files'] = [] #legacy for file_info in file_set['file_infos']: local_file = os.path.join(filepath, file_info['filename']) if os.path.exists(local_file): logging.info( "Requested data exists on node: {}".format(local_file)) else: local_file = self.download(download_url, user, token, file_info['shock_id'], filepath) file_info['local_file'] = local_file file_set['files'].append(local_file) #legacy all_files.append(file_set) return datapath, all_files
def assembly_data_to_rows(data): rows = [] data_key = "assembly_data" kbase_key = "kbase_assembly_input" lib_key = "file_sets" info_key = "file_infos" if data_key in data: data = data[data_key] else: if kbase_key in data: data = kb_to_asm(data[kbase_key]) for lib in data.get(lib_key, []): libtype = lib.get("type", "unknown") files = [] for info in lib.get(info_key, []): filename = info.get("filename", "") filesize = info.get("filesize", None) filesize = " (%s)" % sizeof_fmt(filesize) if filesize else "" files.append("%s%s" % (filename, filesize)) rows.append([libtype, " ".join(files)]) return rows
def _get_data(self, body): params = json.loads(body) filepath = os.path.join(self.datapath, params['ARASTUSER'], str(params['data_id'])) datapath = filepath filepath += "/raw/" all_files = [] user = params['ARASTUSER'] job_id = params['job_id'] data_id = params['data_id'] token = params['oauth_token'] uid = params['_id'] self.gc_lock.acquire() try: self.garbage_collect(self.datapath, self.min_free_space, user, job_id, data_id) except: logger.error('Unexpected error in GC.') raise finally: self.gc_lock.release() ##### Get data from ID ##### data_doc = self.metadata.get_data_docs(params['ARASTUSER'], params['data_id']) if not data_doc: raise Exception('Invalid Data ID: {}'.format(params['data_id'])) logger.debug('data_doc = {}'.format(data_doc)) if 'kbase_assembly_input' in data_doc: params['assembly_data'] = kb_to_asm(data_doc['kbase_assembly_input']) elif 'assembly_data' in data_doc: params['assembly_data'] = data_doc['assembly_data'] ##### Get data from assembly_data ##### self.metadata.update_job(uid, 'status', 'Data transfer') with ignored(OSError): os.makedirs(filepath) touch(filepath) file_sets = params['assembly_data']['file_sets'] for file_set in file_sets: if file_set['type'] == 'paired_url': file_set['type'] = 'paired' elif file_set['type'] == 'single_url': file_set['type'] = 'single' elif file_set['type'] == 'reference_url': file_set['type'] = 'reference' file_set['files'] = [] #legacy for file_info in file_set['file_infos']: #### File is stored on Shock if file_info['filename']: local_file = os.path.join(filepath, file_info['filename']) if os.path.exists(local_file): local_file = self.extract_file(local_file) logger.info("Requested data exists on node: {}".format(local_file)) else: local_file = self.download_shock(file_info['shock_url'], user, token, file_info['shock_id'], filepath) elif file_info['direct_url']: local_file = os.path.join(filepath, os.path.basename(file_info['direct_url'])) if os.path.exists(local_file): local_file = self.extract_file(local_file) logger.info("Requested data exists on node: {}".format(local_file)) else: local_file = self.download_url(file_info['direct_url'], filepath, token=token) file_info['local_file'] = local_file if file_set['type'] == 'single' and asm.is_long_read_file(local_file): if not 'tags' in file_set: file_set['tags'] = [] if not 'long_read' in file_set['tags']: file_set['tags'].append('long_read') # pacbio or nanopore reads file_set['files'].append(local_file) #legacy all_files.append(file_set) return datapath, all_files
def _get_data(self, body): params = json.loads(body) filepath = os.path.join(self.datapath, params['ARASTUSER'], str(params['data_id'])) datapath = filepath filepath += "/raw/" all_files = [] user = params['ARASTUSER'] job_id = params['job_id'] data_id = params['data_id'] token = params['oauth_token'] uid = params['_id'] self.gc_lock.acquire() try: self.garbage_collect(self.datapath, self.min_free_space, user, job_id, data_id) except: logger.error('Unexpected error in GC.') raise finally: self.gc_lock.release() ##### Get data from ID ##### data_doc = self.metadata.get_data_docs(params['ARASTUSER'], params['data_id']) if not data_doc: raise Exception('Invalid Data ID: {}'.format(params['data_id'])) logger.debug('data_doc = {}'.format(data_doc)) if 'kbase_assembly_input' in data_doc: params['assembly_data'] = kb_to_asm( data_doc['kbase_assembly_input']) elif 'assembly_data' in data_doc: params['assembly_data'] = data_doc['assembly_data'] ##### Get data from assembly_data ##### self.metadata.update_job(uid, 'status', 'Data transfer') with ignored(OSError): os.makedirs(filepath) touch(filepath) file_sets = params['assembly_data']['file_sets'] for file_set in file_sets: if file_set['type'] == 'paired_url': file_set['type'] = 'paired' elif file_set['type'] == 'single_url': file_set['type'] = 'single' elif file_set['type'] == 'reference_url': file_set['type'] = 'reference' file_set['files'] = [] #legacy for file_info in file_set['file_infos']: #### File is stored on Shock if file_info['filename']: local_file = os.path.join(filepath, file_info['filename']) if os.path.exists(local_file): local_file = self.extract_file(local_file) logger.info("Requested data exists on node: {}".format( local_file)) else: local_file = self.download_shock( file_info['shock_url'], user, token, file_info['shock_id'], filepath) elif file_info['direct_url']: local_file = os.path.join( filepath, os.path.basename(file_info['direct_url'])) if os.path.exists(local_file): local_file = self.extract_file(local_file) logger.info("Requested data exists on node: {}".format( local_file)) else: local_file = self.download_url(file_info['direct_url'], filepath, token=token) file_info['local_file'] = local_file if file_set['type'] == 'single' and asm.is_long_read_file( local_file): if not 'tags' in file_set: file_set['tags'] = [] if not 'long_read' in file_set['tags']: file_set['tags'].append( 'long_read') # pacbio or nanopore reads file_set['files'].append(local_file) #legacy all_files.append(file_set) return datapath, all_files