def _get_data(self, body): params = json.loads(body) filepath = os.path.join(self.datapath, params['ARASTUSER'], str(params['data_id'])) datapath = filepath filepath += "/raw/" all_files = [] user = params['ARASTUSER'] job_id = params['job_id'] data_id = params['data_id'] token = params['oauth_token'] uid = params['_id'] self.gc_lock.acquire() try: self.garbage_collect(self.datapath, self.min_free_space, user, job_id, data_id) except: logger.error('Unexpected error in GC.') raise finally: self.gc_lock.release() ##### Get data from ID ##### data_doc = self.metadata.get_data_docs(params['ARASTUSER'], params['data_id']) if not data_doc: raise Exception('Invalid Data ID: {}'.format(params['data_id'])) logger.debug('data_doc = {}'.format(data_doc)) if 'kbase_assembly_input' in data_doc: params['assembly_data'] = kb_to_asm(data_doc['kbase_assembly_input']) elif 'assembly_data' in data_doc: params['assembly_data'] = data_doc['assembly_data'] ##### Get data from assembly_data ##### self.metadata.update_job(uid, 'status', 'Data transfer') with ignored(OSError): os.makedirs(filepath) touch(filepath) file_sets = params['assembly_data']['file_sets'] for file_set in file_sets: if file_set['type'] == 'paired_url': file_set['type'] = 'paired' elif file_set['type'] == 'single_url': file_set['type'] = 'single' elif file_set['type'] == 'reference_url': file_set['type'] = 'reference' file_set['files'] = [] #legacy for file_info in file_set['file_infos']: #### File is stored on Shock if file_info['filename']: local_file = os.path.join(filepath, file_info['filename']) if os.path.exists(local_file): local_file = self.extract_file(local_file) logger.info("Requested data exists on node: {}".format(local_file)) else: local_file = self.download_shock(file_info['shock_url'], user, token, file_info['shock_id'], filepath) elif file_info['direct_url']: local_file = os.path.join(filepath, os.path.basename(file_info['direct_url'])) if os.path.exists(local_file): local_file = self.extract_file(local_file) logger.info("Requested data exists on node: {}".format(local_file)) else: local_file = self.download_url(file_info['direct_url'], filepath, token=token) file_info['local_file'] = local_file if file_set['type'] == 'single' and asm.is_long_read_file(local_file): if not 'tags' in file_set: file_set['tags'] = [] if not 'long_read' in file_set['tags']: file_set['tags'].append('long_read') # pacbio or nanopore reads file_set['files'].append(local_file) #legacy all_files.append(file_set) return datapath, all_files
def _makedirs(self, dir): with ignored(OSError): os.makedirs(dir)
def _get_data(self, body): params = json.loads(body) filepath = os.path.join(self.datapath, params['ARASTUSER'], str(params['data_id'])) datapath = filepath filepath += "/raw/" all_files = [] user = params['ARASTUSER'] job_id = params['job_id'] data_id = params['data_id'] token = params['oauth_token'] uid = params['_id'] self.gc_lock.acquire() try: self.garbage_collect(self.datapath, self.min_free_space, user, job_id, data_id) except: logger.error('Unexpected error in GC.') raise finally: self.gc_lock.release() ##### Get data from ID ##### data_doc = self.metadata.get_data_docs(params['ARASTUSER'], params['data_id']) if not data_doc: raise Exception('Invalid Data ID: {}'.format(params['data_id'])) logger.debug('data_doc = {}'.format(data_doc)) if 'kbase_assembly_input' in data_doc: params['assembly_data'] = kb_to_asm( data_doc['kbase_assembly_input']) elif 'assembly_data' in data_doc: params['assembly_data'] = data_doc['assembly_data'] ##### Get data from assembly_data ##### self.metadata.update_job(uid, 'status', 'Data transfer') with ignored(OSError): os.makedirs(filepath) touch(filepath) file_sets = params['assembly_data']['file_sets'] for file_set in file_sets: if file_set['type'] == 'paired_url': file_set['type'] = 'paired' elif file_set['type'] == 'single_url': file_set['type'] = 'single' elif file_set['type'] == 'reference_url': file_set['type'] = 'reference' file_set['files'] = [] #legacy for file_info in file_set['file_infos']: #### File is stored on Shock if file_info['filename']: local_file = os.path.join(filepath, file_info['filename']) if os.path.exists(local_file): local_file = self.extract_file(local_file) logger.info("Requested data exists on node: {}".format( local_file)) else: local_file = self.download_shock( file_info['shock_url'], user, token, file_info['shock_id'], filepath) elif file_info['direct_url']: local_file = os.path.join( filepath, os.path.basename(file_info['direct_url'])) if os.path.exists(local_file): local_file = self.extract_file(local_file) logger.info("Requested data exists on node: {}".format( local_file)) else: local_file = self.download_url(file_info['direct_url'], filepath, token=token) file_info['local_file'] = local_file if file_set['type'] == 'single' and asm.is_long_read_file( local_file): if not 'tags' in file_set: file_set['tags'] = [] if not 'long_read' in file_set['tags']: file_set['tags'].append( 'long_read') # pacbio or nanopore reads file_set['files'].append(local_file) #legacy all_files.append(file_set) return datapath, all_files