def __init__(self, config): """ :param config: :param logger: :param directory: Working directory :param urls: Service urls """ # BEGIN_CONSTRUCTOR self.ws_url = config["workspace-url"] self.ws_url = config["workspace-url"] self.callback_url = config['SDK_CALLBACK_URL'] self.srv_wiz_url = config['srv-wiz-url'] self.token = config['KB_AUTH_TOKEN'] self.shock_url = config['shock-url'] self.dfu = DataFileUtil(self.callback_url) self.gfu = GenomeFileUtil(self.callback_url) self.au = AssemblyUtil(self.callback_url) self.rau = ReadsAlignmentUtils(self.callback_url) self.set_api = SetAPI(self.srv_wiz_url, service_ver='dev') self.eu = ExpressionUtils(self.callback_url) self.ws = Workspace(self.ws_url, token=self.token) self.scratch = os.path.join(config['scratch'], str(uuid.uuid4())) self._mkdir_p(self.scratch) self.tool_used = "Cufflinks" self.tool_version = os.environ['VERSION'] # END_CONSTRUCTOR pass
def _stage_assembly_files(self, object_list): """ _stage_assembly_files: download the fasta files to the scratch area return list of file names """ log('Processing assembly object list: {}'.format(object_list)) # Sourmash uses the sequence filename as the default label for the signatures # this includes the complete file path. So keeping the sequence file name as close # to the desired label as possible is the reason not to place each file under # a 'fasta' directory or inlude the '.fa' file extension auc = AssemblyUtil(self.callbackURL) staged_file_list = [] for assembly_upa in object_list: try: file_ = auc.get_assembly_as_fasta({'ref': assembly_upa})['path'] except AssemblyUtilError as assembly_error: print(str(assembly_error)) raise filename = os.path.basename(file_).replace('.fa', '') to_upper_command = "awk '{ if ($0 !~ />/) {print toupper($0)} else {print $0} }' " \ + file_ + ' > tmp.fa ' + '&& mv tmp.fa ' + filename self._run_command(to_upper_command) staged_file_list.append(filename) log('Created file list: {}'.format(staged_file_list)) return staged_file_list
def test_annotate_contigs(self): assembly_file_name = "small.fna" #"AP009048.fna" assembly_test_file = os.path.join("/kb/module/test/data", assembly_file_name) assembly_temp_file = os.path.join("/kb/module/work/tmp", assembly_file_name) shutil.copy(assembly_test_file, assembly_temp_file) assembly_name = 'Assembly.1' au = AssemblyUtil(os.environ['SDK_CALLBACK_URL'], token=self.getContext()['token']) assembly_ref = au.save_assembly_from_fasta({'file': {'path': assembly_temp_file}, 'workspace_name': self.getWsName(), 'assembly_name': assembly_name}) genome_name = "Genome.1" result = self.getImpl().annotate_contigs(self.getContext(), {'assembly_ref': assembly_ref, 'output_workspace': self.getWsName(), 'output_genome_name': genome_name, 'evalue': None, 'fast': 0, 'gcode': None, 'genus': '', 'kingdom': 'Bacteria', 'metagenome': 0, 'mincontiglen': 1, 'norrna': 0, 'notrna': 0, 'rawproduct': 0, 'rfam': 1, 'scientific_name': 'Super : diper - name;' })[0] rep = self.getWsClient().get_objects([{'ref': result['report_ref']}])[0]['data'] self.assertTrue('text_message' in rep) print("Report:\n" + str(rep['text_message']))
def getBogusAssembly(self): # Create a fake assembly with lots of contigs assembly_file_name = "bogus.fna" # "AP009048.fna" assembly_temp_file = os.path.join("/kb/module/work/tmp", assembly_file_name) with open(assembly_temp_file, "w") as f: for i in range(1, 30002): f.write("> contig_%d\n" % i) f.write( "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC\n" ) assembly_name = "Assembly.2" au = AssemblyUtil(os.environ["SDK_CALLBACK_URL"], token=self.getContext()["token"]) assembly_ref = au.save_assembly_from_fasta({ "file": { "path": assembly_temp_file }, "workspace_name": self.getWsName(), "assembly_name": assembly_name }) self.assembly_ref = assembly_ref print("Uploaded bogus assembly " + str(assembly_ref)) return assembly_ref
def __init__(self, config): #BEGIN_CONSTRUCTOR self.scratch = config['scratch'] self.callback_url = os.environ['SDK_CALLBACK_URL'] self.dfu = AssemblyUtil(self.callback_url) #END_CONSTRUCTOR pass
def load_test_genome_direct(self, filename, assembly_filename, obj_name): au = AssemblyUtil(os.environ['SDK_CALLBACK_URL']) assembly_ref = au.save_assembly_from_fasta({ 'workspace_name': self.getWsName(), 'assembly_name': obj_name + '.assembly', 'file': { 'path': assembly_filename } }) pprint('created test assembly: ' + assembly_ref) with open(filename, 'r') as file: data_str = file.read() data = json.loads(data_str) data['assembly_ref'] = assembly_ref # save to ws save_info = { 'workspace': self.getWsName(), 'objects': [{ 'type': 'KBaseGenomes.Genome', 'data': data, 'name': obj_name + '.genome' }] } result = self.ws.save_objects(save_info) info = result[0] ref = str(info[6]) + '/' + str(info[0]) + '/' + str(info[4]) print('created test genome: ' + ref + ' from file ' + filename) return ref
def _build_index(self, assembly_info, validated_params): # get the assembly as a fasta file using AssemblyUtil au = AssemblyUtil(self.callback_url) fasta_info = au.get_assembly_as_fasta({'ref': assembly_info['ref']}) # make the target destination folder (check again it wasn't created yet) if os.path.exists(validated_params['output_dir']): raise('Output directory name specified (' + validated_params['output_dir'] + ') already exists. Will not overwrite, so aborting.') os.makedirs(validated_params['output_dir']) # configure the command line args and run it cli_params = self._build_cli_params(fasta_info['path'], fasta_info['assembly_name'], validated_params) self.bowtie2.run('bowtie2-build', cli_params) index_info = {'output_dir': validated_params['output_dir'], 'index_files_basename': fasta_info['assembly_name']} # cache the result, mark if it worked or not cache_success = self._put_cached_index(assembly_info, fasta_info['assembly_name'], validated_params['output_dir'], validated_params['ws_for_cache']) if cache_success: index_info['pushed_to_cache'] = 1 else: index_info['pushed_to_cache'] = 0 return index_info
def get_fasta_file(self, genome_ref): ws = Workspace(self.ws_url) # test if genome references an assembly type # do get_objects2 without data. get list of refs genome_obj_info = ws.get_objects2({ 'objects': [{ 'ref': genome_ref }], 'no_data': 1 }) # get the list of genome refs from the returned info. # if there are no refs (or something funky with the return), this will be an empty list. # this WILL fail if data is an empty list. But it shouldn't be, and we know because # we have a real genome reference, or get_objects2 would fail. genome_obj_refs = genome_obj_info.get('data', [{}])[0].get('refs', []) # see which of those are of an appropriate type (ContigSet or Assembly), if any. assembly_ref = list() ref_params = [{'ref': x} for x in genome_obj_refs] ref_info = ws.get_object_info3({'objects': ref_params}) for idx, info in enumerate(ref_info.get('infos')): if "KBaseGenomeAnnotations.Assembly" in info[ 2] or "KBaseGenomes.ContigSet" in info[2]: assembly_ref.append(";".join(ref_info.get('paths')[idx])) # now just get the file. au = AssemblyUtil(self.callback_url) fasta_file = au.get_assembly_as_fasta({'ref': assembly_ref[0]}) return fasta_file["path"]
def test_annotate_contigs(self): assembly_file_name = "small.fna" #"AP009048.fna" assembly_test_file = os.path.join("/kb/module/test/data", assembly_file_name) assembly_temp_file = os.path.join("/kb/module/work/tmp", assembly_file_name) shutil.copy(assembly_test_file, assembly_temp_file) assembly_name = 'Assembly.1' au = AssemblyUtil(os.environ['SDK_CALLBACK_URL']) assembly_ref = au.save_assembly_from_fasta({'file': {'path': assembly_temp_file}, 'workspace_name': self.getWsName(), 'assembly_name': assembly_name}) # Add a genome to the WS to test ref_paths genome_name = "Genome.1" genome = {'id': 'Unknown', 'features': [], 'scientific_name': "", 'domain': "", 'genetic_code': 0, 'assembly_ref': assembly_ref, 'cdss': [], 'mrnas': [], 'source': 'Magic!', 'gc_content': 0, 'dna_size': 0, 'reference_annotation': 0} prov = self.getContext().provenance() ga = GenomeAnnotationAPI(os.environ['SDK_CALLBACK_URL']) info = ga.save_one_genome_v1( {'workspace': self.getWsName(), 'name': genome_name, 'data': genome, 'provenance': prov})['info'] genome_ref = str(info[6]) + '/' + str(info[0]) + '/' + str(info[4]) result = self.getImpl().annotate_contigs(self.getContext(), {'assembly_ref': "{};{}".format(genome_ref, assembly_ref), 'output_workspace': self.getWsName(), 'output_genome_name': genome_name, 'evalue': None, 'fast': 0, 'gcode': 0, 'genus': 'genus', 'kingdom': 'Bacteria', 'metagenome': 0, 'mincontiglen': 1, 'norrna': 0, 'notrna': 0, 'rawproduct': 0, 'rfam': 1, 'scientific_name': 'Super : diper - name;' })[0] rep = self.getWsClient().get_objects([{'ref': result['report_ref']}])[0]['data'] self.assertTrue('text_message' in rep) print("Report:\n" + str(rep['text_message'])) genome_ref = self.getWsName() + "/" + genome_name genome = self.getWsClient().get_objects([{'ref': genome_ref}])[0]['data'] features_to_work = {} for feature in genome['features']: features_to_work[feature['id']] = feature['location'] aseq = AssemblySequenceAPI(os.environ['SDK_CALLBACK_URL'], token=self.getContext()['token']) dna_sequences = aseq.get_dna_sequences({'requested_features': features_to_work, 'assembly_ref': genome['assembly_ref']})['dna_sequences'] bad_dnas = 0 for feature in genome['features']: if feature['dna_sequence'] != dna_sequences[feature['id']]: bad_dnas += 1 self.assertEqual(bad_dnas, 0)
def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.scratch = config['scratch'] self.shock_url = config['shock-url'] self.dfu = DataFileUtil(self.callback_url) self.au = AssemblyUtil(self.callback_url) self.setapi = SetAPI(self.callback_url) self.wss = workspaceService(config['workspace-url'])
def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.scratch = os.path.join(config['scratch'], 'import_assembly_' + str(uuid.uuid4())) handler_utils._mkdir_p(self.scratch) self.token = config['KB_AUTH_TOKEN'] self.dfu = DataFileUtil(self.callback_url) self.au = AssemblyUtil(self.callback_url) self.uploader_utils = UploaderUtil(config)
def load_fasta_file(self, filename, obj_name, contents): f = open(filename, 'w') f.write(contents) f.close() assemblyUtil = AssemblyUtil(self.callback_url) assembly_ref = assemblyUtil.save_assembly_from_fasta({'file': {'path': filename}, 'workspace_name': self.getWsName(), 'assembly_name': obj_name }) return assembly_ref
def run_mash_sketch(self, ctx, params): """ Generate a sketch file from a fasta/fastq file :param params: instance of type "MashSketchParams" (* * Pass in **one of** input_path, assembly_ref, or reads_ref * input_path - string - local file path to an input fasta/fastq * assembly_ref - string - workspace reference to an Assembly type * reads_ref - string - workspace reference to a Reads type * Optionally, pass in a boolean indicating whether you are using paired-end reads. * paired_ends - boolean - whether you are passing in paired ends) -> structure: parameter "input_path" of String, parameter "assembly_ref" of String, parameter "reads_ref" of String, parameter "paired_ends" of type "boolean" (params: input_upa: workspace reference to an assembly object workspace_name: name of current workspace search_db: database to search n_max_results: number of results to return, integer between 1 and 100) :returns: instance of type "MashSketchResults" (* * Returns the local scratch file path of the generated sketch file. * Will have the extension '.msh') -> structure: parameter "sketch_path" of String """ # ctx is the context object # return variables are: results #BEGIN run_mash_sketch if 'reads_ref' in params: reads_utils = ReadsUtils(self.callbackURL) result = reads_utils.download_reads({ 'read_libraries': [params['reads_ref']], 'interleaved': 'true' }) input_path = result['files'][params['reads_ref']]['files']['fwd'] elif 'assembly_ref' in params: assembly_util = AssemblyUtil(self.callbackURL) result = assembly_util.get_assembly_as_fasta( {'ref': params['assembly_ref']}) input_path = result['path'] elif 'input_path' in params: input_path = params['input_path'] else: raise ValueError( 'Invalid params; must provide one of `reads_ref`, `assembly_ref`, or `input_path`.' ) mash_utils = MashUtils(self.config, self.auth_token) output_file_path = mash_utils.mash_sketch( input_path, paired_ends=params.get('paired_ends')) results = {'sketch_path': output_file_path} #END run_mash_sketch # At some point might do deeper type checking... if not isinstance(results, dict): raise ValueError('Method run_mash_sketch return value ' + 'results is not type dict as required.') # return the results return [results]
def setUpClass(cls): token = environ.get('KB_AUTH_TOKEN', None) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('SetAPI'): cls.cfg[nameval[0]] = nameval[1] authServiceUrl = cls.cfg.get('auth-service-url', "https://kbase.us/services/authorization/Sessions/Login") auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({'token': token, 'user_id': user_id, 'provenance': [ {'service': 'SetAPI', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1}) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = workspaceService(cls.wsURL, token=token) cls.serviceImpl = SetAPI(cls.cfg) # setup data at the class level for now (so that the code is run # once for all tests, not before each test case. Not sure how to # do that outside this function..) suffix = int(time.time() * 1000) wsName = "test_SetAPI_" + str(suffix) ret = cls.wsClient.create_workspace({'workspace': wsName}) # wsName = 'pranjan77:1477441032423' cls.wsName = wsName # copy test file to scratch area fna_filename = "seq.fna" fna_path = os.path.join(cls.cfg['scratch'], fna_filename) shutil.copy(os.path.join("data", fna_filename), fna_path) ru = AssemblyUtil(os.environ['SDK_CALLBACK_URL']) ws_obj_name = 'MyNewAssembly' cls.assembly1ref = ru.save_assembly_from_fasta( { 'file':{'path':fna_path}, 'workspace_name':wsName, 'assembly_name':'assembly_obj_1' }) cls.assembly2ref = ru.save_assembly_from_fasta( { 'file':{'path':fna_path}, 'workspace_name':wsName, 'assembly_name':'assembly_obj_2' })
def load_fasta_file(self, path, name): assembly_util = AssemblyUtil(self.callback_url) return assembly_util.save_assembly_from_fasta({ 'file': { 'path': path }, 'workspace_name': self.getWsName(), 'assembly_name': name })
def get_fasta_file(self, filename, obj_name): assemblyUtil = AssemblyUtil(self.callback_url) assembly_ref = assemblyUtil.save_assembly_from_fasta({ 'file': { 'path': filename }, 'workspace_name': self.getWsName(), 'assembly_name': obj_name }) return assembly_ref
def loadAssembly(self): if hasattr(self.__class__, 'assembly_ref'): return self.__class__.assembly_ref fasta_path = os.path.join(self.scratch, 'test.fna') shutil.copy(os.path.join('data', 'test.fna'), fasta_path) au = AssemblyUtil(self.callback_url) assembly_ref = au.save_assembly_from_fasta({'file': {'path': fasta_path}, 'workspace_name': self.getWsName(), 'assembly_name': 'test_assembly' }) self.__class__.assembly_ref = assembly_ref return assembly_ref
def save_assembly(self, wsname, output_contigs, token, name, console): self.log(console, 'Uploading FASTA file to Assembly') assemblyUtil = AssemblyUtil(self.callbackURL, token=token, service_ver='dev') assemblyUtil.save_assembly_from_fasta({ 'file': { 'path': output_contigs }, 'workspace_name': wsname, 'assembly_name': name })
def get_assembly(self, target_dir, assembly_upa): auc = AssemblyUtil(self.callbackURL) filename = os.path.join(target_dir, assembly_upa.replace('/', '_')) try: auc.get_assembly_as_fasta({ 'ref': assembly_upa, 'filename': filename }) except AssemblyUtilError as assembly_error: print(str(assembly_error)) raise return filename
def test_filter_contigs_by_length_01(self): method = 'filter_contigs_by_length_01' print("\n\nRUNNING: test_filter_contigs_by_length_01()") print("===========================================\n\n") # upload test data try: auClient = AssemblyUtil(self.callback_url, token=self.getContext()['token']) except Exception as e: raise ValueError( 'Unable to instantiate auClient with callbackURL: ' + self.callback_url + ' ERROR: ' + str(e)) ass_file_1 = 'assembly_1.fa' ass_file_2 = 'assembly_2.fa' ass_path_1 = os.path.join(self.scratch, ass_file_1) ass_path_2 = os.path.join(self.scratch, ass_file_2) shutil.copy(os.path.join("data", ass_file_1), ass_path_1) shutil.copy(os.path.join("data", ass_file_2), ass_path_2) ass_ref_1 = auClient.save_assembly_from_fasta({ 'file': { 'path': ass_path_1 }, 'workspace_name': self.getWsName(), 'assembly_name': 'assembly_1' }) ass_ref_2 = auClient.save_assembly_from_fasta({ 'file': { 'path': ass_path_2 }, 'workspace_name': self.getWsName(), 'assembly_name': 'assembly_2' }) # run method input_refs = [ass_ref_1, ass_ref_2] base_output_name = method + '_output' params = { 'workspace_name': self.getWsName(), 'input_assembly_refs': input_refs, 'min_contig_length': 1000, 'output_name': 'test_filtered' } result = self.getImpl().run_filter_contigs_by_length( self.getContext(), params) print('RESULT:') pprint(result) pass
def loadFasta2Assembly(self, filename): fn, ext = os.path.splitext(filename) fasta_path = os.path.join(self.scratch, filename) shutil.copy(os.path.join('../testReads', filename), fasta_path) au = AssemblyUtil(self.callback_url) a_ref = au.save_assembly_from_fasta({ 'file': { 'path': fasta_path }, 'workspace_name': self.getWsName(), 'assembly_name': fn }) return a_ref
def fetch_fasta_from_assembly(assembly_ref, ws_url, callback_url): """ From an assembly or contigset, this uses a data file util to build a FASTA file and return the path to it. """ allowed_types = ['KBaseFile.Assembly', 'KBaseGenomeAnnotations.Assembly', 'KBaseGenomes.ContigSet'] if not check_ref_type(assembly_ref, allowed_types, ws_url): raise ValueError("The reference {} cannot be used to fetch a FASTA file".format( assembly_ref)) au = AssemblyUtil(callback_url) return au.get_assembly_as_fasta({'ref': assembly_ref})
def __init__(self, config): self.scratch = config["scratch"] self.ctx = config['ctx']; self.callback_url = config["SDK_CALLBACK_URL"] self.ws_client = workspaceService(config["workspace-url"]) self.gfu = GenomeFileUtil(self.callback_url) self.au = AssemblyUtil(self.callback_url) self.kbr = KBaseReport(self.callback_url) self.dfu = DataFileUtil(self.callback_url) self.genome_api = GenomeAnnotationAPI(self.callback_url) self.sso_ref = None self.sso_event = None self.ec_to_sso = {} self.output_workspace = None
def get_fasta_from_genome(logger,ws_client,urls,genome_id): ref = ws_client.get_object_subset( [{ 'ref' : genome_id ,'included': ['contigset_ref']}]) contig_id = ref[0]['data']['contigset_ref'] logger.info( "Generating FASTA from Genome") try: ## get the FASTA assembly = AssemblyUtil(urls['callback_url']) ret = assembly.get_assembly_as_fasta({'ref':contig_id}) output_file = ret['path'] fasta_file = os.path.basename(output_file) return fasta_file except Exception, e: raise Exception(e) raise Exception("Unable to Create FASTA file from Genome : {0}".format(genome_id))
def load_fasta_file(self, filename, obj_name, contents): f = open(filename, 'w') # TODO make this use the data folder (not sure of relative path) f.write(contents) f.close() assemblyUtil = AssemblyUtil(self.callback_url) # TODO why does this next line take forevverr assembly_ref = assemblyUtil.save_assembly_from_fasta({ 'file': { 'path': filename }, 'workspace_name': self.getWsName(), 'assembly_name': obj_name }) return assembly_ref
def get_genome_ref(self, ws_name, tf='ecoliMG1655.fa'): if hasattr(self.__class__, 'genomeInfo'): return self.__class__.genomeInfo au = AssemblyUtil(os.environ['SDK_CALLBACK_URL']) target = os.path.join(self.scratch, tf) self.genome_path = target shutil.copy('data/' + tf, target) self.__class__.genomeInfo = au.save_assembly_from_fasta({ 'file': { 'path': target }, 'workspace_name': ws_name, 'assembly_name': tf.split('.fa')[0] }) return self.__class__.genomeInfo
def setUpClass(cls): token = environ.get('KB_AUTH_TOKEN', None) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({'token': token, 'provenance': [ {'service': 'AssemblyAPI', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1}) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('AssemblyAPI'): cls.cfg[nameval[0]] = nameval[1] cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = workspaceService(cls.wsURL, token=token) cls.serviceImpl = AssemblyAPI(cls.cfg) cls.scratch = cls.cfg['scratch'] cls.callback_url = os.environ['SDK_CALLBACK_URL'] suffix = int(time.time() * 1000) cls.wsName = "test_kb_maxbin_" + str(suffix) cls.ws_info = cls.wsClient.create_workspace({'workspace': cls.wsName}) cls.obj_name = "7989/489/2" cls.contigs = [u'NZ_ALQT01000016'] # create an example Assembly cls.au = AssemblyUtil(cls.callback_url) assembly_filename = 'test.fa' cls.assembly_fasta_file_path = os.path.join(cls.scratch, assembly_filename) shutil.copy(os.path.join("data", assembly_filename), cls.assembly_fasta_file_path) assembly_params = { 'file': {'path': cls.assembly_fasta_file_path}, 'workspace_name': cls.wsName, 'assembly_name': 'MyAssembly' } cls.assembly_ref_1 = cls.au.save_assembly_from_fasta(assembly_params) print('Assembly1:' + cls.assembly_ref_1) # create a test legacy contigset with open('data/contigset1.json') as file: contigset_data = json.load(file) pprint(contigset_data) saveData = { 'type': 'KBaseGenomes.ContigSet', 'data': contigset_data, 'name': 'contigset' } cls.contig_set_info = cls.wsClient.save_objects({'workspace': cls.wsName, 'objects': [saveData]})[0] pprint(cls.contig_set_info) cls.contig_set_ref = str(cls.contig_set_info[6]) + '/' + str(cls.contig_set_info[0]) + '/' + str(cls.contig_set_info[4]) print('ContigSet1:' + cls.contig_set_ref)
def test_genbank_to_genome(self, download_staging_file, update_staging_service): fasta_file = 'small_fasta.fna' ws_obj_name = 'MyAssembly' params = { 'staging_file_subdir_path': fasta_file, 'workspace_name': self.getWsName(), 'assembly_name': ws_obj_name } ref = self.getImpl().import_fasta_as_assembly_from_staging( self.getContext(), params) self.assertTrue('obj_ref' in ref[0]) self.assertTrue('report_ref' in ref[0]) self.assertTrue('report_name' in ref[0]) fasta_file_path = os.path.join('/kb/module/work/tmp', fasta_file) assemblyUtil = AssemblyUtil(os.environ['SDK_CALLBACK_URL']) fasta_assembly = assemblyUtil.get_assembly_as_fasta( {'ref': self.getWsName() + "/{}".format(ws_obj_name)}) expected_data = None with open(fasta_file_path, 'r') as f: expected_data = f.read() actual_data = None with open(fasta_assembly['path'], 'r') as f: actual_data = f.read() self.assertEqual(actual_data, expected_data) get_objects_params = { 'object_refs': [ref[0].get('obj_ref')], 'ignore_errors': False } object_data = self.dfu.get_objects(get_objects_params) base_count = object_data.get('data')[0].get('data').get('base_counts') dna_size = object_data.get('data')[0].get('data').get('dna_size') self.assertEqual(dna_size, 2520) expected_base_count = {'A': 700, 'C': 558, 'T': 671, 'G': 591} self.assertDictContainsSubset(base_count, expected_base_count) self.assertDictContainsSubset(expected_base_count, base_count)
def load_fasta_file(callback_url, ws_name, filename, obj_name, contents): """ Loads the given FASTA file into a workspace as an Assembly object. """ f = open(filename, 'w') f.write(contents) f.close() assembly_util = AssemblyUtil(callback_url) assembly_ref = assembly_util.save_assembly_from_fasta({ 'file': { 'path': filename }, 'workspace_name': ws_name, 'assembly_name': obj_name }) return assembly_ref
def test_annotate_contigs(self): assembly_file_name = "small.fna" #"AP009048.fna" assembly_test_file = os.path.join("/kb/module/test/data", assembly_file_name) assembly_temp_file = os.path.join("/kb/module/work/tmp", assembly_file_name) shutil.copy(assembly_test_file, assembly_temp_file) assembly_name = 'Assembly.1' au = AssemblyUtil(os.environ['SDK_CALLBACK_URL'], token=self.getContext()['token']) assembly_ref = au.save_assembly_from_fasta({'file': {'path': assembly_temp_file}, 'workspace_name': self.getWsName(), 'assembly_name': assembly_name}) genome_name = "Genome.1" result = self.getImpl().annotate_contigs(self.getContext(), {'assembly_ref': assembly_ref, 'output_workspace': self.getWsName(), 'output_genome_name': genome_name, 'evalue': None, 'fast': 0, 'gcode': 0, 'genus': 'genus', 'kingdom': 'Bacteria', 'metagenome': 0, 'mincontiglen': 1, 'norrna': 0, 'notrna': 0, 'rawproduct': 0, 'rfam': 1, 'scientific_name': 'Super : diper - name;' })[0] rep = self.getWsClient().get_objects([{'ref': result['report_ref']}])[0]['data'] self.assertTrue('text_message' in rep) print("Report:\n" + str(rep['text_message'])) genome_ref = self.getWsName() + "/" + genome_name genome = self.getWsClient().get_objects([{'ref': genome_ref}])[0]['data'] features_to_work = {} for feature in genome['features']: features_to_work[feature['id']] = feature['location'] aseq = AssemblySequenceAPI(os.environ['SDK_CALLBACK_URL'], token=self.getContext()['token']) dna_sequences = aseq.get_dna_sequences({'requested_features': features_to_work, 'assembly_ref': genome['assembly_ref']})['dna_sequences'] bad_dnas = 0 for feature in genome['features']: if feature['dna_sequence'] != dna_sequences[feature['id']]: bad_dnas += 1 self.assertEqual(bad_dnas, 0)
def upload_assembly(self): if not self.testobjref: print "upload_assembly start" indata = 'U00096.2.fa'#_first1000. ftarget = os.path.join(self.cfg['scratch'], indata)#self.scratch, indata) print "ftarget " + ftarget ret = shutil.copy('../test_data/' + indata, ftarget) #self.readsUtilClient = ReadsUtils(os.environ['SDK_CALLBACK_URL']) self.assemblyUtilClient = AssemblyUtil(os.environ['SDK_CALLBACK_URL']) if not self.testwsname: self.testwsname.append(self.create_random_string()) print "upload_assembly self.testwsname[0] " + self.testwsname[0] try: ret = self.wsClient.create_workspace({'workspace': self.testwsname[0]}) #test_ws_name except Exception as e: #print "ERROR" #print(type(e)) #print(e.args) print(e) pass try: print "attempt upload" print "ftarget " + ftarget ref = self.assemblyUtilClient.save_assembly_from_fasta( { 'workspace_name': self.testwsname[0], 'assembly_name': 'Ecolik12MG1655', 'file': {'path': ftarget}}) print "upload_assembly" print ref #self.testobjref = [] self.testobjref.append(self.testwsname[0] + '/Ecolik12MG1655/1') #self.testobjdata = [] #self.testobjdata.append(self.dfu.get_objects( # {'object_refs': [self.testobjref[0]]})) ##print self.testobjdata[0] except Exception as e: print e pass print "self.testobjref[0]" print self.testobjref print self.testobjref[0]
def stage_assembly_files(self, object_list): """ _stage_assembly_files: download the fasta files to the scratch area return list of file names """ log('Processing assembly object list: {}'.format(object_list)) auc = AssemblyUtil(self.callbackURL) staged_file_list = [] for assembly_upa in object_list: try: filename = auc.get_assembly_as_fasta({'ref': assembly_upa})['path'] except AssemblyUtilError as assembly_error: print(str(assembly_error)) raise staged_file_list.append(filename) log('Created file list: {}'.format(staged_file_list)) return staged_file_list
def run_megahit(self, ctx, params): """ :param params: instance of type "MegaHitParams" (Run MEGAHIT. Most parameters here are just passed forward to MEGAHIT workspace_name - the name of the workspace for input/output read_library_ref - the name of the PE read library (SE library support in the future) output_contig_set_name - the name of the output contigset megahit_parameter_preset - override a group of parameters; possible values: meta '--min-count 2 --k-list 21,41,61,81,99' (generic metagenomes, default) meta-sensitive '--min-count 2 --k-list 21,31,41,51,61,71,81,91,99' (more sensitive but slower) meta-large '--min-count 2 --k-list 27,37,47,57,67,77,87' (large & complex metagenomes, like soil) bulk '--min-count 3 --k-list 31,51,71,91,99 --no-mercy' (experimental, standard bulk sequencing with >= 30x depth) single-cell '--min-count 3 --k-list 21,33,55,77,99,121 --merge_level 20,0.96' (experimental, single cell data) min_count - minimum multiplicity for filtering (k_min+1)-mers, default 2 min_k - minimum kmer size (<= 127), must be odd number, default 21 max_k - maximum kmer size (<= 127), must be odd number, default 99 k_step - increment of kmer size of each iteration (<= 28), must be even number, default 10 k_list - list of kmer size (all must be odd, in the range 15-127, increment <= 28); override `--k-min', `--k-max' and `--k-step' min_contig_length - minimum length of contigs to output, default is 2000 @optional megahit_parameter_preset @optional min_count @optional k_min @optional k_max @optional k_step @optional k_list @optional min_contig_length) -> structure: parameter "workspace_name" of String, parameter "read_library_ref" of String, parameter "output_contigset_name" of String, parameter "megahit_parameter_preset" of String, parameter "min_count" of Long, parameter "k_min" of Long, parameter "k_max" of Long, parameter "k_step" of Long, parameter "k_list" of list of Long, parameter "min_contig_length" of Long :returns: instance of type "MegaHitOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN run_megahit print('Running run_megahit with params=') pprint(params) # STEP 1: basic parameter checks + parsing if 'workspace_name' not in params: raise ValueError('workspace_name parameter is required') if 'read_library_ref' not in params: raise ValueError('read_library_ref parameter is required') if 'output_contigset_name' not in params: raise ValueError('output_contigset_name parameter is required') # STEP 2: get the read library as deinterleaved fastq files input_ref = params['read_library_ref'] reads_params = {'read_libraries': [input_ref], 'interleaved': 'false', 'gzipped': None } ru = ReadsUtils(self.callbackURL) reads = ru.download_reads(reads_params)['files'] print('Input reads files:') fwd = reads[input_ref]['files']['fwd'] rev = reads[input_ref]['files']['rev'] pprint('forward: ' + fwd) pprint('reverse: ' + rev) # STEP 3: run megahit # construct the command megahit_cmd = [self.MEGAHIT] # we only support PE reads, so add that megahit_cmd.append('-1') megahit_cmd.append(fwd) megahit_cmd.append('-2') megahit_cmd.append(rev) # if a preset is defined, use that: if 'megahit_parameter_preset' in params: if params['megahit_parameter_preset']: megahit_cmd.append('--presets') megahit_cmd.append(params['megahit_parameter_preset']) if 'min_count' in params: if params['min_count']: megahit_cmd.append('--min-count') megahit_cmd.append(str(params['min_count'])) if 'k_min' in params: if params['k_min']: megahit_cmd.append('--k-min') megahit_cmd.append(str(params['k_min'])) if 'k_max' in params: if params['k_max']: megahit_cmd.append('--k-max') megahit_cmd.append(str(params['k_max'])) if 'k_step' in params: if params['k_step']: megahit_cmd.append('--k-step') megahit_cmd.append(str(params['k_step'])) if 'k_list' in params: if params['k_list']: k_list = [] for k_val in params['k_list']: k_list.append(str(k_val)) megahit_cmd.append('--k-list') megahit_cmd.append(','.join(k_list)) min_contig_length = self.DEFAULT_MIN_CONTIG_LENGTH if 'min_contig_length' in params: if params['min_contig_length']: if str(params['min_contig_length']).isdigit(): min_contig_length = params['min_contig_length'] else: raise ValueError('min_contig_length parameter must be a non-negative integer') megahit_cmd.append('--min-contig-len') megahit_cmd.append(str(min_contig_length)) # set the output location timestamp = int((datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds() * 1000) output_dir = os.path.join(self.scratch, 'output.' + str(timestamp)) megahit_cmd.append('-o') megahit_cmd.append(output_dir) # run megahit print('running megahit:') print(' ' + ' '.join(megahit_cmd)) p = subprocess.Popen(megahit_cmd, cwd=self.scratch, shell=False) retcode = p.wait() print('Return code: ' + str(retcode)) if p.returncode != 0: raise ValueError('Error running MEGAHIT, return code: ' + str(retcode) + '\n') output_contigs = os.path.join(output_dir, 'final.contigs.fa') # on macs, we cannot run megahit in the shared host scratch space, so we need to move the file there if self.mac_mode: shutil.move(output_contigs, os.path.join(self.host_scratch, 'final.contigs.fa')) output_contigs = os.path.join(self.host_scratch, 'final.contigs.fa') # STEP 4: save the resulting assembly assemblyUtil = AssemblyUtil(self.callbackURL) output_data_ref = assemblyUtil.save_assembly_from_fasta({ 'file': {'path': output_contigs}, 'workspace_name': params['workspace_name'], 'assembly_name': params['output_contigset_name'] }) # STEP 5: generate and save the report # compute a simple contig length distribution for the report lengths = [] for seq_record in SeqIO.parse(output_contigs, 'fasta'): lengths.append(len(seq_record.seq)) report = '' report += 'ContigSet saved to: ' + params['workspace_name'] + '/' + params['output_contigset_name'] + '\n' report += 'Assembled into ' + str(len(lengths)) + ' contigs.\n' report += 'Avg Length: ' + str(sum(lengths) / float(len(lengths))) + ' bp.\n' bins = 10 counts, edges = np.histogram(lengths, bins) report += 'Contig Length Distribution (# of contigs -- min to max basepairs):\n' for c in range(bins): report += ' ' + str(counts[c]) + '\t--\t' + str(edges[c]) + ' to ' + str(edges[c + 1]) + ' bp\n' print('Running QUAST') kbq = kb_quast(self.callbackURL) try: quastret = kbq.run_QUAST({'files': [{'path': output_contigs, 'label': params['output_contigset_name']}]}) except QUASTError as qe: # not really any way to test this, all inputs have been checked earlier and should be # ok print('Logging exception from running QUAST') print(str(qe)) # TODO delete shock node raise print('Saving report') kbr = KBaseReport(self.callbackURL) try: report_info = kbr.create_extended_report( {'message': report, 'objects_created': [{'ref': output_data_ref, 'description': 'Assembled contigs'}], 'direct_html_link_index': 0, 'html_links': [{'shock_id': quastret['shock_id'], 'name': 'report.html', 'label': 'QUAST report'} ], 'report_object_name': 'kb_megahit_report_' + str(uuid.uuid4()), 'workspace_name': params['workspace_name'] }) except _RepError as re: # not really any way to test this, all inputs have been checked earlier and should be # ok print('Logging exception from creating report object') print(str(re)) # TODO delete shock node raise # STEP 6: contruct the output to send back output = {'report_name': report_info['name'], 'report_ref': report_info['ref']} #END run_megahit # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method run_megahit return value ' + 'output is not type dict as required.') # return the results return [output]
def do_assembly(self, assemblyRef, wsName): #try: # assembly = wsClient.get_objects2({'objects': [{'ref': assembly_ref}]}) #except: # exc_type, exc_value, exc_traceback = sys.exc_info() # lines = traceback.format_exception(exc_type, exc_value, exc_traceback) # orig_error = ''.join(' ' + line for line in lines) # raise ValueError('Error from workspace:\n' + orig_error) #print assembly#[200:] #print assembly['data'] #print assembly['data'][0] #assembly['data'][0]['data'] #fasta_handle_ref = assembly['data'][0]['data']['fasta_handle_ref'] #print "fasta_handle_ref "+fasta_handle_ref #print type(fasta_handle_ref) #TODO create file here /kb/module/work #TODO set output file name print "SDK_CALLBACK_URL "+os.environ['SDK_CALLBACK_URL'] au = AssemblyUtil(os.environ['SDK_CALLBACK_URL']) #assembly_input_ref = "16589/2/1" #filename = "test.fasta" #obj_name = "EcoliMG1655.f" #wsname = "example_assembly" param = dict() param['ref'] = assemblyRef#assembly_input_ref input_fasta_file = au.get_assembly_as_fasta(param)#{'ref': assembly_input_ref}) #just_input_fasta_file = os.path.basename(input_fasta_file['path']) #print "input_fasta_file "+ str(input_fasta_file['path']) newtmp = "/kb/module/work/tmp/tmp_"+self.create_random_string() os.mkdir(newtmp) os.mkdir(newtmp+"/input") newfasta = newtmp +"/input/"+os.path.basename(input_fasta_file['path']) print "newfasta "+newfasta os.rename(input_fasta_file['path'], newfasta) args = ["wrapper_phage_contigs_sorter_iPlant.pl ", "--db 2 ","--fna ", newfasta," --wdir ",newtmp] print str(args) cmdstring = "".join(args) print "Executing" cmdProcess = subprocess.Popen(cmdstring, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) print "Done "+str(cmdProcess) stdout, stderr = cmdProcess.communicate() print " stdout: " + stdout print " stderr: " + stderr #return [report] # Step 5 - Build a Report and return reportObj = { 'objects_created': [], 'text_message': stdout } # 'objects_created': [{'ref': new_assembly, 'description': 'Filtered contigs'}], #report_info = report.create({'report': reportObj, 'workspace_name': wsName}) #reportObj = { # 'objects_created': [{'ref': new_assembly, 'description': 'Filtered contigs'}], # 'text_message': 'Filtered Assembly to ' + str(n_remaining) + ' contigs out of ' + str(n_total) #} #report = KBaseReport(self.callback_url) #report_info = report.create({'report': reportObj, 'workspace_name': params['workspace_name']}) # contruct the output to send back #output = {'report_name': report_info['name'], # 'report_ref': report_info['ref'] # } #print('returning:' + pformat(output)) print('Saving report') kbr = KBaseReport(self.callback_url, service_ver='dev') report = '' report += "cmdstring: " + str(cmdstring) + " stdout: " + str(stdout) + " stderr: " + str(stderr) virout = newtmp+"/"+"VIRSorter_global-phage-signal.csv" with open(virout, 'r') as myfile: data = myfile.read().replace('\n', '') print "wsName "+str(wsName) data = data.replace(",", "\t") data = data.replace("##", "\n##") report = report +"\n\n***** VirSorter output *****\n"+data report_data = {'message': report, 'objects_created': None, 'direct_html_link_index': None, 'html_links': None, 'report_object_name': 'kb_virsorter_' + str(uuid.uuid4()), 'workspace_name': wsName } print "report_data" print str(report_data) report_info = kbr.create_extended_report(report_data ) # 'objects_created': [{'ref': assembly_ref, 'description': 'Assembled contigs'}], # 'html_links': [{'shock_id': quastret['shock_id'], # 'name': 'report.html', # 'label': 'QUAST report'} # ], reportName = report_info['name'] reportRef = report_info['ref'] return reportName, reportRef
def arast_run(self, ctx, params, assembler, server='http://localhost:8000'): output = None console = [] self.log(console,'Running run_{} with params='.format(assembler)) self.log(console, pformat(params)) #### do some basic checks if 'workspace_name' not in params: raise ValueError('workspace_name parameter is required') if 'read_library_refs' not in params and 'read_library_names' not in params: raise ValueError('read_library_refs or read_library_names parameter is required') if 'read_library_refs' in params: if type(params['read_library_refs']) != list: raise ValueError('read_library_refs must be a list') if 'read_library_names' in params: if type(params['read_library_names']) != list: raise ValueError('read_library_names must be a list') if 'output_contigset_name' not in params: raise ValueError('output_contigset_name parameter is required') min_contig_len = params.get('min_contig_len') or 300 token = ctx['token'] os.environ["KB_AUTH_TOKEN"] = token os.environ["ARAST_URL"] = server ws = workspaceService(self.workspaceURL) ws_libs = [] if 'read_library_refs' in params: for lib_ref in params['read_library_refs']: ws_libs.append({'ref': lib_ref}) if 'read_library_names' in params: for lib_name in params['read_library_names']: ws_libs.append({'ref': params['workspace_name'] + '/' + lib_name}) if len(ws_libs)==0: raise ValueError('At least one read library must be provided in read_library_refs or read_library_names') libs = ws.get_objects2({'objects': ws_libs})['data'] wsid = libs[0]['info'][6] kbase_assembly_input = self.combine_read_libs(libs) tmp_data = self.create_temp_json(kbase_assembly_input) mode = '' cmd = ['ar-run', '--data-json', tmp_data] if assembler: cmd = cmd + ['-a', assembler] mode = 'assembler: ' + assembler elif 'pipeline' in params and params['pipeline']: cmd = cmd + ['-p', params['pipeline']] mode = 'assembly pipeline: ' + params['pipeline'] else: cmd = cmd + ['-r', params.get('recipe', 'auto')] mode = 'assembly recipe: ' + params['recipe'] logger.info('Start {}'.format(mode)) logger.debug('CMD: {}'.format(' '.join(cmd))) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=False) out, err = p.communicate() logger.debug(out) if p.returncode != 0: raise ValueError('Error running ar_run, return code: {}\n'.format(p.returncode)) job_id = None match = re.search('(\d+)', out) if match: job_id = match.group(1) else: raise ValueError('No integer job ID found: {}\n'.format(out)) timestamp = int((datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds()*1000) output_dir = os.path.join(self.scratch, 'output.'+str(timestamp)) output_contigs = os.path.join(output_dir, 'contigs.fa') if not os.path.exists(output_dir): os.makedirs(output_dir) cmd = ['ar-get', '-j', job_id, '-w', '-l'] logger.debug('CMD: {}'.format(' '.join(cmd))) ar_log = subprocess.check_output(cmd) self.log(console, ar_log) cmdstr = 'ar-get -j {} -w -p | ar-filter -l {} > {}'.format(job_id, min_contig_len, output_contigs) logger.debug('CMD: {}'.format(cmdstr)) subprocess.check_call(cmdstr, shell=True) cmd = ['ar-get', '-j', job_id, '-w', '-r'] logger.debug('CMD: {}'.format(' '.join(cmd))) ar_report = subprocess.check_output(cmd) self.log(console, "\nDONE\n") client = AssemblyUtil(self.callback_url) assembly_ref = client.save_assembly_from_fasta({ 'file':{'path':output_contigs}, 'workspace_name':params['workspace_name'], 'assembly_name':params['output_contigset_name'] }) lengths = [] for seq_record in SeqIO.parse(output_contigs, 'fasta'): lengths.append(len(seq_record.seq)) provenance = [{}] if 'provenance' in ctx: provenance = ctx['provenance'] # add additional info to provenance here, in this case the input data object reference if 'read_library_names' in params: provenance[0]['input_ws_objects']=[params['workspace_name']+'/'+x for x in params['read_library_names']] elif 'read_library_refs' in params: provenance[0]['input_ws_objects']=[x for x in params['read_library_refs']] os.remove(tmp_data) #shutil.rmtree(output_dir) # create a Report report = '' report += '============= Raw Contigs ============\n' + ar_report + '\n' report += '========== Filtered Contigs ==========\n' report += 'ContigSet saved to: '+params['workspace_name']+'/'+params['output_contigset_name']+'\n' report += 'Assembled into '+str(len(lengths)) + ' contigs.\n' report += 'Average Length: '+str(sum(lengths)/float(len(lengths))) + ' bp.\n' # compute a simple contig length distribution bins = 10 counts, edges = np.histogram(lengths, bins) report += 'Contig Length Distribution (# of contigs -- min to max basepairs):\n' for c in range(bins): report += ' '+str(counts[c]) + '\t--\t' + str(edges[c]) + ' to ' + str(edges[c+1]) + ' bp\n' print report reportObj = { 'objects_created':[{'ref':params['workspace_name']+'/'+params['output_contigset_name'], 'description':'Assembled contigs'}], 'text_message': report } reportName = '{}.report.{}'.format(assembler, job_id) report_obj_info = ws.save_objects({ 'id': wsid, 'objects': [ { 'type': 'KBaseReport.Report', 'data': reportObj, 'name': reportName, 'meta': {}, 'hidden': 1, 'provenance': provenance } ] })[0] output = { 'report_name': reportName, 'report_ref': str(report_obj_info[6]) + '/' + str(report_obj_info[0]) + '/' + str(report_obj_info[4]) } # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method filter_contigs return value ' + 'returnVal is not type dict as required.') # return the results return output
class kb_virsorterTest(unittest.TestCase): @classmethod def setUpClass(cls): token = environ.get('KB_AUTH_TOKEN', None) user_id = requests.post( 'https://kbase.us/services/authorization/Sessions/Login', data='token={}&fields=user_id'.format(token)).json()['user_id'] # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({'token': token, 'user_id': user_id, 'provenance': [ {'service': 'kb_virsorter', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1}) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('kb_virsorter'): cls.cfg[nameval[0]] = nameval[1] cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = workspaceService(cls.wsURL, token=token) cls.serviceImpl = kb_virsorter(cls.cfg) cls.testobjref = [] #cls.testobjdata = [] cls.testwsname = [] @classmethod def tearDownClass(cls): if hasattr(cls, 'wsName'): cls.wsClient.delete_workspace({'workspace': cls.wsName}) print('Test workspace was deleted') if hasattr(cls, 'testwsname') and len(cls.testwsname) > 0: try: print('Deleting workspace 2 ' + cls.testwsname[0]) cls.wsClient.delete_workspace({'workspace': cls.testwsname[0]}) print('Test workspace 2 was deleted ' + cls.testwsname[0]) except Exception as e: print e #if hasattr(cls, 'testobjdata'): # try: # print('Deleting shock data ' + str(len(cls.testobjdata))) # print('Deleting shock data ' + str(len(cls.testobjdata[0]['data'][0]))) # print('Deleting shock data ' + str(cls.testobjdata[0])) # node = cls.testobjdata[0]['data'][0]['lib']['file']['id'] # cls.delete_shock_node(node) # print('Test shock data was deleted') # except Exception as e: # print e def getWsClient(self): return self.__class__.wsClient def getWsName(self): if hasattr(self.__class__, 'wsName'): return self.__class__.wsName suffix = int(time.time() * 1000) wsName = "test_kb_virsorter_" + str(suffix) ret = self.getWsClient().create_workspace({'workspace': wsName}) self.__class__.wsName = wsName return wsName def getImpl(self): return self.__class__.serviceImpl def getContext(self): return self.__class__.ctx def write_file(self, filename, content): tmp_dir = self.cfg['scratch'] file_path = os.path.join(tmp_dir, filename) with open(file_path, 'w') as fh1: fh1.write(content) return file_path def delete_shock_node(self, node_id): header = {'Authorization': 'Oauth {0}'.format(cls.token)} requests.delete(cls.shockURL + '/node/' + node_id, headers=header, allow_redirects=True) def ztest_aaa_upload_to_shock(self): print "upload ref data to shock staging" self.dfUtil = DataFileUtil(os.environ['SDK_CALLBACK_URL']) #file_path = self.write_file('Phage_gene_catalog.tar.gz', 'Test') input_file_name = 'Phage_gene_catalog_plus_viromes.tar.gz'#'Phage_gene_catalog.tar.gz'#''PFAM_27.tar.gz' source_file_path = "/kb/module/work/"+input_file_name# os.path.join(tmp_dir, input_file_name) tmp_dir = self.cfg['scratch'] target_file_path = os.path.join(tmp_dir, input_file_name) print "file_path " + source_file_path+"\t"+target_file_path orig_size = os.path.getsize(source_file_path) shutil.copy(source_file_path, target_file_path) print "Testing "+target_file_path print(os.path.isfile(target_file_path)) ret1 = self.dfUtil.file_to_shock( {'file_path': target_file_path}) print str(ret1) shock_id = ret1['shock_id'] print "shock_id "+shock_id file_path2 = os.path.join("/kb/module/work/", 'test.tar.gz') #ret2 = self.dfUtil.shock_to_file( # {'shock_id': shock_id, 'file_path': file_path2})[0] ret2 = self.dfUtil.shock_to_file( {'shock_id': shock_id, 'file_path': file_path2}) print(ret2) file_name = ret2['node_file_name'] attribs = ret2['attributes'] self.assertEqual(file_name, 'Phage_gene_catalog_plus_viromes.tar.gz') self.assertEqual(ret2['file_path'], file_path2) self.assertEqual(ret2['size'], orig_size) self.assertIsNone(attribs) #self.delete_shock_node(shock_id) def create_random_string(self): N = 20 return ''.join( random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(N)) def test_virsorter_ok(self): self.upload_assembly() if not self.testwsname: self.testwsname.append(self.create_random_string()) print "upload_reads self.testwsname[0] " + self.testwsname[0] #try: # ret = self.wsClient.create_workspace({'workspace': self.testwsname[0]}) # test_ws_name #except Exception as e: # # print "ERROR" # # print(type(e)) # # print(e.args) # print(e) # pass print "self.testwsname "+ str(self.testwsname) params = {} params['assembly_ref'] = str(self.testobjref[0])#str(self.testwsname[0])+"/"+ #"16589/2/1"#""#'16589/2/1'#self.testobjref params['ws_name'] = self.testwsname[0] result = self.getImpl().run_virsorter(self.getContext(), params) print('RESULT run_virsorter:') pprint(result) #testresult = [ # {'blah': 'blah', 'bleh': 'bleh'}] testresult = [{'report_ref': result[0]['report_ref'], 'report_name': result[0]['report_name']}] self.assertEqual(sorted(result), sorted(testresult)) def upload_assembly(self): if not self.testobjref: print "upload_assembly start" indata = 'U00096.2.fa'#_first1000. ftarget = os.path.join(self.cfg['scratch'], indata)#self.scratch, indata) print "ftarget " + ftarget ret = shutil.copy('../test_data/' + indata, ftarget) #self.readsUtilClient = ReadsUtils(os.environ['SDK_CALLBACK_URL']) self.assemblyUtilClient = AssemblyUtil(os.environ['SDK_CALLBACK_URL']) if not self.testwsname: self.testwsname.append(self.create_random_string()) print "upload_assembly self.testwsname[0] " + self.testwsname[0] try: ret = self.wsClient.create_workspace({'workspace': self.testwsname[0]}) #test_ws_name except Exception as e: #print "ERROR" #print(type(e)) #print(e.args) print(e) pass try: print "attempt upload" print "ftarget " + ftarget ref = self.assemblyUtilClient.save_assembly_from_fasta( { 'workspace_name': self.testwsname[0], 'assembly_name': 'Ecolik12MG1655', 'file': {'path': ftarget}}) print "upload_assembly" print ref #self.testobjref = [] self.testobjref.append(self.testwsname[0] + '/Ecolik12MG1655/1') #self.testobjdata = [] #self.testobjdata.append(self.dfu.get_objects( # {'object_refs': [self.testobjref[0]]})) ##print self.testobjdata[0] except Exception as e: print e pass print "self.testobjref[0]" print self.testobjref print self.testobjref[0]