def create_shock_handle(logger=None, file_name=None, shock_url=None, handle_url=None, obj_type=None, token=None): hs = HandleService(url=handle_url, token=token) f_shock = upload_file_to_shock(logger,shock_url,file_name,'{}',True,token) f_sha1 = hashfile(file_name) hid = getHandles(logger,shock_url,handle_url,[f_shock['id']],None,token)[0] handle = { 'hid' : hid , "file_name" : f_shock['file']['name'] , "id" : f_shock['id'] , "type" : obj_type , "url" : shock_url, "remote_md5" : f_shock['file']['checksum']['md5'], "remote_sha1" : f_sha1 } return handle
def getHandles(logger=None, shock_service_url=None, handle_service_url=None, shock_ids=None, handle_ids=None, token=None): """ Retrieve KBase handles for a list of shock ids or a list of handle ids. """ if token is None: raise Exception("Authentication token required!") hs = HandleService(url=handle_service_url, token=token) handles = list() if shock_ids is not None: header = dict() header["Authorization"] = "Oauth {0}".format(token) for sid in shock_ids: info = None try: logger.info( "Found shock id {0}, retrieving information about the data." .format(sid)) response = requests.get("{0}/node/{1}".format( shock_service_url, sid), headers=header, verify=True) info = response.json()["data"] except: logger.error( "There was an error retrieving information about the shock node id {0} from url {1}" .format(sid, shock_service_url)) try: logger.info("Retrieving a handle id for the data.") handle = hs.persist_handle({ "id": sid, "type": "shock", "url": shock_service_url, "file_name": info["file"]["name"], "remote_md5": info["file"]["checksum"]["md5"] }) handles.append(handle) except: try: handle_id = hs.ids_to_handles([sid])[0]["hid"] single_handle = hs.hids_to_handles([handle_id]) assert len(single_handle) != 0 if info is not None: single_handle[0]["file_name"] = info["file"]["name"] single_handle[0]["remote_md5"] = info["file"][ "checksum"]["md5"] logger.debug(single_handle) handles.append(single_handle[0]) except: logger.error( "The input shock node id {} is already registered or could not be registered" .format(sid)) hs = HandleService(url=handle_service_url, token=token) all_handles = hs.list_handles() for x in all_handles: if x[0] == sid: logger.info("FOUND shock id as existing handle") logger.info(x) break else: logger.info( "Unable to find a handle containing shock id") logger.info( "Trying again to get a handle id for the data.") handle_id = hs.persist_handle({ "id": sid, "type": "shock", "url": shock_service_url, "file_name": info["file"]["name"], "remote_md5": info["file"]["checksum"]["md5"] }) handles.append(handle_id) raise elif handle_ids is not None: for hid in handle_ids: try: single_handle = hs.hids_to_handles([hid]) assert len(single_handle) != 0 handles.append(single_handle[0]) except: logger.error("Invalid handle id {0}".format(hid)) raise return handles
def getHandles( logger=None, shock_service_url=None, handle_service_url=None, shock_ids=None, handle_ids=None, token=None ): """ Retrieve KBase handles for a list of shock ids or a list of handle ids. """ if token is None: raise Exception("Authentication token required!") hs = HandleService(url=handle_service_url, token=token) handles = list() if shock_ids is not None: header = dict() header["Authorization"] = "Oauth {0}".format(token) for sid in shock_ids: info = None try: logger.info("Found shock id {0}, retrieving information about the data.".format(sid)) response = requests.get("{0}/node/{1}".format(shock_service_url, sid), headers=header, verify=True) info = response.json()["data"] except: logger.error( "There was an error retrieving information about the shock node id {0} from url {1}".format( sid, shock_service_url ) ) try: logger.info("Retrieving a handle id for the data.") handle = hs.persist_handle( { "id": sid, "type": "shock", "url": shock_service_url, "file_name": info["file"]["name"], "remote_md5": info["file"]["checksum"]["md5"], } ) handles.append(handle) except: try: handle_id = hs.ids_to_handles([sid])[0]["hid"] single_handle = hs.hids_to_handles([handle_id]) assert len(single_handle) != 0 if info is not None: single_handle[0]["file_name"] = info["file"]["name"] single_handle[0]["remote_md5"] = info["file"]["checksum"]["md5"] logger.debug(single_handle) handles.append(single_handle[0]) except: logger.error( "The input shock node id {} is already registered or could not be registered".format(sid) ) hs = HandleService(url=handle_service_url, token=token) all_handles = hs.list_handles() for x in all_handles: if x[0] == sid: logger.info("FOUND shock id as existing handle") logger.info(x) break else: logger.info("Unable to find a handle containing shock id") logger.info("Trying again to get a handle id for the data.") handle_id = hs.persist_handle( { "id": sid, "type": "shock", "url": shock_service_url, "file_name": info["file"]["name"], "remote_md5": info["file"]["checksum"]["md5"], } ) handles.append(handle_id) raise elif handle_ids is not None: for hid in handle_ids: try: single_handle = hs.hids_to_handles([hid]) assert len(single_handle) != 0 handles.append(single_handle[0]) except: logger.error("Invalid handle id {0}".format(hid)) raise return handles
def index_genomes(self, ctx, params): # ctx is the context object # return variables are: returnVal #BEGIN index_genomes user_token=ctx['token'] svc_token = Token(user_id=self.__SVC_USER, password=self.__SVC_PASS).token ws_client=Workspace(url=self.__WS_URL, token=user_token) hs = HandleService(url=self.__HS_URL, token=user_token) gs = {'elements' : {}} try: self.__LOGGER.info( "Preparing Target FA") blast_dir =self.__BLAST_DIR if os.path.exists(blast_dir): files=glob.glob("%s/*" % blast_dir) for f in files: os.remove(f) if not os.path.exists(blast_dir): os.makedirs(blast_dir) target_nt_fn = "%s/%s_nt.fa" %( blast_dir, params['blastindex_name']) target_aa_fn = "%s/%s_aa.fa" %( blast_dir, params['blastindex_name']) try: target_nt=open(target_nt_fn,'w') target_aa=open(target_aa_fn,'w') except: self.__LOGGER.error("Couldn't open file") raise KBaseGenomeUtilException("Backend awe client error: Couldn't open files") have_nt_seq = False have_aa_seq = False # Iterate one at a time to cope with main memory limit for euk genomes for genome_id in params['genome_ids']: try: obj_infos = ws_client.get_object_info_new({"objects": [{'name':genome_id, # replace `0' with loop 'workspace': params['ws_id']}]}) except: self.__LOGGER.error("Couldn't retrieve %s:%s from the workspace" %(params['ws_id'],genome_id)) raise KBaseGenomeUtilException("Couldn't retrieve %s:%s from the workspace" %(params['ws_id'],genome_id)) if len(obj_infos) < 1: self.__LOGGER.error("Couldn't find %s:%s from the workspace" %(params['ws_id'],genome_id)) continue #err_msg += "Workspace error: Couldn't find %s:%s from the workspace\n" %(params['ws_id'],genome_id) # we can continue due to multiple genomes #raise Exception("Couldn't find %s:%s from the workspace" %(params['ws_id'],genome_id)) ref_id = "{0}/{1}/{2}".format(obj_infos[0][6],obj_infos[0][0],obj_infos[0][4]) gs['elements'][genome_id] = [ref_id] self.__LOGGER.info( "Downloading genome object from workspace {0}".format(ref_id)) # TODO: make the following procedures to be loop for each genome_ids try: genome_list=ws_client.get_object_subset([{'name':genome_id, # replace `0' with loop 'workspace': params['ws_id'], 'included':['features']}]) #genome_list=ws_client.get_objects([{'name':genome_id, # replace `0' with loop # 'workspace': params['ws_id']}]) genome = genome_list[0] except Exception, e: raise KBaseGenomeUtilException("Failed to download genome object itself even though we got the object information") self.__LOGGER.info( "Dumping seq for %s" % genome_id) # Dump genome sequences check_seq=0 #extract protein sequences from the genome object try: for gene in genome['data']['features']: #>kb.g.1234.CDS.1234#At1g3333 amalase... function = "NA" aliases = "NA" if 'function' in gene: function = gene['function'] if 'aliases' in gene: aliases = ",".join(gene['aliases']) if 'protein_translation' in gene: target_aa.write(">%s#%s#%s#%s\n%s\n" % (gene['id'], ref_id, aliases, function, gene['protein_translation'])) have_aa_seq = True if 'dna_sequence' in gene: target_nt.write(">%s#%s#%s#%s\n%s\n" % (gene['id'], ref_id, aliases, function, gene['dna_sequence'])) have_nt_seq = True except Exception as e: raise KBaseGenomeUtilException("Failed to dump target sequence for genome : %s" % genome_id) try: target_nt.close() target_aa.close() except Exception as e: raise KBaseGenomeUtilException("Failed to close sequence files") if not have_nt_seq : self.__LOGGER.info("The genome objects do not contain any dna sequences!") if not have_aa_seq : self.__LOGGER.info("The genome objects do not contain any amino acid sequences!") index_type = 'none' if have_nt_seq : try: cmdstring="%s -i %s -p F" %(self.__INDEX_CMD, target_nt_fn) # TODO: replace it to subprocess.Popen tool_process = subprocess.Popen(cmdstring, stderr=subprocess.PIPE, shell=True) stdout, stderr = tool_process.communicate() if stdout is not None and len(stdout) > 0: self.__LOGGER.info(stdout) if stderr is not None and len(stderr) > 0: self.__LOGGER.error("Indexing error: " + stderr) raise KBaseGenomeUtilException("Indexing error: " + stderr) except Exception, e: raise KBaseGenomeUtilException("Failed to run indexing program (%s) : %s " %(self.__INDEX_CMD, e)) index_type = 'nucleotide'