예제 #1
0
def create_shock_handle(logger=None,
			file_name=None,
			shock_url=None,
			handle_url=None,
			obj_type=None,
			token=None):
     	
        hs = HandleService(url=handle_url, token=token)
        f_shock = upload_file_to_shock(logger,shock_url,file_name,'{}',True,token)
        f_sha1 =  hashfile(file_name)
        hid = getHandles(logger,shock_url,handle_url,[f_shock['id']],None,token)[0]
        handle = { 'hid' : hid , 
	           "file_name" : f_shock['file']['name'] , 
                   "id" : f_shock['id'] , 
                   "type" : obj_type ,
	           "url" : shock_url,
	           "remote_md5" : f_shock['file']['checksum']['md5'],
	           "remote_sha1" : f_sha1 }	
   
        return handle
예제 #2
0
def getHandles(logger=None,
               shock_service_url=None,
               handle_service_url=None,
               shock_ids=None,
               handle_ids=None,
               token=None):
    """
    Retrieve KBase handles for a list of shock ids or a list of handle ids.
    """

    if token is None:
        raise Exception("Authentication token required!")

    hs = HandleService(url=handle_service_url, token=token)

    handles = list()
    if shock_ids is not None:
        header = dict()
        header["Authorization"] = "Oauth {0}".format(token)

        for sid in shock_ids:
            info = None

            try:
                logger.info(
                    "Found shock id {0}, retrieving information about the data."
                    .format(sid))

                response = requests.get("{0}/node/{1}".format(
                    shock_service_url, sid),
                                        headers=header,
                                        verify=True)
                info = response.json()["data"]
            except:
                logger.error(
                    "There was an error retrieving information about the shock node id {0} from url {1}"
                    .format(sid, shock_service_url))

            try:
                logger.info("Retrieving a handle id for the data.")
                handle = hs.persist_handle({
                    "id":
                    sid,
                    "type":
                    "shock",
                    "url":
                    shock_service_url,
                    "file_name":
                    info["file"]["name"],
                    "remote_md5":
                    info["file"]["checksum"]["md5"]
                })
                handles.append(handle)
            except:
                try:
                    handle_id = hs.ids_to_handles([sid])[0]["hid"]
                    single_handle = hs.hids_to_handles([handle_id])

                    assert len(single_handle) != 0

                    if info is not None:
                        single_handle[0]["file_name"] = info["file"]["name"]
                        single_handle[0]["remote_md5"] = info["file"][
                            "checksum"]["md5"]
                        logger.debug(single_handle)

                    handles.append(single_handle[0])
                except:
                    logger.error(
                        "The input shock node id {} is already registered or could not be registered"
                        .format(sid))

                    hs = HandleService(url=handle_service_url, token=token)
                    all_handles = hs.list_handles()

                    for x in all_handles:
                        if x[0] == sid:
                            logger.info("FOUND shock id as existing handle")
                            logger.info(x)
                            break
                    else:
                        logger.info(
                            "Unable to find a handle containing shock id")

                        logger.info(
                            "Trying again to get a handle id for the data.")
                        handle_id = hs.persist_handle({
                            "id":
                            sid,
                            "type":
                            "shock",
                            "url":
                            shock_service_url,
                            "file_name":
                            info["file"]["name"],
                            "remote_md5":
                            info["file"]["checksum"]["md5"]
                        })
                        handles.append(handle_id)

                    raise
    elif handle_ids is not None:
        for hid in handle_ids:
            try:
                single_handle = hs.hids_to_handles([hid])

                assert len(single_handle) != 0

                handles.append(single_handle[0])
            except:
                logger.error("Invalid handle id {0}".format(hid))
                raise

    return handles
예제 #3
0
def getHandles(
    logger=None, shock_service_url=None, handle_service_url=None, shock_ids=None, handle_ids=None, token=None
):
    """
    Retrieve KBase handles for a list of shock ids or a list of handle ids.
    """

    if token is None:
        raise Exception("Authentication token required!")

    hs = HandleService(url=handle_service_url, token=token)

    handles = list()
    if shock_ids is not None:
        header = dict()
        header["Authorization"] = "Oauth {0}".format(token)

        for sid in shock_ids:
            info = None

            try:
                logger.info("Found shock id {0}, retrieving information about the data.".format(sid))

                response = requests.get("{0}/node/{1}".format(shock_service_url, sid), headers=header, verify=True)
                info = response.json()["data"]
            except:
                logger.error(
                    "There was an error retrieving information about the shock node id {0} from url {1}".format(
                        sid, shock_service_url
                    )
                )

            try:
                logger.info("Retrieving a handle id for the data.")
                handle = hs.persist_handle(
                    {
                        "id": sid,
                        "type": "shock",
                        "url": shock_service_url,
                        "file_name": info["file"]["name"],
                        "remote_md5": info["file"]["checksum"]["md5"],
                    }
                )
                handles.append(handle)
            except:
                try:
                    handle_id = hs.ids_to_handles([sid])[0]["hid"]
                    single_handle = hs.hids_to_handles([handle_id])

                    assert len(single_handle) != 0

                    if info is not None:
                        single_handle[0]["file_name"] = info["file"]["name"]
                        single_handle[0]["remote_md5"] = info["file"]["checksum"]["md5"]
                        logger.debug(single_handle)

                    handles.append(single_handle[0])
                except:
                    logger.error(
                        "The input shock node id {} is already registered or could not be registered".format(sid)
                    )

                    hs = HandleService(url=handle_service_url, token=token)
                    all_handles = hs.list_handles()

                    for x in all_handles:
                        if x[0] == sid:
                            logger.info("FOUND shock id as existing handle")
                            logger.info(x)
                            break
                    else:
                        logger.info("Unable to find a handle containing shock id")

                        logger.info("Trying again to get a handle id for the data.")
                        handle_id = hs.persist_handle(
                            {
                                "id": sid,
                                "type": "shock",
                                "url": shock_service_url,
                                "file_name": info["file"]["name"],
                                "remote_md5": info["file"]["checksum"]["md5"],
                            }
                        )
                        handles.append(handle_id)

                    raise
    elif handle_ids is not None:
        for hid in handle_ids:
            try:
                single_handle = hs.hids_to_handles([hid])

                assert len(single_handle) != 0

                handles.append(single_handle[0])
            except:
                logger.error("Invalid handle id {0}".format(hid))
                raise
    return handles
 def index_genomes(self, ctx, params):
     # ctx is the context object
     # return variables are: returnVal
     #BEGIN index_genomes
     user_token=ctx['token']
     svc_token = Token(user_id=self.__SVC_USER, password=self.__SVC_PASS).token
     ws_client=Workspace(url=self.__WS_URL, token=user_token)
     hs = HandleService(url=self.__HS_URL, token=user_token)
     gs = {'elements' : {}}
     try:
         self.__LOGGER.info( "Preparing Target FA")
      
         blast_dir =self.__BLAST_DIR
         if os.path.exists(blast_dir):
             files=glob.glob("%s/*" % blast_dir)
             for f in files: os.remove(f)
         if not os.path.exists(blast_dir): os.makedirs(blast_dir)
       
      
            
         target_nt_fn = "%s/%s_nt.fa" %( blast_dir, params['blastindex_name'])
         target_aa_fn = "%s/%s_aa.fa" %( blast_dir, params['blastindex_name'])
      
         try:
           target_nt=open(target_nt_fn,'w')
           target_aa=open(target_aa_fn,'w')
         except:
           self.__LOGGER.error("Couldn't open file")
           raise KBaseGenomeUtilException("Backend awe client error: Couldn't open files")
      
         have_nt_seq = False
         have_aa_seq = False
      
      
      
         # Iterate one at a time to cope with main memory limit for euk genomes
         for genome_id in params['genome_ids']: 
      
             try:
                 obj_infos = ws_client.get_object_info_new({"objects": [{'name':genome_id, # replace `0' with loop
                                                            'workspace': params['ws_id']}]})
             except:
                 self.__LOGGER.error("Couldn't retrieve %s:%s from the workspace" %(params['ws_id'],genome_id))
                 raise KBaseGenomeUtilException("Couldn't retrieve %s:%s from the workspace" %(params['ws_id'],genome_id))
                  
      
             if len(obj_infos) < 1:
                 self.__LOGGER.error("Couldn't find %s:%s from the workspace" %(params['ws_id'],genome_id))
                 continue
                 #err_msg += "Workspace error: Couldn't find %s:%s from the workspace\n" %(params['ws_id'],genome_id)                
                 # we can continue due to multiple genomes
                 #raise Exception("Couldn't find %s:%s from the workspace" %(params['ws_id'],genome_id)) 
      
             ref_id = "{0}/{1}/{2}".format(obj_infos[0][6],obj_infos[0][0],obj_infos[0][4])
             gs['elements'][genome_id] = [ref_id]
            
             self.__LOGGER.info( "Downloading genome object from workspace {0}".format(ref_id))
            
             # TODO: make the following procedures to be loop for each genome_ids 
             try:
                 genome_list=ws_client.get_object_subset([{'name':genome_id, # replace `0' with loop
                                                           'workspace': params['ws_id'], 
                                                           'included':['features']}])
                 #genome_list=ws_client.get_objects([{'name':genome_id, # replace `0' with loop
                 #                                          'workspace': params['ws_id']}])
                 genome = genome_list[0]
             except Exception, e:
                 raise KBaseGenomeUtilException("Failed to download genome object itself even though we got the object information")
  
            
            
             self.__LOGGER.info( "Dumping seq for %s" % genome_id)
             # Dump genome sequences
             check_seq=0
             #extract protein sequences from the genome object
             try:
                 for gene in genome['data']['features']:
                       #>kb.g.1234.CDS.1234#At1g3333 amalase...
                       function = "NA"
                       aliases = "NA"
                       if 'function' in gene: 
                           function = gene['function']
                       if 'aliases' in gene: aliases = ",".join(gene['aliases'])
                       if 'protein_translation' in gene:
                             target_aa.write(">%s#%s#%s#%s\n%s\n" % (gene['id'], ref_id, aliases, function, gene['protein_translation']))
                             have_aa_seq = True
                       if 'dna_sequence' in gene:
                             target_nt.write(">%s#%s#%s#%s\n%s\n" % (gene['id'], ref_id, aliases, function, gene['dna_sequence']))
                             have_nt_seq = True
             except Exception as e:
                 raise KBaseGenomeUtilException("Failed to dump target sequence for genome : %s" % genome_id)
         try:
             target_nt.close()
             target_aa.close()
         except Exception as e:
             raise KBaseGenomeUtilException("Failed to close sequence files")
             
             
            
         if not have_nt_seq :
             self.__LOGGER.info("The genome objects do not contain any dna sequences!")
         if not have_aa_seq :
             self.__LOGGER.info("The genome objects do not contain any amino acid sequences!")
      
         index_type = 'none'
            
         if have_nt_seq :
             try:
                 cmdstring="%s -i %s -p F" %(self.__INDEX_CMD, target_nt_fn)
                 # TODO: replace it to subprocess.Popen
                 tool_process = subprocess.Popen(cmdstring, stderr=subprocess.PIPE, shell=True)
                 stdout, stderr = tool_process.communicate()
                 
                 if stdout is not None and len(stdout) > 0:
                     self.__LOGGER.info(stdout)
                 
                 if stderr is not None and len(stderr) > 0:
                     self.__LOGGER.error("Indexing error: " + stderr)
                     raise KBaseGenomeUtilException("Indexing error: " + stderr)
             except Exception, e:
                 raise KBaseGenomeUtilException("Failed to run indexing program (%s) : %s " %(self.__INDEX_CMD, e))
                
             index_type = 'nucleotide'