예제 #1
0
def get_genome(genome_id=None,
               workspace_id=None,
               token=None,
               workspace_url=None):
    #download genome object from workspace
    if workspace_url is None:
        workspace_url = 'https://kbase.us/services/ws/'
    if token is None:
        with open('/kb/dev_container/modules/genome_util/mytoken.txt'
                  ) as token_file:
            token = token_file.read()
        token = token.rstrip()

    #print token
    #print genome_id
    #print workspace_id

    workspace_client = Workspace(url=workspace_url, token=token)
    #genome=workspace_client.get_object({'id':genome_id, 'workspace':workspace_id, 'type':'KBaseGenomes.Genome'})
    genome = workspace_client.get_object({
        'id': 'Bifidobacterium_animalis_subsp._lactis_AD011',
        'type': 'KBaseGenomes.Genome',
        'workspace': 'plane83:1436884411390'
    })

    return genome
예제 #2
0
class Validator(TransformBase):
    def __init__(self, args):
        TransformBase.__init__(self, args)
        self.ws_url = args.ws_url
        self.cfg_name = args.cfg_name
        self.sws_id = args.sws_id
        self.etype = args.etype
        self.opt_args = args.opt_args

        # download ws object and find where the validation script is located
        self.wsd = Workspace(url=self.ws_url, token=self.token)
        self.config = self.wsd.get_object({
            'id': self.cfg_name,
            'workspace': self.sws_id
        })['data']['config_map']

        if self.config is None:
            raise Exception("Object {} not found in workspace {}".format(
                self.cfg_name, self.sws_id))

    def validation_handler(self):
        ###
        # execute validation
        ## TODO: Add logging

        if self.etype not in self.config['validator']:
            raise Exception(
                "No validation script was registered for {}".format(
                    self.etype))

        fd_list = []
        if os.path.exists("{}/{}".format(self.sdir, self.itmp)):
            fd_list.append("{}/{}".format(self.sdir, self.itmp))
        else:
            fd_list = glob.glob("{}/{}_*".format(self.sdir, self.itmp))

        for fd in fd_list:
            vcmd_lst = [
                self.config['validator'][self.etype]['cmd_name'],
                self.config['validator'][self.etype]['cmd_args']['input'], fd
            ]

            if 'validator' in self.opt_args:
                opt_args = self.opt_args['validator']
                for k in opt_args:
                    if k in self.config['validator'][etype][
                            'opt_args'] and opt_args[k] is not None:
                        vcmd_lst.append(self.config['validator'][self.etype]
                                        ['opt_args'][k])
                        vcmd_lst.append(opt_args[k])

            p1 = Popen(vcmd_lst, stdout=PIPE)
            out_str = p1.communicate()
            # print output message for error tracking
            if out_str[0] is not None: print out_str[0]
            if out_str[1] is not None: print >> sys.stderr, out_str[1]

            if p1.returncode != 0:
                raise Exception(out_str[1])
예제 #3
0
def handler (args) :
    ###
    # download ws object and convert them to csv
    wsd = Workspace(url=args.ws_url, token=os.environ.get('KB_AUTH_TOKEN'))
    indata = wsd.get_object({'id' : args.inobj_id,
                  #'type' : 'KBaseExpression.ExpressionSeries', 
                  'workspace' : args.ws_id})['data']

    if indata is None:
        raise Exception("Object {} not found in workspace {}".format(args.inobj_id, args.ws_id))


    ###
    # execute filtering
    flt_cmd_lst = ['mys_example', "-i", "{}-{}".format(os.getpid(),args.exp_fn) ]
    if (args.method     is not None): 
        flt_cmd_lst.append('-m')
        flt_cmd_lst.append(args.method)
    if (args.p_value    is not None): 
        flt_cmd_lst.append('-p')
        flt_cmd_lst.append(args.p_value)
    if (args.num_genes  is not None): 
        flt_cmd_lst.append('-n')
        flt_cmd_lst.append(args.num_genes)
    if (args.flt_out_fn is not None): 
        flt_cmd_lst.append('-o')
        flt_cmd_lst.append("{}-{}".format(os.getpid(),args.flt_out_fn))

    p1 = Popen(flt_cmd_lst, stdout=PIPE)
    out_str = p1.communicate()
    # print output message for error tracking
    if out_str[0] is not None : print out_str[0]
    if out_str[1] is not None : print >> sys.stderr, out_str[1]
    flt_cmd = " ".join(flt_cmd_lst)
   
    ###
    # put it back to workspace
    #fif = open("{}-{}".format(os.getpid(),args.flt_out_fn), 'r')
    #fif.readline(); # skip header
    
    # assume only one genome id
    outdata = {}
    outdata['key'] = indata['key']
    outdata['value'] = "{}{}".format(indata['value'], indata['value'])
    data_list = []
    data_list.append({'type' : 'MyService.PairString', 'data' : outdata, 'name' : args.outobj_id, 'meta' : {'org.series' : args.inobj_id}})
    wsd.save_objects({'workspace' : args.ws_id, 'objects' : data_list})

    if(args.del_tmps is "true") :
        os.remove("{}-{}".format(os.getpid(), args.exp_fn))
        os.remove("{}-{}".format(os.getpid(), args.flt_out_fn))
예제 #4
0
class Validator(TransformBase):
    def __init__(self, args):
        TransformBase.__init__(self,args)
        self.ws_url = args.ws_url
        self.cfg_name = args.cfg_name
        self.sws_id = args.sws_id
        self.etype = args.etype
        self.opt_args = args.opt_args

        # download ws object and find where the validation script is located
        self.wsd = Workspace(url=self.ws_url, token=self.token)
        self.config = self.wsd.get_object({'id' : self.cfg_name, 'workspace' : self.sws_id})['data']['config_map']
     
        if self.config is None:
            raise Exception("Object {} not found in workspace {}".format(self.cfg_name, self.sws_id))


    def validation_handler (self) :
        ###
        # execute validation
        ## TODO: Add logging
        
        if self.etype not in self.config['validator']:
          raise Exception("No validation script was registered for {}".format(self.etype))

        fd_list = []
        if os.path.exists("{}/{}".format(self.sdir,self.itmp)):
          fd_list.append( "{}/{}".format(self.sdir,self.itmp))
        else:
          fd_list = glob.glob("{}/{}_*".format(self.sdir,self.itmp))

        for fd in fd_list:
          vcmd_lst = [self.config['validator'][self.etype]['cmd_name'], self.config['validator'][self.etype]['cmd_args']['input'], fd]
         
          if 'validator' in self.opt_args:
            opt_args = self.opt_args['validator']
            for k in opt_args:
              if k in self.config['validator'][etype]['opt_args'] and opt_args[k] is not None:
                vcmd_lst.append(self.config['validator'][self.etype]['opt_args'][k])
                vcmd_lst.append(opt_args[k])
               
          p1 = Popen(vcmd_lst, stdout=PIPE)
          out_str = p1.communicate()
          # print output message for error tracking
          if out_str[0] is not None : print out_str[0]
          if out_str[1] is not None : print >> sys.stderr, out_str[1]
         
          if p1.returncode != 0: 
              raise Exception(out_str[1])
예제 #5
0
def get_genome(genome_id=None,workspace_id=None,token=None,workspace_url=None):
	#download genome object from workspace
	if workspace_url is None:
		workspace_url='https://ci.kbase.us/services/ws'

	
	#print token
	#print genome_id
	#print workspace_id
	
	workspace_client=Workspace(url=workspace_url, token=token)
	genome=workspace_client.get_object({'id':genome_id, 'workspace':workspace_id, 'type':'KBaseGenomes.Genome'})
	#genome = workspace_client.get_object({'id' : 'Bifidobacterium_animalis_subsp._lactis_AD011', 'type' : 'KBaseGenomes.Genome',  'workspace' : 'plane83:1436884411390'})
	

	return genome
예제 #6
0
def handler (args) :
    ###
    # download ws object and find where the validation script is located
    wsd = Workspace(url=args.ws_url, token=os.environ.get('KB_AUTH_TOKEN'))
    indata = wsd.get_object({'id' : args.etype,
                  'workspace' : args.sws_id})['data']

    if indata is None:
        raise Exception("Object {} not found in workspace {}".format(args.inobj_id, args.sws_id))

    try:
        os.mkdir(args.sdir)
    except:
        raise Exception("Could not create directory {}".format(args.sdir))

    if indata['validation_script']['id'] is None: raise Exception("Script Shock node id information is not provided")

    surl = args.shock_url
    if indata['validation_script']['shock_url'] is not None: surl = indata['validation_script']['shock_url']
    
    meta = urllib.urlopen("{}/node/{}".format(surl, indata['validation_script']['id']))
    script = urllib.urlopen("{}/node/{}?Download".format(surl, indata['id']))
    data = urllib.urlopen("{}/node/{}?Download".format(surl, args.inobj_id))
        
    # TODO: add compressed file handling using meta (tar.gz, tgz, etc).
    sif = open("{}/validator".format(args.sdir),'w')
    sif.write(script.read())
    sif.close()
    
    dif = open("{}/in_file".format(args.sdir),'w')
    dif.write(data.read())
    dif.close()

    script.close()
    data.close()
    
    ###
    # execute validation
    vcmd_lst = ["{}/validator".format(args.sdir), "-i", "{}/in_file".format(args.sdir) ]

    p1 = Popen(vcmd_lst, stdout=PIPE)
    out_str = p1.communicate()
    # print output message for error tracking
    if out_str[0] is not None : print out_str[0]
    if out_str[1] is not None : print >> sys.stderr, out_str[1]
예제 #7
0
def get_genome(genome_id=None,
               workspace_id=None,
               token=None,
               workspace_url=None):
    #download genome object from workspace
    if workspace_url is None:
        workspace_url = 'https://ci.kbase.us/services/ws'

    #print token
    #print genome_id
    #print workspace_id

    workspace_client = Workspace(url=workspace_url, token=token)
    genome = workspace_client.get_object({
        'id': genome_id,
        'workspace': workspace_id,
        'type': 'KBaseGenomes.Genome'
    })
    #genome = workspace_client.get_object({'id' : 'Bifidobacterium_animalis_subsp._lactis_AD011', 'type' : 'KBaseGenomes.Genome',  'workspace' : 'plane83:1436884411390'})

    return genome
예제 #8
0
def get_genome(genome_id=None,workspace_id=None,token=None,workspace_url=None):
	#download genome object from workspace
	if workspace_url is None:
		workspace_url='https://kbase.us/services/ws/'
	if token is None:
		with open('/kb/dev_container/modules/genome_util/mytoken.txt') as token_file:
			token=token_file.read()
		token=token.rstrip()


	
	#print token
	#print genome_id
	#print workspace_id
	
	workspace_client=Workspace(url=workspace_url, token=token)
	#genome=workspace_client.get_object({'id':genome_id, 'workspace':workspace_id, 'type':'KBaseGenomes.Genome'})
	genome = workspace_client.get_object({'id' : 'Bifidobacterium_animalis_subsp._lactis_AD011', 'type' : 'KBaseGenomes.Genome',  'workspace' : 'plane83:1436884411390'})
	

	return genome
예제 #9
0
class Downloader(TransformBase):
    def __init__(self, args):
        TransformBase.__init__(self,args)
        self.ws_url = args.ws_url
        self.cfg_name = args.cfg_name
        self.sws_id = args.sws_id
        self.etype = args.etype
        self.opt_args = args.opt_args
        self.kbtype = args.kbtype
        #self.otmp = args.otmp
        self.ws_id = args.ws_id
        #self.outobj_id = args.outobj_id
        self.jid = args.jid

        # download ws object and find where the validation script is located
        self.wsd = Workspace(url=self.ws_url, token=self.token)
        self.config = self.wsd.get_object({'id' : self.cfg_name, 'workspace' : self.sws_id})['data']['config_map']
     
        if self.config is None:
            raise Exception("Object {} not found in workspace {}".format(self.cfg_name, self.sws_id))
    
    def download_ws_data (self) :
        try:
            os.mkdir(self.sdir)
        except:
            pass
    
        dif = open("{}/{}".format(self.sdir, "{}".format(self.itmp)),'w')
        data = self.wsd.get_object({'id' : self.inobj_id, 'workspace' : self.ws_id})['data']
        json.dump(data,dif)
        dif.close()


    #def download_handler (ws_url, cfg_name, sws_id, ws_id, in_id, etype, kbtype, sdir, otmp, opt_args, ujs_url, ujs_jid) :
    def download_handler (self) :
        try:
            os.mkdir(self.sdir)
        except:
            pass
    
        conv_type = "{}-to-{}".format(self.kbtype, self.etype)
        if conv_type  not in self.config['down_transformer'] or 'inobj_id'  not in self.config['down_transformer'][conv_type]['cmd_args'] or 'ws_id'  not in self.config['down_transformer'][conv_type]['cmd_args'] or 'output'  not in self.config['down_transformer'][conv_type]['cmd_args']:
            raise Exception("{} to {} conversion was not properly defined!".format(self.kbtype, self.etype))
        vcmd_lst = [self.config['down_transformer'][conv_type]['cmd_name'], 
                    self.config['down_transformer'][conv_type]['cmd_args']['ws_id'], self.ws_id, 
                    self.config['down_transformer'][conv_type]['cmd_args']['inobj_id'], self.inobj_id, 
                    self.config['down_transformer'][conv_type]['cmd_args']['output'],"{}/{}".format(self.sdir,self.otmp)]
    
        if 'down_transformer' in self.opt_args:
            opt_args = self.opt_args['down_transformer']
            for k in opt_args:
                if k in self.config['down_transformer'][conv_type]['opt_args'] and opt_args[k] is not None:
                    vcmd_lst.append(self.config['down_transformer'][conv_type]['opt_args'][k])
                    vcmd_lst.append(opt_args[k])
    
        p1 = Popen(vcmd_lst, stdout=PIPE)
        out_str = p1.communicate()

        if out_str[0] is not None : print out_str[0]
        if out_str[1] is not None : print >> sys.stderr, out_str[1]
    
        if p1.returncode != 0: 
            raise Exception(out_str[1])
예제 #10
0
def gl2networks (args) :
    ###
    # download ws object and convert them to csv
    wsd = Workspace(url=args.ws_url, token=os.environ.get('KB_AUTH_TOKEN'))
    raw_data = wsd.get_object({'id' : args.inobj_id,
                  'workspace' : args.ws_id})['data']

    gl = [ gr[2] for gr in raw_data['genes']]
    gl_str = "'" + "','".join(gl)+ "'"

    
    sql = "SELECT DISTINCT af1.to_link, af2.to_link, f1.source_id, f2.source_id, af1.strength, ig.from_link FROM IsGroupingOf ig, AssociationFeature af1, AssociationFeature af2, Feature f1, Feature f2 WHERE ig.to_link =  af1.from_link and af1.from_link = af2.from_link and (af1.to_link IN ({}) AND af2.to_link IN ({}) ) AND af1.to_link < af2.to_link AND f1.id = af1.to_link AND f2.id = af2.to_link".format(gl_str, gl_str)

    nc = Node()
    datasets = [];

    try:
        con = mdb.connect(args.db_host, args.db_user, args.db_pass, args.db_name);
        cur = con.cursor()
        cur.execute(sql)
    
        edge = cur.fetchone()
        dsid = set()
        while( edge is not None):
            nc.add_edge(edge[4], edge[5], edge[0], 'GENE', edge[1], 'GENE', 0.0, edge[2], edge[3]);
            dsid.add(edge[5]);
            edge = cur.fetchone()
            
        ds_str = "'" + "','".join(dsid)+ "'"
        cur.execute("SELECT id, association_type, data_source, description , df.to_link, sr.from_link FROM AssociationDataset, IsDatasetFor df, IsSourceForAssociationDataset sr WHERE id = df.from_link and id = sr.to_link and id IN({})".format(ds_str))
        ds = cur.fetchone()
        while( ds is not None):
            datasets.append ( { 
                'network_type' : ds[1],
                'taxons' : [ ds[4] ],
                'source_ref' : ds[5],
                'name' : ds[0],
                'id' : ds[0],
                'description' : ds[3],
                'properties' : {
                }
            })
            ds = cur.fetchone()

        # generate Networks object
        net_object = {
          'datasets' : datasets,
          'nodes' : nc.nodes,
          'edges' : nc.edges,
          'user_annotations' : {"genes" :",".join(gl) },
          'name' : 'GeneList Internal Network',
          'id' : args.outobj_id,
          'properties' : {
            'graphType' : 'edu.uci.ics.jung.graph.SparseMultigraph'
          }
        }
 
        # Store results object into workspace
        wsd.save_objects({'workspace' : args.ws_id, 'objects' : [{'type' : 'KBaseNetworks.Network', 'data' : net_object, 'name' : args.outobj_id, 'meta' : {'org_obj_id' : args.inobj_id, 'org_ws_id' : args.ws_id}}]})
        
    except mdb.Error, e:
        print "Error %d: %s" % (e.args[0],e.args[1])
        sys.exit(1)
예제 #11
0
def net_clust (args) :
    ###
    # download ws object and convert them to csv
    wsd = Workspace(url=args.ws_url, token=os.environ.get('KB_AUTH_TOKEN'))
    lseries = wsd.get_object({'id' : args.inobj_id,
                  'type' : 'KBaseExpression.ExpressionSeries', 
                  'workspace' : args.ws_id})['data']

    if lseries is None:
        raise COEXException("Object {} not found in workspace {}".format(args.inobj_id, args.ws_id))

    samples, sids, genome_id = {}, [], ""
    # assume only one genome id
    for gid in sorted(lseries['genome_expression_sample_ids_map'].keys()):
        genome_id = gid
        for samid in lseries['genome_expression_sample_ids_map'][gid]:
            sids.append({'ref': samid})
        samples = wsd.get_objects(sids)
        break

    cif = open(args.exp_fn, 'w')
    header = ",".join([s['data']['source_id'] for s in samples])
    cif.write(header + "\n")
    gids = samples[0]['data']['expression_levels'].keys()  # each sample has same gids
    for gid in sorted(gids):
        line = gid + ","
        line += ",".join([str(s['data']['expression_levels'][gid]) for s in samples])
        cif.write(line + "\n")
    cif.close()


    ###
    # generate network and cluster
    net_cmd_lst = ['coex_net', '-i', args.exp_fn]
    if (args.nmethod    is not None): 
        net_cmd_lst.append("-m")
        net_cmd_lst.append(args.nmethod)
    if (args.cut_off    is not None): 
        net_cmd_lst.append("-c")
        net_cmd_lst.append(args.cut_off)
    if (args.net_fn     is not None):
        net_cmd_lst.append("-o")
        net_cmd_lst.append(args.net_fn)
    p1 = Popen(net_cmd_lst, stdout=PIPE)
    out_str = p1.communicate()
    if out_str[0] is not None : print out_str[0]
    if out_str[1] is not None : print >> sys.stderr, out_str[1]
    net_cmd = " ".join(net_cmd_lst)
   
   
    clust_cmd_lst = ['coex_cluster2', '-i', args.exp_fn]
    if (args.cmethod    is not None):
        clust_cmd_lst.append("-c")
        clust_cmd_lst.append(args.cmethod)
    if (args.nmethod    is not None):
        clust_cmd_lst.append("-n")
        clust_cmd_lst.append(args.nmethod)
    if (args.k          is not None):
        clust_cmd_lst.append("-s")
        clust_cmd_lst.append(args.k)
    if (args.clust_fn   is not None):
        clust_cmd_lst.append("-o")
        clust_cmd_lst.append(args.clust_fn)
    p1 = Popen(clust_cmd_lst, stdout=PIPE)
    out_str = p1.communicate()
    if out_str[0] is not None : print out_str[0]
    if out_str[1] is not None : print >> sys.stderr, out_str[1]
    clust_cmd = " ".join(clust_cmd_lst)

   
    ###
    # Create network object
    #generate Networks datasets
    net_ds_id = args.inobj_id + ".net"
    clt_ds_id = args.inobj_id + ".clt"
 
    datasets = [
      {
        'network_type' : 'FUNCTIONAL_ASSOCIATION',
        'taxons' : [ genome_id ],
        'source_ref' : 'WORKSPACE',
        'name' : net_ds_id,
        'id' : clt_ds_id,
        'description' : "Coexpression network object of " + args.inobj_id,
        'properties' : {
          'original_data_type' : 'workspace',
          'original_ws_id' : args.ws_id,
          'original_obj_id' : args.inobj_id,
          'coex_net_cmd' : net_cmd
        }
      },
      {
        'network_type' : 'FUNCTIONAL_ASSOCIATION',
        'taxons' : [ genome_id ],
        'source_ref' : 'WORKSPACE',
        'name' : clt_ds_id,
        'id' : clt_ds_id,
        'description' : "Coexpression cluster object of " + args.inobj_id,
        'properties' : {
          'original_data_type' : 'workspace',
          'original_ws_id' : args.ws_id,
          'original_obj_id' : args.inobj_id,
          'coex_clust_cmd' : clust_cmd
        }
      }
    ]
 
 
    # process coex network file
    nc = Node()
 
    cnf = open(args.net_fn,'r');
    cnf.readline(); # skip header
    for line in cnf :
        line.strip();
        line = line.replace('"','')
        values = line.split(',')
        if values[0] != values[1] : nc.add_edge(float(values[2]), net_ds_id, values[0], 'GENE', values[1], 'GENE', 0.0) #we add edges meaningful
 
 
    # process coex cluster file
    cnf = open(args.clust_fn,'r')
    cnf.readline(); # skip header
    for line in cnf :
        line = line.strip();
        line = line.replace('"','')
        values = line.split(',')
        nc.add_edge(1.0, clt_ds_id, values[0], 'GENE', "cluster." + values[1], 'CLUSTER', 0.0)
 
    # generate Networks object
    net_object = {
      'datasets' : datasets,
      'nodes' : nc.nodes,
      'edges' : nc.edges,
      'user_annotations' : {},
      'name' : 'Coexpression Network',
      'id' : args.outobj_id,
      'properties' : {
        'graphType' : 'edu.uci.ics.jung.graph.SparseMultigraph'
      }
    }
 
    # Store results object into workspace
    wsd.save_objects({'workspace' : args.ws_id, 'objects' : [{'type' : 'KBaseNetworks.Network', 'data' : net_object, 'name' : args.outobj_id, 'meta' : {'org_obj_id' : args.inobj_id, 'org_ws_id' : args.ws_id}}]})
 
    if(args.del_tmps is "true") :
        os.remove(args.exp_fn)
        os.remove(args.net_fn)
        os.remove(args.clust_fn)
예제 #12
0
def mys_example(args):
    ###
    # download ws object and convert them to csv
    wsd = Workspace(url=args.ws_url, token=os.environ.get('KB_AUTH_TOKEN'))
    indata = wsd.get_object({
        'id': args.inobj_id,
        #'type' : 'KBaseExpression.ExpressionSeries',
        'workspace': args.ws_id
    })['data']

    if indata is None:
        raise Exception("Object {} not found in workspace {}".format(
            args.inobj_id, args.ws_id))

    ###
    # execute filtering
    flt_cmd_lst = [
        'mys_example', "-i", "{}-{}".format(os.getpid(), args.exp_fn)
    ]
    if (args.method is not None):
        flt_cmd_lst.append('-m')
        flt_cmd_lst.append(args.method)
    if (args.p_value is not None):
        flt_cmd_lst.append('-p')
        flt_cmd_lst.append(args.p_value)
    if (args.num_genes is not None):
        flt_cmd_lst.append('-n')
        flt_cmd_lst.append(args.num_genes)
    if (args.flt_out_fn is not None):
        flt_cmd_lst.append('-o')
        flt_cmd_lst.append("{}-{}".format(os.getpid(), args.flt_out_fn))

    p1 = Popen(flt_cmd_lst, stdout=PIPE)
    out_str = p1.communicate()
    # print output message for error tracking
    if out_str[0] is not None: print out_str[0]
    if out_str[1] is not None: print >> sys.stderr, out_str[1]
    flt_cmd = " ".join(flt_cmd_lst)

    ###
    # put it back to workspace
    #fif = open("{}-{}".format(os.getpid(),args.flt_out_fn), 'r')
    #fif.readline(); # skip header

    # assume only one genome id
    outdata = {}
    outdata['key'] = indata['key']
    outdata['value'] = "{}{}".format(indata['value'], indata['value'])
    data_list = []
    data_list.append({
        'type': 'MyService.PairString',
        'data': outdata,
        'name': args.outobj_id,
        'meta': {
            'org.series': args.inobj_id
        }
    })
    wsd.save_objects({'workspace': args.ws_id, 'objects': data_list})

    if (args.del_tmps is "true"):
        os.remove("{}-{}".format(os.getpid(), args.exp_fn))
        os.remove("{}-{}".format(os.getpid(), args.flt_out_fn))
예제 #13
0
def filter_expression (args) :
    ###
    # download ws object and convert them to csv
    wsd = Workspace(url=args.ws_url, token=os.environ.get('KB_AUTH_TOKEN'))
    lseries = wsd.get_object({'id' : args.inobj_id,
                  'type' : 'KBaseExpression.ExpressionSeries', 
                  'workspace' : args.ws_id})['data']

    if lseries is None:
        raise COEXException("Object {} not found in workspace {}".format(args.inobj_id, args.ws_id))

    samples, sids, genome_id = {}, [], ""
    # assume only one genome id
    for gid in sorted(lseries['genome_expression_sample_ids_map'].keys()):
        genome_id = gid
        for samid in lseries['genome_expression_sample_ids_map'][gid]:
            sids.append({'ref': samid})
        samples = wsd.get_objects(sids)
        break

    cif = open(args.exp_fn, 'w')
    header = ",".join([s['data']['source_id'] for s in samples])
    cif.write(header + "\n")

    # find common gene list
    gids = set(samples[0]['data']['expression_levels'].keys())  # each sample has same gids
    for s in samples:
        gids = gids.intersection(set(s['data']['expression_levels'].keys()))
    for gid in sorted(gids):
        line = gid + ","
        line += ",".join([str(s['data']['expression_levels'][gid]) for s in samples])
        cif.write(line + "\n")
    cif.close()

    sif = open(args.rp_smp_fn, 'w')
    sample = ",".join(map(str, range(len(samples))))
    sif.write(sample + "\n")
    sif.close()

    ###
    # execute filtering
    flt_cmd_lst = ['coex_filter', "-i", args.exp_fn]
    if (args.method     is not None): 
        flt_cmd_lst.append('-m')
        flt_cmd_lst.append(args.method)
    if (args.p_value    is not None): 
        flt_cmd_lst.append('-p')
        flt_cmd_lst.append(args.p_value)
    if (args.num_genes  is not None): 
        flt_cmd_lst.append('-n')
        flt_cmd_lst.append(args.num_genes)
    if (args.flt_out_fn is not None): 
        flt_cmd_lst.append('-o')
        flt_cmd_lst.append(args.flt_out_fn)
    if (args.rp_smp_fn  is not None): 
        flt_cmd_lst.append('-s')
        flt_cmd_lst.append(args.rp_smp_fn)

    p1 = Popen(flt_cmd_lst, stdout=PIPE)
    out_str = p1.communicate()
    # print output message for error tracking
    if out_str[0] is not None : print out_str[0]
    if out_str[1] is not None : print >> sys.stderr, out_str[1]
    flt_cmd = " ".join(flt_cmd_lst)
   
    ###
    # put it back to workspace
    elm = {};
    fif = open(args.flt_out_fn, 'r')
    fif.readline(); # skip header
    
    nsamples = len(samples)
    for i in range(nsamples): elm[i] = {}
    
    for line in fif :
        line.strip();
        values = line.split(',')
        gene_id = values[0].replace("\"", "")
        for i in range(nsamples): elm[i][gene_id] = float(values[i + 1])
 
    data_list = [];
    sid_list =[];
    for i in range(nsamples) :
        samples[i]['data']['expression_levels'] = elm[i]
        if samples[i]['data']['title'] is None: samples[i]['data']['title'] = " Filtered by coex-filter-genes" 
        else : samples[i]['data']['title'] += " filtered by coex-filter-genes"
        if samples[i]['data']['description'] is None : samples[i]['data']['description'] = "Generated by " + flt_cmd
        else : samples[i]['data']['description'] += " Generated by " + flt_cmd
        samples[i]['data']['id']+=".filtered";
        samples[i]['data']['source_id']+=".filtered";
        data_list.append({'type' : 'KBaseExpression.ExpressionSample', 'data' : samples[i]['data'], 'name' : samples[i]['data']['id']})
    sv_rst = wsd.save_objects({'workspace' : args.ws_id, 'objects' : data_list})
    for i in range(nsamples):sid_list.append(str(sv_rst[i][6]) + "/" + str(sv_rst[i][0]) + "/" + str(sv_rst[i][4]))
 
    data_list = [];
    # assume only one genome id
    lseries['genome_expression_sample_ids_map'][genome_id] = sid_list
    lseries['title'] += " filtered by coex_filter for " + genome_id
    lseries['source_id'] += ".filtered"
    lseries['id'] = args.outobj_id
    data_list.append({'type' : 'KBaseExpression.ExpressionSeries', 'data' : lseries, 'name' : lseries['id'], 'meta' : {'org.series' : args.inobj_id}})
    wsd.save_objects({'workspace' : args.ws_id, 'objects' : data_list})

    if(args.del_tmps is "true") :
        os.remove(args.exp_fn)
        os.remove(args.rp_smp_fn)
        os.remove(args.flt_out_fn)
예제 #14
0
class Downloader(TransformBase):
    def __init__(self, args):
        TransformBase.__init__(self, args)
        self.ws_url = args.ws_url
        self.cfg_name = args.cfg_name
        self.sws_id = args.sws_id
        self.etype = args.etype
        self.opt_args = args.opt_args
        self.kbtype = args.kbtype
        #self.otmp = args.otmp
        self.ws_id = args.ws_id
        #self.outobj_id = args.outobj_id
        self.jid = args.jid

        # download ws object and find where the validation script is located
        self.wsd = Workspace(url=self.ws_url, token=self.token)
        self.config = self.wsd.get_object({
            'id': self.cfg_name,
            'workspace': self.sws_id
        })['data']['config_map']

        if self.config is None:
            raise Exception("Object {} not found in workspace {}".format(
                self.cfg_name, self.sws_id))

    def download_ws_data(self):
        try:
            os.mkdir(self.sdir)
        except:
            pass

        dif = open("{}/{}".format(self.sdir, "{}".format(self.itmp)), 'w')
        data = self.wsd.get_object({
            'id': self.inobj_id,
            'workspace': self.ws_id
        })['data']
        json.dump(data, dif)
        dif.close()

    #def download_handler (ws_url, cfg_name, sws_id, ws_id, in_id, etype, kbtype, sdir, otmp, opt_args, ujs_url, ujs_jid) :
    def download_handler(self):
        try:
            os.mkdir(self.sdir)
        except:
            pass

        conv_type = "{}-to-{}".format(self.kbtype, self.etype)
        if conv_type not in self.config[
                'down_transformer'] or 'inobj_id' not in self.config[
                    'down_transformer'][conv_type][
                        'cmd_args'] or 'ws_id' not in self.config[
                            'down_transformer'][conv_type][
                                'cmd_args'] or 'output' not in self.config[
                                    'down_transformer'][conv_type]['cmd_args']:
            raise Exception(
                "{} to {} conversion was not properly defined!".format(
                    self.kbtype, self.etype))
        vcmd_lst = [
            self.config['down_transformer'][conv_type]['cmd_name'],
            self.config['down_transformer'][conv_type]['cmd_args']['ws_id'],
            self.ws_id,
            self.config['down_transformer'][conv_type]['cmd_args']['inobj_id'],
            self.inobj_id,
            self.config['down_transformer'][conv_type]['cmd_args']['output'],
            "{}/{}".format(self.sdir, self.otmp)
        ]

        if 'down_transformer' in self.opt_args:
            opt_args = self.opt_args['down_transformer']
            for k in opt_args:
                if k in self.config['down_transformer'][conv_type][
                        'opt_args'] and opt_args[k] is not None:
                    vcmd_lst.append(self.config['down_transformer'][conv_type]
                                    ['opt_args'][k])
                    vcmd_lst.append(opt_args[k])

        p1 = Popen(vcmd_lst, stdout=PIPE)
        out_str = p1.communicate()

        if out_str[0] is not None: print out_str[0]
        if out_str[1] is not None: print >> sys.stderr, out_str[1]

        if p1.returncode != 0:
            raise Exception(out_str[1])