Esempio n. 1
0
 def runBase(self, k_neighbors):
     wkr = self
     logging.info("running base")
     for cstrain in wkr.getStrains():
         logging.info("Strain[%s] starting" % cstrain)
         mypws = wkr.getMyPathways()
         alleles = wkr.getAlleles(cstrain)
         wkr.initStrain(cstrain, mypws, shuffle=False)
         for pw in mypws:
             rt_cache = {} 
             srts = wkr.genSRTs( cstrain, pw )
             for a_base, a_compare in itertools.product(alleles,alleles):
                 r_index = "%s_%s" % (pw, a_compare)
                 base_samp = wkr.getSamplesByAllele(cstrain, a_base)
                 comp_samp = wkr.getSamplesByAllele(cstrain,a_compare)
                 for age, samp in base_samp:
                     neighbors = wkr.kNearest(comp_samp, samp, age, k_neighbors)
                     nhash = ''.join(neighbors)
                     if nhash not in rt_cache:
                         srt_comp = srts.loc[:,neighbors]
                         rt = dirac.getRT(srt_comp)
                         rt_cache[nhash] = rt
                     else:
                         rt = rt_cache[nhash]
                     samp_srt = srts[samp]
                     rms = wkr.getRMS( rt, samp_srt ) 
                     wkr.setRMS(rms, r_index, samp)
     c = wkr.classify()
     return c
Esempio n. 2
0
 def runBase(self, k_neighbors):
     wkr = self
     logging.info("running base")
     for cstrain in wkr.getStrains():
         logging.info("Strain[%s] starting" % cstrain)
         mypws = wkr.getMyPathways()
         alleles = wkr.getAlleles(cstrain)
         wkr.initStrain(cstrain, mypws, shuffle=False)
         for pw in mypws:
             rt_cache = {}
             srts = wkr.genSRTs(cstrain, pw)
             for a_base, a_compare in itertools.product(alleles, alleles):
                 r_index = "%s_%s" % (pw, a_compare)
                 base_samp = wkr.getSamplesByAllele(cstrain, a_base)
                 comp_samp = wkr.getSamplesByAllele(cstrain, a_compare)
                 for age, samp in base_samp:
                     neighbors = wkr.kNearest(comp_samp, samp, age,
                                              k_neighbors)
                     nhash = ''.join(neighbors)
                     if nhash not in rt_cache:
                         srt_comp = srts.loc[:, neighbors]
                         rt = dirac.getRT(srt_comp)
                         rt_cache[nhash] = rt
                     else:
                         rt = rt_cache[nhash]
                     samp_srt = srts[samp]
                     rms = wkr.getRMS(rt, samp_srt)
                     wkr.setRMS(rms, r_index, samp)
     c = wkr.classify()
     return c
Esempio n. 3
0
def genRMS(comm,sd,mi,k_neighbors):
   
    #get and distribute pws and strains 
    pws = None
    if comm.rank == 0:
        pws = sd.getPathways()
        strain_list = mi.getStrains()

    pws = comm.bcast(pws)
    strain_list = comm.bcast(strain_list)

    
    for cstrain in strain_list:

        logging.info('Starting strain [%s]' % cstrain)
        
        mypws = [pw for i,pw in enumerate(pws) if i%comm.size == comm.rank]
        alleles = mi.getNominalAlleles(cstrain)
        indexes = ["%s_%s" % (pw,allele) for pw,allele in  itertools.product(mypws,alleles)]
        samples = mi.getSampleIDs(cstrain)

        #preallocate results dataframe 
        results = pandas.DataFrame(np.empty((len(indexes), len(samples)), dtype=float), index=indexes, columns=samples)
        for pw in mypws:
            #partition samples by strain/allele
            samples = partitionSamplesByAllele( alleles, mi, cstrain)

            #generate pw srts for all samples partitioned by strain/allele
            srts = getSRTSByAllele(alleles,pw,samples)

            for allele_base in alleles:
                for allele_compare in alleles:
                    r_index = "%s_%s" % (pw,allele_compare)
                    #list of samples with comparison allele
                    compare_list = samples[allele_compare]
                    for age, samp in samples[allele_base]:

                        samp_compare = kNearest(compare_list,samp_name, samp_age, k_neighbors)

                        comp_exp = srts[allele_compare].loc[:,samp_compare]
                        rt = dirac.getRT(comp_exp)
                        results[samp][r_index] =  dirac.getRMS(srts[allele_base][samp],rt)

        comm.barrier()
        return results
Esempio n. 4
0
    def runPerm(self, num_runs, k_neighbors,truth):
        wkr = self
        c_results = {}
        for k,v in truth.iteritems():
            c_results[k] = v.copy()
            for i in v.index:
                c_results[k][i] = 0  
            
        test = []
        for ctr in range(num_runs): 
            temp = {}
            check = True
            for cstrain in wkr.getStrains():
                logging.info("Strain[%s] starting" % cstrain)
                mypws = wkr.getMyPathways()
                alleles = wkr.getAlleles(cstrain)
                wkr.initStrain(cstrain, mypws, shuffle=True)
                for pw in mypws:
                    rt_cache = {} 
                    srts = wkr.genSRTs( cstrain, pw )
                    for a_base, a_compare in itertools.product(alleles,alleles):
                        r_index = "%s_%s" % (pw, a_compare)
                        base_samp = wkr.getSamplesByAllele(cstrain, a_base)
                        #testing shuffle
                        temp[(pw,cstrain,a_base)] = base_samp
                        if len(test) > 0:
                            same = True
                            for x in temp[(pw,cstrain,a_base)]:
                                if x not in test[-1][(pw,cstrain,a_base)]:
                                    same = False
                            msg = 'In runperm\n'+ ''.join(map(str, temp[(pw,cstrain,a_base)]))+ '\n and \n '+''.join( map(str,test[-1][(pw,cstrain,a_base)]) )
                            assert same == False, msg

                        if check and self._comm.rank == 0:
                            print ctr,a_base, base_samp[:5]
                        comp_samp = wkr.getSamplesByAllele(cstrain,a_compare)
                        for age, samp in base_samp:
                            neighbors = wkr.kNearest(comp_samp, samp, age, k_neighbors)
                            nhash = ''.join(neighbors)
                            if nhash not in rt_cache:
                                srt_comp = srts.loc[:,neighbors]
                                rt = dirac.getRT(srt_comp)
                                rt_cache[nhash] = rt
                            else:
                                rt = rt_cache[nhash]
                            samp_srt = srts[samp]
                            rms = wkr.getRMS( rt, samp_srt ) 
                            wkr.setRMS(rms, r_index, samp)
                    check = False
            test.append(temp)
            c = wkr.classify()
            
            for key in c.keys():
                for i in c[key].index:
                    if truth[key][i] <= c[key][i]:
                        msg = "key: [%s] index[%s] ctr[%i] value[%i]" %(key,i,ctr,c_results[key][i])
                        assert c_results[key][i] <= ctr, msg
                        c_results[key][i] += 1
                        
                truth[key].to_pickle('/scratch/sgeadmin/unjoined.truth.perm.%s.df.%i.%i.pkl'%(key,self._comm.rank,ctr))
                c[key].to_pickle('/scratch/sgeadmin/unjoined.perm.%s.df.%i.%i.pkl'%(key,self._comm.rank,ctr))
        return c_results       
Esempio n. 5
0
    def runPerm(self, num_runs, k_neighbors, truth):
        wkr = self
        c_results = {}
        for k, v in truth.iteritems():
            c_results[k] = v.copy()
            for i in v.index:
                c_results[k][i] = 0

        test = []
        for ctr in range(num_runs):
            temp = {}
            check = True
            for cstrain in wkr.getStrains():
                logging.info("Strain[%s] starting" % cstrain)
                mypws = wkr.getMyPathways()
                alleles = wkr.getAlleles(cstrain)
                wkr.initStrain(cstrain, mypws, shuffle=True)
                for pw in mypws:
                    rt_cache = {}
                    srts = wkr.genSRTs(cstrain, pw)
                    for a_base, a_compare in itertools.product(
                            alleles, alleles):
                        r_index = "%s_%s" % (pw, a_compare)
                        base_samp = wkr.getSamplesByAllele(cstrain, a_base)
                        #testing shuffle
                        temp[(pw, cstrain, a_base)] = base_samp
                        if len(test) > 0:
                            same = True
                            for x in temp[(pw, cstrain, a_base)]:
                                if x not in test[-1][(pw, cstrain, a_base)]:
                                    same = False
                            msg = 'In runperm\n' + ''.join(
                                map(str, temp[(pw, cstrain, a_base)])
                            ) + '\n and \n ' + ''.join(
                                map(str, test[-1][(pw, cstrain, a_base)]))
                            assert same == False, msg

                        if check and self._comm.rank == 0:
                            print ctr, a_base, base_samp[:5]
                        comp_samp = wkr.getSamplesByAllele(cstrain, a_compare)
                        for age, samp in base_samp:
                            neighbors = wkr.kNearest(comp_samp, samp, age,
                                                     k_neighbors)
                            nhash = ''.join(neighbors)
                            if nhash not in rt_cache:
                                srt_comp = srts.loc[:, neighbors]
                                rt = dirac.getRT(srt_comp)
                                rt_cache[nhash] = rt
                            else:
                                rt = rt_cache[nhash]
                            samp_srt = srts[samp]
                            rms = wkr.getRMS(rt, samp_srt)
                            wkr.setRMS(rms, r_index, samp)
                    check = False
            test.append(temp)
            c = wkr.classify()

            for key in c.keys():
                for i in c[key].index:
                    if truth[key][i] <= c[key][i]:
                        msg = "key: [%s] index[%s] ctr[%i] value[%i]" % (
                            key, i, ctr, c_results[key][i])
                        assert c_results[key][i] <= ctr, msg
                        c_results[key][i] += 1

                truth[key].to_pickle(
                    '/scratch/sgeadmin/unjoined.truth.perm.%s.df.%i.%i.pkl' %
                    (key, self._comm.rank, ctr))
                c[key].to_pickle(
                    '/scratch/sgeadmin/unjoined.perm.%s.df.%i.%i.pkl' %
                    (key, self._comm.rank, ctr))
        return c_results