예제 #1
0
    def split_into_folds(self, inpL, classif,save=True, excludeL=None,info=''):
        
        out_allinfo={}
        out_lengths={}
        numSegm=Constants.numSegm_total # one of the segments will be validation and the rest used in training
        
        for seg in range(numSegm):  
            out_allinfo[seg]={}
            out_lengths[seg]=0

        for scafName,text,fetureV,seq in inpL:
            randomSegment=np.random.randint(numSegm)
            out_allinfo[randomSegment][scafName]={'text':text,'seq':seq,'len':len(seq),'features':fetureV} #
            out_lengths[randomSegment]+=len(seq)

        if self.verb>0:
            print('  achieved split for  %d scaffolds to segments:'%len(inpL),[ (x,len(out_allinfo[x]),out_lengths[x]) for x in out_allinfo ])

        for seg in range(numSegm):
            sum=out_lengths[seg]/1.e6
            print('seg:',seg,' size=%.2f (MB) numScaf=%d'%(sum,len(out_allinfo[seg])))

        out_allinfo['info']=info

        # save segments
        if save==True: 
            out_file=self.trainvalid_1hot_dataPath+'/'+self.name+'.%s-scaff-split.yml'%classif
            write_yaml(out_allinfo,out_file)
        return out_allinfo
예제 #2
0
    def split_species(self, inpL, role, save=True, excludeL=None, info=''):

        out0 = {}
        out1 = {}
        numSegm = self.numSegm + 1  # the last one is not used in any training

        for seg in range(numSegm):
            out0[seg] = {}
            out1[seg] = 0

        for scafN, text, fetureV, seq in inpL:
            ix = np.random.randint(numSegm)
            out0[ix][scafN] = {
                'text': text,
                'seq': seq,
                'len': len(seq),
                'features': fetureV
            }  #
            out1[ix] += len(seq)
        if self.verb > 0:
            print(
                '  achieved split for  %d scaffolds to segments:' % len(inpL),
                [(x, len(out0[x]), out1[x]) for x in out0])

        for seg in range(numSegm):
            sum = out1[seg] / 1.e6
            print('seg:', seg,
                  ' size=%.2f (MB) numScaf=%d' % (sum, len(out0[seg])))

        out0['info'] = info

        # save segments
        if save == True:
            outF = self.dataPath + '/' + self.name + '.%s-scaff-split.yml' % role
            write_yaml(out0, outF)
        return out0
예제 #3
0
 def save_training_history(self):
     outD = self.train_hirD
     outF = self.outPath + '/' + self.name + '.history.yml'
     write_yaml(outD, outF)
예제 #4
0
    print(
        cnt['inp'], scaffN, 'decision=%s, avr score:' % decision,
        classif_avrg_score_str,
        ' len=%.1fk samples=%d' % (len(sequenceString) / 1000., len(sampList)))
    if args.verb > 0:
        print(Yscores_samples, classif_avrg_score_str, classif_score,
              scores_samples_list_class1, scores_samples_list_class0,
              scores_scaffs_list, Yclass, sampListHotEncodedA,
              featureListXsamplesA)

    classif_details_yaml['score'] = classif_score
    classif_details_yaml['model_info'] = deep.info
    #print('out classif_details_yaml='); pprint(classif_details_yaml)

    #write under outPR the info for this scaffold prediction in a yaml file
    write_yaml(classif_details_yaml,
               args.outPath + '/%s.assayer.yml' % (scaffN), 0)

nClass = cnt['plasmid'] + cnt['main'] + cnt['ambig']
#print('M:%s endCnt:'%given_class,cnt,'  fraction: Plasm=%.3f Ambig=%.3f  Main=%.3f'%(cnt['plasmid']/nClass,cnt['ambig']/nClass,cnt['main']/nClass))
print('Counts: Plasm=%s  Ambig=%s  Main=%s  nCount=%s' %
      (cnt['plasmid'], cnt['ambig'], cnt['main'], nClass))

f_predix.close()

# make plot of all scores
#ROC curve
#print("Yclass %s " % (Yclass))
#print("scores_scaffs_list %s " % (scores_scaffs_list))
####Print the AUC-ROC and FP-TP rate list to a file
if 1 in Yclass and 0 in Yclass:  ###Need both classes for this to be meaningful
    fpr, tpr, _ = roc_curve(