コード例 #1
0
ファイル: sush2.py プロジェクト: bh0085/compbio
def show_clustered_expr(tge,tfe,tgnames, tfnames, nrml = True,fig = 8):
    f1 = plt.figure(fig)
    f2 = plt.figure(fig + 1)
    f1.clear()
    f2.clear()

    ax1 = f1.add_subplot(111)
    ax2 = f2.add_subplot(111)
    
    
    tgct = colors.getct(len(tgnames))
    tfct = colors.getct(len(tfnames))
    for i in range(len(tge)):
        ax1.plot(tge[i],color = tgct[i])
    myplots.color_legend(f1,tgct,tgnames, ax = ax1,pos = 4)
    tstr = 'Target Expression Levels' 
    if nrml: tstr += '(Normalized)'
    myplots.maketitle(ax1,tstr)

    for i in range(len(tfe)):
        ax2.plot(tfe[i],color = tfct[i])
    myplots.color_legend(f2,tfct,tfnames, ax = ax2,pos = 4)
    tstr = 'TF Expression Levels' 
    if nrml: tstr += '(Normalized)'
    myplots.maketitle(ax2,tstr)
コード例 #2
0
ファイル: s3.py プロジェクト: bh0085/compbio
def show_m(btgs,btfs,name):
    f = plt.figure(1)
    f.clear()
    ax1 = f.add_subplot(211)
    ax1.plot(sorted(btfs.values()))
    ax2 = f.add_subplot(212)
    ax2.plot(sorted(btgs.values()))
    myplots.maketitle(ax1, 'TFS in the Markov blanket for '+name)
    myplots.maketitle(ax2, 'TGS in the Markov blanket for '+name)
コード例 #3
0
ファイル: view.py プロジェクト: bh0085/compbio
def view_in():
    na = nu.net_affinity()
    f = plt.figure(0)
    f.clear()
    ax = f.add_subplot(111)
    in_degree = sum(na, 0)
    srt = argsort(in_degree)

    sm.seismic([in_degree[srt]], ax=ax)

    myplots.maketitle(ax, 'In degree, sorted')
コード例 #4
0
ファイル: sush2.py プロジェクト: bh0085/compbio
def draw_svm(x_expr, predictions,actual, f = 0, names = None):

    fig = plt.figure(f)
    nx = len(x_expr)

    thr = .05
    pcols = map(lambda x : x <= -thr and 'blue' or \
                    x >= thr and 'red' or \
                    'black',
                predictions)    
    ycols = map(lambda x : x <= -thr and 'blue' or \
                    x >= thr and 'red' or \
                    'black',
                actual)
    mcols = map(lambda x, y: y == x and 'none' or \
                    'black', pcols, ycols)

    pcount = 0
    pmax = (power(nx,2) - nx) /2
    for i in range(nx):
        for j in range(nx):
            ax_r = [float(i)/nx, float(j)/nx,float(1)/nx,float(1)/nx]
            ax = fig.add_axes(ax_r,frameon = False)
            
            rsml = 20
            rbig = rsml*2.5

            ax.scatter( x_expr[i],
                x_expr[j],
                rsml,
                color = pcols
                )
            ax.scatter(x_expr[i],
                       x_expr[j],
                       rbig, 
                       edgecolor = ycols,
                       color = 'none')
            scatter_errors = False
            if scatter_errors:
                ax.scatter(x_expr[i],
                           x_expr[j],
                           rbig*2, 
                           color = mcols,
                           zorder = -100,
                           edgecolor = 'none')
            myplots.hideaxes(ax)
            if names:
                namearr = [names[i],names[j]]
            else:
                namearr= [str(i),str(j)]
            if names: alpha = 1.0
            else: alpha = .6
            myplots.maketitle(ax,' vs '.join(namearr), alpha = alpha)
コード例 #5
0
ファイル: learner.py プロジェクト: bh0085/compbio
    def makePlots(self, name="No Name"):
        xtrain, ytrain = self.xyTrain()
        xtest, ytest = self.xyTest()
        ytrain_predicted = self.predictTraining()
        ytest_predicted = self.predictTest()

        ny = len(ytrain)
        f = plt.figure(1)
        f.clear()
        ax0 = f.add_subplot("211")

        f1 = plt.figure(2)
        f1.clear()
        ax1 = f1.add_subplot("211")
        ct = mycolors.getct(ny)
        for actual, predicted, ax, subtitle in [
            [ytest, ytest_predicted, ax0, "test predictions"],
            [ytrain, ytrain_predicted, ax1, "training predictions"],
        ]:
            for i in range(len(actual)):
                lplots.plotPredictions(actual[i], predicted[i], ax, color=ct[i])
                myplots.maketitle(ax, name, subtitle=subtitle)
コード例 #6
0
ファイル: predict.py プロジェクト: bh0085/compbio
def heatMapGene(gene_name = 'FBgn0014931', 
                model_class = None,
                res = 5,
                prediction ='training'):
  plt.clf()
  if model_class == None: model_class = om.NuSVMModel
  xvals,yvals,coupling = gVals(gene_name)
  learner = l.Learner(xvals,yvals,coupling)
  vals = learner.testParams(model_class, 
                            prediction=prediction
                            ,res = res, dim = 2)
  err = vals['test_rms']
  annotations = vals['pdicts']
  f=plt.gcf()
  ax = f.add_subplot('211')

  ax2 = f.add_subplot('212')
  ax = hm.heatMap(err, annotations,axes = ax)
  
  myplots.maketitle(ax,  'gene: {0}'.format(gene_name), 
                    'heatmap for different learning parameters')
  
  preds = vals['test_preds']
  best_p = preds[unravel_index(argmin(vals['test_rms']),
                               shape(preds)[:2])]
  worst_p = preds[unravel_index(argmax(vals['test_rms']),
                               shape(preds)[:2])]
  ax2.plot(worst_p,
           linestyle = ':',
           linewidth = 4 ,
           color = 'blue')
  ax2.plot(best_p,
           linestyle = ':',
           linewidth = 4,
           color = 'red')
  ax2.plot(vals['actual_preds'][0])
コード例 #7
0
ファイル: gagd.py プロジェクト: bh0085/compbio
def test():
	nhc = 2
        ntg = 2
        ntf_s = 2
        max_tfu = 2
        gagd = GAGD(nhc,ntg,ntf_s, [ max_tfu for i in range(ntg) ] )
        xs, ys = sd.synth_data(ntg,max_tfu,ntf_s)
        g, ga = gagd.sample_genome()
        gagd.init_net()
        gagd.make_cxns_from_genome(g)

        net = gagd.mynn.net
        
        f = plt.figure(0)
        f.clear()
        ax = f.add_subplot(121)
        myplots.draw_pb(ax,net)
        myplots.hideaxes(ax)
        myplots.maketitle(ax,'GANN')
        
        gagd.set_data(xs.T,ys.T)
        gagd.set_trainer()
        gagd.train()
	return
コード例 #8
0
ファイル: sush2.py プロジェクト: bh0085/compbio
def draw_prediction(predictions, actual,fig = 0, match_mean = True,title = '',subt = ''):

    
    f = plt.figure(fig)
    f.clear()
    ax = f.add_subplot(111)
    xax = arange(0,len(predictions))
    
    p2 = predictions - mean(predictions) 
    if std(p2) != 0: p2 = p2/std(p2) *std(actual)
    p2 = p2 + float(np.mean(actual))
    ax.plot(xax,p2)

    mse_zscore =np.sum( power(( p2 - actual),2))
    cov =np.sum( np.corrcoef( p2, actual)[0,1])
    if cov != cov: cov = 0

    ax.plot(xax,actual)
    eps = std(actual)
    minline = actual - eps
    maxline = actual + eps
    ax.plot(xax,maxline,alpha = .3)
    ax.plot(xax,minline,alpha = .3)
    
    ax.fill_between(xax,p2, maxline,
                    where = greater(p2,maxline),
                    color = 'red',
                    interpolate = True)
    ax.fill_between(xax,p2, minline,
                    where = less(p2, minline),
                    color = 'blue',
                    interpolate = True)
    myplots.maketitle(ax,title, subtitle = 'Validation MSE: '+str(round(mse_zscore,3))+'\nValidation Correlation: '+str(round(cov,3)))
    myplots.label_lr(ax,subt)
    f.show()
    return [mse_zscore,cov]
コード例 #9
0
ファイル: reinitz.py プロジェクト: bh0085/compbio
def check_network(net_name = 'binding', 
                  dataset_name = 'reinitz',
                  data_ofs = 4,
                  max_edges = -1,
                  node_restriction = 'reinitz'):

    reinitz_keys =set( get_reinitz_data()[1].keys())
    if dataset_name == 'reinitz':
        coords, values = get_reinitz_data(ofs = data_ofs)
    elif dataset_name == 'bdtnp':
        data = nio.getBDTNP()
        meta = nio.getBDTNP(misc = True)
        values =  dict([( k, v['vals'][:,data_ofs] ) for k,v in data.iteritems()]) 
        coords  = array([meta['x']['vals'][:,data_ofs],meta['y']['vals'][:,data_ofs]])
    elif dataset_name == 'tc':
        data = nio.getTC()
        if node_restriction == 'reinitz':
            data = dict([(k,v) for k,v in data.iteritems() if k in reinitz_keys]) 
        #values =  dict([( k, v['vals'][:,data_ofs] ) for k,v in data.iteritems()]) 
        #coords  = array([meta['x']['vals'][:,data_ofs],meta['y']['vals'][:,data_ofs]])
        values = data
    else:
        raise Exception('data set {0} not yet implemented'.format(dataset_name))

    nets = comp.get_graphs()
    if net_name == 'binding':
        network = nets['bn']
    elif net_name == 'unsup':
        network = nets['unsup']
    elif net_name == 'logistic':
        network = nets['logistic']
    elif net_name =='clusters':
        network = get_soheil_network(max_edges = max_edges,
                                     node_restriction = values.keys())
    else:
        raise Exception('type not implemented: {0}'.format(net_name))

    nodes = values.keys()
    nodes_allowed = set(nodes)

    f = myplots.fignum(1,(8,8))
    ax = f.add_subplot(111)
    targets = {}

    edges = []
    
    for n in nodes:
        targets[n] = []
        if n in network:
            targets[n] = nodes_allowed.intersection(network[n].keys())
            
    xax = linspace(-1,1,20)

    edges = list(it.chain(*[[(e,v2) for v2 in v] for e, v in targets.iteritems()]))
    ccofs = [e for e in [ corrcoef(values[tf], values[tg])[0,1] for tf, tg in edges] if not isnan(e)]
    
    count, kde = make_kde(ccofs)
    

    ax.hist(ccofs,xax,label = net_name)
    h =histogram(ccofs,xax)
    ax.fill_between(xax,kde(xax)*max(h[0]),label = net_name,zorder = 1,alpha = .5)



    myplots.maketitle(ax,'edge correlations kde for {0}'.format('\n{2} data (data offset={0})\n(net_name={1})\n(max_edges={3})'
                                                                .format(data_ofs, net_name, dataset_name, max_edges) ),\
                          subtitle = 'n_edges = {0}'.format(len(edges)))
    ax.legend()
    f.savefig(myplots.figpath('network_edge_corrs_data_ofs={0}_net={1}_expr={2}_max_edges={3}'
                              .format(data_ofs,net_name,dataset_name, max_edges)))
コード例 #10
0
ファイル: sush2.py プロジェクト: bh0085/compbio
def run(  method ='identity',index = 0, reset = 0, 
          nxmax = 100 , 
          binary_x = False, binary_y = False, 
          expression = 'time' ,
          cluster_idx = 0,
          lrn = 'tree',
          showall = False,
          tgonly = False,
          randomize_tfs = False,
          ctfs = 5,
          ctgs = 5,
          cofs = 1,
          do_normalize_cluster = True,
          cluster_tfs = True,
          verbose_expr_labels = False,
          ctype = False):
    '''
sush2.run:

run a selected learning algorithm for  a cluster.

KEYWORDS:

index  [0]: select a tf/target to model from the cluster
method ['identity']: a membership method
multi  [False]: meaningless
nxmax  [3]: max cluster members
binary_x: model x data as binary
binary_y: model y data as binary
expression ['time']: which expression series to use
cluster_idx: not yet implemented

reset

'''

    #Data assembly:
    #
    #1: Grab a list of genes of interest and 
    #   corresponding expression vectors
    #
    trg_kidxs = nu.net_trg_keyidxs()
    tf_kidxs = nu.net_tf_keyidxs()
    #
    #retrieve the list of trg/tf names present in a given cluster.
    #note that at the moment, these are fake functions that just give back
    #a little list of trgs and all of their associated TFs
    #
    #--CLUSTERS USED--


    cands = get_candidates(10,ctfs,ctgs)
    cidx = cands[cofs]
    trg_ssnames = get_trg_ss(cluster = cidx )
    tf_ssnames = get_tf_ss(cluster = cidx , trgnames = trg_ssnames)
            
    if cluster_tfs:
        tf_ssnames = get_tf_ss(cluster = cidx , trgnames = trg_ssnames)
    else:
        tgs, tfs = nu.parse_net()
        tg_specific = trg_ssnames[cluster_idx]
        trg_tfs = tgs[tg_specific]['tfs']
        tf_ssnames = trg_tfs


    if randomize_tfs:
        r =np.random.random_integers(0,len(tf_kidxs.keys()),len(tf_ssnames))
        tf_ssnames = []
        print 'Randomizing TFs'
        for i in r:
            tf_ssnames.append(tf_kidxs.keys()[i])

    trg_ssidxs = array([trg_kidxs[name] for name in trg_ssnames])
    tf_ssidxs = array([tf_kidxs[name] for name in tf_ssnames])
    #
    #2: Project expression data onto membership vectors
    #
    #--EXPR CLUSTERING--
    #4: Grab a list of 'membership vectors' which
    #   translate genes to x and y in the machine learning problem
    #   data merging has not yet been implemented but should be quite simple
    #
    x_memberships = get_membership(tf_ssnames, method = method)
    y_memberships = get_membership(trg_ssnames, method = method)



    if do_normalize_cluster:
        exprtype = 'clustered'
    else:
        exprtype = 'standard'

    if exprtype == 'standard':
        all_expr = non_normal_cluster_expr(trg_ssnames, tf_ssnames,ctype = ctype)
    else:
        all_expr = normalize_cluster_expr(trg_ssnames, tf_ssnames,ctype = ctype)
        
    tg_expr, tf_expr = all_expr
    x_expr = array((tf_expr)).T
    y_expr = array((tg_expr)).T


    show_clustered_expr(y_expr,x_expr, trg_ssnames, tf_ssnames,fig = 8)    

    nx, npertg = shape(x_expr)
    x_all, y_all = fold_expr(x_expr, y_expr)
    nx, nt_folded = shape(x_all)
    train_idxs, test_idxs = [],[]

    nt = npertg
    if ctype:
        nt -= 4
    tginds = range(cluster_idx *npertg,(cluster_idx*npertg)+npertg)
    
    cinds = []
    for i in range(nt_folded):

        if (divmod(i,npertg))[1] >= npertg - 4:
            cinds.append(i)

    for i in range(nt_folded):
        if ctype:
            if i in cinds and i in tginds:
                test_idxs.append(i)
        else:
            if i in tginds[:-4]:
                test_idxs.append(i)
        if tgonly:
            if i in tginds[:-4]:
                train_idxs.append(i)
        else:
            if not (i in tginds) and not (i in cinds):
                train_idxs.append(i)
        


    print 'N_TRAIN' , len(train_idxs)
    expr_fig = 0
    draw_expr(x_expr, y_expr, expr_fig = expr_fig)

    if lrn =='svm':
        model = learn_svm( x_all, y_all,
                           train_idxs = train_idxs,
                           test_idxs = test_idxs,
                           binary_x = binary_x,
                           binary_y = binary_y)
        predictions = run_svm((x_all.T)[test_idxs].T , y_all[test_idxs], model)
    if lrn in ['knn','tree','forest']:

        #pred = myrf.run_tree(x_all,y_all, train_idxs, test_idxs)
        #raise Exception()

        all_ex = myrf.get_ex(x_all,y_all)
        train_ex = all_ex.getitems([int(x) for x in train_idxs])    
        test_ex  = all_ex.getitems([int(x) for x in test_idxs])    

        #test_ex = myrf.examples_from_inds(x_all,y_all,test_idxs)
        #cl_ex = myrf.examples_from_inds(x_all,y_all,cl_idxs)
        model = myrf.OLearn(lrn, train_ex, test_ex = test_ex)
        predictions = model.predictions(test_ex)

    if lrn == 'nn':

        nhc = 2
        ntg = 2
        ntf_s = 2
        max_tfu = 2
        gf = sf.genfann(nhc,ntg,ntf_s, [ max_tfu for i in range(ntg) ] )
        xs, ys = sf.synth_data(ntg,max_tfu,ntf_s)
        g, ga = gf.sample_genome()
        gf.init_net()
        gf.make_cxns_from_genome(g)
        #gf.net_from_cxns(hidden_cxns,output_cxns)

        net = gf.mynn.net
        
        f = plt.figure(0)
        f.clear()
        ax = f.add_subplot(121)
        myplots.draw_pb(ax,net)
        myplots.hideaxes(ax)
        myplots.maketitle(ax,'GANN')
        
        gf.set_data(xs.T,ys.T)
        gf.set_trainer()
        gf.train()


        ax2 = f.add_subplot(122)
        myplots.draw_pb(ax2,net)
        myplots.hideaxes(ax2)
        myplots.maketitle(ax2,'GANN')


        


        return
        raise Exception()

 
        


        
        raise Exception()

        #igrps = [ arange(2)+2*i for i in range(3) ]
        #igrps = [ 
        
        raise Exception()
        gf.train()

        raise Exception()
        #gagd.MyFANN(x_all.T,y_all[newaxis,:].T,train_idxs)

    actual = y_all[test_idxs]
    
    showall = True
    if showall:
        if verbose_expr_labels:
            names = tf_ssnames
        else:
            names = None
        draw_svm(x_all[:,test_idxs],actual, predictions, f = expr_fig,names = names)

    print predictions
    print actual

    if ctype:
        forstring = 'CL Data'
    else:
        forstring = 'TS Data'
        
    namestr = trg_ssnames[cluster_idx]
    subt = 'TFs: '+','.join(tf_ssnames)

    if randomize_tfs:
        title = 'Random TF Predictions ' + forstring + ', ' +namestr
        fnum = 5
    else:
        if cluster_tfs:
            title = 'Network Cluster TF Predictions'+ forstring + ', ' +namestr
        else:
            title = 'Network UnClustered TF Predictions'+ forstring + ', ' +namestr
            
        fnum = 6

    msecov = draw_prediction(predictions,actual,fig=fnum, 
                    title = title,
                    subt = ','.join(tf_ssnames))  

    print msecov
    return msecov