Ejemplo n.º 1
0
def get_reinitz_data(**kwargs):

    ofs = kwargs.get('ofs',0)
    do_plot_coords = kwargs.get('plot_coords',False)
    do_plot_vals = kwargs.get('plot_vals',False)

    idm= id_map()
    df = datafiles(**mem.rc(kwargs))

    #I'm not sure exactly how this dataset works but
    #each nuclei has a bunch of numbers that appear to be
    #monotonically increasing.
    #
    #I just take the first instance.
    nums = dict([(k,v[:,0]) for k, v in df.iteritems()])
    nuc_count = len(set(nums.values()[2]))
   
    values = dict([(k,v[nuc_count *ofs: nuc_count *(ofs + 1),-1]) 
                   for k, v in df.iteritems()])
    coords = dict([(k,v[nuc_count *ofs :nuc_count *(ofs + 1),1:3]) for k, v in df.iteritems()])

    #to check the basic consistency of the data, enable the plot routines.
    #I suppose that I could do this for all of the nuclei occurences...
    #right now, only the first is used.
    if do_plot_coords:
        f = myplots.fignum(1,(8,8))
        ax = f.add_subplot(111)
        ct = mycolors.getct(len(values))
        for i,k in enumerate(values.keys()):
            ax.scatter(coords[k][:,0][::1], coords[k][:,1][::1], 10,
                       edgecolor = 'none', alpha = .25,c =ct[i],
                       label = k, )

        f.savefig(myplots.figpath( 'reinitz_exprdata_coords_nuc_offset={0}'.format(ofs)))
    if do_plot_vals:
        f = myplots.fignum(1,(8,8))
        ax = f.add_subplot(111)
        ct = mycolors.getct(len(values))
        for i,k in enumerate(values.keys()):
            ax.scatter(coords[k][:,0][::1], values[k][::1], 10,
                       edgecolor = 'none',alpha = .25,c =ct[i],
                       label = k, )

        f.savefig(myplots.figpath( 'reinitz_exprdata_ap_vals_nuc_offset={0}'.format(ofs)))

    return coords, values
Ejemplo n.º 2
0
def check_network(net_name = 'binding', 
                  dataset_name = 'reinitz',
                  data_ofs = 4,
                  max_edges = -1,
                  node_restriction = 'reinitz'):

    reinitz_keys =set( get_reinitz_data()[1].keys())
    if dataset_name == 'reinitz':
        coords, values = get_reinitz_data(ofs = data_ofs)
    elif dataset_name == 'bdtnp':
        data = nio.getBDTNP()
        meta = nio.getBDTNP(misc = True)
        values =  dict([( k, v['vals'][:,data_ofs] ) for k,v in data.iteritems()]) 
        coords  = array([meta['x']['vals'][:,data_ofs],meta['y']['vals'][:,data_ofs]])
    elif dataset_name == 'tc':
        data = nio.getTC()
        if node_restriction == 'reinitz':
            data = dict([(k,v) for k,v in data.iteritems() if k in reinitz_keys]) 
        #values =  dict([( k, v['vals'][:,data_ofs] ) for k,v in data.iteritems()]) 
        #coords  = array([meta['x']['vals'][:,data_ofs],meta['y']['vals'][:,data_ofs]])
        values = data
    else:
        raise Exception('data set {0} not yet implemented'.format(dataset_name))

    nets = comp.get_graphs()
    if net_name == 'binding':
        network = nets['bn']
    elif net_name == 'unsup':
        network = nets['unsup']
    elif net_name == 'logistic':
        network = nets['logistic']
    elif net_name =='clusters':
        network = get_soheil_network(max_edges = max_edges,
                                     node_restriction = values.keys())
    else:
        raise Exception('type not implemented: {0}'.format(net_name))

    nodes = values.keys()
    nodes_allowed = set(nodes)

    f = myplots.fignum(1,(8,8))
    ax = f.add_subplot(111)
    targets = {}

    edges = []
    
    for n in nodes:
        targets[n] = []
        if n in network:
            targets[n] = nodes_allowed.intersection(network[n].keys())
            
    xax = linspace(-1,1,20)

    edges = list(it.chain(*[[(e,v2) for v2 in v] for e, v in targets.iteritems()]))
    ccofs = [e for e in [ corrcoef(values[tf], values[tg])[0,1] for tf, tg in edges] if not isnan(e)]
    
    count, kde = make_kde(ccofs)
    

    ax.hist(ccofs,xax,label = net_name)
    h =histogram(ccofs,xax)
    ax.fill_between(xax,kde(xax)*max(h[0]),label = net_name,zorder = 1,alpha = .5)



    myplots.maketitle(ax,'edge correlations kde for {0}'.format('\n{2} data (data offset={0})\n(net_name={1})\n(max_edges={3})'
                                                                .format(data_ofs, net_name, dataset_name, max_edges) ),\
                          subtitle = 'n_edges = {0}'.format(len(edges)))
    ax.legend()
    f.savefig(myplots.figpath('network_edge_corrs_data_ofs={0}_net={1}_expr={2}_max_edges={3}'
                              .format(data_ofs,net_name,dataset_name, max_edges)))