def get_reinitz_data(**kwargs): ofs = kwargs.get('ofs',0) do_plot_coords = kwargs.get('plot_coords',False) do_plot_vals = kwargs.get('plot_vals',False) idm= id_map() df = datafiles(**mem.rc(kwargs)) #I'm not sure exactly how this dataset works but #each nuclei has a bunch of numbers that appear to be #monotonically increasing. # #I just take the first instance. nums = dict([(k,v[:,0]) for k, v in df.iteritems()]) nuc_count = len(set(nums.values()[2])) values = dict([(k,v[nuc_count *ofs: nuc_count *(ofs + 1),-1]) for k, v in df.iteritems()]) coords = dict([(k,v[nuc_count *ofs :nuc_count *(ofs + 1),1:3]) for k, v in df.iteritems()]) #to check the basic consistency of the data, enable the plot routines. #I suppose that I could do this for all of the nuclei occurences... #right now, only the first is used. if do_plot_coords: f = myplots.fignum(1,(8,8)) ax = f.add_subplot(111) ct = mycolors.getct(len(values)) for i,k in enumerate(values.keys()): ax.scatter(coords[k][:,0][::1], coords[k][:,1][::1], 10, edgecolor = 'none', alpha = .25,c =ct[i], label = k, ) f.savefig(myplots.figpath( 'reinitz_exprdata_coords_nuc_offset={0}'.format(ofs))) if do_plot_vals: f = myplots.fignum(1,(8,8)) ax = f.add_subplot(111) ct = mycolors.getct(len(values)) for i,k in enumerate(values.keys()): ax.scatter(coords[k][:,0][::1], values[k][::1], 10, edgecolor = 'none',alpha = .25,c =ct[i], label = k, ) f.savefig(myplots.figpath( 'reinitz_exprdata_ap_vals_nuc_offset={0}'.format(ofs))) return coords, values
def check_network(net_name = 'binding', dataset_name = 'reinitz', data_ofs = 4, max_edges = -1, node_restriction = 'reinitz'): reinitz_keys =set( get_reinitz_data()[1].keys()) if dataset_name == 'reinitz': coords, values = get_reinitz_data(ofs = data_ofs) elif dataset_name == 'bdtnp': data = nio.getBDTNP() meta = nio.getBDTNP(misc = True) values = dict([( k, v['vals'][:,data_ofs] ) for k,v in data.iteritems()]) coords = array([meta['x']['vals'][:,data_ofs],meta['y']['vals'][:,data_ofs]]) elif dataset_name == 'tc': data = nio.getTC() if node_restriction == 'reinitz': data = dict([(k,v) for k,v in data.iteritems() if k in reinitz_keys]) #values = dict([( k, v['vals'][:,data_ofs] ) for k,v in data.iteritems()]) #coords = array([meta['x']['vals'][:,data_ofs],meta['y']['vals'][:,data_ofs]]) values = data else: raise Exception('data set {0} not yet implemented'.format(dataset_name)) nets = comp.get_graphs() if net_name == 'binding': network = nets['bn'] elif net_name == 'unsup': network = nets['unsup'] elif net_name == 'logistic': network = nets['logistic'] elif net_name =='clusters': network = get_soheil_network(max_edges = max_edges, node_restriction = values.keys()) else: raise Exception('type not implemented: {0}'.format(net_name)) nodes = values.keys() nodes_allowed = set(nodes) f = myplots.fignum(1,(8,8)) ax = f.add_subplot(111) targets = {} edges = [] for n in nodes: targets[n] = [] if n in network: targets[n] = nodes_allowed.intersection(network[n].keys()) xax = linspace(-1,1,20) edges = list(it.chain(*[[(e,v2) for v2 in v] for e, v in targets.iteritems()])) ccofs = [e for e in [ corrcoef(values[tf], values[tg])[0,1] for tf, tg in edges] if not isnan(e)] count, kde = make_kde(ccofs) ax.hist(ccofs,xax,label = net_name) h =histogram(ccofs,xax) ax.fill_between(xax,kde(xax)*max(h[0]),label = net_name,zorder = 1,alpha = .5) myplots.maketitle(ax,'edge correlations kde for {0}'.format('\n{2} data (data offset={0})\n(net_name={1})\n(max_edges={3})' .format(data_ofs, net_name, dataset_name, max_edges) ),\ subtitle = 'n_edges = {0}'.format(len(edges))) ax.legend() f.savefig(myplots.figpath('network_edge_corrs_data_ofs={0}_net={1}_expr={2}_max_edges={3}' .format(data_ofs,net_name,dataset_name, max_edges)))