def load_data_targets(config, features): """load the targets for a glomerulus""" door2id = json.load(open(os.path.join(config['data_path'], 'door2id.json'))) csv_path = os.path.join(config['data_path'], 'response_matrix.csv') if 'normed_responses' in config and not config['normed_responses']: csv_path = os.path.join(config['data_path'], 'unnorm_response_matrix.csv') cas_numbers, glomeruli, rm = rdl.load_response_matrix(csv_path, door2id) glom_idx = glomeruli.index(config['glomerulus']) # select molecules available for the glomerulus targets , tmp_cas_numbers = rdl.get_avail_targets_for_glom(rm, cas_numbers, glom_idx) molids = [str(door2id[cas_number][0]) for cas_number in tmp_cas_numbers] assert len(molids) == len(targets) # for some of them the spectra are not available avail = [i for i in range(len(molids)) if molids[i] in features] targets = np.array([targets[i] for i in avail]) data = np.array([features[molids[i]] for i in avail]) molids = [m for i, m in enumerate(molids) if i in avail] assert targets.shape[0] == data.shape[0] assert targets.shape[0] == len(molids) return data, targets, molids
# variables for results plt.close('all') search_res, max_overview, sc, _ = rdl.read_paramsearch_results(config['inpath']) glomeruli = search_res[desc][selection].keys() # sort glomeruli according to performance maxes = [np.max(search_res[desc][selection][glom][method]) for glom in glomeruli] picks = [search_res[desc][selection][glom][method][-1, 1] for glom in glomeruli] max_idx = np.argsort(maxes) glomeruli = [glomeruli[i] for i in max_idx] fig = plt.figure(figsize=(3, 20)) for i_glom, glom in enumerate(glomeruli): mat = search_res[desc][selection][glom][method] glom_idx = all_glomeruli.index(glom) tmp_rm, tmp_cas_numbers = rdl.get_avail_targets_for_glom(rm, cas_numbers, glom_idx) ax = plt.subplot2grid((len(glomeruli), 2), (i_glom, 1)) if len(tmp_rm) > 50 and scoreatpercentile(tmp_rm, 75) > 0.2: ax.hist(tmp_rm, color='g') elif scoreatpercentile(tmp_rm, 75) > 0.14: ax.hist(tmp_rm, color='#6be463') else: ax.hist(tmp_rm, color='r') ax.set_xlim([0, 1]) ax.set_yticks([]) ax.set_xticks([]) ax = plt.subplot2grid((len(glomeruli), 2), (i_glom, 0)) if np.max(mat) < 0: ax.imshow(mat, interpolation='nearest') else: