コード例 #1
0
 def test_get_cas_numbers(self):
     """read the CAS numbers from the R package (rownames)"""
     csv_path = os.path.join(self.tmp_path, 'rm.csv')
     rdl.get_data_from_r(csv_path)
     cas_numbers, _, _ = rdl.load_response_matrix(csv_path)
     self.assertEqual(len(cas_numbers), 249)
     self.assertIn('89-78-1', cas_numbers)
     self.assertNotIn('solvent', cas_numbers)
     os.remove(csv_path)
コード例 #2
0
 def test_get_response_matrix(self):
     """read the response matrix from the DoOR R package"""
     csv_path = os.path.join(self.tmp_path, 'rm.csv')
     rdl.get_data_from_r(csv_path)
     row_names, col_names, rm = rdl.load_response_matrix(csv_path)
     self.assertEqual(249, rm.shape[0])
     self.assertEqual(67, rm.shape[1])
     self.assertEqual(249, len(row_names))
     self.assertEqual(67, len(col_names))
     os.remove(csv_path)
コード例 #3
0
def load_data_targets(config, features):
    """load the targets for a glomerulus"""
    door2id = json.load(open(os.path.join(config['data_path'], 'door2id.json')))
    csv_path = os.path.join(config['data_path'], 'response_matrix.csv')
    if 'normed_responses' in config and not config['normed_responses']:
        csv_path = os.path.join(config['data_path'], 'unnorm_response_matrix.csv')
    cas_numbers, glomeruli, rm = rdl.load_response_matrix(csv_path, door2id)
    glom_idx = glomeruli.index(config['glomerulus'])

    # select molecules available for the glomerulus
    targets , tmp_cas_numbers = rdl.get_avail_targets_for_glom(rm, cas_numbers, glom_idx)
    molids = [str(door2id[cas_number][0]) for cas_number in tmp_cas_numbers]
    assert len(molids) == len(targets)

    # for some of them the spectra are not available
    avail = [i for i in range(len(molids)) if molids[i] in features]
    targets = np.array([targets[i] for i in avail])
    data = np.array([features[molids[i]] for i in avail])
    molids = [m for i, m in enumerate(molids) if i in avail]
    assert targets.shape[0] == data.shape[0]
    assert targets.shape[0] == len(molids)
    return data, targets, molids
from scipy.stats import scoreatpercentile
reload(plib)
reload(rdl)

desc = 'all'
selection = 'linear'
method = 'svr'
config = {
    "inpath": "/Users/dedan/projects/master/results/param_search/all_gloms_svrlin_all",
    "data_path": os.path.join(os.path.dirname(__file__), '..', '..', 'data'),
    "format": "png",
}
outpath = os.path.join(config['inpath'], 'plots')
door2id = json.load(open(os.path.join(config['data_path'], 'door2id.json')))
path_to_csv = os.path.join(config['data_path'], 'response_matrix.csv')
cas_numbers, all_glomeruli, rm = rdl.load_response_matrix(path_to_csv, door2id)

# variables for results
plt.close('all')
search_res, max_overview, sc, _ = rdl.read_paramsearch_results(config['inpath'])
glomeruli = search_res[desc][selection].keys()

# sort glomeruli according to performance
maxes = [np.max(search_res[desc][selection][glom][method]) for glom in glomeruli]
picks = [search_res[desc][selection][glom][method][-1, 1] for glom in glomeruli]
max_idx = np.argsort(maxes)
glomeruli = [glomeruli[i] for i in max_idx]

fig = plt.figure(figsize=(3, 20))
for i_glom, glom in enumerate(glomeruli):
    mat = search_res[desc][selection][glom][method]
from master.libs import utils
import numpy as np
import pylab as plt
from scipy.stats import scoreatpercentile
reload(rdl)

data_path = '/Users/dedan/projects/master/data'
results_path = '/Users/dedan/projects/master/results/summary/'
descriptor = 'ATOMCENTRED_FRAGMENTS'
format = 'png'
N = 50
percentile = 75
percentile_thres = 0.2

door2id = json.load(open(os.path.join(data_path, 'door2id.json')))
cas_numbers, glomeruli, rm = rdl.load_response_matrix(os.path.join(data_path, 'response_matrix.csv'))

# which molecules are missing in door2id?
print 'molecues missing in door2id: \n%s' % [r for r in cas_numbers if not door2id[r]]

# number of measurements available
fig = plt.figure(figsize=(20, 5))
ax = fig.add_subplot(111)
ax.bar(range(len(cas_numbers)), np.sum(~np.isnan(rm), axis=1))
ax.set_xticks(np.arange(len(cas_numbers)) + 1)
bla = []
for i, g in enumerate(cas_numbers):
    bla.append(g + ' ' * 40 if i % 2 == 0 else '' + g)
ax.set_xticklabels(bla, rotation='90', ha='right')
ax.set_title('number of glomeruli available for a stimulus')
fig.savefig(os.path.join(results_path, 'glomeruli_per_stimulus.' + format))
#!/usr/bin/env python
# encoding: utf-8
"""
compare normalized and unnormalized response matrices

Created by  on 2012-01-27.
Copyright (c) 2012. All rights reserved.
"""

import sys
import os
import numpy as np
import pylab as plt
from master.libs import read_data_lib as rdl

data_path = os.path.join(os.path.dirname(__file__), '..', 'data')


_, _, rm = rdl.load_response_matrix(os.path.join(data_path, 'response_matrix.csv'))
_, _, urm = rdl.load_response_matrix(os.path.join(data_path, 'unnorm_response_matrix.csv'))

plt.imshow(rm)
plt.axis('off')
plt.savefig('bla1.png')
plt.imshow(urm)
plt.axis('off')
plt.savefig('bla2.png')
format = 'png'
# selected via the basic statistics script
interesting_glomeruli = ['Or19a', 'Or22a', 'Or35a', 'Or43b', 'Or67a',
                         'Or67b', 'Or7a', 'Or85b', 'Or98a', 'Or9a']
n_glomeruli = 5
resolution = 0.5
recompute = True
n_estimators=100

# read in the IR spectra TODO: move them to data when final version exists
spectra = pickle.load(open(ir_file))
door2id = json.load(open(os.path.join(base_path, 'data', 'door2id.json')))

# investigate only the glomeruli for which we have most molecules available
csv_path = os.path.join(base_path, 'data', 'response_matrix.csv')
cas_numbers, glomeruli, rm = rdl.load_response_matrix(csv_path, door2id)
# best_glom = rdl.select_n_best_glomeruli(rm, glomeruli, n_glomeruli)
# print best_glom

kernel_widths = [2, 3, 5, 10, 20, 30, 50]

res = {}
# data collection
if recompute:
    for glom in interesting_glomeruli:

        print glom
        glom_idx = glomeruli.index(glom)

        # select molecules available for the glomerulus
        targets , tmp_cas_numbers = rdl.get_avail_targets_for_glom(rm, cas_numbers, glom_idx)
Created by  on 2012-01-27.
Copyright (c) 2012. All rights reserved.
"""

import os
import numpy as np
import pylab as plt
from matplotlib.patches import Rectangle, Circle
from matplotlib.ticker import NullLocator
from master.libs import read_data_lib as rdl

subtract_sfr = False
outpath = '/Users/dedan/projects/master/results/summary/'
rm_path = 'data/response_matrix.csv'

cases, gloms, rm = rdl.load_response_matrix(rm_path, door2id=None)

# read standard firing rates
if subtract_sfr:
    sfrs = open(rm_path).readlines()[1].split(',')[1:]
    sfrs = np.array([float(s) if not s == 'NA' else 0 for s in sfrs])
    rm = np.subtract(rm, sfrs)

# only look at a slice of the matrix
rm = rm[70:110]
rm[np.isnan(rm)] = 0

fig = plt.figure(figsize=(rm.shape[1]/7, rm.shape[0]/7))
ax = fig.add_subplot(111)
ax.set_aspect('equal')
ax.xaxis.set_major_locator(NullLocator())