def __init__(self, pth, fle): cells = [] data_matrix = [] with open(path.join(pth, fle)) as src: rdr = reader(src, dialect='excel-tab') rdr.next() rdr.next() drugs = rdr.next()[1:] for row in rdr: cells.append(row[0]) data_matrix.append(np.genfromtxt(np.array(row[1:])).astype(np.float64)) cell_idx = supporting_functions.index(cells) drug_idx = supporting_functions.index(drugs) cell_idx_rv = dict([(value, key) for key, value in cell_idx.iteritems()]) drug_idx_rv = dict([(value, key) for key, value in drug_idx.iteritems()]) self.cell_line_2_idx = cell_idx self.drug_2_idx = drug_idx self.idx_2_cell_line = cell_idx_rv self.idx_2_drug = drug_idx_rv self.gi_50 = np.array(data_matrix)
def __init__(self, pth, fle): cells = [] markers = [] with open(path.join(pth, fle)) as src: rdr = reader(src, dialect='excel-tab') rdr.next() rdr.next() header = rdr.next()[1:] for row in rdr: cells.append(row[0]) markers.append(row[1:]) cell_idx = supporting_functions.index(cells) cell_idx_rv = dict([(value, key) for key, value in cell_idx.iteritems()]) self.cassificant_index = cell_idx self.header = header self.classificant_index_rv = cell_idx_rv self.markers = markers
def __init__(self, pth, fle, alpha_bound_percentile=5): cells = [] drugs = [] drug_versions = defaultdict(list) plates = [] with open(path.join(pth, fle)) as src: rdr = reader(src, dialect='excel-tab') header = rdr.next() for row in rdr: expanded_drug_name = (row[1], float(row[47])) cells.append(row[0]) drug_versions[row[1]].append(expanded_drug_name) drugs.append(expanded_drug_name) plates.append(row[2]) cell_idx = supporting_functions.index(set(cells)) drug_idx = supporting_functions.index(set(drugs)) plates_idx = supporting_functions.index(set(plates)) drug_versions = dict([(key, list(set(values))) for key, values in drug_versions.iteritems()]) cell_idx_rv = dict([(value, key) for key, value in cell_idx.iteritems()]) drug_idx_rv = dict([(value, key) for key, value in drug_idx.iteritems()]) plates_idx_rv = dict([(value, key) for key, value in plates_idx.iteritems()]) cells_no = len(cell_idx) drugs_no = len(drug_idx) plates_no = len(plates_idx) depth_limiter = 7 storage = np.empty((cells_no, drugs_no, depth_limiter, 10, 3)) storage.fill(np.NaN) background = np.empty((cells_no, drugs_no, depth_limiter, 4)) background.fill(np.NaN) t0_median = np.empty((cells_no, drugs_no, depth_limiter)) t0_median.fill(np.NaN) t0_background = np.empty((cells_no, drugs_no, depth_limiter)) t0_background.fill(np.NaN) tf_background = np.empty((cells_no, drugs_no, depth_limiter)) tf_background.fill(np.NaN) background_noise = np.empty((plates_no, 2)) background_noise.fill(np.NaN) cl_drug_replicates = np.zeros((cells_no, drugs_no)) with open(path.join(pth, fle)) as src: rdr = reader(src, dialect='excel-tab') test_array = rdr.next() supporting_functions.broadcast(test_array[6:36]) for row in rdr: cell_no = cell_idx[row[0]] drug_no = drug_idx[(row[1], float(row[47]))] plate_no = plates_idx[row[2]] depth_index = min(cl_drug_replicates[cell_no, drug_no], depth_limiter-1) storage[cell_no, drug_no, depth_index, :, :] = supporting_functions.broadcast(row[6:36]) background[cell_no, drug_no, depth_index, :] = supporting_functions.lgi(row, [4, 5, 36, 37]) t0_median[cell_no, drug_no, depth_index] = row[38] t0_background[cell_no, drug_no, depth_index] = np.mean( supporting_functions.lgi(row, [4, 5]).astype(np.float64)).tolist() tf_background[cell_no, drug_no, depth_index] = np.mean( supporting_functions.lgi(row, [36, 37]).astype(np.float64)).tolist() background_noise[plate_no, :] = np.abs( supporting_functions.lgi(row, [4, 36]).astype(np.float64) - supporting_functions.lgi(row, [5, 37]).astype( np.float64)) cl_drug_replicates[cell_no, drug_no] += 1 cl_drug_replicates[cl_drug_replicates < 1] = np.nan alpha_bound = np.percentile(rm_nans(background_noise), 100 - alpha_bound_percentile) std_of_tools = np.percentile(rm_nans(background_noise), 66) background = supporting_functions.p_stabilize(background, 0.5) t0_background = supporting_functions.p_stabilize(t0_background, 0.5) tf_background = supporting_functions.p_stabilize(tf_background, 0.5) storage_dblanc = storage - tf_background[:, :, :, np.newaxis, np.newaxis] self.header_line = header self.cell_line_2_idx = cell_idx self.drug_2_idx = drug_idx self.idx_2_cell_line = cell_idx_rv self.idx_2_drug = drug_idx_rv self.raw_data = storage_dblanc # cell_line, drug, concentration -> 3 replicates self.background = background # cell_line, drug -> (T0_1, T0_2, T_final, T_final) backgrounds self.t0_background = t0_background self.t_f_background = tf_background self.t0_median = t0_median # for each cell_line and drug contains T0 self.alpha_bound = alpha_bound # lower significance bound self.std_of_tools = std_of_tools self.background_noise = background_noise self.drug_versions = dict(drug_versions) # drug names + concentrations versions self.cl_drug_replicates = cl_drug_replicates