def _default_parse_func(file, shape, sep, encoding): data = [] nr, nc = shape rxc = nr * nc with open(file, "r", encoding=encoding) as fh: for line in fh: splitted = line.replace("\n", "").split(sep)[2:] if (len(splitted) == rxc): data.append(splitted) if not data: raise AsRuntimeError( """DataParser: parse failed, no any valid line found make sure data file is in correct format""") if (len(data) % 2): raise AsRuntimeError( """DataParser: parse failed, uneven GFP and OD sections make sure data file is in correct format""") data = numpy.asarray(data, dtype=object) sz_d1 = len(data) // 2 # replace overflow with 100000 data[data == "OVRFLW"] = 100000 OD = data[1:sz_d1, :].reshape(-1, nr, nc).astype(float) GFP = data[sz_d1 + 1:].reshape(-1, nr, nc).astype(int) MASK = (GFP != 100000) return _PlateData(OD, GFP, MASK)
def analyze(self): # analyze P for sample in self.samples: sample.run_P_analysis() # analyze I if (self.untreated_sample() is None): raise AsRuntimeError( "cannot canculate I with no assign of untreated sample") untreated_P = self.untreated_sample().P() for sample in self.get_samples_except_untreated(): sample.run_XELI_analysis(untreated_P)
def _blank_correction(self): where_blank = self.layout.mask_by_category(self._blank) if not where_blank.any(): raise AsRuntimeError( "background correction requires at least one 'BLANK' category in layout" ) OD_blank = self.OD()[:, where_blank].mean(axis=1, keepdims=True) self.OD()[:] = self.OD() - OD_blank GFP_blank = self.GFP()[:, where_blank].mean(axis=1, keepdims=True) self.GFP()[:] = self.GFP() - GFP_blank m = ">%s:BLANK_CORRECTION\n%s\n" return m % (self.name(), self.cat_OD_GFP_tables())
def _parse_maps(gene_map, cate_map): coords_list, genes_list, cates_list = [], [], [] for row_id, row in enumerate(gene_map): for col_id, gene in enumerate(row): cate = cate_map[row_id][col_id] if (gene and cate): coords_list.append([row_id, col_id]) genes_list.append(gene) cates_list.append(cate) elif not (gene or cate): continue else: e = "'gene' or 'category' missing at (row %d, col %d)" raise AsRuntimeError(e % (row_id, col_id)) coords = numpy.asarray(coords_list, dtype = int) genes = numpy.asarray(genes_list, dtype = str) cates = numpy.asarray(cates_list, dtype = str) return coords, genes, cates
def _eline_correction(self): eline_where = self.layout.mask_by_category(self._eline) if not eline_where.any(): raise AsRuntimeError( "e-line correction requires at least one 'ELINE' category in layout" ) OD_el = self.OD()[:, eline_where] GFP_el = self.GFP()[:, eline_where] MASK_el = self.MASK()[:, eline_where] # use a linear model for eline correction # the background gfp signal is estimated to be (slope * OD + intercept) slope, inter, inter_sd, msg = self.eline_corre.feed(OD_el, GFP_el, MASK_el) self.model_slope = slope self.model_inter = inter self.model_inter_sd = inter_sd m = ">%s:ELINE_CORRECTION\n%s\n" return m % (self.name(), msg)
def select_slope_and_intercept(self, nr, nc, all_regs): # select slope and intercept by interactive selector # however if only one, no need for interactive select # you have to use that n = len(all_regs) if n == 1: slope_i, inter_i = [0], [0] else: slope_i, inter_i = self.selector.launch(n, nr, nc, self.plot_path, self.sample_name) if not slope_i: raise NoValueSelectedError("slopes cannot be null selection") if not inter_i: raise NoValueSelectedError("intercepts cannot be null selection") slopes = all_regs[slope_i, 0] inters = all_regs[inter_i, 1] if len(inters) == 1: raise AsRuntimeError( "must be at least 2 intercepts (this exception is thrown due to not implemented yet)" ) slope = slopes.mean() inter = inters.mean() inter_sd = inters.std() ret_msg = """Raw regressions: Slope\tIntercept\tr^2\tp\tStd.err %s Selected: Slopes: %s Intercepts: %s Final: Slope: %f Intercept: %f Inter.SD: %f""" % (array2d2string_by_row( all_regs.tolist(), "%f\t%f\t%f\t%.8e\t%f"), vector2string(slopes), vector2string(inters), slope, inter, inter_sd) return slope, inter, inter_sd, ret_msg
def add_sample(self, SampleClass, name, layout=None, offset=None, untreated=False, **kw): # internal _id is same as the position in the self.samples array _id = len(self.samples) sample = SampleClass(name=name, layout=layout or self.shared_layout, log=self.log(), assay_data=self.data(), outdir=self.output_dir(), offset=offset, _id=len(self.samples), **kw) self.samples.append(sample) if untreated: if not (self._control is None): raise AsRuntimeError( "assign more than one sample as untreated is not allowed") self._control = sample
def create_output_dir(self, outdir, overwrite=False, **kw): if os.path.exists(outdir): if not overwrite: raise AsRuntimeError("output dir '%s' already exists" % outdir) else: os.makedirs(outdir)
def _check_shape(f1, f2): if (len(f1) != len(f2)): raise AsRuntimeError("map files must be in exact same shape") for s1, s2 in zip(f1, f2): if (len(s1) != len(s2)): raise AsRuntimeError("map files must be in exact same shape")