def test_merge(self, a): b = NumpyDataFrame({"C": np.arange(30, 40)}) c = NumpyDataFrame.merge((a, b)) assert "C" in c.columns assert np.all(c.data["A"] == np.arange(10)) assert np.all(c.data["C"] == np.arange(30, 40)) assert np.all(c.data["B"] == np.arange(10, 20))
def test_apply_with_kwargs(a): df = NumpyDataFrame({"A": np.arange(10), "B": np.arange(10, 20)}) df = df.reshape((-1, 1)) df1 = df.apply(np.sum, axis=1) assert np.all(df1.data["A"] == np.arange(10)) assert np.all(df1.data["B"] == np.arange(10, 20)) df2 = df.apply(np.sum, axis=0) assert np.all(df2.data["A"] == np.arange(10).sum(axis=0)) assert np.all(df2.data["B"] == np.arange(10, 20).sum(axis=0))
def decoder(obj): """msgpack decoder for cost functions. :param obj: :return: """ if b"__numpydataframe__" in obj: data = obj[b"data"] data = {k.decode(): v for k, v in data.items()} obj = NumpyDataFrame(data=data) elif b"__primercostmodel__" in obj: cost_dict = {tuple(k): v for k, v in obj[b"cost_dict"].items()} span = obj[b"span"] obj = PrimerCostModel.__new__(PrimerCostModel) obj.cost_dict = cost_dict obj.span = span elif b"__synthesiscostmodel__" in obj: cost_dict = {tuple(k): v for k, v in obj[b"cost_dict"].items()} span = obj[b"span"] obj = SynthesisCostModel.__new__(SynthesisCostModel) obj.cost_dict = cost_dict obj.span = span elif b"__spancost__" in obj: cost_dict = {tuple(k): v for k, v in obj[b"cost_dict"].items()} span = obj[b"span"] obj = SpanCost.__new__(SpanCost) obj.cost_dict = cost_dict obj.span = span return obj
def test_can_slice(shape): a = NumpyDataFrame({"A": np.ones(shape), "B": np.zeros(shape)}) if len(shape) == 0: with pytest.raises(IndexError): a[0] else: assert a[0] is not None
def compute(self): span = self.span # span, base cost, cost per bp, time (days) p = df_to_np_ranged( "min", "max", self.primer_df, cols=["base cost", "cost per bp", "time (days)"], dtype=np.float64, ) # flattened extension array ext = p[:, 0].reshape(-1, 1) - self.min_anneal ext = ext.astype(np.int32) # relative span (i.e. the overlap) rel_span = span[:, np.newaxis, np.newaxis] - (ext + ext.T)[np.newaxis, :, :] # efficiency, the same shape as rel_span eff_arr = df_to_np_ranged("min", "max", self.eff_df, dtype=np.float64)[:, 1] eff = eff_arr[np.clip(-rel_span, 0, len(eff_arr) - 1)] # material cost m = p[:, 0, np.newaxis] * p[:, 2, np.newaxis] + p[:, 1, np.newaxis] t = p[:, 3, np.newaxis] t = np.maximum(t, t.T) x = m * self.material_modifier + t * self.time_cost material_cost = x + x.T # cost cost = material_cost / eff cost[np.where(np.isnan(cost))] = np.inf slice_dict = { (0, 0): slicer[:, :1, :1], (0, 1): slicer[:, :1, 1:], (1, 0): slicer[:, 1:, :1], (1, 1): slicer[:, 1:, 1:], } for slice_index, slice_obj in slice_dict.items(): s_eff = eff[slice_obj] s_cost = cost[slice_obj] s_mat = material_cost[slice_obj[1], slice_obj[2]] idx = lexargmin((s_eff, s_cost), axis=0) self.cost_dict[slice_index] = NumpyDataFrame( dict( span=span[idx[0]], cost=s_cost[idx], efficiency=s_eff[idx], material=s_mat[idx[1], idx[2]], left_ext=ext[idx[1]], right_ext=ext[idx[2]], time=t[idx[1], idx[2]], ), apply=np.squeeze, )
def test_update(self, a): b = NumpyDataFrame({"C": np.arange(30, 40)}) a.update(b) assert "C" in a.columns print(a) assert np.all(a.data["A"] == np.arange(10)) assert np.all(a.data["C"] == np.arange(30, 40)) assert np.all(a.data["B"] == np.arange(10, 20))
def compute(self): def choose(a, i): return np.choose(i, a) for ext in [(0, 0), (0, 1), (1, 0), (1, 1)]: # numpy data frames for primer cost and syn cost over span df1 = self.primer_cost(self.span, ext) df2 = self.syn_cost(self.span, ext) # determine the indices of the min cost (0=primer, 1=syn) c1 = df1.data["cost"] c2 = df2.data["cost"] c3 = np.stack((c1, c2), axis=1) y = c3.argmin(axis=1) # select between primer_cost and syn_cost based on the min cost df4 = NumpyDataFrame.group_apply( (df1, df2), choose, i=y, _fill_value=np.nan ) self.cost_dict[ext] = df4
def test_concat_raises(a): c = a.copy() c.col["C"] = np.arange(100, 110) with pytest.raises(NumpyDataFrameException): NumpyDataFrame.concat([a, c])
def a(): return NumpyDataFrame({"A": np.arange(10), "B": np.arange(10, 20)})
def test_concat(a): c = a.copy() d = NumpyDataFrame.concat([a, c]) assert d.shape == (20, )
def test_to_df_raises(shape): a = NumpyDataFrame({"A": np.ones(shape), "B": np.zeros(shape)}) with pytest.raises(NumpyDataFrameException): a.to_df()
def test_str(shape): a = NumpyDataFrame({"A": np.ones(shape), "B": np.zeros(shape)}) print(str(a))
def test_init_raises(): with pytest.raises(NumpyDataFrameException): NumpyDataFrame({"A": np.arange(10), "B": np.arange(9)})
def test_empty_init(): a = NumpyDataFrame() assert a.data == {}
def test_init(): a = NumpyDataFrame({"A": np.arange(10), "B": np.arange(10)}) assert a
def test_update_raises(self, a): b = NumpyDataFrame({"C": np.arange(30, 41)}) with pytest.raises(NumpyDataFrameException): a.update(b)
def _compute( self, gene_costs, gene_sizes, gene_times, i: Union[bool, int], j: Union[bool, int], left_span, ): # extension conditions, idk left_ext = (i, 0) right_ext = (0, j) # left primer left_jxn = self.primer_cost(left_span, ext=left_ext) left_eff = left_jxn.data["efficiency"] left_material = left_jxn.data["material"] # right primer right_span = self.span - gene_sizes - left_span right_jxn = self.primer_cost(right_span, ext=right_ext) right_eff = right_jxn.data["efficiency"] right_material = right_jxn.data["material"] ext_material = left_material + right_material ext_eff = np.multiply(left_eff, right_eff) # swap axes # span, size, left_span ext_material = ext_material.swapaxes(0, 2) ext_eff = ext_eff.swapaxes(0, 2) syn_eff = ext_eff * 1.0 # here place probability of success for gene synthesis # could even use sequence to compute this later??? syn_material_cost = ( ext_material + gene_costs[np.newaxis, ...] * self.material_modifier ) syn_time_cost = gene_times * self.time_cost syn_total_cost = (syn_material_cost + syn_time_cost[np.newaxis, ...]) / syn_eff idx = lexargmin((syn_eff, syn_total_cost), axis=0) _gcosts = gene_costs[idx[1]] _span = np.squeeze(self.span)[idx[0]] _gtimes = syn_time_cost[idx[1]] gene_df = NumpyDataFrame( dict( cost=_gcosts, material=_gcosts, time=_gtimes, efficiency=np.ones(idx[0].shape[0]), size=gene_sizes[idx[1]], ), apply=np.squeeze, ) flat_left_jxn = left_jxn[idx[2]].apply(np.squeeze) flat_right_jxn = right_jxn[idx[2], idx[1], idx[0]] time = np.vstack( ( flat_left_jxn.data["time"], flat_right_jxn.data["time"], gene_df.data["time"], ) ).max(axis=0) gap_df = NumpyDataFrame( dict( span=_span, cost=syn_total_cost[idx], efficiency=syn_eff[idx], time=time, material=syn_material_cost[idx], lshift=left_span[idx[2]], ), apply=np.squeeze, ) gap_df.update(flat_left_jxn.prefix("lprimer_")) gap_df.update(flat_right_jxn.prefix("rprimer_")) gap_df.update(gene_df.prefix("gene_")) return gap_df
def test_repr(shape): a = NumpyDataFrame({"A": np.ones(shape), "B": np.ones(shape)}) print(a.__repr__())
def test_to_df(shape): a = NumpyDataFrame({"A": np.ones(shape), "B": np.zeros(shape)}) assert a.shape == shape print(a.to_df())
def test_concat_fills_missing(a): c = a.copy() c.col["C"] = np.arange(100, 110) d = NumpyDataFrame.concat([a, c], fill_value=np.inf) assert np.all(d.data["C"] == np.array([np.inf] * 10 + list(range(100, 110))))