Esempio n. 1
0
 def test_merge(self, a):
     b = NumpyDataFrame({"C": np.arange(30, 40)})
     c = NumpyDataFrame.merge((a, b))
     assert "C" in c.columns
     assert np.all(c.data["A"] == np.arange(10))
     assert np.all(c.data["C"] == np.arange(30, 40))
     assert np.all(c.data["B"] == np.arange(10, 20))
Esempio n. 2
0
def test_apply_with_kwargs(a):
    df = NumpyDataFrame({"A": np.arange(10), "B": np.arange(10, 20)})
    df = df.reshape((-1, 1))

    df1 = df.apply(np.sum, axis=1)
    assert np.all(df1.data["A"] == np.arange(10))
    assert np.all(df1.data["B"] == np.arange(10, 20))

    df2 = df.apply(np.sum, axis=0)
    assert np.all(df2.data["A"] == np.arange(10).sum(axis=0))
    assert np.all(df2.data["B"] == np.arange(10, 20).sum(axis=0))
Esempio n. 3
0
def decoder(obj):
    """msgpack decoder for cost functions.

    :param obj:
    :return:
    """
    if b"__numpydataframe__" in obj:
        data = obj[b"data"]
        data = {k.decode(): v for k, v in data.items()}
        obj = NumpyDataFrame(data=data)
    elif b"__primercostmodel__" in obj:
        cost_dict = {tuple(k): v for k, v in obj[b"cost_dict"].items()}
        span = obj[b"span"]
        obj = PrimerCostModel.__new__(PrimerCostModel)
        obj.cost_dict = cost_dict
        obj.span = span
    elif b"__synthesiscostmodel__" in obj:
        cost_dict = {tuple(k): v for k, v in obj[b"cost_dict"].items()}
        span = obj[b"span"]
        obj = SynthesisCostModel.__new__(SynthesisCostModel)
        obj.cost_dict = cost_dict
        obj.span = span
    elif b"__spancost__" in obj:
        cost_dict = {tuple(k): v for k, v in obj[b"cost_dict"].items()}
        span = obj[b"span"]
        obj = SpanCost.__new__(SpanCost)
        obj.cost_dict = cost_dict
        obj.span = span
    return obj
Esempio n. 4
0
def test_can_slice(shape):
    a = NumpyDataFrame({"A": np.ones(shape), "B": np.zeros(shape)})
    if len(shape) == 0:
        with pytest.raises(IndexError):
            a[0]
    else:
        assert a[0] is not None
Esempio n. 5
0
    def compute(self):
        span = self.span

        # span, base cost, cost per bp, time (days)
        p = df_to_np_ranged(
            "min",
            "max",
            self.primer_df,
            cols=["base cost", "cost per bp", "time (days)"],
            dtype=np.float64,
        )

        # flattened extension array
        ext = p[:, 0].reshape(-1, 1) - self.min_anneal
        ext = ext.astype(np.int32)

        # relative span (i.e. the overlap)
        rel_span = span[:, np.newaxis, np.newaxis] - (ext + ext.T)[np.newaxis, :, :]

        # efficiency, the same shape as rel_span
        eff_arr = df_to_np_ranged("min", "max", self.eff_df, dtype=np.float64)[:, 1]
        eff = eff_arr[np.clip(-rel_span, 0, len(eff_arr) - 1)]

        # material cost
        m = p[:, 0, np.newaxis] * p[:, 2, np.newaxis] + p[:, 1, np.newaxis]
        t = p[:, 3, np.newaxis]
        t = np.maximum(t, t.T)
        x = m * self.material_modifier + t * self.time_cost
        material_cost = x + x.T

        # cost
        cost = material_cost / eff
        cost[np.where(np.isnan(cost))] = np.inf

        slice_dict = {
            (0, 0): slicer[:, :1, :1],
            (0, 1): slicer[:, :1, 1:],
            (1, 0): slicer[:, 1:, :1],
            (1, 1): slicer[:, 1:, 1:],
        }

        for slice_index, slice_obj in slice_dict.items():
            s_eff = eff[slice_obj]
            s_cost = cost[slice_obj]
            s_mat = material_cost[slice_obj[1], slice_obj[2]]

            idx = lexargmin((s_eff, s_cost), axis=0)
            self.cost_dict[slice_index] = NumpyDataFrame(
                dict(
                    span=span[idx[0]],
                    cost=s_cost[idx],
                    efficiency=s_eff[idx],
                    material=s_mat[idx[1], idx[2]],
                    left_ext=ext[idx[1]],
                    right_ext=ext[idx[2]],
                    time=t[idx[1], idx[2]],
                ),
                apply=np.squeeze,
            )
Esempio n. 6
0
 def test_update(self, a):
     b = NumpyDataFrame({"C": np.arange(30, 40)})
     a.update(b)
     assert "C" in a.columns
     print(a)
     assert np.all(a.data["A"] == np.arange(10))
     assert np.all(a.data["C"] == np.arange(30, 40))
     assert np.all(a.data["B"] == np.arange(10, 20))
Esempio n. 7
0
    def compute(self):
        def choose(a, i):
            return np.choose(i, a)

        for ext in [(0, 0), (0, 1), (1, 0), (1, 1)]:
            # numpy data frames for primer cost and syn cost over span
            df1 = self.primer_cost(self.span, ext)
            df2 = self.syn_cost(self.span, ext)

            # determine the indices of the min cost (0=primer, 1=syn)
            c1 = df1.data["cost"]
            c2 = df2.data["cost"]
            c3 = np.stack((c1, c2), axis=1)
            y = c3.argmin(axis=1)

            # select between primer_cost and syn_cost based on the min cost
            df4 = NumpyDataFrame.group_apply(
                (df1, df2), choose, i=y, _fill_value=np.nan
            )
            self.cost_dict[ext] = df4
Esempio n. 8
0
def test_concat_raises(a):
    c = a.copy()
    c.col["C"] = np.arange(100, 110)
    with pytest.raises(NumpyDataFrameException):
        NumpyDataFrame.concat([a, c])
Esempio n. 9
0
def a():
    return NumpyDataFrame({"A": np.arange(10), "B": np.arange(10, 20)})
Esempio n. 10
0
def test_concat(a):
    c = a.copy()
    d = NumpyDataFrame.concat([a, c])
    assert d.shape == (20, )
Esempio n. 11
0
def test_to_df_raises(shape):
    a = NumpyDataFrame({"A": np.ones(shape), "B": np.zeros(shape)})
    with pytest.raises(NumpyDataFrameException):
        a.to_df()
Esempio n. 12
0
def test_str(shape):
    a = NumpyDataFrame({"A": np.ones(shape), "B": np.zeros(shape)})
    print(str(a))
Esempio n. 13
0
def test_init_raises():
    with pytest.raises(NumpyDataFrameException):
        NumpyDataFrame({"A": np.arange(10), "B": np.arange(9)})
Esempio n. 14
0
def test_empty_init():
    a = NumpyDataFrame()
    assert a.data == {}
Esempio n. 15
0
def test_init():
    a = NumpyDataFrame({"A": np.arange(10), "B": np.arange(10)})
    assert a
Esempio n. 16
0
 def test_update_raises(self, a):
     b = NumpyDataFrame({"C": np.arange(30, 41)})
     with pytest.raises(NumpyDataFrameException):
         a.update(b)
Esempio n. 17
0
    def _compute(
        self,
        gene_costs,
        gene_sizes,
        gene_times,
        i: Union[bool, int],
        j: Union[bool, int],
        left_span,
    ):
        # extension conditions, idk
        left_ext = (i, 0)
        right_ext = (0, j)
        # left primer
        left_jxn = self.primer_cost(left_span, ext=left_ext)
        left_eff = left_jxn.data["efficiency"]
        left_material = left_jxn.data["material"]

        # right primer
        right_span = self.span - gene_sizes - left_span
        right_jxn = self.primer_cost(right_span, ext=right_ext)
        right_eff = right_jxn.data["efficiency"]
        right_material = right_jxn.data["material"]
        ext_material = left_material + right_material
        ext_eff = np.multiply(left_eff, right_eff)

        # swap axes
        # span, size, left_span
        ext_material = ext_material.swapaxes(0, 2)
        ext_eff = ext_eff.swapaxes(0, 2)
        syn_eff = ext_eff * 1.0  # here place probability of success for gene synthesis
        # could even use sequence to compute this later???
        syn_material_cost = (
            ext_material + gene_costs[np.newaxis, ...] * self.material_modifier
        )
        syn_time_cost = gene_times * self.time_cost
        syn_total_cost = (syn_material_cost + syn_time_cost[np.newaxis, ...]) / syn_eff
        idx = lexargmin((syn_eff, syn_total_cost), axis=0)

        _gcosts = gene_costs[idx[1]]
        _span = np.squeeze(self.span)[idx[0]]
        _gtimes = syn_time_cost[idx[1]]
        gene_df = NumpyDataFrame(
            dict(
                cost=_gcosts,
                material=_gcosts,
                time=_gtimes,
                efficiency=np.ones(idx[0].shape[0]),
                size=gene_sizes[idx[1]],
            ),
            apply=np.squeeze,
        )

        flat_left_jxn = left_jxn[idx[2]].apply(np.squeeze)
        flat_right_jxn = right_jxn[idx[2], idx[1], idx[0]]

        time = np.vstack(
            (
                flat_left_jxn.data["time"],
                flat_right_jxn.data["time"],
                gene_df.data["time"],
            )
        ).max(axis=0)

        gap_df = NumpyDataFrame(
            dict(
                span=_span,
                cost=syn_total_cost[idx],
                efficiency=syn_eff[idx],
                time=time,
                material=syn_material_cost[idx],
                lshift=left_span[idx[2]],
            ),
            apply=np.squeeze,
        )

        gap_df.update(flat_left_jxn.prefix("lprimer_"))
        gap_df.update(flat_right_jxn.prefix("rprimer_"))
        gap_df.update(gene_df.prefix("gene_"))

        return gap_df
Esempio n. 18
0
def test_repr(shape):
    a = NumpyDataFrame({"A": np.ones(shape), "B": np.ones(shape)})
    print(a.__repr__())
Esempio n. 19
0
def test_to_df(shape):
    a = NumpyDataFrame({"A": np.ones(shape), "B": np.zeros(shape)})
    assert a.shape == shape
    print(a.to_df())
Esempio n. 20
0
def test_concat_fills_missing(a):
    c = a.copy()
    c.col["C"] = np.arange(100, 110)
    d = NumpyDataFrame.concat([a, c], fill_value=np.inf)
    assert np.all(d.data["C"] == np.array([np.inf] * 10 +
                                          list(range(100, 110))))