def test_design_with_overhang_primers(repeat, span_cost):
    goal = random_record(3000)
    make_circular_and_id([goal])

    r1 = random_record(100) + goal[1000:2000] + random_record(100)
    p1 = goal[970:1030]
    p2 = goal[1970:2030].reverse_complement()
    r2 = goal[2000:] + goal[:1000]
    p3 = goal[1970:2030]

    make_linear_and_id([r1, p1, p2, r2, p3])

    design = Design(span_cost)
    design.add_materials(primers=[p1, p2, p3],
                         templates=[r1, r2],
                         queries=[goal],
                         fragments=[])

    expected_path = [
        (970, False, "A", True),
        (2030, False, "B", True),
        (1970, False, "A", True),
        (4000, True, "B", True),
    ]

    check_design_result(design, expected_path)
def test_design_with_overlaps_with_templates(span_cost):
    """Fragments with overlaps."""

    goal = random_record(3000)
    make_circular_and_id([goal])

    r1 = goal[-40:] + goal[:1000]
    r2 = goal[970:2000]
    r3 = goal[1950:]
    p1 = goal[-40:]

    make_linear_and_id([r1, r2, r3, p1])

    design = Design(span_cost)
    design.add_materials(primers=[p1],
                         fragments=[],
                         queries=[goal],
                         templates=[r1, r2, r3])

    expected_path = [
        (970, True, "A", True),
        (1950, True, "B", False),
        (1950, True, "A", False),
        (3000, True, "B", True),
        (3000 - 40, False, "A", True),
        (4000, True, "B", True),
    ]

    check_design_result(design, expected_path, check_path=True)
def test_design_task_with_gaps(span_cost):
    """Fragments with overlaps."""

    goal = random_record(3000)
    make_circular_and_id([goal])

    r1 = goal[:950]
    r2 = goal[1000:2000]
    r3 = goal[2050:]

    make_linear_and_id([r1, r2, r3])

    design = Design(span_cost)
    design.add_materials(primers=[],
                         templates=[r1, r2, r3],
                         queries=[goal],
                         fragments=[])

    expected_path = [
        (0, True, "A", False),
        (950, True, "B", False),
        (1000, True, "A", False),
        (2000, True, "B", False),
        (2050, True, "A", False),
        (3000, True, "B", False),
    ]

    check_design_result(design, expected_path)
def test_single_fragment(span_cost):
    goal = random_record(3000)
    make_circular_and_id([goal])

    r1 = goal[177:2255]

    make_linear_and_id([r1])

    design = Design(span_cost)
    design.add_materials(primers=[],
                         templates=[r1],
                         queries=[goal],
                         fragments=[])

    expected_path = [(177, True, "A", False), (2255, True, "B", False)]

    check_design_result(design, expected_path)
def test_a_reverse_pcr_fragment(span_cost):
    goal = random_record(3000)
    make_circular_and_id([goal])

    t1 = goal[1000:2500].reverse_complement()
    p1 = goal[2500 - 20:2510].reverse_complement()

    make_linear_and_id([p1, t1])

    design = Design(span_cost)
    design.add_materials(primers=[p1],
                         templates=[t1],
                         queries=[goal],
                         fragments=[])

    expected_path = [(1000, True, "A", False), (2510, False, "B", False)]

    check_design_result(design, expected_path)
def test_fully_overlapped(span_cost):
    goal = random_record(2000)
    make_circular_and_id([goal])

    r1 = goal[1100:1300]
    p1 = goal[1177:1177 + 30]
    p2 = goal[1188:1188 + 30]
    p3 = goal[1225 - 30:1225].reverse_complement()

    make_linear_and_id([r1, p1, p2, p3])

    design = Design(span_cost)
    design.add_materials(primers=[p1, p2, p3],
                         templates=[r1],
                         queries=[goal],
                         fragments=[])

    expected_path = [(1177, False, "A", False), (1300, True, "B", False)]

    check_design_result(design, expected_path)
def test_blast_has_same_results(span_cost):
    goal = random_record(3000)
    make_circular_and_id([goal])

    r1 = random_record(100) + goal[1000:2000] + random_record(100)
    p1 = goal[970:1030]
    p2 = goal[1970:2030].reverse_complement()
    r2 = goal[2000:] + goal[:1000]
    p3 = goal[1970:2030]

    make_linear_and_id([r1, p1, p2, r2, p3])

    size_of_groups = []
    for i in range(20):

        design = Design(span_cost)
        design.logger.set_level("INFO")
        design.add_materials(primers=[p1, p2, p3],
                             templates=[r1, r2],
                             queries=[goal],
                             fragments=[])

        design.compile()

        for container in design.container_list:
            size_of_groups.append(len(container.groups()))
    assert len(size_of_groups) == 20
    assert len(set(size_of_groups)) == 1
Ejemplo n.º 8
0
def test_num_groups_vs_endpoints(here, paths, query, span_cost):
    primers = make_linear(load_fasta_glob(paths["primers"]))
    templates = load_genbank_glob(paths["templates"])

    query_path = join(here, "data/test_data/genbank/designs", query)
    queries = make_circular(load_genbank_glob(query_path))

    design = Design(span_cost)

    design.add_materials(primers=primers, templates=templates, queries=queries)

    design._blast()
    containers = design.container_list
    assert len(containers) == 1
    container = containers[0]
    container.expand()
    groups = container.groups()
    print(len(groups)**2)

    a_arr = set()
    b_arr = set()

    for g in groups:
        a_arr.add(g.query_region.a)
        b_arr.add(g.query_region.b)

    print(len(a_arr) * len(b_arr))
Ejemplo n.º 9
0
    def test_add_special_partition_node(self, span_cost):
        """This test adds a new unique node 'n3' with a unique type to simulate
        adding a partitioning to the graph.

        Such a procedure might be used for highly complex sequences.
        """
        goal = random_record(4000)
        make_circular_and_id([goal])

        r1 = goal[1000:2000]
        r2 = goal[200:500]

        make_linear_and_id([r1, r2])

        design = Design(span_cost)
        design.add_materials(
            primers=[], templates=[r1, r2], queries=[goal], fragments=[]
        )

        design.compile()

        import networkx as nx

        for qk, g in design.graphs.items():
            query = design.seqdb[qk]
            gcopy = nx.DiGraph(g)
            for n1, n2, edata in g.edges(data=True):
                r = Region(n1.index, n2.index, len(query.seq), cyclic=True)
                if n1.type == "B" and n2.type == "A":
                    # index = int((n1.index + n2.index) / 2)
                    delta = int(len(r) / 2)
                    index = r.t(delta + n1.index)
                    n3 = AssemblyNode(index, False, str(uuid4()), overhang=True)
                    edata1 = dict(edata)
                    edata2 = dict(edata)
                    edata1["material"] = edata["material"] / 10.0
                    edata2["material"] = edata["material"] / 10.0
                    edata1["span"] = 0

                    gcopy.add_edge(n1, n3, **edata1)
                    gcopy.add_edge(n3, n2, **edata2)
            design.graphs[qk] = gcopy

        result = list(design.optimize().values())[0]
        assembly = result.assemblies[0]
        df = assembly.to_df()
        assert list(df["query_start"]) == [200, 500, 750, 1000, 2000, 3100]
        assert list(df["query_end"]) == [500, 750, 1000, 2000, 3100, 200]
def test_requires_synthesis_with_template_over_origin(span_cost):
    goal = random_record(5000)
    make_circular_and_id([goal])

    r1 = goal[1000:2000]
    r2 = goal[3500:] + goal[:500]

    make_linear_and_id([r1, r2])

    design = Design(span_cost)
    design.add_materials(primers=[],
                         templates=[r1, r2],
                         queries=[goal],
                         fragments=[])

    expected_path = [
        (500, True, "B", False),
        (1000, True, "A", False),
        (2000, True, "B", False),
        (3500, True, "A", False),
    ]

    check_design_result(design, expected_path)
def test_case(span_cost):
    """This is a test case which has previously failed to find a solution.

    The case is that there are just two small fragments with a small
    <10bp gap. The solution should be to PCR amplify both fragment and
    synthesize the rest of the plasmid.
    """
    goal = random_record(2000)
    make_circular_and_id([goal])

    r1 = goal[1188:1230]
    r2 = goal[1238:1282]

    make_linear_and_id([r1, r2])

    design = Design(span_cost)
    design.add_materials(primers=[],
                         templates=[r1, r2],
                         queries=[goal],
                         fragments=[])

    expected_path = [(1238, True, "A", False), (1282, True, "B", False)]

    check_design_result(design, expected_path)
def test_very_long_synthesizable_region(span_cost):
    goal = random_record(10000)
    make_circular_and_id([goal])

    r1 = goal[4177:4255]
    r2 = goal[4188:4225]

    make_linear_and_id([r1, r2])

    design = Design(span_cost)
    design.add_materials(primers=[],
                         templates=[r1],
                         queries=[goal],
                         fragments=[])

    expected_path = [
        (500, True, "B", False),
        (1000, True, "A", False),
        (2000, True, "B", False),
        (2500, True, "A", False),
    ]

    with pytest.raises(NoSolution):
        check_design_result(design, expected_path)
Ejemplo n.º 13
0
def test_large_pkl(span_cost):
    """Expect more than one graph to be output if multiple queries are
    provided."""
    design = Design.fake(n_designs=3,
                         n_cyclic_seqs=100,
                         n_linear_seqs=100,
                         n_primers=100)
    design.compile()

    with logger.timeit("DEBUG", "pickling graphs"):
        pickle.loads(pickle.dumps(design.graphs))

    with logger.timeit("DEBUG", "pickling containers"):
        pickle.loads(pickle.dumps(design.container_factory))

    with logger.timeit("DEBUG", "pickling span_cost"):
        pickle.loads(pickle.dumps(span_cost))
def test_design_near_origin(span_cost):
    """Fragments with overlaps."""

    goal = random_record(3000)
    make_circular_and_id([goal])

    r1 = goal[-40:] + goal[:1000]
    r3 = goal[950:] + goal[:1]
    make_linear_and_id([r1, r3])

    design = Design(span_cost)
    design.add_materials(primers=[],
                         fragments=[r1, r3],
                         queries=[goal],
                         templates=[])
    design.run()
    df = design.to_df()[1]
    print(df)
Ejemplo n.º 15
0
def test_cost_comparison_library():
    design1, library = Design.fake(
        n_designs=3,
        n_linear_seqs=50,
        n_cyclic_seqs=50,
        n_primers_from_templates=500,
        shared_length=500,
        return_with_library=True,
    )
    design2 = LibraryDesign(seqdb=design1.seqdb)

    designs = library["design"]
    plasmids = library["cyclic"]
    fragments = library["linear"]
    primers = library["short"]

    design2.add_materials(primers=primers,
                          fragments=fragments,
                          templates=plasmids,
                          queries=designs)

    design1.run()
    design2.run()

    print("#" * 10 + "\nDesign\n" + "#" * 10)
    print(json.dumps(design1.status, indent=2))

    print("#" * 10 + "\nLibraryDesign\n" + "#" * 10)
    print(json.dumps(design2.status, indent=2))

    print("%" * 10 + "\nDesign Cost\n" + "%" * 10)
    for qk, s in design1.status.items():
        print(s["assemblies"])

    print("%" * 10 + "\nLibraryDesign Cost\n" + "%" * 10)
    for qk, s in design2.status.items():
        print(s["assemblies"])

    design2.report().plot_coverage(show=True)
    print(design2.to_df()[1])
Ejemplo n.º 16
0
def test_reindex_invariant(reindex):
    design1, library = Design.fake(
        n_designs=1,
        n_linear_seqs=50,
        n_cyclic_seqs=50,
        n_primers_from_templates=500,
        shared_length=500,
        return_with_library=True,
    )

    designs = library["design"]
    plasmids = library["cyclic"]
    fragments = library["linear"]
    primers = library["short"]

    new_designs = [design[reindex:] + design[:reindex] for design in designs]
    make_cyclic(new_designs)

    design2 = Design()
    design2.add_materials(templates=plasmids,
                          fragments=fragments,
                          primers=primers,
                          queries=new_designs)

    design1.run()
    design2.run()

    results1 = design1.out()
    results2 = design2.out()

    assemblies1 = list(results1["designs"].values())[0]["assemblies"][0]
    assemblies2 = list(results2["designs"].values())[0]["assemblies"][0]

    print(assemblies1["cost"])
    print(assemblies2["cost"])

    assert assemblies1["cost"] == assemblies2["cost"]
    def example_design(self, span_cost):
        goal = random_record(3000)
        make_circular_and_id([goal])

        r1 = random_record(100) + goal[1000:2000] + random_record(100)
        p1 = goal[970:1030]
        p2 = goal[1970:2030].reverse_complement()
        r2 = goal[2000:] + goal[:1000]
        p3 = goal[1970:2030]

        make_linear_and_id([r1, p1, p2, r2, p3])

        design = Design(span_cost)
        design.add_materials(primers=[p1, p2, p3],
                             templates=[r1, r2],
                             queries=[goal],
                             fragments=[])
        design.compile()
        results = design.optimize()
        return design, results
Ejemplo n.º 18
0
def test_design_optimize_cannot_run_before_compile(cached_span_cost):
    design = Design.fake(n_designs=1)
    with pytest.raises(DasiDesignException):
        design.optimize()
    design.compile()
    design.optimize()
Ejemplo n.º 19
0
def test_example_0(args):
    i0, i1, DesignCls = args
    random.seed(0)
    np.random.seed(0)

    def open_gb(path):
        with open(join(fixtures, path)) as f:
            return list(SeqIO.parse(f, format="genbank"))

    fragments = open_gb("fragments_0.gb")
    primers = open_gb("primers_0.gb")
    plasmids = open_gb("plasmids_0.gb")
    goals = open_gb("goals_0.gb")

    design = Design()
    design.add_fragments(fragments)
    design.add_primers(primers)
    design.add_templates(plasmids)
    s = slice(i0, i1)
    design.add_queries(goals[s])
    design.run(n_jobs=4)

    # assert successful runs
    print(design.status)
    for v in design.status.values():
        assert v["success"] is True

    # output JSON
    out = design.out()
    print(out)
    print(design.to_df()[1])
Ejemplo n.º 20
0
    def run(self, n_jobs: int = 10):
        """Run a design job.

        :param n_jobs: number of parrallel jobs to run. (default: 10)
        :return:
        """
        import warnings

        warnings.simplefilter(action="ignore", category=RuntimeWarning)
        warnings.simplefilter(action="ignore", category=BiopythonParserWarning)

        self._logger.info("Loading sequence files")
        primers = make_linear(load_fasta_glob(self._primers))
        templates = make_circular(load_genbank_glob(self._templates))
        fragments = make_linear(load_genbank_glob(self._fragments))
        goals = make_circular(load_genbank_glob(self._goals))
        design = Design()
        design.n_jobs = n_jobs
        design.add_materials(primers=primers,
                             templates=templates,
                             fragments=fragments,
                             queries=goals)

        self._logger.info("Getting span cost model")
        span_cost = self._get_span_cost()
        design.span_cost = span_cost

        self._logger.info("Compiling possible molecular assemblies")
        design.compile()

        self._logger.info("Optimizing molecular assemblies")
        design.optimize()

        self._logger.info("Designing assembly primers and fragments")
        df, adf, design_json = design.to_df()
        adf.to_csv("summary.csv")
        df.to_csv("sequence_design.csv")

        records = []
        for result in design.results.values():
            if result.assemblies:
                a = result.assemblies[0]
                for i, role, m in a.molecules:
                    records.append(m.sequence)

        SeqIO.write(records, os.path.join(self._directory, "sequences.gb"),
                    "genbank")
Ejemplo n.º 21
0
 def design(self):
     d = Design.fake(n_designs=3)
     d.run()
     return d