コード例 #1
0
def test_highly_complex_design(span_cost, design_class):
    backbone = random_record(3000)
    repeat = random_record(30)
    complex_sequence = repeat + random_record(200) + repeat + random_record(
        1000)
    goal = backbone[1000:] + complex_sequence + backbone[:1000]
    f1 = backbone[:2000]
    f2 = backbone[1900:2500]

    make_linear([f1, f2])
    make_circular([goal])

    design = design_class(span_cost)
    design.n_jobs = 1
    design.add_materials(primers=[],
                         templates=[f1, f2],
                         queries=[goal],
                         fragments=[])

    design.compile()
    design.optimize()

    print(design.to_df()[1])

    results = list(design.results.values())
    result = results[0]
    print(result.assemblies)
    print(result.assemblies[0]._nodes)
    print(result)

    print(design.out())
コード例 #2
0
def to_record(seq, linear=True):
    record = SeqRecord(Seq(seq))
    if linear:
        make_linear([record])
    else:
        make_circular([record])
    return record
コード例 #3
0
def iter_fake_random_record(n_seqs: int, size_int: Tuple[int, int],
                            cyclic: bool) -> List[SeqRecord]:
    for i in range(n_seqs):
        length = random.randint(*size_int)

        name = "<random record {}".format(str(uuid4()))
        rec = biopython.random_record(length, name=name, auto_annotate=True)
        rec.id = rec.name
        biopython.randomly_annotate(rec, (100, 1000))

        if cyclic:
            make_circular([rec])
        else:
            make_linear([rec])
        yield rec
コード例 #4
0
def test_num_groups_vs_endpoints(here, paths, query, span_cost):
    primers = make_linear(load_fasta_glob(paths["primers"]))
    templates = load_genbank_glob(paths["templates"])

    query_path = join(here, "data/test_data/genbank/designs", query)
    queries = make_circular(load_genbank_glob(query_path))

    design = Design(span_cost)

    design.add_materials(primers=primers, templates=templates, queries=queries)

    design._blast()
    containers = design.container_list
    assert len(containers) == 1
    container = containers[0]
    container.expand()
    groups = container.groups()
    print(len(groups)**2)

    a_arr = set()
    b_arr = set()

    for g in groups:
        a_arr.add(g.query_region.a)
        b_arr.add(g.query_region.b)

    print(len(a_arr) * len(b_arr))
コード例 #5
0
ファイル: test_seq_db.py プロジェクト: jvrana/pyblast2
def test_add_with_transform():
    """We expect that when we add a record with a transformation, that the we
    can obtain the origin record and transformed record."""

    db = SeqRecordDB()
    from uuid import uuid4
    from pyblast.constants import Constants as C

    def pseudocircularize(r):
        r2 = r + r
        r2.name = C.PSEUDOCIRCULAR + "__" + r.name
        r2.id = str(uuid4())
        return r2

    record = SeqRecord(
        Seq("ACGTTCGTGATTGTGCTGTGTGTATGGTATGATTATAGTGATGTAGTGATGATGTAGTAGTATA")
    )
    records = make_circular([record])

    keys = db.add_many_with_transformations(records, pseudocircularize,
                                            C.PSEUDOCIRCULAR)

    key = keys[0]
    origin_key = db.get_origin_key(key)
    origin = db.get_origin(key)
    transformed = db.get(key)

    assert origin is record
    assert origin is not transformed
    assert origin_key is not key
    assert len(transformed) == 2 * len(record)
コード例 #6
0
ファイル: test_seq_db.py プロジェクト: jvrana/pyblast2
def test_add_multiple_transformations():
    """We expect that when we add a record with a transformation, that the we
    can obtain the origin record and transformed record."""

    db = SeqRecordDB()
    from uuid import uuid4
    from pyblast.constants import Constants as C

    def pseudocircularize(r):
        pseudor = r + r
        pseudor.name = C.PSEUDOCIRCULAR + "__" + r.name
        pseudor.id = str(uuid4())
        return pseudor

    record = SeqRecord(
        Seq("ACGTTCGTGATTGTGCTGTGTGTATGGTATGATTATAGTGATGTAGTGATGATGTAGTAGTATA")
    )
    r1 = deepcopy(record)
    r2 = deepcopy(record)
    r1.id = "record1"
    r2.id = "record2"
    records = make_circular([r1, r2])

    keys = db.add_many_with_transformations(records, pseudocircularize,
                                            C.PSEUDOCIRCULAR)
    assert len(keys) == 2
    assert len(db) == 4
コード例 #7
0
    def run(self, n_jobs: int = 10):
        """Run a design job.

        :param n_jobs: number of parrallel jobs to run. (default: 10)
        :return:
        """
        import warnings

        warnings.simplefilter(action="ignore", category=RuntimeWarning)
        warnings.simplefilter(action="ignore", category=BiopythonParserWarning)

        self._logger.info("Loading sequence files")
        primers = make_linear(load_fasta_glob(self._primers))
        templates = make_circular(load_genbank_glob(self._templates))
        fragments = make_linear(load_genbank_glob(self._fragments))
        goals = make_circular(load_genbank_glob(self._goals))
        design = Design()
        design.n_jobs = n_jobs
        design.add_materials(primers=primers,
                             templates=templates,
                             fragments=fragments,
                             queries=goals)

        self._logger.info("Getting span cost model")
        span_cost = self._get_span_cost()
        design.span_cost = span_cost

        self._logger.info("Compiling possible molecular assemblies")
        design.compile()

        self._logger.info("Optimizing molecular assemblies")
        design.optimize()

        self._logger.info("Designing assembly primers and fragments")
        df, adf, design_json = design.to_df()
        adf.to_csv("summary.csv")
        df.to_csv("sequence_design.csv")

        records = []
        for result in design.results.values():
            if result.assemblies:
                a = result.assemblies[0]
                for i, role, m in a.molecules:
                    records.append(m.sequence)

        SeqIO.write(records, os.path.join(self._directory, "sequences.gb"),
                    "genbank")
コード例 #8
0
    def test_circular_over_subject(self):
        record = rand_record(1000)
        queries = [record]
        subjects = [record[200:300] + ns(500) + record[100:200]]

        queries = make_linear(queries)
        subjects = make_circular(subjects)

        bioblast = BioBlast(subjects, queries)
        results = bioblast.blastn()

        compare_result(results[0], 101, 300, 601, 100)
コード例 #9
0
ファイル: conftest.py プロジェクト: jvrana/DASi-DNA-Design
def blast_factory(paths) -> BioBlastFactory:
    factory = BioBlastFactory()

    primers = make_linear(load_fasta_glob(paths[PRIMERS]))
    templates = load_genbank_glob(paths[REGISTRY])
    queries = make_circular(load_genbank_glob(paths[QUERIES]))

    factory.add_records(primers, PRIMERS)
    factory.add_records(templates, TEMPLATES)
    factory.add_records(queries, QUERIES)

    return factory
コード例 #10
0
def test_library_design_to_df_2(paths, here, span_cost):
    primers_path = join(here, "data/test_data_sd2", "primers.fasta")
    fragments_path = join(here, "data/test_data_sd2", "fragments", "*.gb")
    plasmids_path = join(here, "data/test_data_sd2", "plasmids", "*.gb")
    designs_path = join(here, "data/test_data_sd2", "designs", "*.gb")

    primers = make_linear(load_fasta_glob(primers_path))
    templates = load_genbank_glob(plasmids_path)
    fragments = load_genbank_glob(fragments_path)
    print(fragments_path)
    queries = make_circular(load_genbank_glob(designs_path))

    design = LibraryDesign(span_cost=span_cost)
    design.n_jobs = 1
    design.add_materials(
        primers=primers,
        templates=make_circular(templates),
        queries=queries,
        fragments=make_linear(fragments),
    )

    design.logger.set_level("DEBUG")
    design.compile()

    results = design.optimize()

    for result in results.values():
        assembly = result.assemblies[0]
        print(assembly.to_df())
    #

    a, b, c = design.to_df()
    a.to_csv("library_design.csv")
    b.to_csv("library_summary.csv")
    with open("designs.json", "w") as f:
        json.dump(c, f)
    print(a)
    print(b)
    print(c)
コード例 #11
0
def test_benchmark_blast(benchmark, here, paths, query):
    primers = make_linear(load_fasta_glob(paths["primers"]))
    templates = load_genbank_glob(paths["templates"])

    query_path = join(here, "data/test_data/genbank/designs", query)
    queries = make_circular(load_genbank_glob(query_path))

    design = Design()

    design.add_materials(primers=primers, templates=templates, queries=queries)

    design._blast()

    benchmark(design._blast)
コード例 #12
0
ファイル: conftest.py プロジェクト: jvrana/pyblast2
    def make_blast():

        subjects = load_genbank_glob(join(
            here, "data/test_data/genbank/templates/*.gb"),
                                     force_unique_ids=True)
        queries = load_genbank_glob(
            join(
                here,
                "data/test_data/genbank/designs/pmodkan-ho-pact1-z4-er-vpr.gb"
            ),
            force_unique_ids=True,
        )
        queries = make_circular(queries)
        assert is_circular(queries[0])
        return BioBlast(subjects, queries)
コード例 #13
0
    def test_circular_complete_subject(self):
        record = rand_record(1000)
        queries = [record]
        subjects = [record[500:] + record[:400]]

        queries = make_circular(queries)
        subjects = make_linear(subjects)

        bioblast = BioBlast(subjects, queries)
        results = bioblast.blastn()

        result = results[0]
        print(result)
        assert result["subject"]["start"] == 1
        assert result["subject"]["end"] == 900
        assert result["query"]["start"] == 501
        assert result["query"]["end"] == 400
コード例 #14
0
    def test_circular_complete_query_1(self):
        """In this situation, the subject is completely aligned with a circular query
        starting at index 500 (starting index = 0). Note that the
         pyblast results start at index 1."""
        record = rand_record(1000)
        queries = [record]
        subjects = [ns(100) + record[500:] + record[:500] + ns(100)]

        queries = make_circular(queries)
        subjects = make_linear(subjects)

        bioblast = BioBlast(subjects, queries)
        results = bioblast.blastn()
        result = results[0]

        assert result["query"]["start"] == 501
        assert result["query"]["raw_end"] == 1500
        assert result["subject"]["start"] == 101
        assert result["subject"]["end"] == 1100
コード例 #15
0
    def _get_results_func(n_jobs):
        if True:
            print("PROCESSING!")
            primers = make_linear(load_fasta_glob(paths["primers"]))
            templates = load_genbank_glob(paths["templates"])

            query_path = join(here, "data/test_data/genbank/designs/*.gb")
            queries = make_circular(
                load_genbank_glob(query_path))[:LIM_NUM_DESIGNS]

            design = Design(span_cost=cached_span_cost)
            design.add_materials(primers=primers,
                                 templates=templates,
                                 queries=queries)
            if n_jobs > 1:
                design._run_with_pool(n_jobs, 1)
            else:
                design.run()
            return design, design.results
コード例 #16
0
    def test_circular_over_query(self):
        record = rand_record(1000)
        queries = [record]
        subjects = [record[-100:] + record[:100]]

        queries = make_circular(queries)
        subjects = make_linear(subjects)

        bioblast = BioBlast(subjects, queries)
        results = bioblast.blastn()

        result = results[0]

        result_seq = str((record[result["query"]["start"] - 1:] +
                          record[:result["query"]["end"]]).seq)
        expected_seq = str(subjects[0].seq)
        assert result_seq == expected_seq

        compare_result(results[0], 1000 - 100 + 1, 100, 1, 200)
コード例 #17
0
ファイル: test_readme.py プロジェクト: jvrana/pyblast2
def test_example2():
    from pyblast import BioBlast
    from pyblast.utils import make_linear, make_circular
    from Bio.SeqRecord import SeqRecord
    from Bio.Seq import Seq
    import json

    seq = "ACGTTGTAGTGTAGTTGATGATGATGTCTGTGTCGTGTGATGTGCTAGGGGTTGATGTGAGTAGTTAGTGGTAGTGTTTAGGGGCGGCGCGGAGTATGCTG"
    queries = [SeqRecord(Seq(seq))]

    subjects = [SeqRecord(Seq(seq[-20:] + seq[:30]))]

    # pyblast requires a 'topology' annotation on the SeqRecords.
    # we can make records circular or linear using `make_linear` or `make_circular` methods
    subjects = make_linear(subjects)
    queries = make_circular(queries)

    blast = BioBlast(subjects, queries)
    results = blast.blastn()
    print(json.dumps(results, indent=2))
コード例 #18
0
    def test_circular_complete_query_4(self):
        """In this situation, the subject is wraps around the query for 10
        extra bases on the left and right site.

        Note that pyblast results start at index 1.
        """
        record = rand_record(1000)
        queries = [record]
        subjects = [ns(100) + record[-10 + 500:] + record[:500 + 10] + ns(100)]

        queries = make_circular(queries)
        subjects = make_linear(subjects)

        bioblast = BioBlast(subjects, queries)
        results = bioblast.blastn()
        result = results[0]

        assert result["query"]["start"] == 491
        assert result["query"]["raw_end"] == 1510
        assert result["subject"]["start"] == 101
        assert result["subject"]["end"] == 1120
コード例 #19
0
    def test_circular_complete_query_parametrized_rc(self, extra_right,
                                                     extra_left):
        record = rand_record(1000)
        queries = [record]
        subjects = [
            ns(100) + record[(500 - extra_left):] +
            record[:(500 + extra_right)] + ns(100)
        ]

        subjects = [subjects[0].reverse_complement()]

        queries = make_circular(queries)
        subjects = make_linear(subjects)

        bioblast = BioBlast(subjects, queries)
        results = bioblast.blastn()

        result = results[0]
        print(json.dumps(result, indent=2))

        assert result["query"]["start"] == 501 - extra_left
        assert result["query"]["raw_end"] == 1500 + extra_right
        assert result["subject"]["start"] == 1100 + extra_right + extra_left
        assert result["subject"]["end"] == 101

        # to spans
        query_span = bioblast.parse_result_to_span(result["query"],
                                                   output_index=0)
        subject_span = bioblast.parse_result_to_span(result["subject"],
                                                     output_index=0)

        assert len(subject_span) == len(
            query_span) == 1000 + extra_right + extra_left
        assert query_span.a == 500 - extra_left
        assert query_span.b == 500 + extra_right

        assert subject_span.a == 100
        assert subject_span.b == 1100 + extra_right + extra_left
コード例 #20
0
ファイル: test_seq_db.py プロジェクト: jvrana/pyblast2
def test_add_same_transformation():
    """We expect that when we add a record with a transformation, that the we
    can obtain the origin record and transformed record."""

    db = SeqRecordDB()
    from uuid import uuid4
    from pyblast.constants import Constants as C

    def pseudocircularize(r):
        r2 = r + r
        r2.name = C.PSEUDOCIRCULAR + "__" + r.name
        r2.id = str(uuid4())
        return r2

    record = SeqRecord(
        Seq("ACGTTCGTGATTGTGCTGTGTGTATGGTATGATTATAGTGATGTAGTGATGATGTAGTAGTATA")
    )
    records = make_circular([record, record])

    keys = db.add_many_with_transformations(records, pseudocircularize,
                                            C.PSEUDOCIRCULAR)
    assert len(set(keys)) == 1
    assert len(db) == 2
コード例 #21
0
def test_library_design_to_df(paths, here, span_cost):
    primers = make_linear(load_fasta_glob(paths["primers"]))
    templates = load_genbank_glob(paths["templates"])

    query_path = join(here, "data/test_data/genbank/library_designs/*.gb")
    queries = make_circular(load_genbank_glob(query_path))
    queries = queries

    design = LibraryDesign(span_cost=span_cost)
    design.n_jobs = 1
    design.add_materials(primers=primers, templates=templates, queries=queries)

    design.logger.set_level("DEBUG")
    design.compile()
    results = design.optimize()
    print(results)
    a, b, c = design.to_df()
    a.to_csv("library_design.csv")
    b.to_csv("library_summary.csv")
    with open("designs.json", "w") as f:
        json.dump(c, f)
    print(a)
    print(b)
    print(c)
コード例 #22
0
def generate_fake_designs(
        n_designs: int,
        circular: bool,
        n_cyclic_seqs: int,
        n_linear_seqs: int,
        n_primers: int,
        n_primers_from_templates: int,
        design_sequence_similarity_length: int = 0,
        cyclic_size_int: Tuple[int, int] = (3000, 10000),
        linear_size_int: Tuple[int, int] = (100, 4000),
        primer_size_int: Tuple[int, int] = (15, 60),
        plasmid_size_interval: Tuple[int, int] = (5000, 10000),
        chunk_size_interval: Tuple[int, int] = (100, 3000),
        random_chunk_prob_int: Tuple[float, float] = (0, 0.5),
        random_chunk_size_int: Tuple[int, int] = (100, 1000),
):
    library_dict = generate_fake_library(
        n_cyclic_seqs=n_cyclic_seqs,
        n_linear_seqs=n_linear_seqs,
        n_primers=n_primers,
        cyclic_size_int=cyclic_size_int,
        linear_size_int=linear_size_int,
        primer_size_int=primer_size_int,
    )
    linear_seqs = library_dict["linear"]
    cyclic_seqs = library_dict["cyclic"]
    templates = cyclic_seqs + linear_seqs
    short_seqs = library_dict["short"]

    for i in range(n_primers_from_templates):
        primer = biopython.random_record_from_library(
            templates,
            circular=False,
            size_interval=(15, 100),
            max_chunks=1,
            chunk_size_interval=(15, 60),
            random_chunk_prob_int=(0, 0),
            random_chunk_size_int=(0, 0),
        )
        short_seqs.append(primer)

    # generate designs from templates or random sequence
    designs = []
    for i in range(n_designs):
        rec = biopython.random_record_from_library(
            templates,
            circular=circular,
            size_interval=plasmid_size_interval,
            chunk_size_interval=chunk_size_interval,
            random_chunk_prob_int=random_chunk_prob_int,
            random_chunk_size_int=random_chunk_size_int,
        )
        designs.append(rec)

    if design_sequence_similarity_length:
        designs = _add_shared_sequence(designs,
                                       design_sequence_similarity_length)

    if circular:
        make_circular(designs)
    else:
        make_linear(designs)

    return {
        "design": designs,
        "cyclic": cyclic_seqs,
        "linear": linear_seqs,
        "short": short_seqs,
    }
コード例 #23
0
def make_circular_and_id(rlist):
    make_circular(rlist)
    for r in rlist:
        r.id = str(uuid4())