예제 #1
0
def test_num_groups_vs_endpoints(here, paths, query, span_cost):
    primers = make_linear(load_fasta_glob(paths["primers"]))
    templates = load_genbank_glob(paths["templates"])

    query_path = join(here, "data/test_data/genbank/designs", query)
    queries = make_circular(load_genbank_glob(query_path))

    design = Design(span_cost)

    design.add_materials(primers=primers, templates=templates, queries=queries)

    design._blast()
    containers = design.container_list
    assert len(containers) == 1
    container = containers[0]
    container.expand()
    groups = container.groups()
    print(len(groups)**2)

    a_arr = set()
    b_arr = set()

    for g in groups:
        a_arr.add(g.query_region.a)
        b_arr.add(g.query_region.b)

    print(len(a_arr) * len(b_arr))
예제 #2
0
def test_not_raise_pyblast_when_unique(here):
    subjects = load_genbank_glob(
        join(here, "data/test_data/genbank/templates/*.gb"))
    queries = load_genbank_glob(
        join(here, "data/test_data/genbank/designs/*.gb"))

    force_unique_record_ids(subjects + queries)
    print("n_queres: {}".format(len(queries)))
    BioBlast(subjects, queries)
예제 #3
0
def test_raises_pyblast_when_not_unique(here):
    subjects = load_genbank_glob(
        join(here, "data/test_data/genbank/templates/*.gb"))
    queries = load_genbank_glob(
        join(here, "data/test_data/genbank/designs/*.gb"))
    print("n_queres: {}".format(len(queries)))
    print("n_subjects: {}".format(len(subjects)))
    with pytest.raises(PyBlastException):
        BioBlast(subjects, queries)
예제 #4
0
def blast_factory(paths) -> BioBlastFactory:
    factory = BioBlastFactory()

    primers = make_linear(load_fasta_glob(paths[PRIMERS]))
    templates = load_genbank_glob(paths[REGISTRY])
    queries = make_circular(load_genbank_glob(paths[QUERIES]))

    factory.add_records(primers, PRIMERS)
    factory.add_records(templates, TEMPLATES)
    factory.add_records(queries, QUERIES)

    return factory
예제 #5
0
def test_benchmark_blast(benchmark, here, paths, query):
    primers = make_linear(load_fasta_glob(paths["primers"]))
    templates = load_genbank_glob(paths["templates"])

    query_path = join(here, "data/test_data/genbank/designs", query)
    queries = make_circular(load_genbank_glob(query_path))

    design = Design()

    design.add_materials(primers=primers, templates=templates, queries=queries)

    design._blast()

    benchmark(design._blast)
예제 #6
0
파일: conftest.py 프로젝트: jvrana/pyblast2
    def make_blast():

        subjects = load_genbank_glob(join(
            here, "data/test_data/genbank/templates/*.gb"),
                                     force_unique_ids=True)
        queries = load_genbank_glob(
            join(
                here,
                "data/test_data/genbank/designs/pmodkan-ho-pact1-z4-er-vpr.gb"
            ),
            force_unique_ids=True,
        )
        queries = make_circular(queries)
        assert is_circular(queries[0])
        return BioBlast(subjects, queries)
예제 #7
0
    def run(self, n_jobs: int = 10):
        """Run a design job.

        :param n_jobs: number of parrallel jobs to run. (default: 10)
        :return:
        """
        import warnings

        warnings.simplefilter(action="ignore", category=RuntimeWarning)
        warnings.simplefilter(action="ignore", category=BiopythonParserWarning)

        self._logger.info("Loading sequence files")
        primers = make_linear(load_fasta_glob(self._primers))
        templates = make_circular(load_genbank_glob(self._templates))
        fragments = make_linear(load_genbank_glob(self._fragments))
        goals = make_circular(load_genbank_glob(self._goals))
        design = Design()
        design.n_jobs = n_jobs
        design.add_materials(primers=primers,
                             templates=templates,
                             fragments=fragments,
                             queries=goals)

        self._logger.info("Getting span cost model")
        span_cost = self._get_span_cost()
        design.span_cost = span_cost

        self._logger.info("Compiling possible molecular assemblies")
        design.compile()

        self._logger.info("Optimizing molecular assemblies")
        design.optimize()

        self._logger.info("Designing assembly primers and fragments")
        df, adf, design_json = design.to_df()
        adf.to_csv("summary.csv")
        df.to_csv("sequence_design.csv")

        records = []
        for result in design.results.values():
            if result.assemblies:
                a = result.assemblies[0]
                for i, role, m in a.molecules:
                    records.append(m.sequence)

        SeqIO.write(records, os.path.join(self._directory, "sequences.gb"),
                    "genbank")
예제 #8
0
def test_multiquery_blast(here):
    subjects = load_genbank_glob(join(here,
                                      "data/test_data/genbank/templates/*.gb"),
                                 force_unique_ids=True)
    queries = load_genbank_glob(join(here,
                                     "data/test_data/genbank/designs/*.gb"),
                                force_unique_ids=True)
    print("n_queres: {}".format(len(queries)))
    print("n_subjects: {}".format(len(subjects)))
    bioblast = BioBlast(subjects, queries)

    results = bioblast.blastn()
    recids = set()
    for res in results:
        recid = res["query"]["origin_record_id"]
        recids.add(recid)
    print("n_records: {}".format(len(results)))
    assert len(recids) == len(queries)
예제 #9
0
    def _get_results_func(n_jobs):
        if True:
            print("PROCESSING!")
            primers = make_linear(load_fasta_glob(paths["primers"]))
            templates = load_genbank_glob(paths["templates"])

            query_path = join(here, "data/test_data/genbank/designs/*.gb")
            queries = make_circular(
                load_genbank_glob(query_path))[:LIM_NUM_DESIGNS]

            design = Design(span_cost=cached_span_cost)
            design.add_materials(primers=primers,
                                 templates=templates,
                                 queries=queries)
            if n_jobs > 1:
                design._run_with_pool(n_jobs, 1)
            else:
                design.run()
            return design, design.results
예제 #10
0
def test_self_blast(here):
    subjects = load_genbank_glob(join(here,
                                      "data/test_data/genbank/templates/*.gb"),
                                 force_unique_ids=True)
    queries = [
        SeqRecord(Seq(str(subjects[0][:1000].seq))),
        # SeqRecord(Seq(str(subjects[1][:1000]))),
    ]
    force_unique_record_ids(make_linear(queries))

    bioblast = BioBlast(queries, queries)
    results = bioblast.blastn()
    assert not results
예제 #11
0
def test_library_design_to_df_2(paths, here, span_cost):
    primers_path = join(here, "data/test_data_sd2", "primers.fasta")
    fragments_path = join(here, "data/test_data_sd2", "fragments", "*.gb")
    plasmids_path = join(here, "data/test_data_sd2", "plasmids", "*.gb")
    designs_path = join(here, "data/test_data_sd2", "designs", "*.gb")

    primers = make_linear(load_fasta_glob(primers_path))
    templates = load_genbank_glob(plasmids_path)
    fragments = load_genbank_glob(fragments_path)
    print(fragments_path)
    queries = make_circular(load_genbank_glob(designs_path))

    design = LibraryDesign(span_cost=span_cost)
    design.n_jobs = 1
    design.add_materials(
        primers=primers,
        templates=make_circular(templates),
        queries=queries,
        fragments=make_linear(fragments),
    )

    design.logger.set_level("DEBUG")
    design.compile()

    results = design.optimize()

    for result in results.values():
        assembly = result.assemblies[0]
        print(assembly.to_df())
    #

    a, b, c = design.to_df()
    a.to_csv("library_design.csv")
    b.to_csv("library_summary.csv")
    with open("designs.json", "w") as f:
        json.dump(c, f)
    print(a)
    print(b)
    print(c)
예제 #12
0
def test_bioblast_factory_init(here):
    subjects = load_genbank_glob(join(here,
                                      "data/test_data/genbank/templates/*.gb"),
                                 force_unique_ids=True)
    queries = load_genbank_glob(join(here,
                                     "data/test_data/genbank/designs/*.gb"),
                                force_unique_ids=True)
    primers = load_fasta_glob(join(here, "data/test_data/primers/*.fasta"))

    factory = BioBlastFactory()
    factory.add_records(make_linear(primers), "primers")
    factory.add_records(queries, "queries")
    factory.add_records(subjects, "subjects")

    primer_blaster = factory("primers", "queries")
    template_blaster = factory("subjects", "queries")

    primer_results = primer_blaster.blastn_short()

    template_results = template_blaster.blastn()

    print(len(primer_results))
    print(len(template_results))
예제 #13
0
파일: conftest.py 프로젝트: jvrana/pyblast2
    def make_blast():

        subjects = load_fasta_glob(join(
            here, "data/test_data/primers/primers.fasta"),
                                   force_unique_ids=True)
        subjects = make_linear(subjects)
        queries = load_genbank_glob(
            join(
                here,
                "data/test_data/genbank/designs/pmodkan-ho-pact1-z4-er-vpr.gb"
            ),
            force_unique_ids=True,
        )
        return BioBlast(subjects, queries)
예제 #14
0
def test_library_design_to_df(paths, here, span_cost):
    primers = make_linear(load_fasta_glob(paths["primers"]))
    templates = load_genbank_glob(paths["templates"])

    query_path = join(here, "data/test_data/genbank/library_designs/*.gb")
    queries = make_circular(load_genbank_glob(query_path))
    queries = queries

    design = LibraryDesign(span_cost=span_cost)
    design.n_jobs = 1
    design.add_materials(primers=primers, templates=templates, queries=queries)

    design.logger.set_level("DEBUG")
    design.compile()
    results = design.optimize()
    print(results)
    a, b, c = design.to_df()
    a.to_csv("library_design.csv")
    b.to_csv("library_summary.csv")
    with open("designs.json", "w") as f:
        json.dump(c, f)
    print(a)
    print(b)
    print(c)
예제 #15
0
def test_unnamed_queries_raises_duplicate_error(here):
    subjects = load_genbank_glob(join(here,
                                      "data/test_data/genbank/templates/*.gb"),
                                 force_unique_ids=True)

    seqstr1 = str(subjects[0].seq)[:1000]
    seqstr2 = str(subjects[1].seq)[:1000]

    queries = [
        SeqRecord(Seq(seqstr1)),
        SeqRecord(Seq(seqstr2))
        # SeqRecord(Seq(str(subjects[1][:1000]))),
    ]
    make_linear(queries)
    with pytest.raises(PyBlastException):
        BioBlast(subjects, queries)
def test(paths):
    factory = BioBlastFactory()

    templates = load_genbank_glob(paths[TEMPLATES])
    subject = templates[0]
    query = subject.reverse_complement()

    templates = make_linear([subject])
    queries = make_linear([query])

    factory.add_records(templates, TEMPLATES)
    factory.add_records(queries, QUERIES)

    blast = factory(TEMPLATES, QUERIES)

    results = blast.blastn()
    assert results[0]["subject"]["strand"] == -1
    assert results[0]["subject"]["start"] == len(templates[0])
예제 #17
0
def test_validate_rc(here):
    queries = load_genbank_glob(join(here,
                                     "data/test_data/genbank/designs/*.gb"),
                                force_unique_ids=True)

    templates = make_linear(queries[:1])
    queries = make_linear([templates[0].reverse_complement()])

    factory = BioBlastFactory()
    factory.add_records(queries, "queries")
    factory.add_records(templates, "templates")

    blaster = factory("templates", "queries")

    results = blaster.blastn()

    assert results[0]["subject"]["strand"] == -1
    assert results[0]["query"]["strand"] == 1
    assert results[0]["subject"]["start"] == len(queries[0])
예제 #18
0
def test_unnamed_queries(here):
    subjects = load_genbank_glob(join(here,
                                      "data/test_data/genbank/templates/*.gb"),
                                 force_unique_ids=True)

    seqstr1 = str(subjects[0].seq)[:1000]
    seqstr2 = str(subjects[1].seq)[:1000]

    queries = [
        SeqRecord(Seq(seqstr1)),
        SeqRecord(Seq(seqstr2))
        # SeqRecord(Seq(str(subjects[1][:1000]))),
    ]
    force_unique_record_ids(make_linear(queries))

    bioblast = BioBlast(subjects, queries)
    results = bioblast.blastn()
    recids = set()
    for res in results:
        recid = res["query"]["origin_record_id"]
        recids.add(recid)
    print("n_records: {}".format(len(results)))
    assert len(recids) == len(queries)