Example #1
0
 def _edge_to_region(self, n1, n2):
     if n1.index == len(self.query) and is_circular(self.query):
         a = 0
         b = n2.index
     else:
         a = n1.index
         b = n2.index
     return Region(a, b, len(self.query), cyclic=is_circular(self.query))
Example #2
0
 def __init__(
     self,
     graph: nx.DiGraph,
     query: SeqRecord,
     span_cost: SpanCost,
     seqdb: Dict[str, SeqRecord],
     container: AlignmentContainer,
     stats_repeat_window: Optional[int] = None,
     stats_window: Optional[int] = None,
     stats_hairpin_window: Optional[int] = None,
     edge_threshold: Optional[float] = None,
     stages: Optional[Tuple[str]] = None,
 ):
     if stats_repeat_window is None:
         stats_repeat_window = SequenceScoringConfig.stats_repeat_window
     if stats_window is None:
         stats_window = SequenceScoringConfig.stats_window
     if stats_hairpin_window is None:
         stats_hairpin_window = SequenceScoringConfig.stats_hairpin_window
     if stages is None:
         stages = SequenceScoringConfig.post_process_stages
     if edge_threshold is None:
         edge_threshold = SequenceScoringConfig.edge_threshold
     self.graph = graph
     self.graph_builder = AssemblyGraphBuilder(container, span_cost=span_cost)
     self.graph_builder.G = graph
     self.query = query
     self.seqdb = seqdb
     query_seq = str(query.seq)
     if is_circular(query):
         query_seq = query_seq + query_seq
     self.stats = DNAStats(
         query_seq,
         repeat_window=stats_repeat_window,
         stats_window=stats_window,
         hairpin_window=stats_hairpin_window,
     )
     self.stats_single = DNAStats(
         str(query.seq),
         repeat_window=stats_repeat_window,
         stats_window=stats_window,
         hairpin_window=stats_hairpin_window,
     )
     self.logged_msgs = []
     # TODO: make a more sophisticated complexity function?
     # TODO: expose this to input parameters
     self.COMPLEXITY_THRESHOLD = SequenceScoringConfig.complexity_threshold
     self.logger = logger(self)
     self.span_cost = span_cost
     self.stages = stages
     self.edge_threshold = edge_threshold
Example #3
0
 def _collect_optimize_args(
         self,
         graphs: Dict[str,
                      nx.DiGraph]) -> Tuple[str, nx.DiGraph, bool, dict]:
     for query_key, graph in self.logger.tqdm(graphs.items(),
                                              "INFO",
                                              desc="optimizing graphs"):
         container = self.containers[query_key]
         query = container.seqdb[query_key]
         cyclic = is_circular(query)
         result = DesignResult(container=container,
                               query_key=query_key,
                               graph=graph)
         yield query_key, graph, len(query), cyclic, result
Example #4
0
    def make_blast():

        subjects = load_genbank_glob(join(
            here, "data/test_data/genbank/templates/*.gb"),
                                     force_unique_ids=True)
        queries = load_genbank_glob(
            join(
                here,
                "data/test_data/genbank/designs/pmodkan-ho-pact1-z4-er-vpr.gb"
            ),
            force_unique_ids=True,
        )
        queries = make_circular(queries)
        assert is_circular(queries[0])
        return BioBlast(subjects, queries)
Example #5
0
    def _get_design_status(self, qk):
        status = {
            "compiled": False,
            "run": False,
            "success": False,
            "assemblies": []
        }

        record = self.seqdb[qk]
        status["record"] = {
            "name": record.name,
            "length": len(record.seq),
            "id": record.id,
            "is_circular": is_circular(record),
        }

        if self.graphs.get(qk, None) is not None:
            status["compiled"] = True

        if self.results.get(qk, None) is not None:
            status["run"] = True
            for a in self.results[qk].assemblies:
                status["success"] = True
                summ_df = a.to_df()
                material = sum(list(summ_df["material"]))
                eff = functools.reduce(operator.mul, summ_df["efficiency"])

                comp = 0
                for c in summ_df["complexity"]:
                    if not c:
                        continue
                    elif c > comp:
                        comp = c

                status["assemblies"].append({
                    "cost": {
                        "material cost": round(material, 2),
                        "assembly efficiency": round(eff, 2),
                        "max synthesis complexity": round(comp, 2),
                    }
                })
        return status
Example #6
0
    def _optimize(self, n_paths) -> Dict[str, DesignResult]:
        """Finds the optimal paths for each query in the design."""
        results_dict = {}
        for query_key, graph, query_length, cyclic, result in self.logger.tqdm(
                self._collect_optimize_args(self.graphs),
                "INFO",
                desc="optimizing graphs (n_graphs={})".format(len(
                    self.graphs)),
        ):

            container = self.containers[query_key]
            query = container.seqdb[query_key]
            cyclic = is_circular(query)
            results_dict[query_key] = result

            paths, costs = optimize_graph(graph, len(query), cyclic, n_paths)
            if not paths:
                query_rec = self.blast_factory.db.records[query_key]
                self.logger.error(
                    "\n\tThere were no solutions found for design '{}' ({}).\n\t"
                    "This sequence may be better synthesized. Use a tool such as JBEI's"
                    " BOOST.".format(query_rec.name, query_key))
            result.add_assemblies(paths, ignore_invalid=True)
        return results_dict
Example #7
0
 def filter_linear_records(cls,
                           records: List[SeqRecord]) -> List[SeqRecord]:
     """Return only linear records."""
     return [r for r in records if not is_circular(r)]
Example #8
0
 def cyclic(self):
     return is_circular(self.query)
Example #9
0
    def partition(self, edges: List[Edge]):
        tracker = self.logger.track(
            "INFO", desc="Partitioning sequences", total=3
        ).enter()
        tracker.update(0, "{} highly complex sequences".format(len(edges)))

        edges_to_partition = self._filter_partition_edges(edges)

        cyclic = is_circular(self.query)
        partitions = find_by_partitions_for_sequence(
            self.stats_single,
            cyclic=cyclic,
            threshold=Config.SequenceScoringConfig.complexity_threshold,
            step_size=Config.SequenceScoringConfig.partition_step_size,
            delta=Config.SequenceScoringConfig.partition_overlap,
        )
        tracker.update(1, "Partition: locations: {}".format(partitions))
        add_gap_edge = partial(self.graph_builder.add_gap_edge, add_to_graph=False)
        add_overlap_edge = partial(
            self.graph_builder.add_overlap_edge,
            add_to_graph=True,
            validate_groups_present=False,
            groups=None,
            group_keys=None,
        )

        new_edges = []
        for n1, n2, edata in edges_to_partition:
            r = Region(n1.index, n2.index, len(self.query.seq), cyclic=cyclic)

            if n1.type == "B" and n2.type == "A":

                for p in partitions:
                    if p in r:
                        # TODO: overlap? find optimal partition for overlap?
                        i4 = p
                        i3 = p + Config.SequenceScoringConfig.partition_overlap

                        n3 = AssemblyNode(i3, False, "BC", overhang=True)
                        n4 = AssemblyNode(i4, False, "CA", overhang=True)
                        e1 = add_gap_edge(n1, n3, r, origin=False)
                        e2 = add_gap_edge(n1, n3, r, origin=True)
                        if e1 is None and e2 is None:
                            continue
                        e3 = add_overlap_edge(n3, n4, r, origin=False)
                        e4 = add_overlap_edge(n3, n4, r, origin=True)
                        if e3 is None and e4 is None:
                            continue
                        e5 = add_gap_edge(n4, n2, r, origin=False)
                        e6 = add_gap_edge(n4, n2, r, origin=True)
                        if e5 is None and e6 is None:
                            continue
                        new_edges += [e1, e2, e3, e4, e5, e6]
        for e in new_edges:
            if e is not None:
                self.graph_builder.G.add_edge(e[0], e[1], **e[2])
        edges = []
        for n1, n2, edata in self.graph_builder.G.edges(data=True):
            if edata["cost"] is None:
                edges.append((n1, n2, edata))
        tracker.update(2, "Partition: Added {} new edges".format(len(edges)))
        self.graph_builder.update_costs(edges)
        self.score_complexity_edges(list(self.graph_builder.G.edges(data=True)))
        tracker.exit()
Example #10
0
 def is_circular(records):
     return is_circular(records)