def make_origin_iterator( a: Iterable[AssemblyNode], b: Iterable[AssemblyNode], length: int ) -> Generator[Tuple[AssemblyNode, AssemblyNode], None, None]: a, akeys = sort_with_keys(a, lambda x: x.index) b, bkeys = sort_with_keys(b, lambda x: x.index) i = bisect.bisect_right(akeys, length) j = bisect.bisect_left(bkeys, length) for _a in a[:i]: for _b in b[j:]: yield _b, _a
def make_gap_iterator( a: Iterable[AssemblyNode], b: Iterable[AssemblyNode] ) -> Generator[Tuple[AssemblyNode, AssemblyNode], None, None]: """Find all nodes that satisfy the below condition: b. |--------| |-------| a """ a, akeys = sort_with_keys(a, lambda x: x.index) b, bkeys = sort_with_keys(b, lambda x: x.index) for _a in a: i = bisect.bisect_right(bkeys, _a.index) for _b in b[:i]: yield _b, _a
def expand_primer_extension_products(self, only_one_required=False, lim_size=True): primers = self.get_alignments_by_types(Constants.PRIMER) rev, fwd = partition(lambda p: p.subject_region.direction == 1, primers) fwd, fwd_keys = sort_with_keys(fwd, key=lambda p: p.query_region.b) rev, rev_keys = sort_with_keys(rev, key=lambda p: p.query_region.a) pairs = [] for f in fwd: rev_bind_region = f.query_region[:-Config.PRIMER_MIN_BIND] rev_bind = self.filter_alignments_by_span( rev, rev_bind_region, key=lambda p: p.query_region.a) rev_bind, rkeys = sort_with_keys(rev_bind, key=lambda p: p.query_region.a) for r in rev_bind: if r.query_region.b in f.query_region: if r.query_region.b == f.query_region.b: pass else: continue pairs += self._create_primer_extension_alignment( f, r, Constants.PRIMER_EXTENSION_PRODUCT_WITH_PRIMERS, lim_size=lim_size, ) if only_one_required: for f in fwd: # existing fwd primer pairs += self._create_primer_extension_alignment( f, None, Constants.PRIMER_EXTENSION_PRODUCT_WITH_LEFT_PRIMER, lim_size=lim_size, ) for r in rev: # existing fwd primer pairs += self._create_primer_extension_alignment( None, r, Constants.PRIMER_EXTENSION_PRODUCT_WITH_RIGHT_PRIMER, lim_size=lim_size, ) return pairs
def build_assembly_graph(self) -> nx.DiGraph: self.G = nx.DiGraph(name="Assembly Graph") groups, group_keys = sort_with_keys( self.container.get_groups_by_types( [ Constants.PCR_PRODUCT, Constants.PCR_PRODUCT_WITH_RIGHT_PRIMER, Constants.PCR_PRODUCT_WITH_LEFT_PRIMER, Constants.PCR_PRODUCT_WITH_PRIMERS, Constants.PRIMER_EXTENSION_PRODUCT_WITH_PRIMERS, Constants.PRIMER_EXTENSION_PRODUCT_WITH_RIGHT_PRIMER, Constants.PRIMER_EXTENSION_PRODUCT_WITH_LEFT_PRIMER, Constants.FRAGMENT, Constants.SHARED_SYNTHESIZED_FRAGMENT, ] ), key=lambda g: g.query_region.a, ) self.add_internal_edges(groups) self.add_external_edges(groups, group_keys, self.G.nodes()) self.update_costs() # TODO: freeze? # nx.freeze(self.G) return self.G
def filter_alignments_by_span(alignments, region, key=None, end_inclusive=True): fwd, fwd_keys = sort_with_keys(alignments, key=key) found = [] for a, b in region.ranges(ignore_wraps=True): if not end_inclusive: b = b - 1 found += bisect_slice_between(fwd, fwd_keys, a, b) return found
def make_overlap_iterator( a: Iterable[AssemblyNode], b: Iterable[AssemblyNode] ) -> Generator[Tuple[AssemblyNode, AssemblyNode], None, None]: """Find all nodes that satisfy the below condition: b. |--------| |-------| a With the exception that when a.index == b.index, this is not considered an overlap, but covered in the `make_gap_iterator`, due to using bisect.bisect_right. Overlap is more computationally intensive. """ a, akeys = sort_with_keys(a, lambda x: x.index) b, bkeys = sort_with_keys(b, lambda x: x.index) for _a in a: i = bisect.bisect_right(bkeys, _a.index) for _b in b[i:]: yield _b, _a
def optimize_graph(graph: nx.DiGraph, query_length: int, cyclic: bool, n_paths: int) -> Tuple[List[List[tuple]], List[float]]: # get ordered nodelist and nodekeys nodelist, nodekeys = sort_with_keys(list(graph.nodes()), key=lambda x: x[0]) # 2D matrix of 'efficiency' and 'material' costs weight_matrix, matrix_dict, ori_matrix_dict = sympy_floyd_warshall( graph, f=path_length_config["f"], accumulators=path_length_config["accumulators"], nodelist=nodelist, dtype=np.float64, return_all=True, ) matrix_dict = {k: np.array(v) for k, v in matrix_dict.items()} if cyclic: weight_matrix = cyclic_matrix(matrix_dict, nodelist, query_length) else: raise NotImplementedError("Linear assemblies not yet implemented.") min_index = tuple([i[:n_paths] for i in argmin(weight_matrix)]) costs = weight_matrix[min_index] costs = [c for c in costs if c != np.inf] a_nodes = [nodelist[i] for i in min_index[0]] b_nodes = [nodelist[i] for i in min_index[1]] nodes = list(zip(a_nodes, b_nodes)) nodes_and_costs = list(zip(nodes, costs)) if nodes_and_costs: trimmed_nodes, trimmed_costs = zip(*nodes_and_costs) else: trimmed_nodes, trimmed_costs = [], [] paths = _nodes_to_fullpaths(graph, trimmed_nodes, cyclic, n_paths=n_paths) if len(paths) < n_paths: warn( "Number of paths returned is less than requested paths {} < {}". format(len(paths), n_paths), DASiWarning) _check_paths(paths) return paths, trimmed_costs
def overlapping_groups(group_list_a, group_list_b): """Get all groups in group_list_b that right-hand overlap with group_list_a.""" group_sort, group_keys = sort_with_keys( group_list_b, key=lambda x: x.query_region.a ) tuples = [] for group_a in group_list_a: overlapping = AlignmentContainer.filter_alignments_by_span( group_sort, group_a.query_region, key=lambda p: p.query_region.a, end_inclusive=False, ) if group_a in overlapping: overlapping.remove(group_a) tuples.append((group_a, overlapping)) return tuples
def expand_overlaps( self, alignment_groups: List[AlignmentGroup], atype=Constants.PCR_PRODUCT, lim_size: bool = True, pass_condition: Callable = None, include_left: bool = True, ) -> List[Alignment]: """ Expand the list of alignments from existing regions. Produces new fragments in the following two situations: :: if `include_left` |--------| alignment 1 |--------| alignment 2 |---| new alignment |--------| alignment 1 |--------| alignment 2 |---| new alignment :param alignment_groups: list of alignment groups to expand :param atype: the alignment type label for expanded alignments :param lim_size: if True, only add alignments that pass the size limitations :param pass_condition: an optional callable that takes group_a (AlignmentGroup) and group_b (AlignmentGroup). If the returned value is False, alignments are skipped. :param include_left: if True, will add overlapping region and the left region up to the overlap. :return: list of new alignments """ min_overlap = Config.MIN_OVERLAP group_sort, group_keys = sort_with_keys(alignment_groups, key=lambda x: x.query_region.a) alignments = [] for group_a in logger.tqdm(group_sort, "INFO", desc="expanding pcr products"): overlapping = self.filter_alignments_by_span( group_sort, group_a.query_region, key=lambda p: p.query_region.a, end_inclusive=False, ) for group_b in overlapping: if group_b is not group_a: # ignore groups that are completely contained in region if group_b.query_region.b in group_a.query_region: continue if pass_condition: if not pass_condition(group_a, group_b): continue if include_left: left = self._make_subgroup( group_a, group_a.query_region.a, group_b.query_region.a, atype, ) if left and len(left.query_region) > min_overlap: alignments += left.alignments overlap = self._make_subgroup(group_a, group_b.query_region.a, group_a.query_region.b, atype) if overlap and len(overlap.query_region) > min_overlap: alignments += overlap.alignments if lim_size: alignments = [a for a in alignments if a.size_ok()] return alignments
def expand_primer_pairs(self, alignment_groups: List[AlignmentGroup], lim_size: bool = True) -> List[Alignment]: """Creates new alignments for all possible primer pairs. Searches for fwd and rev primer pairs that exist within other alignments and produces all combinations of alignments that can form from these primer pairs. :return: list """ primers = self.get_alignments_by_types(Constants.PRIMER) rev, fwd = partition(lambda p: p.subject_region.direction == 1, primers) fwd, fwd_keys = sort_with_keys(fwd, key=lambda p: p.query_region.b) rev, rev_keys = sort_with_keys(rev, key=lambda p: p.query_region.a) pairs = [] for g in self.logger.tqdm(alignment_groups, "INFO", desc="Expanding primer pair"): query_ranges = g.query_region.ranges() fwd_bind_region = g.query_region[Config.PRIMER_MIN_BIND:] rev_bind_region = g.query_region[:-Config.PRIMER_MIN_BIND] fwd_bind = self.filter_alignments_by_span( fwd, fwd_bind_region, key=lambda p: p.query_region.b) rev_bind = self.filter_alignments_by_span( rev, rev_bind_region, key=lambda p: p.query_region.a) rev_bind, rkeys = sort_with_keys(rev_bind, key=lambda p: p.query_region.a) # both primers for f in fwd_bind: _rev_bind = [] if len(query_ranges) == 1: i = bisect_left(rkeys, f.query_region.a) _rev_bind = rev_bind[i:] else: try: _rev_span = g.query_region.sub(f.query_region.a, g.query_region.b) except IndexError: _rev_span = g.query_region for a, b in _rev_span.ranges(): _rev_bind += bisect_slice_between( rev_bind, rkeys, a, b) for r in _rev_bind: if f.query_region.a in r.query_region: if f.query_region.a == r.query_region.a: pass else: continue if r.query_region.b in f.query_region: if r.query_region.b == f.query_region.b: pass else: continue pairs += self._create_pcr_product_alignment( g, f, r, Constants.PCR_PRODUCT_WITH_PRIMERS, lim_size=lim_size) # left primer for f in fwd_bind: pairs += self._create_pcr_product_alignment( g, f, None, Constants.PCR_PRODUCT_WITH_LEFT_PRIMER, lim_size=lim_size, ) # right primer for r in rev_bind: pairs += self._create_pcr_product_alignment( g, None, r, Constants.PCR_PRODUCT_WITH_RIGHT_PRIMER, lim_size=lim_size, ) return pairs