Example #1
0
    def make_origin_iterator(
        a: Iterable[AssemblyNode], b: Iterable[AssemblyNode], length: int
    ) -> Generator[Tuple[AssemblyNode, AssemblyNode], None, None]:
        a, akeys = sort_with_keys(a, lambda x: x.index)
        b, bkeys = sort_with_keys(b, lambda x: x.index)

        i = bisect.bisect_right(akeys, length)
        j = bisect.bisect_left(bkeys, length)
        for _a in a[:i]:
            for _b in b[j:]:
                yield _b, _a
Example #2
0
    def make_gap_iterator(
        a: Iterable[AssemblyNode], b: Iterable[AssemblyNode]
    ) -> Generator[Tuple[AssemblyNode, AssemblyNode], None, None]:
        """Find all nodes that satisfy the below condition: b.

        |--------|              |-------|              a
        """
        a, akeys = sort_with_keys(a, lambda x: x.index)
        b, bkeys = sort_with_keys(b, lambda x: x.index)

        for _a in a:
            i = bisect.bisect_right(bkeys, _a.index)
            for _b in b[:i]:
                yield _b, _a
    def expand_primer_extension_products(self,
                                         only_one_required=False,
                                         lim_size=True):
        primers = self.get_alignments_by_types(Constants.PRIMER)

        rev, fwd = partition(lambda p: p.subject_region.direction == 1,
                             primers)
        fwd, fwd_keys = sort_with_keys(fwd, key=lambda p: p.query_region.b)
        rev, rev_keys = sort_with_keys(rev, key=lambda p: p.query_region.a)
        pairs = []
        for f in fwd:
            rev_bind_region = f.query_region[:-Config.PRIMER_MIN_BIND]
            rev_bind = self.filter_alignments_by_span(
                rev, rev_bind_region, key=lambda p: p.query_region.a)
            rev_bind, rkeys = sort_with_keys(rev_bind,
                                             key=lambda p: p.query_region.a)

            for r in rev_bind:
                if r.query_region.b in f.query_region:
                    if r.query_region.b == f.query_region.b:
                        pass
                    else:
                        continue
                pairs += self._create_primer_extension_alignment(
                    f,
                    r,
                    Constants.PRIMER_EXTENSION_PRODUCT_WITH_PRIMERS,
                    lim_size=lim_size,
                )

        if only_one_required:
            for f in fwd:
                # existing fwd primer
                pairs += self._create_primer_extension_alignment(
                    f,
                    None,
                    Constants.PRIMER_EXTENSION_PRODUCT_WITH_LEFT_PRIMER,
                    lim_size=lim_size,
                )

            for r in rev:
                # existing fwd primer
                pairs += self._create_primer_extension_alignment(
                    None,
                    r,
                    Constants.PRIMER_EXTENSION_PRODUCT_WITH_RIGHT_PRIMER,
                    lim_size=lim_size,
                )
        return pairs
Example #4
0
    def build_assembly_graph(self) -> nx.DiGraph:

        self.G = nx.DiGraph(name="Assembly Graph")

        groups, group_keys = sort_with_keys(
            self.container.get_groups_by_types(
                [
                    Constants.PCR_PRODUCT,
                    Constants.PCR_PRODUCT_WITH_RIGHT_PRIMER,
                    Constants.PCR_PRODUCT_WITH_LEFT_PRIMER,
                    Constants.PCR_PRODUCT_WITH_PRIMERS,
                    Constants.PRIMER_EXTENSION_PRODUCT_WITH_PRIMERS,
                    Constants.PRIMER_EXTENSION_PRODUCT_WITH_RIGHT_PRIMER,
                    Constants.PRIMER_EXTENSION_PRODUCT_WITH_LEFT_PRIMER,
                    Constants.FRAGMENT,
                    Constants.SHARED_SYNTHESIZED_FRAGMENT,
                ]
            ),
            key=lambda g: g.query_region.a,
        )

        self.add_internal_edges(groups)
        self.add_external_edges(groups, group_keys, self.G.nodes())
        self.update_costs()

        # TODO: freeze?
        # nx.freeze(self.G)
        return self.G
 def filter_alignments_by_span(alignments,
                               region,
                               key=None,
                               end_inclusive=True):
     fwd, fwd_keys = sort_with_keys(alignments, key=key)
     found = []
     for a, b in region.ranges(ignore_wraps=True):
         if not end_inclusive:
             b = b - 1
         found += bisect_slice_between(fwd, fwd_keys, a, b)
     return found
Example #6
0
    def make_overlap_iterator(
        a: Iterable[AssemblyNode], b: Iterable[AssemblyNode]
    ) -> Generator[Tuple[AssemblyNode, AssemblyNode], None, None]:
        """Find all nodes that satisfy the below condition: b.

        |--------|
               |-------|
               a

        With the exception that when a.index == b.index, this is not
        considered an overlap, but covered in the `make_gap_iterator`,
        due to using bisect.bisect_right. Overlap is more computationally
        intensive.
        """
        a, akeys = sort_with_keys(a, lambda x: x.index)
        b, bkeys = sort_with_keys(b, lambda x: x.index)
        for _a in a:
            i = bisect.bisect_right(bkeys, _a.index)
            for _b in b[i:]:
                yield _b, _a
Example #7
0
def optimize_graph(graph: nx.DiGraph, query_length: int, cyclic: bool,
                   n_paths: int) -> Tuple[List[List[tuple]], List[float]]:
    # get ordered nodelist and nodekeys
    nodelist, nodekeys = sort_with_keys(list(graph.nodes()),
                                        key=lambda x: x[0])

    # 2D matrix of 'efficiency' and 'material' costs
    weight_matrix, matrix_dict, ori_matrix_dict = sympy_floyd_warshall(
        graph,
        f=path_length_config["f"],
        accumulators=path_length_config["accumulators"],
        nodelist=nodelist,
        dtype=np.float64,
        return_all=True,
    )
    matrix_dict = {k: np.array(v) for k, v in matrix_dict.items()}

    if cyclic:
        weight_matrix = cyclic_matrix(matrix_dict, nodelist, query_length)
    else:
        raise NotImplementedError("Linear assemblies not yet implemented.")

    min_index = tuple([i[:n_paths] for i in argmin(weight_matrix)])
    costs = weight_matrix[min_index]
    costs = [c for c in costs if c != np.inf]
    a_nodes = [nodelist[i] for i in min_index[0]]
    b_nodes = [nodelist[i] for i in min_index[1]]
    nodes = list(zip(a_nodes, b_nodes))

    nodes_and_costs = list(zip(nodes, costs))

    if nodes_and_costs:
        trimmed_nodes, trimmed_costs = zip(*nodes_and_costs)
    else:
        trimmed_nodes, trimmed_costs = [], []
    paths = _nodes_to_fullpaths(graph, trimmed_nodes, cyclic, n_paths=n_paths)

    if len(paths) < n_paths:
        warn(
            "Number of paths returned is less than requested paths {} < {}".
            format(len(paths), n_paths), DASiWarning)

    _check_paths(paths)
    return paths, trimmed_costs
Example #8
0
def overlapping_groups(group_list_a, group_list_b):
    """Get all groups in group_list_b that right-hand overlap with
    group_list_a."""
    group_sort, group_keys = sort_with_keys(
        group_list_b, key=lambda x: x.query_region.a
    )
    tuples = []
    for group_a in group_list_a:
        overlapping = AlignmentContainer.filter_alignments_by_span(
            group_sort,
            group_a.query_region,
            key=lambda p: p.query_region.a,
            end_inclusive=False,
        )

        if group_a in overlapping:
            overlapping.remove(group_a)
        tuples.append((group_a, overlapping))
    return tuples
    def expand_overlaps(
        self,
        alignment_groups: List[AlignmentGroup],
        atype=Constants.PCR_PRODUCT,
        lim_size: bool = True,
        pass_condition: Callable = None,
        include_left: bool = True,
    ) -> List[Alignment]:
        """
        Expand the list of alignments from existing regions. Produces new fragments in
        the following two situations:

        ::

            if `include_left`
            |--------|          alignment 1
                |--------|      alignment 2
            |---|               new alignment


            |--------|          alignment 1
                 |--------|     alignment 2
                 |---|          new alignment

        :param alignment_groups: list of alignment groups to expand
        :param atype: the alignment type label for expanded alignments
        :param lim_size: if True, only add alignments that pass the size limitations
        :param pass_condition: an optional callable that takes group_a (AlignmentGroup)
            and group_b (AlignmentGroup). If the returned value is False, alignments
            are skipped.
        :param include_left: if True, will add overlapping region and the left
            region up to the overlap.
        :return: list of new alignments
        """

        min_overlap = Config.MIN_OVERLAP
        group_sort, group_keys = sort_with_keys(alignment_groups,
                                                key=lambda x: x.query_region.a)
        alignments = []
        for group_a in logger.tqdm(group_sort,
                                   "INFO",
                                   desc="expanding pcr products"):
            overlapping = self.filter_alignments_by_span(
                group_sort,
                group_a.query_region,
                key=lambda p: p.query_region.a,
                end_inclusive=False,
            )

            for group_b in overlapping:
                if group_b is not group_a:
                    # ignore groups that are completely contained in region
                    if group_b.query_region.b in group_a.query_region:
                        continue
                    if pass_condition:
                        if not pass_condition(group_a, group_b):
                            continue

                    if include_left:
                        left = self._make_subgroup(
                            group_a,
                            group_a.query_region.a,
                            group_b.query_region.a,
                            atype,
                        )
                        if left and len(left.query_region) > min_overlap:
                            alignments += left.alignments

                    overlap = self._make_subgroup(group_a,
                                                  group_b.query_region.a,
                                                  group_a.query_region.b,
                                                  atype)
                    if overlap and len(overlap.query_region) > min_overlap:
                        alignments += overlap.alignments
        if lim_size:
            alignments = [a for a in alignments if a.size_ok()]
        return alignments
    def expand_primer_pairs(self,
                            alignment_groups: List[AlignmentGroup],
                            lim_size: bool = True) -> List[Alignment]:
        """Creates new alignments for all possible primer pairs. Searches for
        fwd and rev primer pairs that exist within other alignments and
        produces all combinations of alignments that can form from these primer
        pairs.

        :return: list
        """
        primers = self.get_alignments_by_types(Constants.PRIMER)

        rev, fwd = partition(lambda p: p.subject_region.direction == 1,
                             primers)
        fwd, fwd_keys = sort_with_keys(fwd, key=lambda p: p.query_region.b)
        rev, rev_keys = sort_with_keys(rev, key=lambda p: p.query_region.a)
        pairs = []

        for g in self.logger.tqdm(alignment_groups,
                                  "INFO",
                                  desc="Expanding primer pair"):
            query_ranges = g.query_region.ranges()
            fwd_bind_region = g.query_region[Config.PRIMER_MIN_BIND:]
            rev_bind_region = g.query_region[:-Config.PRIMER_MIN_BIND]
            fwd_bind = self.filter_alignments_by_span(
                fwd, fwd_bind_region, key=lambda p: p.query_region.b)
            rev_bind = self.filter_alignments_by_span(
                rev, rev_bind_region, key=lambda p: p.query_region.a)
            rev_bind, rkeys = sort_with_keys(rev_bind,
                                             key=lambda p: p.query_region.a)

            # both primers
            for f in fwd_bind:
                _rev_bind = []
                if len(query_ranges) == 1:
                    i = bisect_left(rkeys, f.query_region.a)
                    _rev_bind = rev_bind[i:]
                else:
                    try:
                        _rev_span = g.query_region.sub(f.query_region.a,
                                                       g.query_region.b)
                    except IndexError:
                        _rev_span = g.query_region

                    for a, b in _rev_span.ranges():
                        _rev_bind += bisect_slice_between(
                            rev_bind, rkeys, a, b)
                for r in _rev_bind:
                    if f.query_region.a in r.query_region:
                        if f.query_region.a == r.query_region.a:
                            pass
                        else:
                            continue
                    if r.query_region.b in f.query_region:
                        if r.query_region.b == f.query_region.b:
                            pass
                        else:
                            continue
                    pairs += self._create_pcr_product_alignment(
                        g,
                        f,
                        r,
                        Constants.PCR_PRODUCT_WITH_PRIMERS,
                        lim_size=lim_size)
            # left primer
            for f in fwd_bind:
                pairs += self._create_pcr_product_alignment(
                    g,
                    f,
                    None,
                    Constants.PCR_PRODUCT_WITH_LEFT_PRIMER,
                    lim_size=lim_size,
                )

            # right primer
            for r in rev_bind:
                pairs += self._create_pcr_product_alignment(
                    g,
                    None,
                    r,
                    Constants.PCR_PRODUCT_WITH_RIGHT_PRIMER,
                    lim_size=lim_size,
                )
        return pairs