Esempio n. 1
0
    def test_regular_ra_parser(self, num_kmers):
        # given
        kmer_size = 11
        b = builder.Graph() \
            .with_kmer_size(kmer_size) \
            .with_num_colors(1)
        seen_kmers = set()
        for _ in range(num_kmers):
            kmer_string = lexlo(''.join(
                [random.choice('ACGT') for _ in range(kmer_size)]))
            while kmer_string in seen_kmers:
                kmer_string = lexlo(''.join(
                    [random.choice('ACGT') for _ in range(kmer_size)]))
            seen_kmers.add(kmer_string)
            b.with_kmer(kmer_string)
        fh = b.build()

        ra = parser.RandomAccess(fh, kmer_cache_size=None)
        for k_string in list(ra):
            ra[k_string]
        with mock.patch.object(fh, 'read', wraps=fh.read) as mocked_read:
            # when
            for seen_kmer in sorted(seen_kmers):
                ra[seen_kmer]

            # then
            assert 2 * num_kmers == mocked_read.call_count
Esempio n. 2
0
 def get_incoming_kmers(self, kmer_string):
     lexlo_string = lexlo(kmer_string)
     assert lexlo_string == kmer_string
     return [
         lexlo(kmer_string)
         for kmer_string in self.get_incoming_kmer_strings(kmer_string,
                                                           is_lexlo=True)
     ]
Esempio n. 3
0
def get_canonical_edge(first, second):
    """Get canonical edge.

    Canonical edges are between lexlo kmers and are ordered lexicographically

    Return canonical edge, if the first and second nodes were lexlo"""
    lexlo_first = lexlo(first)
    lexlo_second = lexlo(second)
    flip_first_second = lexlo_second < lexlo_first
    if flip_first_second:
        lexlo_second, lexlo_first = lexlo_first, lexlo_second
    return lexlo_first, lexlo_second, flip_first_second
Esempio n. 4
0
    def make_graph_nodes_consistent(self, seed_kmer_strings=None):
        """
        Take a Cortex graph and make all nodes have kmer_strings that are consistent with each
        other. If a seed kmer string is provided, then start with that seed kmer.
        """
        if self.graph.is_consistent():
            return self
        if seed_kmer_strings is None:
            seed_kmer_strings = []
        graph = CortexDiGraph(self.graph)
        new_graph = ConsistentCortexDiGraph(graph=self.graph.graph)

        seeds = SeedKmerStringIterator.from_all_kmer_strings_and_seeds(self.graph.nodes(),
                                                                       seed_kmer_strings)

        for seed, lexlo_seed in seeds:
            new_graph.add_node(seed, kmer=self.graph.node[lexlo_seed])
            seeds.remove(lexlo_seed)
            for source, sink, key, direction in nx.edge_dfs(graph, lexlo_seed, 'ignore'):
                if direction == 'forward':
                    rc_after_ref_kmer = True
                    ref, target = source, sink
                elif direction == 'reverse':
                    ref, target = sink, source
                    rc_after_ref_kmer = False
                else:
                    raise Exception("unknown direction: {}".format(direction))
                if ref not in new_graph.node:
                    ref = revcomp(ref)
                    rc_after_ref_kmer = not rc_after_ref_kmer
                matched_target, _ = revcomp_target_to_match_ref(target, ref, rc_after_ref_kmer)
                new_graph.add_node(matched_target, kmer=graph.node[matched_target])
                seeds.remove(lexlo(matched_target))
        self.graph = new_graph
        return self
Esempio n. 5
0
 def __setitem__(self, key, value):
     lexlo_key = lexlo(key)
     if lexlo_key in self._exclusion_set:
         self._exclusion_set.discard(lexlo_key)
     if lexlo_key in self.ra_parser and lexlo_key not in self._new_kmers:
         self._n_duplicates += 1
     self._new_kmers[lexlo_key] = value
Esempio n. 6
0
 def __getitem__(self, key):
     lexlo_key = lexlo(key)
     if lexlo_key in self._exclusion_set:
         raise KeyError
     if lexlo_key in self._new_kmers:
         return self._new_kmers[lexlo_key]
     return self.ra_parser[lexlo_key]
Esempio n. 7
0
 def build_or_get(self, kmer_string):
     """Build empty kmer or return a cached kmer for a kmer string"""
     check_kmer_string(kmer_string)
     kmer_string_to_use = lexlo(kmer_string)
     if kmer_string_to_use in self._seen_kmers.keys():
         return self._seen_kmers[kmer_string_to_use]
     kmer = self._build_from_lexlo(kmer_string_to_use)
     self._seen_kmers[kmer_string_to_use] = kmer
     return kmer
Esempio n. 8
0
    def test_slurping_ra_parser(self, num_kmers):
        # given
        kmer_size = 11
        b = builder.Graph() \
            .with_kmer_size(kmer_size) \
            .with_num_colors(1)
        seen_kmers = set()
        for _ in range(num_kmers):
            kmer_string = lexlo(''.join(
                [random.choice('ACGT') for _ in range(kmer_size)]))
            while kmer_string in seen_kmers:
                kmer_string = lexlo(''.join(
                    [random.choice('ACGT') for _ in range(kmer_size)]))
            seen_kmers.add(kmer_string)
            b.with_kmer(kmer_string)
        fh = b.build()

        fh.seek(0)
        with mock.patch.object(fh, 'seek', wraps=fh.seek) as mocked_seek:
            # when
            ra = parser.SlurpedRandomAccess.from_handle(fh)

            # then
            assert 0 == mocked_seek.call_count

            for seen_kmer in sorted(seen_kmers):
                ra[seen_kmer]
            assert 0 == mocked_seek.call_count

        fh.seek(0)
        num_header_reads = 10
        num_eof_reads = 1
        with mock.patch.object(fh, 'read', wraps=fh.read) as mocked_read:
            # when
            ra = parser.SlurpedRandomAccess.from_handle(fh)

            # then
            assert num_kmers + num_eof_reads + num_header_reads == mocked_read.call_count

            for seen_kmer in sorted(seen_kmers):
                ra[seen_kmer]
            assert num_kmers + num_eof_reads + num_header_reads == mocked_read.call_count
Esempio n. 9
0
 def set_unitig_cycle(self, unitig):
     unitig.is_cycle = False
     try:
         flipped_string, is_flipped = revcomp_target_to_match_ref(
             unitig.left_node,
             unitig.right_node,
             rc_is_after_reference_kmer=True)
     except ValueError:
         return
     if lexlo(unitig.right_node) == unitig.right_node:
         edge_letter = flipped_string[-1].upper()
     else:
         edge_letter = lexlo(flipped_string[-1]).lower()
     colors = unitig.unitig_edge_colors
     if len(colors) == 0:
         return
     for color in colors:
         if not self.graph.node[unitig.right_node]['kmer'].edges[
                 color].is_edge(edge_letter):
             return
     unitig.is_cycle = True
Esempio n. 10
0
 def __delitem__(self, item):
     lexlo_string = lexlo(item)
     if lexlo_string in self._exclusion_set:
         raise KeyError
     in_new_kmers = lexlo_string in self._new_kmers
     in_ra_parser = lexlo_string in self.ra_parser
     if in_new_kmers:
         del self._new_kmers[lexlo_string]
     if in_ra_parser:
         self._exclusion_set.add(lexlo_string)
     if in_new_kmers and in_ra_parser:
         self._n_duplicates -= 1
Esempio n. 11
0
 def load_kmer(self, kmer):
     """Load the link group for a kmer in the orientation of the kmer."""
     lexlo_kmer = lexlo(kmer)
     is_lexlo = lexlo_kmer == kmer
     try:
         link_group = self.links.body[lexlo_kmer]
         logger.debug('Loaded link group for kmer %s: %s', kmer, link_group)
     except KeyError:
         pass
     else:
         for junc in link_group.get_link_junctions_in_kmer_orientation(
                 is_lexlo):
             self.junctions[junc[0]].append(junc)
     return self
Esempio n. 12
0
 def __next__(self):
     while self.seed_kmer_strings:
         seed = self.seed_kmer_strings.pop()
         lexlo_seed = lexlo(seed)
         if lexlo_seed not in self._unseen_lexlo_kmer_strings:
             continue
         self._seen_lexlo_kmer_strings.add(lexlo_seed)
         return seed, lexlo_seed
     while self._unseen_lexlo_kmer_strings:
         unseen, _ = self._unseen_lexlo_kmer_strings.popitem(last=False)
         if unseen in self._seen_lexlo_kmer_strings:
             continue
         self._seen_lexlo_kmer_strings.add(unseen)
         return unseen, unseen
     raise StopIteration
Esempio n. 13
0
 def __iter__(self):
     edge_kmers = set()
     query_is_lexlo = self.kmer.kmer == self.query
     for color in self.colors:
         if self.orientation == EdgeTraversalOrientation.original:
             node_iter = self.kmer.edges[color].get_outgoing_kmer_strings(
                 self.query, is_lexlo=query_is_lexlo
             )
         else:
             node_iter = self.kmer.edges[color].get_incoming_kmer_strings(
                 self.query, is_lexlo=query_is_lexlo
             )
         for out_node in node_iter:
             if self.return_lexlo_kmers:
                 out_node = lexlo(out_node)
             edge_kmers.add(out_node)
     return iter(edge_kmers)
Esempio n. 14
0
def test_revcomps_many_kmers(data, num_kmers, kmer_size):
    # given
    kmers = {}
    for _ in range(num_kmers):
        kmer_string = data.draw(kmer_strings(min_size=kmer_size, max_size=kmer_size))
        kmers[lexlo(kmer_string)] = kmer_string

    b = get_cortex_builder()
    for kmer in kmers.keys():
        b.with_kmer('{} 1 ........'.format(kmer))
    cdb = b.build()

    # when
    expect = KmerGraphExpectation(
        Interactor(cdb).make_graph_nodes_consistent(set(kmers.values())).graph)

    # then
    for kmer_string in kmers.values():
        expect.has_node(kmer_string)
    expect.has_n_nodes(len(kmers))
Esempio n. 15
0
def annotate_kmer_graph_edges(graph):
    """Adds nodes to graph for kmer_strings that only exist as edges in a node's kmer."""
    colors = graph.graph['colors']
    kmer_builder = EmptyKmerBuilder(num_colors=len(colors), default_coverage=1)
    for kmer_string, kmer in list(graph.nodes(data='kmer')):
        is_lexlo = bool(kmer_string == lexlo(kmer_string))
        for color in colors:
            for new_kmer_string in kmer.edges[color].get_outgoing_kmer_strings(
                    kmer_string, is_lexlo=is_lexlo):
                if new_kmer_string not in graph.nodes:
                    graph.add_node(
                        new_kmer_string,
                        kmer=kmer_builder.build_or_get(new_kmer_string))
                    graph.add_edge(kmer_string, new_kmer_string, key=color)
            for new_kmer_string in kmer.edges[color].get_incoming_kmer_strings(
                    kmer_string, is_lexlo=is_lexlo):
                if new_kmer_string not in graph.nodes:
                    graph.add_node(
                        new_kmer_string,
                        kmer=kmer_builder.build_or_get(new_kmer_string))
                    graph.add_edge(new_kmer_string, kmer_string, key=color)
    return graph
Esempio n. 16
0
    def get_kmers(self, contig):
        kmer_size = self.graph_parser.kmer_size
        assert len(contig) >= kmer_size
        kmers = []
        for kmer_start in range(len(contig) - kmer_size + 1):
            kmer_string = contig[kmer_start:(kmer_start + kmer_size)]
            lexlo_kmer_string = lexlo(kmer_string)
            if lexlo_kmer_string in self.seen_kmer_strings:
                kmer = self.seen_kmer_strings[lexlo_kmer_string]
            else:
                try:
                    kmer = self.graph_parser.get_kmer_for_string(kmer_string)
                except KeyError:
                    kmer = self.empty_kmer_builder.build(kmer_string)
                self.seen_kmer_strings[lexlo_kmer_string] = kmer
            kmer.increment_color_coverage(self.num_colors - 1)
            kmers.append((kmer, kmer_string))
        for kmer_idx in range(len(kmers) - 1):
            this_kmer = kmers[kmer_idx][0]
            next_kmer = kmers[kmer_idx + 1][0]

            connect_kmers(this_kmer, next_kmer, self.contig_color,
                          identical_kmer_check=False)
        return kmers
Esempio n. 17
0
 def remove(self, lexlo_kmer_string):
     assert lexlo_kmer_string == lexlo(lexlo_kmer_string)
     self._seen_lexlo_kmer_strings.add(lexlo_kmer_string)
Esempio n. 18
0
 def from_all_kmer_strings_and_seeds(cls, all_kmers, seeds):
     return cls(
         list(seeds),
         OrderedDict.fromkeys(lexlo(k_string) for k_string in all_kmers))
Esempio n. 19
0
 def num_neighbor(self, kmer_string):
     lexlo_kmer_string = lexlo(kmer_string)
     if lexlo_kmer_string != kmer_string:
         return self.other_orientation().num_neighbor(lexlo(kmer_string))
     else:
         return self._num_neighbor()
Esempio n. 20
0
 def get_kmer_for_string(self, string):
     """Will compute the revcomp of kmer string before getting a kmer"""
     return self[lexlo(string)]
Esempio n. 21
0
 def get_incoming_kmer_strings(self, kmer_string, is_lexlo=None):
     if is_lexlo is None:
         is_lexlo = bool(kmer_string == lexlo(kmer_string))
     return self._get_kmer_strings(kmer_string[:-1], True, is_lexlo)
Esempio n. 22
0
 def build(self, kmer_string):
     """Build empty kmer from a kmer string"""
     check_kmer_string(kmer_string)
     kmer_string_to_use = lexlo(kmer_string)
     return self._build_from_lexlo(kmer_string_to_use)
Esempio n. 23
0
 def get_outgoing_kmer_strings(self, kmer_string, is_lexlo=None):
     if is_lexlo is None:
         is_lexlo = bool(kmer_string == lexlo(kmer_string))
     return self._get_kmer_strings(kmer_string[1:], False, is_lexlo)
Esempio n. 24
0
 def with_link_for_kmer(self, link, kmer):
     "link: <F|R> <num_juncs> <counts0,counts1,...> <junctions>"
     assert lexlo(kmer) == kmer
     self.links[kmer].append(link)
     return self