def levenshtein_graph(reads, tau=1, **kwargs): """ Construct Levenshtein(tau) graph using naive O(N**2 d) algorithm """ import igraph as ig import numpy as np from Levenshtein import distance as levenshtein_dist N = len(reads) m = np.zeros((N, N), dtype=int) for i in range(N): for j in range(i): dist = levenshtein_dist(reads[i], reads[j]) m[i, j] = m[j, i] = dist if dist <= tau else 0 # Be careful! Zero elements are not interpreted as zero-length edges g = ig.Graph.Weighted_Adjacency(m.tolist(), mode="UNDIRECTED", attr="weight", loops=False) g.vs["read"] = reads for attr_name, attr_data in kwargs.iteritems(): g.vs[attr_name] = attr_data return g
def _find_by_vendor_levenshtein(cls, vendor): matched_macs = [] for mac in cls._macs: lev_dist = levenshtein_dist(vendor, mac.vendor.lower()) if lev_dist <= cls._levenshtein_max_dist_allowed: matched_macs.append(mac) return matched_macs
def _compute_block(self, seqs1, seqs2, origin): origin_row, origin_col = origin if seqs2 is not None: # compute the full matrix coord_iterator = itertools.product(enumerate(seqs1), enumerate(seqs2)) else: # compute only upper triangle in this case coord_iterator = itertools.combinations_with_replacement( enumerate(seqs1), r=2) result = [] for (row, s1), (col, s2) in coord_iterator: d = levenshtein_dist(s1, s2) if d <= self.cutoff: result.append((d + 1, origin_row + row, origin_col + col)) return result
def coord_generator(): for j, s2 in enumerate(seqs[i_row:], start=i_row): d = levenshtein_dist(target, s2) if d <= self.cutoff: yield d + 1, j