def _get_adj_list_directional(self, umis, counts): ''' identify all umis within the hamming distance threshold and where the counts of the first umi is > (2 * second umi counts)-1''' adj_list = {umi: [] for umi in umis} if self.fuzzy_match: for umi1 in umis: # we need a second regex for some insertions, # e.g UMI1 = "ATCG", UMI2 = "ATTC" comp_regex_err = regex.compile("(%s){e<=1}" % str(umi1)) comp_regex_del = regex.compile("(%s){i<=1}" % str(umi1)[::-1]) for umi2 in umis: if umi1 == umi2: continue if counts[umi1] >= (counts[umi2]*self.dir_threshold): if (max(len(umi1), len(umi2)) - min(len(umi1), len(umi2))) > 1: continue if (comp_regex_err.match(str(umi2)) or comp_regex_del.match(str(umi2))): adj_list[umi1].append(umi2) else: for umi1, umi2 in itertools.combinations(umis, 2): if edit_distance(umi1, umi2) <= 1: if counts[umi1] >= (counts[umi2]*2)-1: adj_list[umi1].append(umi2) if counts[umi2] >= (counts[umi1]*2)-1: adj_list[umi2].append(umi1) return adj_list
def _get_adj_list_directional_adjacency(self, umis, counts, threshold): ''' identify all umis within the hamming distance threshold and where the counts of the first umi is > (2 * second umi counts)-1''' return {umi: [umi2 for umi2 in umis if edit_distance(umi, umi2) == 1 and counts[umi] >= (counts[umi2]*2)-1] for umi in umis}
def _get_adj_list_adjacency(self, umis, counts, threshold): ''' identify all umis within hamming distance threshold''' return { umi: [umi2 for umi2 in umis if edit_distance(umi, umi2) <= threshold] for umi in umis }
def _get_adj_list_adjacency(self, umis, counts, threshold): ''' identify all umis within hamming distance threshold''' adj_list = {umi: [] for umi in umis} if len(umis) > 25: umi_length = len(umis[0]) substr_idx = build_substr_idx(umis, umi_length, threshold) iter_umi_pairs = iter_nearest_neighbours(umis, substr_idx) else: iter_umi_pairs = itertools.combinations(umis, 2) for umi1, umi2 in iter_umi_pairs: if edit_distance(umi1, umi2) <= threshold: adj_list[umi1].append(umi2) adj_list[umi2].append(umi1) return adj_list
def _get_adj_list_directional(self, umis, counts, threshold=1): ''' identify all umis within the hamming distance threshold and where the counts of the first umi is > (2 * second umi counts)-1''' adj_list = {umi: [] for umi in umis} if len(umis) > 25: umi_length = len(umis[0]) substr_idx = build_substr_idx(umis, umi_length, threshold) iter_umi_pairs = iter_nearest_neighbours(umis, substr_idx) else: iter_umi_pairs = itertools.combinations(umis, 2) for umi1, umi2 in iter_umi_pairs: if edit_distance(umi1, umi2) <= threshold: if counts[umi1] >= (counts[umi2]*2)-1: adj_list[umi1].append(umi2) if counts[umi2] >= (counts[umi1]*2)-1: adj_list[umi2].append(umi1) return adj_list
def _get_adj_list_adjacency(self, umis, counts, threshold): ''' identify all umis within hamming distance threshold''' return {umi: [umi2 for umi2 in umis if edit_distance(umi, umi2) <= threshold] for umi in umis}
def get_average_umi_distance(umis): if len(umis) == 1: return -1 dists = [edit_distance(*pair) for pair in itertools.combinations(umis, 2)] return float(sum(dists))/(len(dists))
def get_average_umi_distance(umis): if len(umis) == 1: return -1 dists = [edit_distance(*pair) for pair in itertools.combinations(umis, 2)] return float(sum(dists)) / (len(dists))