예제 #1
0
def equals(alpha, beta):
    if isinstance(alpha, str) and isinstance(beta, str):
        if alpha == beta:
            return True
    elif isinstance(beta, list):
        sym = translate(alpha)
        if sym in beta:
            return True
    elif isinstance(alpha, list):
        sym = translate(beta)
        if sym in alpha:
            return True
    return False
예제 #2
0
def _translate(s):
    r=[]
    for c in s:
        if isinstance(c,str):
            r.append([translate(c)])
        elif isinstance(c, list):
            r.append(c)
    return r
예제 #3
0
def match_score2(alpha, beta):

    if alpha == '-' and beta != '-':
        return gap_penalty
    if alpha != '-' and beta == '-':
        return gap_penalty

    if isinstance(alpha, str) and isinstance(beta, str):
        if alpha == beta:
            return match_award
    elif isinstance(beta, list):
        sym = translate(alpha)
        if sym in beta:
            return match_award
    elif isinstance(alpha, list):
        sym = translate(beta)
        if sym in alpha:
            return match_award

    return mismatch_penalty
예제 #4
0
def translate_all(inputs,
                  filter_empty=True,
                  sort=True,
                  trans_table=all_table_ascii_unicode):
    p = [
        translate(i, trans_table=trans_table) for i in inputs
        if not filter_empty or len(i.strip()) > 0
    ]
    if sort:
        p = sorted(p, key=functools.cmp_to_key(pattern_comparator))
    return p
예제 #5
0
def symbol_to_patter(orig, symbol):

    m=[]

    for i, s in enumerate(symbol):
        if isinstance(s, str):
            m.append(s)
        elif isinstance(s, list):
            if len(s) == 1:
                # normally only happens if we compare two patterns with the same symbols
                m.append(s)
            elif len(s) != 2:
                print("WARNIGN {}".format(s))
            else:
                a1, a2 = s[0], s[1]

                if "-" in s:
                    opt=a2 if a1 =="-" else a1
                    if isinstance(opt,str):
                        opt= utils.translate(opt)
                    m.append((opt,0,1))

                else:
                    if isinstance(a1, str) and isinstance(a2, str):
                        # update vcounts
                        t = [utils.translate(a1), utils.translate(a2)]
                        m.append(list(set(t)))

                    if isinstance(a1, list) and isinstance(a2, str):
                        # we keep the a2 value in the freq dict,
                        # but need to merge the symbol
                        t = [utils.translate(a2)]
                        m.append(list(set(a1 + t)))
                    if isinstance(a1, str) and isinstance(a2, list):
                        # we keep the a2 value in the freq dict,
                        # but need to merge the symbol
                        t = [utils.translate(a1)]
                        m.append(list(set(a2 + t)))
    print("O:{} S:{}".format(orig, symbol))

    return m
예제 #6
0
    def update_symbol(self, symbol):
        #TODO length check
        m = []
        if not self._vcounts:
            self._vcounts = [{} for i in range(0, len(symbol))]

        for i, s in enumerate(symbol):
            if isinstance(s, str):
                m.append(s)
            elif isinstance(s, list):
                if len(s) == 1:
                    #normally only happens if we compare two patterns with the same symbols
                    m.append(s)
                    print("SINGLE VALUE {}".format(s))
                elif len(s) != 2:
                    print("WARNIGN {}".format(s))
                else:
                    a1 = s[0]
                    #
                    a2 = s[1]

                    if isinstance(a1, str) and isinstance(a2, str):
                        #
                        self._vcounts[i].setdefault(a1, 0)
                        self._vcounts[i].setdefault(a2, 0)
                        self._vcounts[i][a1] += 1
                        self._vcounts[i][a2] += 1
                        t = [translate(a1), translate(a2)]

                        m.append(list(set(t)))

                    if isinstance(a1, list) and isinstance(a2, str):
                        #we keep the a2 value in the freq dict,
                        # but need to merge the symbol
                        t = [translate(a2)]
                        m.append(list(set(a1 + t)))
        self.symbol = m
        self._count += 1
예제 #7
0
def match_score2(alpha, beta, score_matrix=score_matrix):

    if alpha == '-' and beta != '-':
        return score_matrix[gap_penalty]
    if alpha != '-' and beta == '-':
        return score_matrix[gap_penalty]

    if isinstance(alpha, str) and isinstance(beta, str):
        if alpha == beta:
            return score_matrix[match_award]
    elif isinstance(alpha, list) and isinstance(beta, list):
        if set(beta) < set(alpha):
            return score_matrix[match_award]

    elif isinstance(beta, list):
        sym = translate(alpha)
        if sym in beta:
            return score_matrix[cset_match_award]
    elif isinstance(alpha, list):
        sym = translate(beta)
        if sym in alpha:
            return score_matrix[cset_match_award]

    return score_matrix[mismatch_penalty]
예제 #8
0
    def merge_alignment(self, symbol):
        m = []
        for s in symbol:
            if isinstance(s, str):
                m.append(s)
            elif isinstance(s, list):
                a1 = s[0]
                a2 = s[1]
                # if isinstance(a1,list) and isinstance(a2, str):
                ##a1 is already a merge or optional

                t = set(translate("".join(s)))
                m.append([c for c in t])

        return m
예제 #9
0
    def from_string(cls,pstring):

        _p= Pattern()
        ins=''
        parseValue=True
        valuestring=''
        for c in pstring:
            if c == "'":
                if not parseValue:
                    #start
                    parseValue=True
                else:
                    #end
                    _p._values.append(valuestring)
                    _p._l1patterns.append(translate(valuestring))
                    parseValue=False
            else:
                if parseValue:
                    valuestring+=c
                else:
                    pass
예제 #10
0
    def parse_value(self, value):
        logger.debug("CREATE PATTERN")
        if not value:
            return

        t = translate(value)

        logger.debug("{:>2} translated:{}".format('',t))
        #o=unique_order(t)
        #logger.debug("O:{}".format(o))
        l1_grouped = [(k, sum(1 for i in g)) for k, g in itertools.groupby(t)]
        logger.debug("{:>2} L1-groups:{}".format('',l1_grouped))

        c=0
        for g in l1_grouped:
            self._l2patterns.append((g[0],c,g[1]))
            self._l1patterns.append(t[c:c+g[1]])
            self._values.append(value[c:c + g[1]])
            c+=g[1]

        self._l2string= "".join([p[0] for p in self._l2patterns])
        self._l1string = "".join(self._l1patterns)
        self._valuestring = "".join(self._values)
예제 #11
0
    def find_best_alignment(self, alpha_list, beta_list):

        identity, score, align1, symbol2, align2 = needle(
            alpha_list, beta_list)
        self.data['raw'] = {
            'score': score,
            'identity': identity,
            'align1': align1,
            'align2': align2,
            'symbol': symbol2
        }
        if 0 < identity < 100:
            ctrans = False
            #translate the non matching symbols in alpha
            alpha_ct = []
            for i in range(0, len(align1)):
                if len(symbol2[i]) == 1:
                    alpha_ct.append(align1[i])
                else:
                    if symbol2[i][0] != '-':
                        if isinstance(symbol2[i][0], str):
                            ctrans = True
                            alpha_ct.append([translate(symbol2[i][0])])
                        else:
                            alpha_ct.append(symbol2[i][0])
            if ctrans:
                identity, score, align1, symbol2, align2 = needle(
                    alpha_ct, beta_list)
                self.data['partl1'] = {
                    'score': score,
                    'identity': identity,
                    'align1': align1,
                    'align2': align2,
                    'symbol': symbol2
                }
        elif identity == 0:
            #no matching characters:

            identity, score, align1, symbol2, align2 = needle(
                self._translate(alpha_list), beta_list)
            self.data['l1'] = {
                'score': score,
                'identity': identity,
                'align1': align1,
                'align2': align2,
                'symbol': symbol2
            }

        if len(self.data) > 1:

            def compare(item1, item2):
                res = item1[1]['identity'] - item2[1]['identity']
                if res == 0:
                    res = item1[1]['score'] - item2[1]['score']
                return res

            _s_al = sorted(enumerate(list(self.data.values())),
                           key=functools.cmp_to_key(compare))
            self.data['best'] = _s_al[-1][1]
        else:
            self.data['best'] = self.data['raw']