Exemple #1
0
def score_phonetic_alignment(srcw,
                             tgtw,
                             slang,
                             tlang,
                             sim_matrix_path,
                             gap_start_p=-1.0,
                             gap_extend_p=-1.0):

    # convert to ascii required by align library
    nsrcw = ''.join(
        make_ascii(srcw, slang) if slang in
        langinfo.SCRIPT_RANGES else [str(c) for c in srcw])
    ntgtw = ''.join(
        make_ascii(tgtw, tlang) if tlang in
        langinfo.SCRIPT_RANGES else [str(c) for c in tgtw])

    ## use global alignment
    src_aln, tgt_aln = nw.global_align(nsrcw,
                                       ntgtw,
                                       matrix=sim_matrix_path,
                                       gap_open=gap_start_p,
                                       gap_extend=gap_extend_p)
    return nw.score_alignment(src_aln,
                              tgt_aln,
                              matrix=sim_matrix_path,
                              gap_open=gap_start_p,
                              gap_extend=gap_extend_p)
Exemple #2
0
 def sound_seq_distance_str(self,seq1_str, seq2_str):
     seq1_str = np.asanyarray(seq1_str)
     seq2_str = np.asanyarray(seq2_str)
     
     align = nw.global_align(seq1_str.tostring(), seq2_str.tostring(), gap_open=0, gap_extend=-5, matrix='/tmp/som.costs')
     len1 = len(seq1_str.tostring())
     len2 = len(seq2_str.tostring())
     return (-nw.score_alignment(*align, gap_open=0, gap_extend=-5, matrix='/tmp/som.costs'))/(len1+len2+0.0)
def score_phonetic_alignment(srcw,tgtw,slang,tlang,sim_matrix_path,gap_start_p=-1.0,gap_extend_p=-1.0):

    # convert to ascii required by align library 
    nsrcw=''.join(make_ascii(srcw,slang) if slang in langinfo.SCRIPT_RANGES else [str(c) for c in srcw ])
    ntgtw=''.join(make_ascii(tgtw,tlang) if tlang in langinfo.SCRIPT_RANGES else [str(c) for c in tgtw ])
    
    ## use global alignment 
    src_aln,tgt_aln=nw.global_align(nsrcw,ntgtw,matrix=sim_matrix_path, gap_open=gap_start_p, gap_extend=gap_extend_p)
    return nw.score_alignment(src_aln,tgt_aln,matrix=sim_matrix_path, gap_open=gap_start_p, gap_extend=gap_extend_p)
Exemple #4
0
def testNeedleman(N):

    alpha = [
        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
        'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
    ]
    Validated = 0
    SamePath = 0
    for epoch in tqdm(range(N)):
        sizeA = random.randint(1, SIZEMAX)
        sizeB = random.randint(sizeA // 2, 2 * sizeA)
        A = ""
        B = ""
        for i in range(sizeA):
            A += alpha[random.randint(0, len(alpha) - 1)]

        for i in range(sizeB):
            B += alpha[random.randint(0, len(alpha) - 1)]

        aligned = nw.global_align(A,
                                  B,
                                  matrix='atiam-fpa_alpha.dist',
                                  gap_open=-3,
                                  gap_extend=-3)
        score = nw.score_alignment(aligned[0],
                                   aligned[1],
                                   gap_open=-3,
                                   gap_extend=-3,
                                   matrix='atiam-fpa_alpha.dist')

        res = (aligned[0], aligned[1], score)

        try:
            (a, b, s) = myNeedleman(A,
                                    B,
                                    matrix='atiam-fpa_alpha.dist',
                                    gap_open=-3,
                                    gap_extend=-3)

            if s == score:
                Validated += 1

            if res == (a, b, s):
                SamePath += 1
        except RuntimeError:
            print(A, B)
            pass

    print(str(100 * Validated / N) + "% are validated.")
    print(str(100 * SamePath / N) + "% have the exact same path.")
Exemple #5
0
 def _score_alignment(self):
     if all([self._score_match is not None, self._score_mismatch is not None]):
         matrix = self._get_matrix_file(match=self._score_match,
                                        mismatch=self._score_mismatch)
     elif self._matrix is not None:
         matrix = self._get_matrix_file(matrix=self._matrix)
     else:
         matrix = self._get_matrix_file(match=self._match,
                                        mismatch=self._mismatch)
     gap_open = self._score_gap_open if self._score_gap_open is not None else self._gap_open
     gap_extend = self._score_gap_extend if self._score_gap_extend is not None else self._gap_extend
     aln = nw.score_alignment(self.aligned_query,
                             self.aligned_target,
                             gap_open=gap_open,
                             gap_extend=gap_extend,
                             matrix=matrix)
     return aln
Exemple #6
0
 def _score_alignment(self):
     if all(
         [self._score_match is not None, self._score_mismatch is not None]):
         matrix = self._get_matrix_file(match=self._score_match,
                                        mismatch=self._score_mismatch)
     elif self._matrix is not None:
         matrix = self._get_matrix_file(matrix=self._matrix)
     else:
         matrix = self._get_matrix_file(match=self._match,
                                        mismatch=self._mismatch)
     gap_open = self._score_gap_open if self._score_gap_open is not None else self._gap_open
     gap_extend = self._score_gap_extend if self._score_gap_extend is not None else self._gap_extend
     aln = nw.score_alignment(self.aligned_query,
                              self.aligned_target,
                              gap_open=gap_open,
                              gap_extend=gap_extend,
                              matrix=matrix)
     return aln
# Reference code for testing
import nwalign as nw
print("myNeedleman")
print(
    myNeedleman("CEELECANTH",
                "PELICAN",
                matrix='atiam-fpa_alpha.dist',
                gap_open=-1,
                gap_extend=-1))
print("Nwalign")
aligned = nw.global_align("CEELECANTH",
                          "PELICAN",
                          matrix='atiam-fpa_alpha.dist')
score = nw.score_alignment(aligned[0],
                           aligned[1],
                           gap_open=-1,
                           gap_extend=-1,
                           matrix='atiam-fpa_alpha.dist')
print('Results for basic gap costs (linear)')
print(aligned[0])
print(aligned[1])
print('Score : ' + str(score))

print("myNeedleman")
print(
    myNeedleman("CEELECANTH",
                "PELICAN",
                matrix='atiam-fpa_alpha.dist',
                gap_open=-5,
                gap_extend=-2))
print("Nwalign")
 def score_alignment(self, seq1, seq2, gp_e=-2, gp_o=-5):
     score = nw.score_alignment(seq1, seq2, matrix='BLOSUM62.txt', gap_extend=gp_e, gap_open=gp_o)
     return score
    def compare_datapoints(p1, p2):
        gist_1 = p1[2:len(p1) - 2]
        gist_2 = p2[2:len(p2) - 2]
        gist_1_flipped = list(gist_1)

        # We also try the 'flipped' (mirrored) version of the gesture to allow comparison between left-handed and right-handed gestures
        # (although it's not super accurate).
        # Need to take into account not to flip spine (so we drop the last 3 parts)
        for i in range(0, (len(gist_1_flipped) - 3) / 2):
            tmp = GestureComparison._flip_quadrants(gist_1_flipped[i])
            # print gist_1_flipped[i] + " --> " + tmp
            # print str(i) + "<->" + str(len(gist_1_flipped) / 2 + i - 3 + 1)
            gist_1_flipped[i] = GestureComparison._flip_quadrants(
                gist_1_flipped[len(gist_1_flipped) / 2 + i - 3 + 1])
            gist_1_flipped[len(gist_1_flipped) / 2 + i - 3 + 1] = tmp

        score = 0.0
        score_flipped = 0.0

        for i in range(0, len(gist_1)):
            if gist_1[i] != 0 and gist_2[i] != '':
                res = nwalign.global_align(
                    gist_1[i],
                    gist_2[i],
                    matrix=os.path.dirname(os.path.realpath(__file__)) +
                    '/alignment.matrix')
                this_score = nwalign.score_alignment(
                    res[0],
                    res[1],
                    gap_open=0,
                    gap_extend=-5,
                    matrix=os.path.dirname(os.path.realpath(__file__)) +
                    '/alignment.matrix')
                if i >= len(gist_1) - 3:
                    this_score *= 2

                score += this_score

                res = nwalign.global_align(
                    gist_1_flipped[i],
                    gist_2[i],
                    matrix=os.path.dirname(os.path.realpath(__file__)) +
                    '/alignment.matrix')
                this_score_flipped = nwalign.score_alignment(
                    res[0],
                    res[1],
                    gap_open=0,
                    gap_extend=-5,
                    matrix=os.path.dirname(os.path.realpath(__file__)) +
                    '/alignment.matrix')

                if i >= len(gist_1) - 3:
                    this_score_flipped *= 2

                score_flipped += this_score_flipped

        # print str(score_flipped) + " " + str(score)

        if score_flipped > score:
            score_flipped -= abs(int(p1[-2]) -
                                 int(p2[-1])) + abs(int(p1[-1]) - int(p2[-2]))
            # print "using flipped! " + p1[1] + " " + p2[1] + " " + str(score_flipped) + " > " + str(score)
            return -score_flipped
        else:
            score -= abs(int(p1[-2]) -
                         int(p2[-2])) + abs(int(p1[-1]) - int(p2[-1]))
            return -score
Exemple #10
0
    def RemapReadsSingle(self, count=None):
        #scores = {}
        counter = 0
        has_score = False
        write_mode = 'wb'
        if self.binary_mode is False:
            write_mode = 'wh'
        self._out = pysam.Samfile(self.sam_out,
                                  mode=write_mode,
                                  referencenames=self.sam_in.references,
                                  referencelengths=self.sam_in.lengths,
                                  header=self._MakeHeader(self.sam_in.header)
                                  )

        for read in self.sam_in.fetch():
            'Optional setting of count, to only realign count reads'
            if count is not None and counter > count:
                break
            if read.is_unmapped is True:
                self._out.write(read)
                continue
            if counter == 0:
                # Check if an alignment score is already present
                # If it is then record this in the has_score flag
                tags = read.tags
                for i in range(0, len(tags)):
                    if tags[i][0] == 'AS':
                        has_score = True
                        continue
            if self.only_gapped is True:
                has_indel = False
                for c in read.cigar:
                    if c[0] == 1 or c[0] == 2:
                        # read has an indel
                        has_indel = True
                        break
                if has_indel == False:
                    # Read must not have an indel
                    print(read.qname + ', ' + str(read.cigar) + " does not have an indel")
                    self._out.write(read)
                    continue
                    '''
                    if the read is a perfect match then don't realign
                    '''

            fivep_soft_clip = 0
            threep_soft_clip = 0
            cigar_last = len(read.cigar) - 1
            if read.cigar[0][0] == 4:
                fivep_soft_clip = read.cigar[0][1]
            if read.cigar[cigar_last][0] == 4:
                threep_soft_clip = read.cigar[cigar_last][1]

            ref = self.ref.fetch(reference=self.refnames[read.tid],
                                          start=read.aend - read.alen - fivep_soft_clip,
                                          end=read.aend + threep_soft_clip)

            # Realign sense strand reads
            query = ''
            subject = ''
            if self.reverse_sense is True and read.is_reverse is False:
                query = self.ReverseSeq(read.seq)
                subject = self.ReverseSeq(ref.upper())
                #query = Seq(read.seq).complement().tostring()
                #subject = Seq(ref.upper()).complement().tostring()
            else:
                query = read.seq
                subject = ref.upper()

            aln = nw.global_align(query, subject,
                                  gap_open=self.gap_open,
                                  gap_extend=self.gap_extend,
                                  matrix=self.matrix)

            if self.compute_scores is True:
                score = nw.score_alignment(aln[0], aln[1],
                                           gap_open=self.gap_open,
                                           gap_extend=self.gap_extend,
                                           matrix=self.matrix)

                if has_score is True:
                    as_index = None
                    tags = read.tags
                    for i in range(0, len(tags)):
                        if tags[i][0] == 'AS':
                            as_index = i
                    if as_index is None:
                        raise ValueError("Read " + read.qname +
                        " is missing an alignment score.")
                    tags[as_index] = ('AS', score)
                    read.tags = tags
                else:
                    read.tags = [('AS', score)] + read.tags 

            bam_cigar = self._MakeBamCigar(aln, read)
            if self.reverse_sense is True and read.is_reverse is False:
                bam_cigar.reverse()

            if self.verbose is True:
                self.PrettyPrint(read, aln, bam_cigar)

            # New read
            read.cigar = bam_cigar
            read.tags = read.tags + [('OC', self._MakeSamCigar(read.cigar)),
                                     ('OP', read.pos)]
            self._out.write(read)
            counter += 1
        self._out.close
Exemple #11
0
    def _RealignRead(self, read):
        has_score = False

        if read.is_unmapped is True:
            #self._out.write(read)
            return read
    
        tags = read.tags
        '''If any of the read tags are AS, then remember the read has an
        existing score.'''
        for i in range(0, len(tags)):
            if tags[i][0] == 'AS':
                has_score = True
                continue

        if self.only_gapped is True:
            has_indel = False
            for c in read.cigar:
                if c[0] == 1 or c[0] == 2:
                    # read has an indel
                    has_indel = True
                    break
            if has_indel == False:
                # Read must not have an indel
                '''
                If the read is a perfect match then don't realign
                '''
                print(read.qname + ', ' + str(read.cigar) + " does not have an indel")
                #self._out.write(read)
                return read

        fivep_soft_clip = 0
        threep_soft_clip = 0
        cigar_last = len(read.cigar) - 1
        if read.cigar[0][0] == 4:
            fivep_soft_clip = read.cigar[0][1]
        if read.cigar[cigar_last][0] == 4:
            threep_soft_clip = read.cigar[cigar_last][1]

        ref = self.ref.fetch(reference=self.refnames[read.tid],
                                      start=read.aend - read.alen - fivep_soft_clip,
                                      end=read.aend + threep_soft_clip)


        # Realign sense strand reads
        query = ''
        subject = ''
        if self.reverse_sense is True and read.is_reverse is False:
            query = self.ReverseSeq(read.seq)
            subject = self.ReverseSeq(ref.upper())
            #query = Seq(read.seq).complement().tostring()
            #subject = Seq(ref.upper()).complement().tostring()
        else:
            query = read.seq
            subject = ref.upper()

        print query, subject
        aln = nw.global_align(query, subject,
                              gap_open=self.gap_open,
                              gap_extend=self.gap_extend,
                              matrix=self.matrix)

        if self.compute_scores is True:
            score = nw.score_alignment(aln[0], aln[1],
                                       gap_open=self.gap_open,
                                       gap_extend=self.gap_extend,
                                       matrix=self.matrix)

            if has_score is True:
                as_index = None
                tags = read.tags
                for i in range(0, len(tags)):
                    if tags[i][0] == 'AS':
                        as_index = i
                if as_index is None:
                    raise ValueError("Read " + read.qname +
                    " is missing an alignment score.")
                tags[as_index] = ('AS', score)
                read.tags = tags
            else:
                read.tags = [('AS', score)] + read.tags 
        
        bam_cigar = self._MakeBamCigar(aln, read)
        if self.reverse_sense is True and read.is_reverse is False:
            bam_cigar.reverse()
        
        if self.verbose is True:
            self.PrettyPrint(read, aln, bam_cigar)

        # New read
        read.cigar = bam_cigar
        read.tags = read.tags + [('OC', self._MakeSamCigar(read.cigar)),
                                 ('OP', read.pos)]
        #self._out.write(read)
        return read
Exemple #12
0

print 'Performing alignement on CEELECANTH and PELICAN...'
ali = myNeedleman('CEELECANTH', 'PELICAN')
print ali[0]
print ali[1]
print ''

# Reference code for testing
import nwalign as nw
aligned = nw.global_align("CEELECANTH",
                          "PELICAN",
                          matrix='atiam-fpa_alpha.dist')
score = nw.score_alignment('CEELECANTH',
                           '-PELICAN--',
                           gap_open=-5,
                           gap_extend=-2,
                           matrix='atiam-fpa_alpha.dist')
print('Results for basic gap costs (linear)')
print(aligned[0])
print(aligned[1])
print('Score : ' + str(score))
aligned = nw.global_align("CEELECANTH",
                          "PELICAN",
                          matrix='atiam-fpa_alpha.dist',
                          gap_open=-5,
                          gap_extend=-2)
score = nw.score_alignment('CEELECANTH',
                           '-PELICAN--',
                           gap_open=-5,
                           gap_extend=-2,
Exemple #13
0
        singreps=line.split("#")
        rep_count.append(len(singreps)-1)
        poslist=[]
        for singrep in singreps:
            if singrep != "\n":
                repinfo=singrep.split(":")
                poslist.append(int(repinfo[1]))
        rep_pos.append(poslist)                
        locus_num.append(i)
        locus.append(curloc)
        
fullscore=0
array=()
score=0

for k in range(len(locus)):
    print "Processing repeat", k, "of", locus[k]
    fullscore=nw.score_alignment(rep[k], rep[k], gap_open=-5,\
        gap_extend=-2, matrix='/home/CT/server/pybin/BLOSUM62')
    print >> fileout, ">", k, locus[k], rep[k], rep_count[k], rep_pos[k]
    for j in range(len(rep)):
        array=nw.global_align(rep[k], rep[j], gap_open=-5,\
            gap_extend=-2, matrix='/home/CT/server/pybin/BLOSUM62')
        score=nw.score_alignment(array[0], array[1], gap_open=-5,\
            gap_extend=-2, matrix='/home/CT/server/pybin/BLOSUM62')
        if score>0 and score/float(fullscore)>=threshold and j!=k:
            print >> fileout, j, locus[j], rep[j], rep_count[j], rep_pos[j]

filein.close()
fileout.close()
Exemple #14
0
scoring = sw.ScoringMatrix('scoring_matrix.txt')
sw = sw.LocalAlignment(scoring)

match = 2
n = 0

for x, seq1 in enumerate(unique_sequences):
    for y, seq2 in enumerate(unique_sequences):

        alignment = nw.global_align(allsequences[seq1], allsequences[seq2])

        score = float(
            nw.score_alignment(alignment[0],
                               alignment[1],
                               gap_open=-5,
                               gap_extend=-2,
                               matrix='scoring_matrix.txt'))

        n = float(len(alignment[0]) * match)
        if abs(score) > n:
            score = 0

        similarity_matrix[x, y] = int(score)
        dist_matrix[x, y] = float(score / n)

    print '%d/%d Calculated %d alignments in %f seconds' % (
        x, len(unique_sequences), len(unique_sequences), time.clock() - t0)

    t0 = time.clock()