Example #1
0
 def assess_alignment(alignment: pysam.AlignedSegment, alignment_info: Dict):
     """ Compare alignment against reference alignment"""
     chrom_match = alignment.reference_name == alignment_info['chrom']
     # assess reference bases that match between the two reads
     matching_pos = np.array(alignment.get_reference_positions(full_length=False))
     base_range = (matching_pos >= alignment_info['start']) & (matching_pos <= alignment_info['end'])
     matching_prop = sum(base_range) / len(alignment_info['cigar'])
     return chrom_match, matching_prop
Example #2
0
 referenceid = str(reference) + ' '
 refnucpos1 = 0 + (3 * refcodonpos)
 refnucpos2 = 1 + (3 * refcodonpos)
 refnucpos3 = 2 + (3 * refcodonpos)
 if 1 <= (refcodonpos + 1) <= 9:
     refcodonposid = str(refcodonpos + 1) + " "
 else:
     refcodonposid = str(refcodonpos + 1)
 refAAid = str(Seq(codon).translate()[0])
 # loop over all reads to find AAs
 marker_list = []
 for read in samfile.fetch():
     read_codon = []
     #loop through base and its pos at the same time
     for seq, pos in zip(read.seq,
                         AlignedSegment.get_reference_positions(read)):
         if pos == refnucpos1:
             read_codon.append(seq)
         if pos == refnucpos2:
             read_codon.append(seq)
         if pos == refnucpos3:
             read_codon.append(seq)
     if any(read_codon) is True:
         if len(read_codon) == 3:
             counter += 1
             if ''.join(read_codon) == codon:
                 marker_list.append('.')
             else:
                 marker_list.append(
                     str(Seq("".join(read_codon)).translate()[0]))
 print(referenceid, refcodonposid, refAAid, counter,
Example #3
0
 def __call__(self):
     fastaFile = pysam.FastaFile(self.args.fastainput)
     bamFile = pysam.AlignmentFile(self.args.BAMinput, "rb")
     ssl_settings = {'ca':self.args.sslpath}
     con = MySQLdb.connect(self.args.server, self.args.user, self.args.password, self.args.database, ssl=ssl_settings)
     with con:
         cur = con.cursor()
         cur.execute("USE " + self.args.database)
     def batch_gen(data, batch_size):
         for i in range(0, len(data), batch_size):
                 yield data[i:i+batch_size]
     references = sorted(set(bamFile.getrname(read.tid) for read in samfile.fetch()))
     referencesLeng = sorted(set(len(fastaFile.fetch(reference=str(item)))for item in references))
     for ref, leng in zip(references, referencesLeng):
         print ref, leng
         cur.execute('INSERT INTO templates(protein, length) VALUES(%s, %s)' ,(ref, leng))
     for reference in references:
         returned_position_lines=[]
         length=0
         refcodonpos=0
         counter=0
         for codon in batch_gen(fastaFile.fetch(reference=str(reference)),3):
             length+=3
             markerlist=[]
             referenceid = str(reference)+ ' '
             refnucpos1=0 +(3*refcodonpos)
             refnucpos2=1 +(3*refcodonpos)
             refnucpos3=2 +(3*refcodonpos)
             if 1 <= (refcodonpos+1) <= 9:
                 refcodonposid = str(refcodonpos+1)+ " "
             else:
                 refcodonposid = str(refcodonpos+1)
             refAAid = str(Seq(codon).translate()[0])
             marker_list=[]
             for read in samfile.fetch():
                 read_codon=[]
                 for seq, pos in zip(read.seq,AlignedSegment.get_reference_positions(read)):
                     if pos == refnucpos1:
                         read_codon.append(seq)
                     if pos == refnucpos2:
                         read_codon.append(seq)
                     if pos == refnucpos3:
                         read_codon.append(seq)
                 if any(read_codon) is True:
                     if len(read_codon) == 3:
                         counter+=1
                         if ''.join(read_codon) == codon:
                             marker_list.append('.')
                         else:
                             marker_list.append(str(Seq("".join(read_codon)).translate()[0]))
             print (referenceid, refcodonposid, refAAid, counter, ''.join(str(item)for item in marker_list))
             returned_position_lines.append(''.join(str(item)for item in marker_list))
             cur.execute("INSERT INTO sites(template_id, position, wild_type_AA) VALUES((SELECT id from templates WHERE protein=%s), %s, %s)" ,(reference, refcodonposid, refAAid))
             counter=0
             refcodonpos+=1
         print returned_position_lines
         AAs = ('A','R','N','D','C','E','Q','G','H','I','L','K','M','F','P','S','T','W','Y','V','*')
         for AA in AAs:
             position=0
             for line in returned_position_lines:
                 position+=1
                 count=0
                 for readAA in line:
                     if readAA==AA:
                         count+=1
                 if (count >= 1):
                     print count, AA, position
                     cur.execute("INSERT INTO substitutions(site_id, substitution, count) VALUES((SELECT id from sites WHERE position=%s AND template_id=(SELECT id from templates WHERE protein=%s)), %s, %s)" ,(position, reference, AA, count))
     con.commit()
     fastaFile.close()
     bamFile.close()