def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('input',help="PSLFILE or - for STDIN")
  args = parser.parse_args()
  inf = sys.stdin
  if args.input != '-':
    inf = open(args.input)
  z = 0
  for line in inf:
    z+=1
    p = PSL(line.rstrip())
    print str(z) + "\t" + p.value('qName') + "\t" + p.value('tName')+"\t"+str(p.get_coverage())+"\t"+str(p.value('qSize'))+"\t"+str(p.get_quality())
  inf.close()
Exemple #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('input', help="PSLFILE or - for STDIN")
    args = parser.parse_args()
    inf = sys.stdin
    if args.input != '-':
        inf = open(args.input)
    z = 0
    for line in inf:
        z += 1
        p = PSL(line.rstrip())
        print str(z) + "\t" + p.value('qName') + "\t" + p.value(
            'tName') + "\t" + str(p.get_coverage()) + "\t" + str(
                p.value('qSize')) + "\t" + str(p.get_quality())
    inf.close()
 def read_next(self):
     mpa = MultiplePSLAlignments()
     mcnt = 0
     current_name = None
     if self.previous:  #We have one waiting to go into an alignment
         l1 = self.previous
         p1 = PSL(l1.rstrip())
         current_name = p1.value('qName')
         mpa.add_entry(p1)
         mcnt += 1
     else:  # It must be our first entry, so prime our buffer
         l1 = None
         while True:
             l1 = self.fh.readline()
             if not l1:
                 return None
             if not is_valid(l1.rstrip()): continue  # go till we get a PSL
             break
         p1 = PSL(l1.rstrip())
         current_name = p1.value('qName')
         mpa.add_entry(p1)
         mcnt += 1
     while True:
         l2 = self.fh.readline()
         if not l2:
             self.previous = None
             if mcnt > 0:
                 return mpa
             return None
         if not is_valid(l2):
             sys.stderr.write("Warning line is not a valid psl line\n" +
                              l2.rstrip() + "\n")
             continue  # just skip strange bad lines like we never saw them
         p2 = PSL(l2.rstrip())
         if p2.value(
                 'qName'
         ) == current_name:  # We are working on this set of entries
             mpa.add_entry(p2)
             mcnt += 1
         else:  # We have a new set so buffer it and output what we have so far
             self.previous = l2  # buffer the line
             if mcnt > 0:
                 return mpa
             sys.stderr.write("ERROR: How are we here?\n")
             sys.exit()
 def read_next(self):
   mpa = MultiplePSLAlignments()
   mcnt = 0
   current_name = None
   if self.previous:      #We have one waiting to go into an alignment
     l1 = self.previous
     p1 = PSL(l1.rstrip())
     current_name = p1.value('qName')
     mpa.add_entry(p1)
     mcnt +=  1
   else: # It must be our first entry, so prime our buffer
     l1 = None
     while True:
       l1 = self.fh.readline()
       if not l1:
         return None
       if not is_valid(l1.rstrip()): continue # go till we get a PSL
       break
     p1 = PSL(l1.rstrip())
     current_name = p1.value('qName')
     mpa.add_entry(p1)
     mcnt += 1
   while True:
     l2 = self.fh.readline()
     if not l2: 
       self.previous = None
       if mcnt > 0:
         return mpa
       return None
     if not is_valid(l2): 
       sys.stderr.write("Warning line is not a valid psl line\n"+l2.rstrip()+"\n")
       continue # just skip strange bad lines like we never saw them
     p2 = PSL(l2.rstrip())
     if p2.value('qName') == current_name: # We are working on this set of entries
       mpa.add_entry(p2)
       mcnt += 1
     else: # We have a new set so buffer it and output what we have so far
       self.previous = l2 # buffer the line
       if mcnt > 0:
         return mpa
       sys.stderr.write("ERROR: How are we here?\n")
       sys.exit()
def main():
    parser = argparse.ArgumentParser(
        description="Convert a sam file into a psl file")
    parser.add_argument('--genome',
                        help="FASTA input file of reference genome")
    parser.add_argument('--get_secondary_alignments',
                        action='store_true',
                        help="Report SA:Z secondary alignments as well")
    parser.add_argument('--get_alternative_alignments',
                        action='store_true',
                        help="Report XA:Z alternative alignments as well")
    parser.add_argument(
        '--get_all_alignments',
        action='store_true',
        help="Report SA:Z and XA:Z alternative alignments as well")
    parser.add_argument('--give_unique_names',
                        action='store_true',
                        help="Output query names will be unique.")
    group = parser.add_mutually_exclusive_group()
    group.add_argument(
        '--output_fasta',
        help=
        "FILENAME to save an outgoing fasta.  Only works for primary alignments."
    )
    group.add_argument(
        '--output_fastq',
        help=
        "FILENAME to save an outgoing fastq.  Only works for primary alignments."
    )
    parser.add_argument('infile', help="FILENAME input file or '-' for STDIN")
    parser.add_argument('-o',
                        '--output',
                        help="FILENAME for the output, STDOUT if not set.")
    args = parser.parse_args()
    if (args.output_fasta
            or args.output_fastq) and (args.get_secondary_alignments
                                       or args.get_alternative_alignments
                                       or args.get_all_alignments):
        sys.stderr.write(
            "ERROR, can only output the fastq/fasta if we are doing primary alignments only.\n"
        )
        sys.exit()
    inf = sys.stdin
    if args.infile != '-':
        inf = open(args.infile)
    of = sys.stdout
    if args.output:
        of = open(args.output, 'w')
    spcf = SamBasics.SAMtoPSLconversionFactory()
    if args.genome: spcf.set_genome(args.genome)
    off = None
    if args.output_fasta:
        off = open(args.output_fasta, 'w')
    if args.output_fastq:
        off = open(args.output_fastq, 'w')
    z = 0
    for line in inf:
        line = line.rstrip()
        if SamBasics.is_header(line):
            spcf.read_header_line(line)
            continue
        # We have a line to convert
        psl = spcf.convert_line(line)
        if psl:
            pobj = PSL(psl)
            z += 1
            if args.give_unique_names:
                pobj.entry['qName'] = 'Q' + str(z)
            of.write(pobj.get_line() + "\n")
            if args.output_fastq or args.output_fasta:
                sam = SamBasics.SAM(line)
                sequence = sam.value('seq').upper()
                quality = sam.value('qual')
                if sam.check_flag(16):
                    sequence = rc(sam.value('seq').upper())
                    quality = sam.value('qual')[::-1]
                if args.output_fasta:
                    off.write(">" + pobj.value('qName') + "\n" + sequence +
                              "\n")
                elif args.output_fastq:
                    if len(sequence) == len(quality):
                        off.write("@" + pobj.value('qName') + "\n" + sequence +
                                  "\n" + "+\n" + quality + "\n")
                    else:
                        sys.stderr.write("ERROR: sequence " + sequence +
                                         " length (" + str(len(sequence)) +
                                         ") doesnt match quality " + quality +
                                         " length (" + str(len(quality)) +
                                         ")\n")
                        sys.exit()
        # Lets look for secondary alignments to convert
        if args.get_secondary_alignments or args.get_all_alignments:
            secondary_alignments = SamBasics.get_secondary_alignments(
                line.rstrip())
            for samline in secondary_alignments:
                psl = spcf.convert_line(samline)
                if psl:
                    #print "\nsecondary"
                    #print samline
                    z += 1
                    pobj = PSL(psl)
                    if args.give_unique_names:
                        pobj.entry['qName'] = 'Q' + str(z)
                    of.write(pobj.get_line() + "\n")
        if args.get_alternative_alignments or args.get_all_alignments:
            alternative_alignments = SamBasics.get_alternative_alignments(
                line.rstrip())
            for samline in alternative_alignments:
                psl = spcf.convert_line(samline)
                if psl:
                    #print "\nsecondary"
                    #print samline
                    z += 1
                    pobj = PSL(psl)
                    if args.give_unique_names:
                        pobj.entry['qName'] = 'Q' + str(z)
                    of.write(pobj.get_line() + "\n")
    inf.close()
    of.close()
def do_psl(args):
  for line in args.input:
    psl = PSL(line)
    cov = sum(psl.value('blockSizes'))
    print cov
def main():
  parser = argparse.ArgumentParser(description="Convert a sam file into a psl file")
  parser.add_argument('--genome',help="FASTA input file of reference genome")
  parser.add_argument('--get_secondary_alignments',action='store_true',help="Report SA:Z secondary alignments as well")
  parser.add_argument('--get_alternative_alignments',action='store_true',help="Report XA:Z alternative alignments as well")
  parser.add_argument('--get_all_alignments',action='store_true',help="Report SA:Z and XA:Z alternative alignments as well")
  parser.add_argument('--give_unique_names',action='store_true',help="Output query names will be unique.")
  group = parser.add_mutually_exclusive_group()
  group.add_argument('--output_fasta',help="FILENAME to save an outgoing fasta.  Only works for primary alignments.")
  group.add_argument('--output_fastq',help="FILENAME to save an outgoing fastq.  Only works for primary alignments.")
  parser.add_argument('infile',help="FILENAME input file or '-' for STDIN")
  parser.add_argument('-o','--output',help="FILENAME for the output, STDOUT if not set.")
  args = parser.parse_args()
  if (args.output_fasta or args.output_fastq) and (args.get_secondary_alignments or args.get_alternative_alignments or args.get_all_alignments):
    sys.stderr.write("ERROR, can only output the fastq/fasta if we are doing primary alignments only.\n")
    sys.exit()
  inf = sys.stdin
  if args.infile != '-': 
    inf = open(args.infile)
  of = sys.stdout
  if args.output:
    of = open(args.output,'w')
  spcf = SamBasics.SAMtoPSLconversionFactory()
  if args.genome: spcf.set_genome(args.genome)
  off = None
  if args.output_fasta:
    off = open(args.output_fasta,'w')
  if args.output_fastq:
    off = open(args.output_fastq,'w')
  z = 0
  for line in inf:
    line = line.rstrip()
    if SamBasics.is_header(line): 
      spcf.read_header_line(line)
      continue
    # We have a line to convert
    psl = spcf.convert_line(line)
    if psl:
      pobj = PSL(psl)
      z += 1
      if args.give_unique_names:
        pobj.entry['qName'] = 'Q'+str(z)
      of.write(pobj.get_line()+"\n")
      if args.output_fastq or args.output_fasta:
        sam = SamBasics.SAM(line)
        sequence = sam.value('seq').upper()
        quality = sam.value('qual')
        if sam.check_flag(16):
          sequence = rc(sam.value('seq').upper())
          quality = sam.value('qual')[::-1]
        if args.output_fasta:
          off.write(">"+pobj.value('qName')+"\n"+sequence+"\n")
        elif args.output_fastq:
          if len(sequence) == len(quality):
            off.write("@"+pobj.value('qName')+"\n"+sequence+"\n"+"+\n"+quality+"\n")
          else:
            sys.stderr.write("ERROR: sequence "+sequence+" length ("+str(len(sequence))+") doesnt match quality "+quality+" length ("+str(len(quality))+")\n")
            sys.exit()
    # Lets look for secondary alignments to convert
    if args.get_secondary_alignments or args.get_all_alignments:
      secondary_alignments = SamBasics.get_secondary_alignments(line.rstrip())
      for samline in secondary_alignments:
        psl = spcf.convert_line(samline)
        if psl:
          #print "\nsecondary"
          #print samline
          z += 1
          pobj = PSL(psl)
          if args.give_unique_names:
            pobj.entry['qName'] = 'Q'+str(z)
          of.write(pobj.get_line()+"\n")
    if args.get_alternative_alignments or args.get_all_alignments:
      alternative_alignments = SamBasics.get_alternative_alignments(line.rstrip())
      for samline in alternative_alignments:
        psl = spcf.convert_line(samline)
        if psl:
          #print "\nsecondary"
          #print samline
          z += 1
          pobj = PSL(psl)
          if args.give_unique_names:
            pobj.entry['qName'] = 'Q'+str(z)
          of.write(pobj.get_line()+"\n")
  inf.close()
  of.close()
def do_psl(args):
    for line in args.input:
        psl = PSL(line)
        cov = sum(psl.value('blockSizes'))
        print cov