Example #1
0
 def start_trim(f, lock, conn, args):
    '''Start the actual trimming of a file'''
    
    if args.gz: fh = gzip.open(f, 'rb')
    else: fh = open(f, 'r')
    
    # set adaptors #
    adaptors = set_adaptors(args.adaptors, args.min_adaptor_match)
    
    # set fqtype #
    format = genobox_modules.set_filetype(f, args.gz)
    if format == 'fastq': fqtype = genobox_modules.set_fqtype(f, args.gz)
    else: raise ValueError('Input not fastq\n')
    
    # start trimming file #
    total = 0
    written = 0
    for (title, sequence, quality) in FastqGeneralIterator(fh):
       #lock.acquire()
       total += 1
       (title, sequence, quality) = filter_adaptor(adaptors, args.min_adaptor_match, args.min_length, title, sequence, quality)
       (title, sequence, quality) = trim_qual(args.min_baseq, args.min_avgq, args.min_length, fqtype, title, sequence, quality)
       if title != None:
          if len(sequence) != len(quality):
             raise ValueError('sequence and quality not of the same length\n%s\n%s\n' % (sequence, quality))
          written += 1
       conn.send('@%s\n%s\n+\n%s\n' % (title, sequence, quality))
       #lock.release()
    conn.send('Stop')
    conn.close()
    return written, total
Example #2
0
def check_formats_fq(i, gz, bwa6):
   '''Checks format of fastq file and returns it'''
   
   import genobox_modules
   
   # check if fastq and if so mode
   format = genobox_modules.set_filetype(i, gz)
   if format != 'fastq':
      raise ValueError('Input must be fastq')
   else:
      fqtype = genobox_modules.set_fqtype(i, gz)   
   return fqtype
Example #3
0
def check_formats_fq(i, gz, bwa6):
    '''Checks format of fastq file and returns it'''

    import genobox_modules

    # check if fastq and if so mode
    format = genobox_modules.set_filetype(i, gz)
    if format != 'fastq':
        raise ValueError('Input must be fastq')
    else:
        fqtype = genobox_modules.set_fqtype(i, gz)
    return fqtype
Example #4
0
    def start_trim(f, lock, conn, args):
        '''Start the actual trimming of a file'''

        if args.gz: fh = gzip.open(f, 'rb')
        else: fh = open(f, 'r')

        # set adaptors #
        adaptors = set_adaptors(args.adaptors, args.min_adaptor_match)

        # set fqtype #
        format = genobox_modules.set_filetype(f, args.gz)
        if format == 'fastq': fqtype = genobox_modules.set_fqtype(f, args.gz)
        else: raise ValueError('Input not fastq\n')

        # start trimming file #
        total = 0
        written = 0
        for (title, sequence, quality) in FastqGeneralIterator(fh):
            #lock.acquire()
            total += 1
            (title, sequence,
             quality) = filter_adaptor(adaptors, args.min_adaptor_match,
                                       args.min_length, title, sequence,
                                       quality)
            (title, sequence, quality) = trim_qual(args.min_baseq,
                                                   args.min_avgq,
                                                   args.min_length, fqtype,
                                                   title, sequence, quality)
            if title != None:
                if len(sequence) != len(quality):
                    raise ValueError(
                        'sequence and quality not of the same length\n%s\n%s\n'
                        % (sequence, quality))
                written += 1
            conn.send('@%s\n%s\n+\n%s\n' % (title, sequence, quality))
            #lock.release()
        conn.send('Stop')
        conn.close()
        return written, total
Example #5
0
 def __init__(self, f, o, l=25, q=20, m=20, keep_n=False, M=20, 
              a=['GATCGGAAGAGCGGTTCAGCAGGAATGCCGAG', 'ACACTCTTTCCCTACACGACGCTCTTCCGATCT', 
              'AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT', 
              'CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT', 
              'CGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT', 
              'ACACTCTTTCCCTACACGACGCTCTTCCGATCT'], gz=False):
    self.f = f
    self.o = o
    self.l = l
    self.q = q
    self.m = m
    self.keep_n = keep_n
    self.M = M
    self.a = a
    self.paired = False
    self.interleaved = False
    self.gz = gz
    
    # set adaptors, readtype, fastq format
    self.adaptors = self.set_adaptors()
    self.set_readtype()
    self.format = genobox_modules.set_filetype(self.f[0], self.gz)
    if self.format == 'fastq':
       self.fqtype = genobox_modules.set_fqtype(self.f[0], self.gz)
Example #6
0
    def __init__(
            self,
            f,
            o,
            l=25,
            q=20,
            m=20,
            keep_n=False,
            M=20,
            a=[
                'GATCGGAAGAGCGGTTCAGCAGGAATGCCGAG',
                'ACACTCTTTCCCTACACGACGCTCTTCCGATCT',
                'AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT',
                'CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT',
                'CGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT',
                'ACACTCTTTCCCTACACGACGCTCTTCCGATCT'
            ],
            gz=False):
        self.f = f
        self.o = o
        self.l = l
        self.q = q
        self.m = m
        self.keep_n = keep_n
        self.M = M
        self.a = a
        self.paired = False
        self.interleaved = False
        self.gz = gz

        # set adaptors, readtype, fastq format
        self.adaptors = self.set_adaptors()
        self.set_readtype()
        self.format = genobox_modules.set_filetype(self.f[0], self.gz)
        if self.format == 'fastq':
            self.fqtype = genobox_modules.set_fqtype(self.f[0], self.gz)