Beispiel #1
0
def run_align(inputs, paths_in, paths_out):  # all arguments = dict
    '''Bowtie align'''

    run = inputs['run_bowtie']
    files = inputs['files']
    threads = inputs['cores']  # bowtie uses 1 core per instance

    if not files:
        print("There are no files")
        return

    ladder = []
    tRNA = []
    rRNA = []
    chromosome = []

    for fname in files:
        if not run == 'yes':
            if not os.path.exists(paths_out['path_chr'] + fname +
                                  '_match.SAM'):
                print "ERROR: " + fname + " has not been aligned, change run settings"
                continue
            else:
                print fname + " has been aligned"
                continue

        if not os.path.exists(paths_out['path_filter'] + fname +
                              '-trimmed.fastq'):
            print "ERROR: " + fname + " has no filtered file, has been removed from analysis"
            inputs['files'].remove(fname)
            continue

        file_log = paths_out['path_log'] + fname + '_bowtie'

        # bowtie_1 will rewrite log
        bowtie_1 = '%s -v 2 -y -m 1 -a --best --strata -S -p 2 --un '
        bowtie_1 += '%s%s_nomatch.fastq --max %s%s_multi.fastq --al %s%s_match.fastq %s '
        bowtie_1 += '%s%s %s%s 1>>%s 2>%s'

        # bowtie will only add info to log
        bowtie = '%s -v 2 -y -m 1 -a --best --strata -S -p 2 --un '
        bowtie += '%s%s_nomatch.fastq --max %s%s_multi.fastq --al %s%s_match.fastq %s '
        bowtie += '%s%s %s%s 1>>%s 2>>%s'

        # first, align to ladder index to subtract
        bowtie_ladder = bowtie_1 % (
            paths_in['path_bowtie'], paths_out['path_ladder'], fname,
            paths_out['path_ladder'], fname, paths_out['path_ladder'], fname,
            paths_in['btindex_ladder'], paths_out['path_filter'],
            fname + '-trimmed.fastq', paths_out['path_temp'],
            fname + '_ladder_match.SAM', file_log, file_log)
        ladder.append(bowtie_ladder)

        # second, align to ladder index to subtract
        bowtie_tRNA = bowtie % (
            paths_in['path_bowtie'], paths_out['path_trna'], fname,
            paths_out['path_trna'], fname, paths_out['path_trna'], fname,
            paths_in['btindex_trna'], paths_out['path_ladder'],
            fname + '_nomatch.fastq', paths_out['path_temp'],
            fname + '_tRNA_match.SAM', file_log, file_log)
        tRNA.append(bowtie_tRNA)

        # third, align to the rRNA index
        bowtie_rRNA = bowtie % (
            paths_in['path_bowtie'], paths_out['path_rrna'], fname,
            paths_out['path_rrna'], fname, paths_out['path_rrna'], fname,
            paths_in['btindex_rrna'], paths_out['path_trna'],
            fname + '_nomatch.fastq', paths_out['path_temp'],
            fname + '_rRNA_match.SAM', file_log, file_log)
        rRNA.append(bowtie_rRNA)

        # then align to the chr index
        bowtie_chr = bowtie % (paths_in['path_bowtie'], paths_out['path_chr'],
                               fname, paths_out['path_chr'], fname,
                               paths_out['path_chr'], fname,
                               paths_in['btindex_chr'], paths_out['path_rrna'],
                               fname + '_nomatch.fastq', paths_out['path_chr'],
                               fname + '_match.SAM', file_log, file_log)
        chromosome.append(bowtie_chr)

    print "\n------ALIGN------"
    print '\nFiles to align: ' + ', '.join(files)
    print "\n\tStarted Bowtie alignment at " + str(datetime.now())

    ribo_util.subprocess_wf(ladder, threads)
    print "\tFinished ladder removal at " + str(datetime.now())

    ribo_util.subprocess_wf(tRNA, threads)
    print "\tFinished tRNA removal at " + str(datetime.now())

    ribo_util.subprocess_wf(rRNA, threads)
    print "\tFinished rRNA removal at " + str(datetime.now())

    ribo_util.subprocess_wf(chromosome, threads)
    print "\tFinished chromosome alignment at " + str(datetime.now())

    print "\tCOMPLETED ALIGNING"

    return
Beispiel #2
0
def run_filter(inputs, paths_in, paths_out):  # all arguments = dict
    '''
    Filter reads using skewer
    '''

    files = inputs['files']
    run = inputs['run_filtering']
    minlength = inputs['minlength']
    maxlength = inputs['maxlength']
    phred_cutoff = inputs['phred_cutoff']
    linker = inputs['linker']
    threads = inputs['threads']  # filterreads has its own threading,
    filtering = []
    log_data = {}
    if not files:
        print("There are no files")
        return

    for fname in files:

        file_in = paths_in['path_fastq'] + fname
        file_out = paths_out['path_filter'] + fname
        file_log = paths_out['path_log'] + fname + '_filter'

        if not run == 'yes':
            if not os.path.exists(file_out + '-trimmed.fastq'):
                print "ERROR: " + fname + " has not been filtered, change run setting"
                continue
            else:
                print fname + " has been filtered"
                continue

        if not os.path.exists(file_in):
            print "ERROR: " + fname + " has no FASTQ file, has been removed from analysis"
            inputs['files'].remove(fname)
            continue

        command_to_run = 'skewer -x %s -Q %d  -l %d -L %d -o %s --quiet -t %d %s 1>>%s 2>%s' % (
            linker, phred_cutoff, minlength, maxlength, file_out, threads,
            file_in, file_log, file_log)

        #Add filter parameters to log:

        log_data['settings'] = {
            'linker': linker,
            'phred_cutoff': phred_cutoff,
            'minlength': minlength,
            'maxlength': maxlength
        }

        log_function = 'ribo_density'
        ribo_util.analysis_log(fname, log_function, log_data, paths_in,
                               paths_out)

        filtering.append(command_to_run)

    print "-----FILTER-----"
    print '\nFiles to filter: ' + ', '.join(files)
    print "Filter parameters are: \nmin length = %s \nmax length = %s \nphred cutoff = %s " % (
        minlength, maxlength, phred_cutoff)
    print "\n\tStarted filtering at " + str(datetime.now())

    ribo_util.subprocess_wf(filtering, 1)

    print "\tFinished filtering at " + str(datetime.now())
    print "\tCOMPLETED FILTERING"

    return inputs
def run_filter(inputs, paths_in, paths_out): # all arguments = dict
    '''
    Filter reads using skewer
    '''
                 
    files        = inputs['files']
    run          = inputs['run_filtering']
    minlength    = inputs['minlength']
    maxlength    = inputs['maxlength']
    phred_cutoff = inputs['phred_cutoff']
    linker       = inputs['linker']
    threads      = inputs['threads']    # filterreads has its own threading,
    filtering    = []
    log_data     = {}
    
    # If using Unique Molecular Index (UMI) in library prep. Skewer will not remove UMI
    # so we will do it manually after. skewer output file will have UMI naming to identify it: 
    
    if inputs['run_filter_UMI'] == 'yes':
        # UMI adds 10 nt to read
        minlength = minlength + 10
        maxlength = maxlength + 10
        # for naming: UMI 
        UMI = '_UMI'
    else:
        UMI = ''
    
    # return error if file names not specified
    if not files:
        print("There are no files")
        return
    
    # loop through files to filter
    for fname in files: 
                       
        file_in  = paths_in['path_fastq'] + fname
        file_out = paths_out['path_filter'] + fname + UMI
        file_log = paths_out['path_log'] + fname + '_filter'
        
        # if skewer filtering isnt needed, skip 
        if not run == 'yes':
            if not os.path.exists(file_out+'-trimmed.fastq'):
                print "ERROR: " + fname + " has not been filtered, change run setting"
                continue
            else: 
                print fname + " has been filtered"
                continue
        
        # return error if input file missing, and continue to next file
        if not os.path.exists(file_in):
            print "ERROR: " + fname + " has no FASTQ file, has been removed from analysis"
            inputs['files'].remove(fname)
            continue
        
        # make commmand string
        command_to_run = 'skewer -x %s -Q %d  -l %d -L %d -o %s --quiet -t %d %s 1>>%s 2>%s' % (
            linker, 
            phred_cutoff, 
            minlength,
            maxlength, 
            file_out, 
            threads, 
            file_in, 
            file_log,
            file_log
            )
        
        #Add filter parameters to log:
        
        log_data['settings'] = {'linker': linker, 'phred_cutoff': phred_cutoff,
                                'minlength': minlength, 'maxlength': maxlength}
        
        log_function = 'ribo_density'
        ribo_util.analysis_log(fname, log_function, log_data, paths_in, paths_out)
        
        filtering.append(command_to_run)
    
    #print start time and run skewer
    print "-----FILTER-----"
    print '\nFiles to filter: ' + ', '.join(files)
    print "Filter parameters are: \nmin length = %s \nmax length = %s \nphred cutoff = %s " % (
                    minlength, maxlength, phred_cutoff)
    print "\n\tStarted filtering at " + str(datetime.now())
    
    ribo_util.subprocess_wf(filtering, 1)
    
    print "\tFinished filtering at " + str(datetime.now())
    print "\tCOMPLETED FILTERING"
    
    return inputs