예제 #1
0
def runBayesHammer(fastq_sin, fastq_frw, fastq_rev, sample_name, parent_dir):
    try: assert( (fastq_sin and os.path.isfile(fastq_sin) or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw) and os.path.isfile(fastq_rev))) )
    except: sys.stderr.write("ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n"); raise RuntimeError

    # set up directory structure
    workspace_name = "BayesHammer"
    workspace = uF.setupDirectory(parent_dir, workspace_name)

    # create logging object
    log_file = workspace + 'BayesHammer.log'
    logObject = uF.createLoggerObject(log_file)

    ### Perform single end cutadapt operation
    if fastq_sin and os.path.isfile(fastq_sin):

        # create Fastq object
        FastqObj = Fastq(fastq_sin, sample_name, logObject)
        FastqObj.error_correction(workspace)

    ### Perform paired-end cutadapt operation
    elif fastq_frw and fastq_rev:

        # create FastqPaired Object
        FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name, logObject)

        # trim adpaters using trim-galore preset settings for nextera and return resulting FASTQ files in gzip compressed format
        FastqPairedObj.error_correction(workspace)

    # create successful completion file if steps completed!
    conf_file = open(parent_dir + "BAYESHAMMER.txt", 'w')
    conf_file.write("BayesHammer: Module Completed Succesfully!")
    conf_file.close()
예제 #2
0
def runStrainGST(fastq_sin, fastq_frw, fastq_rev, db, sample_name, parent_dir,
                 options_kmerize, options_straingst):
    try:
        assert ((fastq_sin and os.path.isfile(fastq_sin)
                 or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw)
                     and os.path.isfile(fastq_rev))))
    except:
        sys.stderr.write(
            "ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n"
        )
        raise RuntimeError

    try:
        assert (os.path.isfile(db))
    except:
        sys.stderr.write(
            "ERROR: StrainGST pangenome database file is not available.")
        raise RuntimeError()

    # set up directory structure
    workspace_name = "StrainGST"
    workspace = uF.setupDirectory(parent_dir, workspace_name)

    # create logging object
    log_file = workspace + 'StrainGST.log'
    logObject = uF.createLoggerObject(log_file)

    kmer_file = None
    ### Perform single end QC analysis
    if fastq_sin and os.path.isfile(fastq_sin):

        # create Fastq object
        FastqObj = Fastq(fastq_sin, sample_name, logObject)

        # run strainge kmerize
        kmer_file = FastqObj.kmerize(workspace, options=options_kmerize)

    ### Perform paired-end QC analysis
    elif fastq_frw and fastq_rev:

        # create Fastq object
        FastqObj = FastqPaired(fastq_frw, fastq_rev, sample_name, logObject)

        # run strainge kmerize
        kmer_file = FastqObj.kmerize(workspace, options=options_kmerize)

    # create Kmer object
    KmerObj = Kmer(kmer_file, sample_name, logObject)

    # run straingst
    KmerObj.run_straingst(workspace, db, options=options_straingst)

    # produce kmer histogram - in progress - issues running.
    # KmerObj.create_histogram(workspace)

    # create successful completion file if steps completed!
    conf_file = open(parent_dir + "STRAINGST.txt", 'w')
    conf_file.write("StrainGST: Module Completed Succesfully!")
    conf_file.close()
예제 #3
0
def runFastQC(fastq_sin, fastq_frw, fastq_rev, sample_name, parent_dir, cores):
    try:
        assert ((fastq_sin and os.path.isfile(fastq_sin))
                or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw)
                    and os.path.isfile(fastq_rev)))
    except:
        sys.stderr.write(
            "ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n"
        )
        raise RuntimeError

    # set up directory structure
    workspace_name = "FastQC"
    workspace = uF.setupDirectory(parent_dir, workspace_name)

    # create logging object
    log_file = workspace + 'FastQC.log'
    logObject = uF.createLoggerObject(log_file)

    ### Perform single end QC analysis
    if fastq_sin and os.path.isfile(fastq_sin):

        # create Fastq object
        FastqObj = Fastq(fastq_sin, sample_name, logObject)

        # validate FASTQ file is indeed a FASTQ file
        valid = FastqObj.validate()
        if not valid:
            sys.stderr.write(
                "ERROR: FASTQ file %s seems to be in invalid format. Exiting now...\n"
                % FastqObj.fastq)
            sys.exit(1)

        # run FastQC and parse results.
        fastqcResDir = FastqObj.run_qc(workspace, cores=cores)

    ### Perform paired-end QC analysis
    elif fastq_frw and fastq_rev:

        # create FastqPaired Object
        FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name,
                                     logObject)

        # validate FASTQ file is indeed a FASTQ file
        valid = FastqPairedObj.validate()
        if not valid:
            sys.stderr.write(
                "ERROR: At least one of the FASTQ files seems to be in an invalid format. Exiting now ...\n"
            )
            sys.exit(1)

        # run FastQC and parse results.
        fastqcResDirs = FastqPairedObj.run_qc(workspace, cores=cores)

    # create successful completion file if steps completed!
    conf_file = open(parent_dir + "FASTQC.txt", 'w')
    conf_file.write("FastQC: Module Completed Succesfully!")
    conf_file.close()
예제 #4
0
def runRefAlignment(fastq_sin, fastq_frw, fastq_rev, reference_fasta, sample_name, parent_dir, bwa_options, cores):
    try: assert( (os.path.isfile(fastq_sin)) or (os.path.isfile(fastq_frw) and os.path.isfile(fastq_rev)) )
    except: sys.stderr.write("ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n"); raise RuntimeError

    try: assert(os.path.isfile(reference_fasta))
    except: sys.stderr.write("ERROR: Reference FASTA file does not have the correct format.\n"); raise RuntimeError

    # set up directory structure
    workspace_name = "ReferenceAlignment"
    workspace = uF.setupDirectory(parent_dir, workspace_name)

    # create logging object
    log_file = workspace + 'ReferenceAlignment.log'
    logObject = uF.createLoggerObject(log_file)

    sam_file = None
    ### Perform single end QC analysis
    if fastq_sin and os.path.isfile(fastq_sin):

        # create Fastq object
        FastqObj = Fastq(fastq_sin, sample_name, logObject)

        # Align reads to reference genome
        sam_file = FastqObj.align_to_reference(workspace, reference_fasta, options=bwa_options, cores=cores)


    ### Perform paired-end QC analysis
    elif fastq_frw and fastq_rev:

        # create FastqPaired Object
        FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name, logObject)

        # Align reads to reference genome
        sam_file = FastqPairedObj.align_to_reference(workspace, reference_fasta, options=bwa_options, cores=cores)

    # create Alignment object
    AlignmentObj = Alignment(sam_file, sample_name, logObject)

    # compress SAM to BAM
    AlignmentObj.compress_sam(workspace, clean=True)

    # sort BAM file
    AlignmentObj.sort_bam(workspace, clean=True)

    # index BAM file
    AlignmentObj.index_bam(workspace)

    # mark duplicates
    AlignmentObj.mark_dups(workspace, clean=True)

    # index BAM file
    AlignmentObj.index_bam(workspace)

    # create successful completion file if steps completed!
    conf_file = open(parent_dir + "REFALIGNMENT.txt", 'w')
    conf_file.write("Reference Alignment: Module Completed Succesfully!")
    conf_file.close()
예제 #5
0
def runNanoCanu(nanopore_fastq, sample_dir, sample_name, canu_options, memory,
                cores):
    try:
        assert (nanopore_fastq and os.path.isfile(nanopore_fastq))
    except:
        raise RuntimeError(
            "ERROR: FASTQ input(s) were not provided properly. Please fix. Raising exception\n"
        )

    unicycler_options = canu_options.strip('"')

    # set up directory structure
    workspace_name = "Canu_Assembly"
    workspace = uF.setupDirectory(sample_dir, workspace_name)

    # create logging object
    log_file = workspace + 'Canu_Assembly.log'
    logObject = uF.createLoggerObject(log_file)

    if nanopore_fastq.endswith('.gz'):
        FastqObj = Fastq(nanopore_fastq, sample_name, logObject)
        FastqObj.create_new_instance(workspace,
                                     compress=False,
                                     change_reference=True)

        # Initialize Nanopore Object
        NanoporeObj = Nanopore(FastqObj.fastq, sample_name, logObject)

        # Run Canu for assembly
        NanoporeObj.run_canu(workspace,
                             options=canu_options,
                             memory=memory,
                             cores=cores)

        # Clean up temporary FASTQ instance
        os.system('rm -f %s' % FastqObj.fastq)

    else:
        # Initialize Nanopore Object
        NanoporeObj = Nanopore(nanopore_fastq, sample_name, logObject)

        # Run Canu for assembly
        NanoporeObj.run_canu(workspace,
                             options=canu_options,
                             memory=memory,
                             cores=cores)

    conf_file = open(sample_dir + "CANU_ASSEMBLY.txt", 'w')
    conf_file.write("Canu Assembly: Module Completed Succesfully!")
    conf_file.close()
예제 #6
0
def runSortMeRNA(fastq_sin, fastq_frw, fastq_rev, database_dir, sample_name,
                 parent_dir, cores, no_gzip):
    try:
        assert ((fastq_sin and os.path.isfile(fastq_sin)
                 or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw)
                     and os.path.isfile(fastq_rev))))
    except:
        sys.stderr.write(
            "ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n"
        )
        raise RuntimeError

    # set up directory structure
    workspace_name = "SortMeRNA"
    workspace = uF.setupDirectory(parent_dir, workspace_name)

    # create logging object
    log_file = workspace + 'SortMeRNA.log'
    logObject = uF.createLoggerObject(log_file)

    ### Perform single end QC analysis
    if fastq_sin and os.path.isfile(fastq_sin):

        # create Fastq object
        FastqObj = Fastq(fastq_sin, sample_name, logObject)

        # split up ribosomal and non-ribosomal RNA data
        FastqObj.filter_ribo_rna(workspace,
                                 database_dir,
                                 cores=cores,
                                 compress=(not no_gzip))

    ### Perform paired-end QC analysis
    elif fastq_frw and fastq_rev:

        # create FastqPaired Object
        FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name,
                                     logObject)

        # split up ribosomal and non-ribosomal RNA data
        FastqPairedObj.filter_ribo_rna(workspace,
                                       database_dir,
                                       cores=cores,
                                       compress=(not no_gzip))

    # create successful completion file if steps completed!
    conf_file = open(parent_dir + "SORTMERNA.txt", 'w')
    conf_file.write("SortMeRNA: Module Completed Succesfully!")
    conf_file.close()
예제 #7
0
def runKneadData(fastq_sin, fastq_frw, fastq_rev, kneaddata_options,
                 sample_name, parent_dir, cores, no_gzip):
    try:
        assert ((fastq_sin and os.path.isfile(fastq_sin)
                 or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw)
                     and os.path.isfile(fastq_rev))))
    except:
        sys.stderr.write(
            "ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n"
        )
        raise RuntimeError

    # set up directory structure
    workspace_name = "KneadData"
    workspace = uF.setupDirectory(parent_dir, workspace_name)

    # create logging object
    log_file = workspace + 'KneadData.log'
    logObject = uF.createLoggerObject(log_file)

    ### Perform single end QC analysis
    if fastq_sin and os.path.isfile(fastq_sin):

        # create Fastq object
        FastqObj = Fastq(fastq_sin, sample_name, logObject)

        # bin reads taxonomically using Centrifuge
        FastqObj.run_kneaddata(workspace,
                               options=kneaddata_options,
                               cores=cores,
                               compress=not (no_gzip))

    ### Perform paired-end QC analysis
    elif fastq_frw and fastq_rev:

        # create FastqPaired Object
        FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name,
                                     logObject)

        # bin reads taxonomically using Centrifuge
        FastqPairedObj.run_kneaddata(workspace,
                                     options=kneaddata_options,
                                     cores=cores,
                                     compress=not (no_gzip))

    # create successful completion file if steps completed!
    conf_file = open(parent_dir + "KNEADDATA.txt", 'w')
    conf_file.write("KneadData: Module Completed Succesfully!")
    conf_file.close()
예제 #8
0
def runCentrifuge(fastq_sin, fastq_frw, fastq_rev, centrifuge_index,
                  sample_name, parent_dir, cores):
    try:
        assert ((fastq_sin and os.path.isfile(fastq_sin)
                 or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw)
                     and os.path.isfile(fastq_rev))))
    except:
        sys.stderr.write(
            "ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n"
        )
        raise RuntimeError

    # set up directory structure
    workspace_name = "Centrifuge"
    workspace = uF.setupDirectory(parent_dir, workspace_name)

    # create logging object
    log_file = workspace + 'Centrifuge.log'
    logObject = uF.createLoggerObject(log_file)

    ### Perform single end QC analysis
    if fastq_sin and os.path.isfile(fastq_sin):

        # create Fastq object
        FastqObj = Fastq(fastq_sin, sample_name, logObject)

        # bin reads taxonomically using Centrifuge
        FastqObj.bin_taxonomically(workspace, centrifuge_index, cores=cores)

    ### Perform paired-end QC analysis
    elif fastq_frw and fastq_rev:

        # create FastqPaired Object
        FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name,
                                     logObject)

        # bin reads taxonomically using Centrifuge
        FastqPairedObj.bin_taxonomically(workspace,
                                         centrifuge_index,
                                         cores=cores)

    # create successful completion file if steps completed!
    conf_file = open(parent_dir + "CENTRIFUGE.txt", 'w')
    conf_file.write("Centrifuge: Module Completed Succesfully!")
    conf_file.close()
예제 #9
0
def runSymlinkInput(fastq_sin, fastq_frw, fastq_rev, parent_dir, sample_name):
    try:
        assert ((fastq_sin and os.path.isfile(fastq_sin))
                or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw)
                    and os.path.isfile(fastq_rev)))
    except:
        sys.stderr.write(
            "ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n"
        )
        raise RuntimeError

    # set up directory structure
    workspace_name = "Symlink_Input"
    workspace = uF.setupDirectory(parent_dir, workspace_name)

    # create logging object
    log_file = workspace + 'Symlink.log'
    logObject = uF.createLoggerObject(log_file)

    ### Perform single end QC analysis
    if fastq_sin and os.path.isfile(fastq_sin):

        # create Fastq object
        FastqObj = Fastq(fastq_sin, sample_name, logObject)

        # create symlink
        FastqObj.create_symlink(workspace)

    ### Perform paired-end QC analysis
    elif fastq_frw and fastq_rev:

        # create FastqPaired Object
        FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name,
                                     logObject)

        # create symlink
        FastqPairedObj.create_symlink(workspace)
    conf_file = open(parent_dir + "SYMLINK_INPUT.txt", 'w')
    conf_file.write("SymlinkInput: Module Completed Succesfully!")
    conf_file.close()
예제 #10
0
def runTrimmomatic(fastq_sin, fastq_frw, fastq_rev, sample_name, parent_dir, trimmomatic_options, cores, no_gzip):
    try: assert( (fastq_sin and os.path.isfile(fastq_sin) or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw) and os.path.isfile(fastq_rev))) )
    except: sys.stderr.write("ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n"); raise RuntimeError

    trimmomatic_options = trimmomatic_options.strip('"')

    # set up directory structure
    workspace_name = "QualityTrim"
    workspace = uF.setupDirectory(parent_dir, workspace_name)

    # create logging object
    log_file = workspace + 'QualityTrim.log'
    logObject = uF.createLoggerObject(log_file)

    ### Perform single end trimmomatic operation
    if fastq_sin and os.path.isfile(fastq_sin):

        # create Fastq object
        FastqObj = Fastq(fastq_sin, sample_name, logObject)

        # trim adapters using cutadapt and return resulting FASTQ file in gzip compressed format
        FastqObj.quality_trim(workspace, options=trimmomatic_options, cores=cores, compress=(not no_gzip))

    ### Perform paired-end trimmomatic operation
    elif fastq_frw and fastq_rev:

        # create FastqPaired Object
        FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name, logObject)

        # trim adpaters using cutadapt and return resulting FASTQ files in gzip compressed format
        FastqPairedObj.quality_trim(workspace, options=trimmomatic_options, cores=cores, compress=(not no_gzip))

    # create successful completion file if steps completed!
    conf_file = open(parent_dir + "QUALITYTRIM.txt", 'w')
    conf_file.write("QualityTrim: Module Completed Succesfully!")
    conf_file.close()
예제 #11
0
def runAdapterTrim(fastq_sin, fastq_frw, fastq_rev, sample_name, parent_dir,
                   trimgalore_options, cutadapt_options):
    try:
        assert ((fastq_sin and os.path.isfile(fastq_sin)
                 or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw)
                     and os.path.isfile(fastq_rev))))
    except:
        sys.stderr.write(
            "ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n"
        )
        raise RuntimeError

    trimgalore_options = trimgalore_options.strip('"')
    cutadapt_options = cutadapt_options.strip('"')

    try:
        assert (not (trimgalore_options and cutadapt_options))
    except:
        sys.stderr.write(
            "ERROR: Both filtering options with cutadapt and trim galore provided. Can only use one adapter trimmer. Exiting now ...\n"
        )
        raise RuntimeError

    # set up directory structure
    workspace_name = "AdapterTrim"
    workspace = uF.setupDirectory(parent_dir, workspace_name)

    # create logging object
    log_file = workspace + 'AdapterTrim.log'
    logObject = uF.createLoggerObject(log_file)

    ### Perform single end cutadapt operation
    if fastq_sin and os.path.isfile(fastq_sin):

        # create Fastq object
        FastqObj = Fastq(fastq_sin, sample_name, logObject)

        # trim adpaters using trim-galore preset settings for nextera and return resulting FASTQ files in gzip compressed format
        if cutadapt_options:
            FastqObj.cutadapt_adapter_trim(workspace, options=cutadapt_options)
        else:
            FastqObj.trim_galore_adapter_trim(workspace,
                                              options=trimgalore_options)

    ### Perform paired-end cutadapt operation
    elif fastq_frw and fastq_rev:

        # create FastqPaired Object
        FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name,
                                     logObject)

        # trim adpaters using trim-galore preset settings for nextera and return resulting FASTQ files in gzip compressed format
        if cutadapt_options:
            FastqPairedObj.cutadapt_adapter_trim(workspace,
                                                 options=cutadapt_options)
        else:
            FastqPairedObj.trim_galore_adapter_trim(workspace,
                                                    options=trimgalore_options)

    # create successful completion file if steps completed!
    conf_file = open(parent_dir + "ADAPTERTRIM.txt", 'w')
    conf_file.write("AdapterTrim: Module Completed Succesfully!")
    conf_file.close()
예제 #12
0
def runSubsample(fastq_sin, fastq_frw, fastq_rev, sample_name, parent_dir,
                 reads, bases, no_gzip):
    try:
        assert ((fastq_sin and os.path.isfile(fastq_sin))
                or (fastq_frw and fastq_rev and os.path.isfile(fastq_frw)
                    and os.path.isfile(fastq_rev)))
    except:
        sys.stderr.write(
            "ERROR: FASTQ inputs were not provided. Please provide either a pair (frw and rev) or a single FASTQ file.\n"
        )
        raise RuntimeError

    # set up directory structure
    workspace_name = "Subsample"
    workspace = uF.setupDirectory(parent_dir, workspace_name)

    # create logging object
    log_file = workspace + 'Subsample.log'
    logObject = uF.createLoggerObject(log_file)

    ### Perform single end QC analysis
    if fastq_sin and os.path.isfile(fastq_sin):

        # create Fastq object
        FastqObj = Fastq(fastq_sin, sample_name, logObject)

        # run FastQC and parse results.
        if reads:
            FastqObj.subsample(workspace, reads=reads, compress=(not no_gzip))
        elif bases:
            FastqObj.downsample(workspace, bases=bases, compress=(not no_gzip))
        else:
            logObject.error(
                "No subsampling quantity specified defaulting to 100K reads being subsampled!"
            )
            FastqObj.subsample(workspace, reads=reads, compress=(not no_gzip))

    ### Perform paired-end QC analysis
    elif fastq_frw and fastq_rev:

        # create FastqPaired Object
        FastqPairedObj = FastqPaired(fastq_frw, fastq_rev, sample_name,
                                     logObject)

        # run FastQC and parse results.
        if reads:
            FastqPairedObj.subsample(workspace,
                                     reads=reads,
                                     compress=(not no_gzip))
        elif bases:
            FastqPairedObj.downsample(workspace,
                                      bases=bases,
                                      compress=(not no_gzip))
        else:
            logObject.error(
                "No subsampling quantity specified defaulting to 100K ")
            FastqPairedObj.subsample(workspace,
                                     reads=reads,
                                     compress=(not no_gzip))

    # create successful completion file if steps completed!
    conf_file = open(parent_dir + "SUBSAMPLE.txt", 'w')
    conf_file.write("Subsample: Module Completed Succesfully!")
    conf_file.close()