Example #1
0
def TrimGalore(TRIM_GALORE_EXE,
               CUTADAPT_BIN,
               OutDir,
               FastQFile1,
               SysOutFile,
               FastQFile2='',
               ZipOutput=False,
               TrimGaloreOptions=''):

    uF.makedir(OutDir)
    uF.makedir(dirname(SysOutFile))

    if exists(FastQFile2):
        if TrimGaloreOptions.find('--paired') == -1:
            TrimGaloreOptions += ' --paired'

    zipParam = ' --dont_gzip '
    if ZipOutput:
        zipParam = ' --gzip '
    if TrimGaloreOptions.find(zipParam) == -1:
        TrimGaloreOptions += zipParam

    Command = "%s --path_to_cutadapt %scutadapt --output_dir %s %s %s >> %s 2>&1" % (
        TRIM_GALORE_EXE, CUTADAPT_BIN, OutDir, TrimGaloreOptions, ' '.join(
            [x for x in [FastQFile1, FastQFile2] if exists(x)]), SysOutFile)
    print Command
    system(Command)
    if exists(SysOutFile):
        with open(SysOutFile, 'a') as f:
            f.write('\n\n%s\n\n' % (Command))

    return Command
Example #2
0
def rsemCalculateExpression(RSEM_BIN,
                            FastQFiles1,
                            RSEMTranscriptIndex,
                            OutPrefix,
                            SysOutFile,
                            FastQFiles2=[],
                            NumThreads=4,
                            RSEMCalcExprParams=''):

    OutDir = dirname(OutPrefix)
    tTMPDir = join(OutDir, 'rsem_tmp/')
    TMPDir = join(tTMPDir, '%s/' % (basename(OutPrefix)))
    uF.makedir(tTMPDir)
    uF.makedir(dirname(SysOutFile))
    Command = """%srsem-calculate-expression --num-threads %s --temporary-folder %s %s %s %s %s %s > %s 2>&1""" % (
        RSEM_BIN, NumThreads, TMPDir, RSEMCalcExprParams, ','.join([
            x for x in FastQFiles1 if exists(x)
        ]), ','.join([x for x in FastQFiles2 if exists(x)
                      ]), RSEMTranscriptIndex, OutPrefix, SysOutFile)
    print Command
    system(Command)
    if exists(SysOutFile):
        with open(SysOutFile, 'a') as f:
            f.write('\n\n%s\n\n' % (Command))

    return Command
Example #3
0
def STARAlignReads(STAR_BIN,
                   FastQFiles1,
                   GenomeIndexDir,
                   OutPrefix,
                   SysOutFile,
                   FastQFiles2=[],
                   NumThreads=4,
                   STARAlignReadsParams=''):

    OutDir = dirname(OutPrefix)
    tTMPDir = join(OutDir, 'star_tmp/')
    TMPDir = join(tTMPDir, '%s/' % (basename(OutPrefix).strip('.')))
    uF.makedir(tTMPDir)
    uF.makedir(dirname(SysOutFile))
    Command = """%sSTAR --runMode alignReads --readFilesIn %s %s --genomeDir %s --outFileNamePrefix %s --outTmpDir %s --runThreadN %s %s >> %s 2>&1""" % (
        STAR_BIN, ','.join([x for x in FastQFiles1 if exists(x)]), ','.join(
            [x for x in FastQFiles2 if exists(x)]), GenomeIndexDir, OutPrefix,
        TMPDir, NumThreads, STARAlignReadsParams, SysOutFile)
    print Command
    system(Command)
    if exists(SysOutFile):
        with open(SysOutFile, 'a') as f:
            f.write('\n\n%s\n\n' % (Command))

    return Command
Example #4
0
def AddOrReplaceReadGroups(JAVA_EXE,
                           JAVA_PARAMS,
                           PICARD_EXE,
                           BAMFileIn,
                           BAMFileOut,
                           RGSampleName,
                           SysOutFile,
                           RGID=1,
                           Library=1,
                           Platform='illumina',
                           PlatformUnit=1,
                           SeqCentre='null',
                           Description='null',
                           RunDate='null',
                           SORT_ORDER='null',
                           PicardParams=''):

    uF.makedir(dirname(BAMFileOut))
    uF.makedir(dirname(SysOutFile))
    Command = "%s %s -jar %s AddOrReplaceReadGroups INPUT=%s OUTPUT=%s SORT_ORDER=%s RGID=%s RGLB=%s RGPL=%s RGPU=%s RGSM=%s RGCN=%s RGDS=%s RGDT=%s %s >> %s 2>&1" % (
        JAVA_EXE, JAVA_PARAMS, PICARD_EXE, BAMFileIn, BAMFileOut, SORT_ORDER,
        RGID, Library, Platform, PlatformUnit, RGSampleName, SeqCentre,
        Description, RunDate, PicardParams, SysOutFile)
    print Command
    system(Command)
    if exists(SysOutFile):
        with open(SysOutFile, 'a') as f:
            f.write('\n\n%s\n\n' % (Command))

    return Command
Example #5
0
def CutAdapt(CUTADAPT_BIN,
             OutPrefix,
             FastQFile1,
             SysOutFile,
             FastQFile2='',
             ZipOutput=False,
             CutAdaptOptions=''):

    uF.makedir(dirname(OutPrefix))
    uF.makedir(dirname(SysOutFile))
    FastQOutFile1 = OutPrefix.strip() + '_1.fastq'
    FastQOutFile2 = OutPrefix.strip() + '_2.fastq'
    if ZipOutput:
        FastQOutFile1 += '.gz'
        FastQOutFile2 += '.gz'
    FastQFiles = [FastQFile1]
    if exists(FastQFile2):
        if CutAdaptOptions.find('--paired-output') == -1:
            CutAdaptOptions += ' --paired-output=%s' % (FastQOutFile2)
        FastQFiles.append(FastQFile2)
    else:
        FastQOutFile1 = OutPrefix.strip() + '.fastq'
        if ZipOutput:
            FastQOutFile1 += '.gz'
    Command = '%scutadapt --output=%s %s %s >> %s 2>&1' % (
        CUTADAPT_BIN, FastQOutFile1, CutAdaptOptions, ' '.join(
            [x for x in [FastQFile1, FastQFile2] if exists(x)]), SysOutFile)
    print Command
    system(Command)
    if exists(SysOutFile):
        with open(SysOutFile, 'a') as f:
            f.write('\n\n%s\n\n' % (Command))

    return Command
Example #6
0
def flagstat(SAMTOOLS_EXE, BAMFile, FlagStatFile):

    uF.makedir(dirname(FlagStatFile))
    Command = "%s flagstat %s > %s" % (SAMTOOLS_EXE, BAMFile, FlagStatFile)
    print Command
    system(Command)

    return Command
Example #7
0
def rsemPlotModel(RSEM_BIN, RSEMFileOutPrefix, PDFPlotFile):

    uF.makedir(dirname(PDFPlotFile))
    Command = """%srsem-plot-model %s %s""" % (RSEM_BIN, RSEMFileOutPrefix,
                                               PDFPlotFile)
    print Command
    system(Command)

    return Command
Example #8
0
def rsemExtractTranscriptsFromGTF(RSEM_BIN, GTFFile, GenomeFasta, OutPrefix):

    uF.makedir(dirname(OutPrefix))
    Command = """%srsem-extract-reference-transcripts %s 0 %s None 0 %s""" % (
        RSEM_BIN, OutPrefix, GTFFile, GenomeFasta)
    print Command
    system(Command)

    return Command
Example #9
0
def SortBAMByCoordinate(SAMTOOLS_EXE, BAMFileIn, BAMFileOut, NumThreads=4):

    if NumThreads > 6:
        NumThreads = 6
    uF.makedir(dirname(BAMFileOut))
    Command = """%s sort -@ %s %s %s""" % (SAMTOOLS_EXE, NumThreads, BAMFileIn,
                                           BAMFileOut[:-4])
    print Command
    system(Command)

    return Command
Example #10
0
def StarGenomeIndex(STAR_BIN,
                    GenomeFasta,
                    OutDir,
                    NumThreads=4,
                    StarGenomeIndexParams=''):

    uF.makedir(OutDir)
    Command = """%sSTAR --runMode genomeGenerate --runThreadN %s --genomeFastaFiles %s --genomeDir %s %s""" % (
        STAR_BIN, NumThreads, GenomeFasta, OutDir, StarGenomeIndexParams)
    print Command
    system(Command)

    return Command
Example #11
0
def CreateSequenceDictionary(JAVA_EXE,
                             JAVA_PARAMS,
                             PICARD_EXE,
                             GenomeFasta,
                             DictFile,
                             PicardParams=''):

    uF.makedir(dirname(DictFile))
    Command = "%s %s -jar %s CreateSequenceDictionary REFERENCE=%s OUTPUT=%s %s" % (
        JAVA_EXE, JAVA_PARAMS, PICARD_EXE, GenomeFasta, DictFile, PicardParams)
    print Command
    system(Command)

    return Command
Example #12
0
def FastQC(FASTQC_EXE,
           JAVA_EXE,
           InputFile,
           OutDir,
           FileFormat='fastq',
           NumThreads=6):

    uF.makedir(OutDir)
    Command = "%s -q --extract --outdir %s -f %s -t %s --java %s %s" % (
        FASTQC_EXE, OutDir, FileFormat, NumThreads, JAVA_EXE, InputFile)
    print Command
    system(Command)

    return Command
Example #13
0
def rsemPrepareReference(RSEM_BIN,
                         GenomeFasta,
                         GTFFile,
                         RSEMTranscriptIndex,
                         NumThreads=4,
                         RSEMPrepRefParams=''):

    uF.makedir(dirname(RSEMTranscriptIndex))
    Command = """%srsem-prepare-reference %s --gtf %s --num-threads %s %s %s """ % (
        RSEM_BIN, RSEMPrepRefParams, GTFFile, NumThreads, GenomeFasta,
        RSEMTranscriptIndex)
    print Command
    system(Command)

    return Command
Example #14
0
def FastQScreen(FASTQ_SCREEN_EXE,
                InputFile,
                OutDir,
                ConfigFile,
                SysOutFile,
                Subset=200000,
                NumThreads=6):

    uF.makedir(OutDir)
    uF.makedir(dirname(SysOutFile))
    Command = "%s --outdir %s --subset %s --conf %s --threads %s --aligner bowtie2 %s > %s 2>&1" % (
        FASTQ_SCREEN_EXE, OutDir, Subset, ConfigFile, NumThreads, InputFile,
        SysOutFile)
    print Command
    system(Command)

    return Command
Example #15
0
def CollectMultipleMetrics(JAVA_EXE,
                           JAVA_PARAMS,
                           PICARD_EXE,
                           BAMFile,
                           GenomeFasta,
                           MetricsFile,
                           SysOutFile,
                           PicardParams=''):

    uF.makedir(dirname(MetricsFile))
    uF.makedir(dirname(SysOutFile))
    Command = "%s %s -jar %s CollectMultipleMetrics INPUT=%s OUTPUT=%s REFERENCE_SEQUENCE=%s %s >> %s 2>&1" % (
        JAVA_EXE, JAVA_PARAMS, PICARD_EXE, BAMFile, MetricsFile, GenomeFasta,
        PicardParams, SysOutFile)
    print Command
    system(Command)
    if exists(SysOutFile):
        with open(SysOutFile, 'a') as f:
            f.write('\n\n%s\n\n' % (Command))

    return Command
Example #16
0
def MarkDuplicates(JAVA_EXE,
                   JAVA_PARAMS,
                   PICARD_EXE,
                   BAMFile,
                   DeDupBAMFile,
                   MetricsFile,
                   SysOutFile,
                   rmDups=False,
                   PicardParams=''):

    uF.makedir(dirname(DeDupBAMFile))
    uF.makedir(dirname(MetricsFile))
    uF.makedir(dirname(SysOutFile))
    Command = "%s %s -jar %s MarkDuplicates INPUT=%s OUTPUT=%s METRICS_FILE=%s %s" % (
        JAVA_EXE, JAVA_PARAMS, PICARD_EXE, BAMFile, DeDupBAMFile, MetricsFile,
        PicardParams)
    if rmDups:
        Command += " REMOVE_DUPLICATES=true >> %s 2>&1" % (SysOutFile)
    else:
        Command += " REMOVE_DUPLICATES=false >> %s 2>&1" % (SysOutFile)
    print Command
    system(Command)
    if exists(SysOutFile):
        with open(SysOutFile, 'a') as f:
            f.write('\n\n%s\n\n' % (Command))

    return Command
Example #17
0
def RNASeqC(JAVA17_EXE,
            JAVA_PARAMS,
            RNASEQC_EXE,
            SamplesFile,
            OutDir,
            GenomeFasta,
            GTFFile,
            SysOutFile,
            RNASeqCParams=''):

    uF.makedir(OutDir)
    uF.makedir(dirname(SysOutFile))
    Command = """%s %s -jar %s -s %s -o %s -r %s -t %s -gatkFlags "-S SILENT -U ALLOW_SEQ_DICT_INCOMPATIBILITY" %s > %s 2>&1 """ % (
        JAVA17_EXE, JAVA_PARAMS, RNASEQC_EXE, SamplesFile, OutDir, GenomeFasta,
        GTFFile, RNASeqCParams, SysOutFile)
    print Command
    system(Command)
    if exists(SysOutFile):
        with open(SysOutFile, 'a') as f:
            f.write('\n\n%s\n\n' % (Command))

    return Command
Example #18
0
def CollectInsertSizeMetrics(JAVA_EXE,
                             JAVA_PARAMS,
                             PICARD_EXE,
                             BAMFileIn,
                             MetricsFileOut,
                             HistogramFile,
                             SysOutFile,
                             PicardParams=''):

    uF.makedir(dirname(MetricsFileOut))
    uF.makedir(dirname(HistogramFile))
    uF.makedir(dirname(SysOutFile))
    Command = "%s %s -jar %s CollectInsertSizeMetrics INPUT=%s OUTPUT=%s HISTOGRAM_FILE=%s %s >> %s 2>&1" % (
        JAVA_EXE, JAVA_PARAMS, PICARD_EXE, BAMFileIn, MetricsFileOut,
        HistogramFile, PicardParams, SysOutFile)
    print Command
    system(Command)
    if exists(SysOutFile):
        with open(SysOutFile, 'a') as f:
            f.write('\n\n%s\n\n' % (Command))

    return Command
Example #19
0
############################################
############################################

if args.SINGLE_END:
    if RNASEQC_PARAMS.find('-singleEnd') == -1:
        RNASEQC_PARAMS += ' -singleEnd '

SAMPLES_FILE = join(args.OUTDIR, 'samples.txt')
REPORT_HTML_FILE = join(args.OUTDIR, 'report.html')
RNASeqC_SysOutFile = join(args.OUTDIR, 'rnaseqc.sysout')

CommandList = []
if not exists(SAMPLES_FILE) and not exists(REPORT_HTML_FILE):

    ## CREATE SAMPLES FILE
    uF.makedir(args.OUTDIR)
    fout = open(SAMPLES_FILE, 'w')
    fout.write('\t'.join(['Sample ID', 'Bam File', 'Notes']) + '\n')
    for BAMFile in [x.strip() for x in args.BAM_FILES.split(',')]:
        if exists(BAMFile):
            fout.write('\t'.join([basename(BAMFile)[:-4], BAMFile, 'NONE']) +
                       '\n')
    fout.close()

    if exists(args.RIBOSOMAL_LIST_FILE) and RNASEQC_PARAMS.find('-rRNA') == -1:
        RNASEQC_PARAMS += ' -rRNA %s' % (args.RIBOSOMAL_LIST_FILE)
    print '%s: %s' % (strftime("%d-%m-%Y %H:%M:%S",
                               gmtime()), 'Running RNASeqC.')
    Command = tW.RNASeqC(JAVA17_EXE=JAVA_17_EXE,
                         JAVA_PARAMS=JAVA_PARAMS,
                         RNASEQC_EXE=RNASEQC_EXE,
Example #20
0
##    if NUM_READS_IN_FASTQ != -1:
##        print '%s: %s' % (strftime("%d-%m-%Y %H:%M:%S", gmtime()),'Counting number of reads in FASTQ file.')
##        NUM_READS_IN_FASTQ = uF.numLinesInFile(args.FASTQ_FILE1)
##
##    print '%s: %s' % (strftime("%d-%m-%Y %H:%M:%S", gmtime()),'Verifying reads in MarkDuplicates BAM with flagstat output.')
##    isValidBAM = uF.flagStatSTARGenomeBAMValidate(FlagStatFile=GENOME_MARKDUP_SORTED_BAM_FLAGSTAT_FILE,NumReadsInFastQ=NUM_READS_IN_FASTQ,isPairedEnd=isPairedEnd)
##    Command = 'touch %s' % (join('%s.fail' % (GENOME_MARKDUP_SORTED_BAM_FLAGSTAT_FILE)))
##    if isValidBAM:
##        Command = 'touch %s' % (join('%s.pass' % (GENOME_MARKDUP_SORTED_BAM_FLAGSTAT_FILE)))
##    system(Command)
##    CommandList.append('%s\n%s' % (strftime("%d-%m-%Y %H:%M:%S", gmtime()),Command))

############################################
############################################
## WRITE COMMAND FILE                     ##
############################################
############################################

if len(CommandList) != 0:
    CompleteFile = join(OUTDIR, 'complete/',
                        '%s.runStar.complete' % (basename(args.OUTPREFIX)))
    uF.makedir(dirname(CompleteFile))
    fout = open(CompleteFile, 'w')
    fout.write('\n' + '\n\n'.join(CommandList) + '\n')
    fout.close()

##############################################
##############################################
##############################################
##############################################
Example #21
0
############################################
## FILTER GTF FILE                        ##
############################################

if exists(args.FAI_IN):

    ChromIDs = []
    fin = open(args.FAI_IN, 'r')
    for line in fin.readlines():
        lspl = [x.strip() for x in line.strip().split('\t')]
        ChromIDs.append(lspl[0])
    fin.close()

    if exists(args.GTF_IN):

        uF.makedir(dirname(args.GTF_OUT))
        fin = open(args.GTF_IN, 'r')
        fout = open(args.GTF_OUT, 'w')
        while True:
            line = fin.readline()
            if line:
                if not line[0] == '#':
                    lspl = line.strip().split('\t')
                    if lspl[0] in ChromIDs:
                        fout.write(line)
                else:
                    fout.write(line)
            else:
                fin.close()
                fout.close()
                break
Example #22
0
############################################
## RSEM STAR                              ##
############################################
############################################

RSEMStarIndexDir = join(args.OUTDIR,
                        'rsem_star/readLen%s/' % (args.STAR_SJDBOVERHANG + 1))
RSEMStarExonGTF = join(RSEMStarIndexDir, '%s.exon.gtf' % (args.PREFIX))
RSEMStarTranscriptIndex = join(RSEMStarIndexDir, '%s' % (args.PREFIX))
RSEMStarTranscriptsFasta = join('%s.transcripts.fa' %
                                (RSEMStarTranscriptIndex))
RSEMStarDictFile = join('%s.transcripts.dict' % (RSEMStarTranscriptIndex))

if not exists(join(RSEMStarIndexDir, 'sjdbInfo.txt')):

    uF.makedir(RSEMStarIndexDir)
    chdir(RSEMStarIndexDir)

    print '%s: %s' % (strftime("%d-%m-%Y %H:%M:%S", gmtime()),
                      'Filtering GTF file for exon attributes.')
    Command = """awk '$3 == "exon"' %s > %s""" % (args.GTF_FILE,
                                                  RSEMStarExonGTF)
    system(Command)
    CommandList.append('%s\n%s' %
                       (strftime("%d-%m-%Y %H:%M:%S", gmtime()), Command))

    print '%s: %s' % (strftime("%d-%m-%Y %H:%M:%S",
                               gmtime()), 'Creating RSEM Star Index.')
    ##    Command = tW.rsemPrepareReference(RSEM_BIN=RSEM_BIN,GenomeFasta=args.GENOME_FASTA_FILE,GTFFile=RSEMStarExonGTF,RSEMTranscriptIndex=RSEMStarTranscriptIndex,NumThreads=args.NUM_THREADS,RSEMPrepRefParams='--star --star-path %s --star-sjdboverhang %s --polyA --polyA-length 125' % (STAR_BIN,args.STAR_SJDBOVERHANG))
    Command = tW.rsemPrepareReference(
        RSEM_BIN=RSEM_BIN,
Example #23
0
## DOWNLOAD & PREPARE FASTA               ##
############################################
############################################

CommandList = []

SPECIES_NAME = args.SPECIES_NAME.lower()
SPECIES_NAME = SPECIES_NAME[0].upper() + SPECIES_NAME[1:]

FASTA_DIR = join(args.OUTDIR, args.NCBI_BUILD,
                 'release-%s/fa/' % (args.ENSEMBL_RELEASE))
FASTA_FILE = join(FASTA_DIR, '%s.fa' % (args.PREFIX))

if not exists(FASTA_FILE):

    uF.makedir(FASTA_DIR)
    chdir(FASTA_DIR)

    ENSEMBL_FASTA_FTP = 'ftp://ftp.ensembl.org/pub/release-%s/fasta/%s/dna' % (
        args.ENSEMBL_RELEASE, SPECIES_NAME.lower())
    ENSEMBL_FASTA = '%s.%s.dna.toplevel.fa' % (SPECIES_NAME, args.NCBI_BUILD)
    if args.NCBI_BUILD in ['GRCh38', 'GRCm38']:
        ENSEMBL_FASTA = '%s.%s.dna.primary_assembly.fa' % (SPECIES_NAME,
                                                           args.NCBI_BUILD)
    elif args.NCBI_BUILD in ['GRCh37']:
        ENSEMBL_FASTA = '%s.%s.%s.dna.primary_assembly.fa' % (
            SPECIES_NAME, args.NCBI_BUILD, args.ENSEMBL_RELEASE)
    elif args.NCBI_BUILD in ['NCBIM37']:
        ENSEMBL_FASTA = '%s.%s.%s.dna.toplevel.fa' % (
            SPECIES_NAME, args.NCBI_BUILD, args.ENSEMBL_RELEASE)
    if args.FASTA_FTP_DOWNLOAD_LINK != 'NA':
Example #24
0
                              'sysout/',
                              '%s.fastqscreen.sysout' % (args.SAMPLE_PREFIX))

FastQScreenOutPrefix = splitext(basename(args.FASTQ_FILE))[0]
if args.FASTQ_FILE[-3:] == '.gz':
    FastQScreenOutPrefix = splitext(splitext(basename(args.FASTQ_FILE))[0])[0]
FastQScreenPngFile = join(args.OUTDIR, 'fastq_screen/', args.SAMPLE_PREFIX,
                          '%s_screen.png' % (FastQScreenOutPrefix))
FastQScreenTxtFile = join(args.OUTDIR, 'fastq_screen/', args.SAMPLE_PREFIX,
                          '%s_screen.txt' % (FastQScreenOutPrefix))

if exists(args.FASTQ_FILE):
    CommandList = []
    if not args.SKIP_FASTQC:
        if not exists(FASTQC_HTML_FILE):
            uF.makedir(FASTQC_DIR)
            print '%s: %s' % (strftime("%d-%m-%Y %H:%M:%S",
                                       gmtime()), 'Running FastQC.')
            Command = tW.FastQC(FASTQC_EXE=FASTQC_EXE,
                                JAVA_EXE=JAVA_18_EXE,
                                InputFile=args.FASTQ_FILE,
                                OutDir=FASTQC_DIR,
                                FileFormat='fastq',
                                NumThreads=args.NUM_THREADS)
            CommandList.append(
                '%s\n%s' % (strftime("%d-%m-%Y %H:%M:%S", gmtime()), Command))

            if exists(FASTQC_ZIP_FILE):
                print '%s: %s' % (strftime("%d-%m-%Y %H:%M:%S", gmtime()),
                                  'Deleting FastQC ZIP file.')
                Command = 'rm %s' % (FASTQC_ZIP_FILE)
Example #25
0
 ##  GENERATE COMMANDS FOR PIPELINE                                                ##
 ####################################################################################
 ####################################################################################
     
 if not exists(CompleteFile):
     
     sampleCommandDict[sampleID] = dict([(x,[]) for x in commandGroupList])
     
     ############################################
     ############################################
     ## CREATE SOFT-LINK TO RAW FASTQ FILE(S)  ##
     ############################################
     ############################################
     
     if not exists(sampleFileDict[sampleID]['RAW_FASTQ_FILE1']):
         uF.makedir(RawFastQDir)        
         Command = 'ln -s %s %s' % (sampleDesignDict[sampleID]['fastq_file1'],sampleFileDict[sampleID]['RAW_FASTQ_FILE1'])
         sampleCommandDict[sampleID]['PREP'] += [Command]
     if sampleDesignDict[sampleID]['isPaired'] and not exists(sampleFileDict[sampleID]['RAW_FASTQ_FILE2']):
         Command = 'ln -s %s %s' % (sampleDesignDict[sampleID]['fastq_file2'],sampleFileDict[sampleID]['RAW_FASTQ_FILE2'])
         sampleCommandDict[sampleID]['PREP'] += [Command]
         
     ############################################
     ############################################
     ## GENERATE SAMPLED FASTQ FILE(S)         ##
     ############################################
     ############################################
     
     SampledFastQCommand = '%sbin/python %srandomSampleFastQ.py %s %s --sample_size %s' % (PYTHON_DIR,SCRIPT_DIR,sampleFileDict[sampleID]['RAW_FASTQ_FILE1'],SampledPrefix,args.SAMPLE_SIZE)
     if sampleDesignDict[sampleID]['isPaired']:
         SampledFastQCommand += ' --fastq_file2 %s' % (sampleFileDict[sampleID]['RAW_FASTQ_FILE2'])