Beispiel #1
0
 def append(self, text, filename):
     if self.verbose: print text  #echo text to stdout
     if not path.isdir(path.dirname(filename)):
         hUtil.mkdir_p(path.dirname(filename))
     fh = open(filename, 'a')
     fh.write(text + '\n') #append text to file
     fh.close()
Beispiel #2
0
def count(gzFastqFile, outDir):
    counts = dict()
    for line in gzip.open(gzFastqFile, 'rb'):
        matchObj = re.match('@(ILLUMINA|NS500422).+:([ACGTN+]+)$', line) #matches from beginning of line
        if matchObj:
            index = matchObj.group(2)
            if index in counts:
                counts[index] += 1
            else:
                counts[index] = 1

    #sort dict by count
    orderedCounts = OrderedDict(sorted(counts.items(), key=lambda t: t[1], reverse=True))

    #write counts to file
    if not path.isdir(outDir):
        hUtil.mkdir_p(outDir)
    f = open(path.join(outDir,"Undetermined_index_counts.txt"),'w')
    fTop = open(path.join(outDir,"Undetermined_index_counts_Top100.txt"),'w')

    f.write("Index\tCount\n")
    fTop.write("Index\tCount\n")

    i = 1;
    for index in orderedCounts:
        line = "%s\t%d\n" % (index, orderedCounts[index])
        f.write(line)
        if i <= 100:
            fTop.write(line)
        i += 1

    f.close() 
    fTop.close()
Beispiel #3
0
    def safeCopy(self, src, dst):  #copy a file or directory. 
        self.checkDest(dst)
        hUtil.deleteItem(dst)

        if not path.isdir(path.dirname(dst)):
            hUtil.mkdir_p(path.dirname(dst))

        hUtil.copy(src, dst)
Beispiel #4
0
    def writeSamplesheet(self, outDir = None):

        if not outDir:
            outDir = self.processingDir

        if not path.isdir(outDir):
            mkdir_p(outDir)

        self.ssFile = path.join(outDir, 'SampleSheet.' + self.name + '.csv')

        with open(self.ssFile, 'w') as fh:
            fh.write('\n'.join( self.Run.SampleSheet.ss[:self.Run.SampleSheet.colNamesLineIndex+1] ))  #write out header portion of samplesheet
            fh.write('\n'.join( [self.Run.SampleSheet.ss[x] for x in self.ssLineIndices] ))  #write out lines corresponding to this analysis
Beispiel #5
0
    def writeSamplesheet(self, outDir = None):

        if not outDir:
            outDir = self.Run.processingDir  #bcl2fastq for HiSeq 2000 requires that analysis processing dir (self.processingDir) not yet exist. Therefore write to Run.processingDir

        if not path.isdir(outDir):
            mkdir_p(outDir)

        self.ssFile = path.join(outDir, 'SampleSheet.' + self.name + '.csv')

        with open(self.ssFile, 'w') as fh:
            fh.write(self.Run.SampleSheet.ss[0] + '\n')  #write out header line 
            fh.write('\n'.join( [self.Run.SampleSheet.ss[x] for x in self.ssLineIndices] ))  #write out lines corresponding to this analysis
Beispiel #6
0
    def processRun(self):

        try:
            self.clearDir(self.processingDir)
            self.parseSamplesheet(write_validated=True, write_analysis_samplesheets=True)
            self.bcl2fastq()
            self.postProcess()

        except:
            if not path.isdir(path.dirname(self.logFile)): 
                hUtil.mkdir_p(path.dirname(self.logFile))

            self.notify('Seqprep Exception', 'Error in ' + self.runOutName + ':\n' + traceback.format_exc())
            return
Beispiel #7
0
    def fastQC(self):

        self.Run.log('Running FastQC...')

        outDir = path.join(self.finishingDir, 'QC')
        hUtil.mkdir_p(outDir)

        fastqDir = path.join(self.finishingDir, 'Fastq')

        for filename in os.listdir(fastqDir):
            if re.match('\S+.fastq.gz', filename) and hUtil.gzNotEmpty( path.join(fastqDir, filename) ):
                command = 'module load centos6/fastqc-0.10.1; fastqc -t 4 --noextract --nogroup -o ' + outDir + ' ' + path.join(fastqDir,filename)

                self.Run.shell(command, self.Run.logFile)
Beispiel #8
0
 def write_validatedSamplesheet(self):
     ssBkupDir = path.join(path.dirname(self.file),'ss')
     hUtil.mkdir_p(ssBkupDir)
     ssBkupBase = path.join(ssBkupDir,'SampleSheet.csv.orig')
     ssBkupFile = ssBkupBase
     k = 2
     while path.isfile(ssBkupFile):
         ssBkupFile = ssBkupBase + str(k)
         k += 1
     hUtil.copy(self.file, ssBkupFile)
     hUtil.setPermissions(ssBkupFile)
     hUtil.deleteItem(self.file)  #cannot set permissions if someone else is owner. Therefore delete before openning to rewrite
     with open(self.file, 'w') as fh:
         fh.write('\n'.join(self.ss))
     hUtil.setPermissions(self.file)
Beispiel #9
0
    def initLogFile(self):
        hUtil.mkdir_p(self.logDir)
        k = 1

        if path.isfile(self.logFile):  #preserve any previous log files
            logBkup = self.logFile + str(k)

            while path.isfile(logBkup):
                k += 1
                logBkup = self.logFile + str(k)

            self.safeCopy(self.logFile, logBkup)

            hUtil.setPermissions(logBkup)

            self.safeDeleteItem(self.logFile)
Beispiel #10
0
    def gather_analysis_fastq(self):

        self.Run.log('Concatenating fastq files...')

        #Example file set to concatenate:
        #  SampleA_ACAGTG_L001_R1_001.fastq.gz
        #  SampleA_ACAGTG_L001_R1_002.fastq.gz 
        #  SampleA_ACAGTG_L001_R1_003.fastq.gz 

        projectDir = path.join(self.processingDir, 'Project_Fastq_Files')
        sampDirs = glob.glob( path.join(projectDir, 'Sample_*') )
        undetDir = path.join(self.processingDir, 'Undetermined_indices')
        undetSampDirs = glob.glob( path.join(undetDir, 'Sample_*') )

        outDir = path.join(self.finishingDir, 'Fastq')
        hUtil.mkdir_p(outDir)

        for sampDir in sampDirs + undetSampDirs:

            for filename in os.listdir(sampDir):

                for readNumStr in ['1', '2']:

                    labelMatch = re.match('(?P<fileLabel>(?P<sampLabel>[\S]+)_L[0-9]+_R'+readNumStr+')_001.fastq.gz', filename)

                    if labelMatch:
                        sampLabel = labelMatch.group('sampLabel')
                        fileLabel = labelMatch.group('fileLabel')

                        mergeFile = path.join(outDir, sampLabel + '.R' + readNumStr + '.fastq.gz')
                        componentFiles = sorted([path.join(projectDir, sampDir, f) for f in os.listdir(sampDir) if re.match(fileLabel + '_[0-9]+.fastq.gz', f)])

                        self.Run.log('Concatenating ' + sampLabel + ' R'+readNumStr+' fastq component files:')
                        self.Run.log('\n'.join(['  %s' % path.basename(x) for x in componentFiles]))

                        fout = file(mergeFile, 'wb')

                        for componentFile in componentFiles:
                            fin  = file(componentFile,'rb')
                            while True:
                                data = fin.read(65536)
                                if not data: break
                                fout.write(data)
                            fin.close()

                        fout.close()
Beispiel #11
0
 def clearDir(self, item):
     if path.isdir(item) or path.isfile(item):
         self.safeDeleteItem(item)
     hUtil.mkdir_p(item)