def run(self): if self.debug: import pdb pdb.set_trace() counter = 0 no_of_vcf = 0 real_counter = 0 for inputFname in self.inputFnameLs: counter += 1 if os.path.isfile(inputFname): try: if NextGenSeq.isFileNameVCF(inputFname, includeIndelVCF=False): no_of_vcf += 1 if NextGenSeq.isVCFFileEmpty(inputFname, checkContent=self.checkEmptyVCFByReading): if self.commit: if self.report: sys.stderr.write("file %s deleted.\n"%(inputFname)) commandline = 'rm %s'%(inputFname) return_data = runLocalCommand(commandline, report_stderr=True, report_stdout=True) real_counter += 1 except: sys.stderr.write('Except type: %s\n'%repr(sys.exc_info())) import traceback traceback.print_exc() if self.report and counter%500==0: sys.stderr.write("%s%s\t%s\t%s"%('\x08'*80, counter, no_of_vcf, real_counter)) sys.stderr.write("%s%s\t%s\t%s\n"%('\x08'*80, counter, no_of_vcf, real_counter)) sys.stderr.write("%s files in total.\n"%(counter)) sys.stderr.write("Out of %s VCF files, %s are empty and were deleted.\n"%(no_of_vcf, real_counter))
def getChrListInTrioInconsistencyFile(self, tabixPath, trioInconsistencyByPosistionFname=None): """ 2011.12.21 """ sys.stderr.write("Getting list of chromosomes out of %s ..."%(trioInconsistencyByPosistionFname)) chr_id_ls = [] commandline = "%s -l %s"%(tabixPath, trioInconsistencyByPosistionFname) return_data = runLocalCommand(commandline, report_stderr=True, report_stdout=False) for chr in return_data.output_stdout: chr_id_ls.append(chr.strip()) sys.stderr.write(" %s chromosomes.\n"%(len(chr_id_ls))) return chr_id_ls
def moveFileIntoDBAffiliatedStorage(self, db_entry=None, filename=None, inputDir=None, outputDir=None, \ dstFilename=None,\ relativeOutputDir=None, shellCommand='cp -rL', srcFilenameLs=None, dstFilenameLs=None,\ constructRelativePathFunction=None, data_dir=None): """ filename (required): relative path of input file inputDir (required): where 'filename' is from outputDir (required): where the output file will be dstFilename: the absolute path of where the output file will be. if set to None (usually), then it'll be constructed on the fly. First either through constructRelativePathFunction() or use join(relativeOutputDir, '%s_%s'%(db_entry.id, filename)) or '%s_%s'%(db_entry.id, filename) relativeOutputDir: used for construct dstFilename if constructRelativePathFunction() is not there. constructRelativePathFunction: similar function of relativeOutputDir. used to construct relative path of output file. if neither relativeOutputDir nor constructRelativePathFunction is available, relative path is ='%s_%s'%(db_entry.id, filename). relative path is used to set db_entry.path when the latter is None. srcFilenameLs, dstFilenameLs: optional. two lists used to store the absolute path of input and output files. used in case rollback is needed. data_dir: the top-level folder where all the db-affiliated file storage is. for constructRelativePathFunction 2013.1.31 bugfix: if relativeOutputDir is included in both outputDir and newPath, use newfilename to avoid double usage. 2012.12.15 moved from VervetDB. i.e.: inputFileBasename = os.path.basename(self.inputFname) relativePath = genotypeFile.constructRelativePath(sourceFilename=inputFileBasename) exitCode = self.db_vervet.moveFileIntoDBAffiliatedStorage(db_entry=genotypeFile, filename=inputFileBasename, \ inputDir=os.path.split(self.inputFname)[0], dstFilename=os.path.join(self.data_dir, relativePath), \ relativeOutputDir=None, shellCommand='cp -rL', \ srcFilenameLs=self.srcFilenameLs, dstFilenameLs=self.dstFilenameLs,\ constructRelativePathFunction=genotypeFile.constructRelativePath, data_dir=self.data_dir) #same as this exitCode = self.db_vervet.moveFileIntoDBAffiliatedStorage(db_entry=genotypeFile, filename=inputFileBasename, \ inputDir=os.path.split(self.inputFname)[0], \ outputDir=self.data_dir, \ relativeOutputDir=None, shellCommand='cp -rL', \ srcFilenameLs=self.srcFilenameLs, dstFilenameLs=self.dstFilenameLs,\ constructRelativePathFunction=genotypeFile.constructRelativePath, data_dir=self.data_dir) if exitCode!=0: sys.stderr.write("Error: moveFileIntoDBAffiliatedStorage() exits with %s code.\n"%(exitCode)) session.rollback() self.cleanUpAndExitOnFailure(exitCode=exitCode) 2012.8.30 add argument dstFilename, which if given , overwrites outputDir 2012.7.18 -L of cp meant "always follow symbolic links in SOURCE". 2012.7.13 copied from RegisterAndMoveSplitSequenceFiles.moveNewISQFileIntoDBStorage() filename could be a folder. 2012.7.4 add srcFilename and dstFilename into given arguments (srcFilenameLs, dstFilenameLs) for later undo 2012.6.8 return non-zero if failure in move or destination file already exists 2012.2.10 this function moves a file to a db-affiliated storage path relativeOutputDir is the path part (in relative path) of db_entry.path = os.path.split(db_entry.path)[0] """ exitCode = 0 if constructRelativePathFunction is not None: newPath = constructRelativePathFunction(db_entry=db_entry, sourceFilename=filename, data_dir=data_dir) newfilename = os.path.basename(newPath) elif relativeOutputDir: newfilename = '%s_%s'%(db_entry.id, filename) newPath = os.path.join(relativeOutputDir, newfilename) else: newfilename = '%s_%s'%(db_entry.id, filename) newPath = newfilename if db_entry.getFilePath()!=newPath: db_entry.setFilePath(newPath) try: self.session.add(db_entry) self.session.flush() except: sys.stderr.write('Except type: %s\n'%repr(sys.exc_info())) import traceback traceback.print_exc() exitCode = 4 return exitCode srcFilename = os.path.join(inputDir, filename) if dstFilename is None: #2012.8.30 if relativeOutputDir: relativePathIndex = outputDir.find(relativeOutputDir) noOfCharsInRelativeOutputDir = len(relativeOutputDir) if outputDir[relativePathIndex:relativePathIndex+noOfCharsInRelativeOutputDir]==relativeOutputDir and newPath.find(relativeOutputDir)>=0: #2013.1.31 bugfix: if relativeOutputDir is included in both outputDir and newPath, use newfilename to avoid double usage. dstFilename = os.path.join(outputDir, newfilename) if dstFilename is None: #still nothing , use newPath instead dstFilename = os.path.join(outputDir, newPath) if os.path.isfile(dstFilename): sys.stderr.write("Error: destination %s already exists.\n"%(dstFilename)) exitCode = 2 else: #21012.12.15 create folder if not existent dstFolder = os.path.split(dstFilename)[0] if not os.path.isdir(dstFolder): os.makedirs(dstFolder) #move the file commandline = '%s %s %s'%(shellCommand, srcFilename, dstFilename) return_data = utils.runLocalCommand(commandline, report_stderr=True, report_stdout=True) if srcFilenameLs is not None: srcFilenameLs.append(srcFilename) if dstFilenameLs is not None: dstFilenameLs.append(dstFilename) if hasattr(db_entry, 'md5sum'):# and getattr(db_entry, 'md5sum', None) is None: #2012.7.14 has this attribute but it's None try: self.updateDBEntryMD5SUM(db_entry=db_entry, absPath=dstFilename) except: self.session.delete(db_entry) sys.stderr.write('Except type: %s\n'%repr(sys.exc_info())) import traceback traceback.print_exc() exitCode = 2 return exitCode if return_data.stderr_content: #something wrong. abort sys.stderr.write("commandline %s failed: %s\n"%(commandline, return_data.stderr_content)) #remove the db entry self.session.delete(db_entry) self.session.flush() exitCode = 3 return exitCode if hasattr(db_entry, 'file_size'):# and db_entry.file_size is None: try: self.updateDBEntryPathFileSize(db_entry=db_entry, absPath=dstFilename) except: self.session.delete(db_entry) sys.stderr.write('Except type: %s\n'%repr(sys.exc_info())) import traceback traceback.print_exc() exitCode = 2 return exitCode else: exitCode = 0 return exitCode