def progressiveFunction(self, experimentFile, toilDir, batchSystem, buildAvgs, buildReference, buildHal, buildFasta, toilStats, subtreeRoot=None): tempDir = getTempDirectory(os.getcwd()) tempExperimentDir = os.path.join(tempDir, "exp") runCreateMultiCactusProject(experimentFile, tempExperimentDir, fixNames=False, root=subtreeRoot) logger.info("Put the temporary files in %s" % tempExperimentDir) runCactusProgressive(os.path.join(tempExperimentDir, "exp_project.xml"), toilDir, batchSystem=batchSystem, buildAvgs=buildAvgs, toilStats=toilStats) # Check that the headers and sequences in the output are the # same as the sequences in the input (minus differences in # repeat-masking) exp = ExperimentWrapper(ET.parse(experimentFile).getroot()) seqMap = exp.buildSequenceMap() # Maps genome name -> headers in fasta headers = {} for genomeName, inputSequencePath in seqMap.items(): if os.path.isdir(inputSequencePath): # Some "input sequence paths" are actually provided as # directories containing multiple FASTAs concatenatedPath = getTempFile() system("cat %s/* > %s" % (inputSequencePath, concatenatedPath)) inputSequencePath = concatenatedPath headers[genomeName] = list(map(itemgetter(0), fastaRead(inputSequencePath))) # check headers inside .c2h output for expPath in glob.glob('%s/*/*_experiment.xml' % (tempExperimentDir)): subExp = ExperimentWrapper(ET.parse(expPath).getroot()) outgroups = subExp.getOutgroupEvents() c2hPath = subExp.getHALPath() with open(c2hPath) as f: for line in f: fields = line.split('\t') if fields[0] == 's': # Sequence line genome = fields[1][1:-1] header = fields[2][1:-1] if genome in headers and genome not in outgroups: # This genome is an input genome self.assertTrue(header in headers[genome], 'Header %s from output c2h %s not found in input fa %s' ' for genome %s' % (header, c2hPath, seqMap[genome], genome)) runToilStatusAndFailIfNotComplete(toilDir) system("rm -rf %s" % tempDir)
def writeXml(self): assert os.path.isdir(self.workingDir) configPath = absSymPath(os.path.join(self.workingDir, "config.xml")) expPath = absSymPath(os.path.join(self.workingDir, "expTemplate.xml")) self.expWrapper.setConfigPath(configPath) self.configWrapper.writeXML(configPath) self.expWrapper.writeXML(expPath) projPath = os.path.join(self.workingDir, ProjectWrapper.alignmentDirName) if len(self.seqFile.outgroups) == 0: # No outgroups specified, assume the default outgroup set outgroups = None else: outgroups = self.seqFile.outgroups runCreateMultiCactusProject(expPath, projPath, outgroupNames=outgroups, root=self.options.root)
def writeXml(self): assert os.path.isdir(self.workingDir) configPath = absSymPath( os.path.join(self.workingDir, "config.xml")) expPath = absSymPath( os.path.join(self.workingDir, "expTemplate.xml")) self.expWrapper.setConfigPath(configPath) self.configWrapper.writeXML(configPath) self.expWrapper.writeXML(expPath) projPath = os.path.join(self.workingDir, ProjectWrapper.alignmentDirName) if len(self.seqFile.outgroups) == 0: # No outgroups specified, assume the default outgroup set outgroups = None else: outgroups = self.seqFile.outgroups runCreateMultiCactusProject(expPath, projPath, fixNames=0, outgroupNames=outgroups, root=self.options.root)
def progressiveFunction(self, experimentFile, toilDir, batchSystem, buildAvgs, buildReference, buildHal, buildFasta, toilStats, subtreeRoot=None): tempDir = getTempDirectory(os.getcwd()) tempExperimentDir = os.path.join(tempDir, "exp") runCreateMultiCactusProject(experimentFile, tempExperimentDir, fixNames=False, root=subtreeRoot) logger.info("Put the temporary files in %s" % tempExperimentDir) runCactusProgressive(os.path.join(tempExperimentDir, "exp_project.xml"), toilDir, batchSystem=batchSystem, buildAvgs=buildAvgs, toilStats=toilStats) # Check that the headers and sequences in the output are the # same as the sequences in the input (minus differences in # repeat-masking) exp = ExperimentWrapper(ET.parse(experimentFile).getroot()) seqMap = exp.buildSequenceMap() # Maps genome name -> headers in fasta headers = {} for genomeName, inputSequencePath in seqMap.items(): if os.path.isdir(inputSequencePath): # Some "input sequence paths" are actually provided as # directories containing multiple FASTAs concatenatedPath = getTempFile() system("cat %s/* > %s" % (inputSequencePath, concatenatedPath)) inputSequencePath = concatenatedPath headers[genomeName] = list( map(itemgetter(0), fastaRead(inputSequencePath))) # check headers inside .c2h output for expPath in glob.glob('%s/*/*_experiment.xml' % (tempExperimentDir)): subExp = ExperimentWrapper(ET.parse(expPath).getroot()) outgroups = subExp.getOutgroupEvents() c2hPath = subExp.getHALPath() with open(c2hPath) as f: for line in f: fields = line.split('\t') if fields[0] == 's': # Sequence line genome = fields[1][1:-1] header = fields[2][1:-1] if genome in headers and genome not in outgroups: # This genome is an input genome self.assertTrue( header in headers[genome], 'Header %s from output c2h %s not found in input fa %s' ' for genome %s' % (header, c2hPath, seqMap[genome], genome)) runToilStatusAndFailIfNotComplete(toilDir) system("rm -rf %s" % tempDir)