Example #1
0
    def add_vol_surf_snapshot_step(self, dax, jobs_before, vol, surfs):
        snapshot_file_1 = File("snapshot_sagittal_%d.png" %
                               self.SNAPSHOT_NUMBER)
        snapshot_file_2 = File("snapshot_coronal_%d.png" %
                               self.SNAPSHOT_NUMBER)
        snapshot_file_3 = File("snapshot_axial_%d.png" % self.SNAPSHOT_NUMBER)

        job = Job("qc_snapshot")
        job.addArguments(str(self.SNAPSHOT_NUMBER), "vol_surf", vol)
        for surf in surfs:
            job.addArguments(surf)
        job.uses(vol, link=Link.INPUT)
        for surf in surfs:
            job.uses(surf, link=Link.INPUT)
        job.uses(snapshot_file_1,
                 link=Link.OUTPUT,
                 transfer=True,
                 register=False)
        job.uses(snapshot_file_2,
                 link=Link.OUTPUT,
                 transfer=True,
                 register=False)
        job.uses(snapshot_file_3,
                 link=Link.OUTPUT,
                 transfer=True,
                 register=False)
        dax.addJob(job)

        for job_before in jobs_before:
            dax.depends(job, job_before)

        self.SNAPSHOT_NUMBER += 1
Example #2
0
def path_to_pegasus_file(path: Path,
                         *,
                         site: str = "local",
                         name: Optional[str] = None,
                         is_raw_input: bool = False) -> File:
    """
    Given a *path* object return a pegasus `File` for usage in a workflow
    If the resource is not on a local machine provide the *site* string.
    Files can be used for either an input or output of a Job.

    Args:
        path: path to the file
        site: site to be used, default is local. Should be set to saga if running
        on cluster.
        name: name given to the file
        is_raw_input: indicates that the file doesn't come from the output of another
        job in the workflow, so can be safely added to the Pegasus DAX
    Returns:
        Pegasus File at the given path

    """
    rtnr = File(name if name else str(path.absolute()).replace("/", "-"))
    if is_raw_input:
        rtnr.addPFN(path_to_pfn(path, site=site))
    return rtnr
Example #3
0
 def __init__(self, folder_path, file_format="", not_empty=True, prefix=T1):
     self.prefix = prefix
     self.folder_path = folder_path
     self.file_format = file_format
     self.not_empty = not_empty
     self.folder = File(folder_path)
     self.raw_nii_file = File(prefix + "-raw.nii.gz")
     self.main_data = self.folder
Example #4
0
def registerFile(workflow, filename):
    """
	2011.12.13
		function to register any file to the workflow.input_site_handler, 
	"""
    file = File(os.path.basename(filename))
    file.addPFN(PFN("file://" + os.path.abspath(filename), \
           workflow.input_site_handler))
    workflow.addFile(file)
    return file
    def mapEachInterval(self, workflow=None, inputJobData=None, selectIntervalJobData=None, \
        chromosome=None,intervalData=None,\
        mapEachChromosomeData=None, \
        passingData=None, transferOutput=False, **keywords):
        """
		2013.04.08 use inputJobData
		2012.10.3
			#. extract flanking sequences from the input Input (ref sequence file => contig ref sequence)
			#. blast them
			#. run FindSNPPositionOnNewRefFromFlankingBlastOutput.py
				#. where hit length match query length, and no of mismatches <=2 => good => infer new coordinates
			#. output a mapping file between old SNP and new SNP coordinates.
				#. reduce this thing by combining everything
			#. make a new Input file based on the input split Input file
				(replace contig ID , position with the new one's, remove the header part regarding chromosomes or replace it)

		"""
        returnData = PassingData(no_of_jobs=0)
        returnData.jobDataLs = []
        passingData.intervalFileBasenamePrefix
        passingData.splitInputFile
        """
		## 2013.06.19 structures available from passingData, specific to the interval
		passingData.splitInputFile = splitInputFile
		passingData.unitNumber = unitNumber
		passingData.intervalFileBasenamePrefix = '%s_%s_splitInput_u%s'%(chromosome, commonPrefix, unitNumber)
		passingData.noOfIndividuals = jobData.file.noOfIndividuals
		passingData.span = self.intervalSize + self.intervalOverlapSize*2 	#2013.06.19 for memory/walltime gauging
		"""
        #add one computing job
        outputFile = File(os.path.join(self.mapDirJob.output, "%s.%s.probability.tsv.gz"%(passingData.fileBasenamePrefix,\
                            intervalData.interval)))
        locusIntervalDeltaOutputFile = File(os.path.join(self.mapDirJob.output, "%s.%s.locusIntervalDelta.tsv.gz"%(passingData.fileBasenamePrefix,\
                           intervalData.interval)))
        job = self.addAbstractMatrixFileWalkerJob(executable=self.ComputeLiftOverLocusProbability, \
           inputFileList=None, inputFile=selectIntervalJobData.file, outputFile=outputFile, \
           outputFnamePrefix=None, whichColumn=None, whichColumnHeader=None, \
           logY=None, valueForNonPositiveYValue=-1, \
           minNoOfTotal=1, samplingRate=1, \
           inputFileFormat=None, outputFileFormat=None,\
           parentJobLs=[selectIntervalJobData.job], extraOutputLs=[locusIntervalDeltaOutputFile],\
           extraDependentInputLs=None, \
           extraArgumentList=["--locusIntervalDeltaOutputFname", locusIntervalDeltaOutputFile, \
               "--startPosition %s"%(intervalData.start), "--stopPosition %s"%(intervalData.stop)], \
           extraArguments=None, transferOutput=transferOutput, job_max_memory=2000, sshDBTunnel=False, \
           objectWithDBArguments=None)
        #For each interval, probabilities are not calculated for loci in extra segment (from overlapStart to start).
        returnData.jobDataLs.append(self.constructJobDataFromJob(job))
        return returnData
    def reduceEachChromosome(self, workflow=None, chromosome=None, passingData=None, mapEachInputDataLs=None,
         chromosome2mapEachIntervalDataLs=None,\
         reduceEachInputDataLs=None,\
         transferOutput=True, \
         **keywords):
        """
		"""
        returnData = PassingData(no_of_jobs=0)
        returnData.jobDataLs = []
        returnData.mapEachInputDataLs = mapEachInputDataLs
        returnData.reduceEachInputDataLs = reduceEachInputDataLs
        #reduce matrix by chosen column and average p-value

        outputFile = File(
            os.path.join(
                self.reduceEachChromosomeDirJob.output,
                'chr_%s_LocusLiftOverProbability.tsv.gz' % (chromosome)))
        reduceChromosomeJob = self.addStatMergeJob(statMergeProgram=self.mergeSameHeaderTablesIntoOne, \
               outputF=outputFile, \
               parentJobLs=[self.reduceEachChromosomeDirJob],extraOutputLs=None, \
               extraDependentInputLs=None, transferOutput=False)
        #extraArgumentList=['--keyColumnLs 0-6 --valueColumnLs 7'],\
        mapEachIntervalDataLs = chromosome2mapEachIntervalDataLs.get(
            chromosome)
        for mapEachIntervalData in mapEachIntervalDataLs:
            for jobData in mapEachIntervalData.jobDataLs:
                self.addInputToStatMergeJob(statMergeJob=reduceChromosomeJob,
                                            parentJobLs=[jobData.job])

        #add the reduction job to final stat merge job
        self.addInputToStatMergeJob(statMergeJob=self.reduceJob,
                                    parentJobLs=[reduceChromosomeJob])

        return returnData
Example #7
0
 def __init__(self):
     self.aseg_mgz_file = File("aparc+aseg.mgz")
     self.t1_mgz_file = File("t1.mgz")
     self.aseg_nii_file = File("aparc+aseg.nii.gz")
     self.t1_nii_file = File("t1-mri.nii.gz")
     self.d2t_file = File("d2t.mat")
     self.t2d_file = File("t2d.mat")
     self.b0_in_t1_file = File("b0-in-t1.nii.gz")
     self.t1_in_d_file = File("t1-in-d.nii.gz ")
Example #8
0
    def add_seeg_gain_dp_computation_steps(self, dax, job_seeg_xyz,
                                           job_mapping_details):
        seeg_xyz = File(SEEGCompFiles.SEEG_XYZ.value)
        centers_txt = File(AsegFiles.CENTERS_TXT.value % self.atlas_suffix)

        gain_mat = File(SeegGainFiles.SEEG_GAIN_DP_MAT.value %
                        self.atlas_suffix)

        job = Job(ProjectionCompJobNames.COMPUTE_PROJ_MAT.value)
        job.addArguments(seeg_xyz, centers_txt, gain_mat, self.subject)
        job.uses(seeg_xyz, link=Link.INPUT)
        job.uses(centers_txt, link=Link.INPUT)
        job.uses(gain_mat, link=Link.OUTPUT, transfer=True, register=True)
        dax.addJob(job)

        dax.depends(job, job_seeg_xyz)
        dax.depends(job, job_mapping_details)
Example #9
0
 def createFile (self, fileName, fileURL=None, site=None):
     #traceback.print_stack ()
     file = self.getFile (fileName)
     if not file:
         file = File (fileName)
         if not fileURL:
             fileURL = "file://%s/%s" % (self.wms.getOutputDir (), fileName)
             logger.debug ("fileurl: %s", fileURL)
         if not site:
             site = "local"
         if not isinstance(fileURL, basestring) and len (fileURL) > 0:
             fileURL = fileURL [0]
         logger.debug ("--add-pfn: (%s)(%s)(%s)", fileName, fileURL, site)
         pfn = PFN (fileURL, site)
         file.addPFN (pfn)
         self.files [fileName] = file
     return file
Example #10
0
    def add_projection_computation_steps(self, dax, job_mapping_details):
        projection_mat = File(ProjectionCompFiles.PROJECTION_MAT.value %
                              (self.sensors_type, self.atlas_suffix))
        sensor_positions = File(ProjectionCompFiles.SENS_POSITIONS.value %
                                self.sensors_type)
        centers_txt = File(AsegFiles.CENTERS_TXT.value % self.atlas_suffix)
        job = Job(ProjectionCompJobNames.COMPUTE_PROJ_MAT.value)
        job.addArguments(sensor_positions, centers_txt, projection_mat,
                         self.subject)
        job.uses(sensor_positions, link=Link.INPUT)
        job.uses(centers_txt, link=Link.INPUT)
        job.uses(projection_mat,
                 link=Link.OUTPUT,
                 transfer=True,
                 register=True)
        dax.addJob(job)

        dax.depends(job, job_mapping_details)
Example #11
0
    def mapEachInterval(self, workflow=None, \
        VCFJobData=None, passingData=None, transferOutput=False, **keywords):
        """
		2013.04.08 use VCFJobData
		2012.10.3
			#. extract flanking sequences from the input VCF (ref sequence file => contig ref sequence)
			#. blast them
			#. run FindSNPPositionOnNewRefFromFlankingBlastOutput.py
				#. where hit length match query length, and no of mismatches <=2 => good => infer new coordinates
			#. output a mapping file between old SNP and new SNP coordinates.
				#. reduce this thing by combining everything
			#. make a new VCF file based on the input split VCF file
				(replace contig ID , position with the new one's, remove the header part regarding chromosomes or replace it)

		"""
        if workflow is None:
            workflow = self

        returnData = PassingData(no_of_jobs=0)
        returnData.jobDataLs = []

        topOutputDirJob = passingData.topOutputDirJob
        mapDirJob = passingData.mapDirJob
        reduceOutputDirJob = passingData.reduceOutputDirJob

        intervalFileBasenamePrefix = passingData.intervalFileBasenamePrefix
        jobData = passingData.jobData
        VCFFile = VCFJobData.file  #2013.04.08

        splitVCFJob = passingData.mapEachVCFData.splitVCFJob
        chromosome = passingData.chromosome

        # a flanking sequence extraction job
        #noOfIndividuals
        realInputVolume = passingData.noOfIndividuals * passingData.span
        baseInputVolume = 600 * 2000  #600 individuals at 2000 sites
        #base is 200 individual X 2Mb region => 120 minutes
        walltime = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
             baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
             minJobPropertyValue=60, maxJobPropertyValue=1200).value
        #base is 4X, => 5000M
        job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
             baseInputVolume=baseInputVolume, baseJobPropertyValue=4000, \
             minJobPropertyValue=4000, maxJobPropertyValue=8000).value

        outputFnamePrefix = os.path.join(
            mapDirJob.output,
            '%s.sameSite.concordance' % (intervalFileBasenamePrefix))
        outputFile = File('%s.tsv' % (outputFnamePrefix))

        returnData.mapJob = self.addAbstractMapperLikeJob(executable=self.CalculateSameSiteConcordanceInVCF, \
           inputF=VCFFile, outputF=outputFile, \
           parentJobLs=[mapDirJob]+VCFJobData.jobLs, transferOutput=transferOutput, \
           job_max_memory=job_max_memory,\
           extraArguments=None, extraArgumentList=None, extraDependentInputLs=[], walltime=walltime)

        return returnData
Example #12
0
    def _ensure_input_format(self, file_format, input_name, output_name, dax):
        input_file = File(input_name)
        output_file = None
        job = None

        if file_format == "dicom":
            output_file = File(output_name)
            job = Job(T1JobNames.MRI_CONVERT.value)
            job.addArguments("-it", "dicom", input_file, output_file)
            job.uses(input_file, link=Link.INPUT)
            job.uses(output_file,
                     link=Link.OUTPUT,
                     transfer=False,
                     register=False)
            dax.addJob(job)

        if output_file is None:
            output_file = input_file

        return output_file, job
Example #13
0
    def add_lead_field_model_steps(self, dax, job_sensor_model_lh,
                                   job_sensor_model_rh):
        head_inv_matrix = File(HeadModelFiles.HEAD_INV_MAT.value)
        head2ipm_file = File(SensorModelFiles.SEEG_H2IPM.value)

        lh_white_dsm = File(SourceModelFiles.LH_WHITE_RESAMP_DSM.value %
                            (self.trg_subject, self.atlas_suffix))
        lh_ds2ipm_file = File(SensorModelFiles.LH_DS2IPM.value %
                              (self.trg_subject, self.atlas_suffix))

        lh_cortical_gain = File(LeadFieldModelFiles.LH_CORT_GAIN_H5.value %
                                self.atlas_suffix)

        job1 = Job(LeadFieldModelJobNames.OM_GAIN.value)
        job1.addArguments("-InternalPotential", head_inv_matrix, lh_white_dsm,
                          head2ipm_file, lh_ds2ipm_file, lh_cortical_gain)
        job1.uses(head_inv_matrix, link=Link.INPUT)
        job1.uses(lh_white_dsm, link=Link.INPUT)
        job1.uses(head2ipm_file, link=Link.INPUT)
        job1.uses(lh_ds2ipm_file, link=Link.INPUT)
        job1.uses(lh_cortical_gain,
                  link=Link.OUTPUT,
                  transfer=True,
                  register=True)
        dax.addJob(job1)

        dax.depends(job1, job_sensor_model_lh)

        rh_white_dsm = File(SourceModelFiles.RH_WHITE_RESAMP_DSM.value %
                            (self.trg_subject, self.atlas_suffix))
        rh_ds2ipm_file = File(SensorModelFiles.RH_DS2IPM.value %
                              (self.trg_subject, self.atlas_suffix))

        rh_cortical_gain = File(LeadFieldModelFiles.RH_CORT_GAIN_H5.value %
                                self.atlas_suffix)

        job2 = Job(LeadFieldModelJobNames.OM_GAIN.value)
        job2.addArguments("-InternalPotential", head_inv_matrix, rh_white_dsm,
                          head2ipm_file, rh_ds2ipm_file, rh_cortical_gain)
        job2.uses(head_inv_matrix, link=Link.INPUT)
        job2.uses(rh_white_dsm, link=Link.INPUT)
        job2.uses(head2ipm_file, link=Link.INPUT)
        job2.uses(rh_ds2ipm_file, link=Link.INPUT)
        job2.uses(rh_cortical_gain,
                  link=Link.OUTPUT,
                  transfer=True,
                  register=True)
        dax.addJob(job2)

        dax.depends(job2, job_sensor_model_rh)
    def selectIntervalFromInputFile(self, jobData=None, chromosome=None,\
           intervalData=None, mapEachChromosomeData=None,\
           passingData=None, transferOutput=False,\
           **keywords):
        """
		2013.11.24
		"""
        inputSuffix = utils.getRealPrefixSuffixOfFilenameWithVariableSuffix(
            jobData.file.name)[1]
        outputFile = File(os.path.join(self.mapDirJob.output, '%s_%s%s'%(passingData.fileBasenamePrefix, \
                      intervalData.overlapInterval, inputSuffix)))
        tabixRetrieveJob = self.addTabixRetrieveJob(executable=self.tabixRetrieve, \
             tabixPath=self.tabixPath, \
             inputF=jobData.file, outputF=outputFile, \
             regionOfInterest=intervalData.overlapInterval, includeHeader=True,\
             parentJobLs=jobData.jobLs + [self.mapDirJob], job_max_memory=100, \
             extraDependentInputLs=jobData.fileLs[1:], \
             transferOutput=False)
        return self.constructJobDataFromJob(job=tabixRetrieveJob)
Example #15
0
    def add_surf_annot_snapshot_step(self, dax, jobs_before, surf, annot):
        snapshot_file_1 = File("snapshot_surface_annotation_%d0.png" %
                               self.SNAPSHOT_NUMBER)
        snapshot_file_2 = File("snapshot_surface_annotation_%d1.png" %
                               self.SNAPSHOT_NUMBER)
        snapshot_file_3 = File("snapshot_surface_annotation_%d2.png" %
                               self.SNAPSHOT_NUMBER)
        snapshot_file_4 = File("snapshot_surface_annotation_%d3.png" %
                               self.SNAPSHOT_NUMBER)
        snapshot_file_5 = File("snapshot_surface_annotation_%d4.png" %
                               self.SNAPSHOT_NUMBER)
        snapshot_file_6 = File("snapshot_surface_annotation_%d5.png" %
                               self.SNAPSHOT_NUMBER)

        job = Job("qc_snapshot")
        job.addArguments(str(self.SNAPSHOT_NUMBER), "surf_annot", surf, annot)
        job.uses(surf, link=Link.INPUT)
        job.uses(annot, link=Link.INPUT)

        job.uses(snapshot_file_1,
                 link=Link.OUTPUT,
                 transfer=True,
                 register=True)
        job.uses(snapshot_file_2,
                 link=Link.OUTPUT,
                 transfer=True,
                 register=True)
        job.uses(snapshot_file_3,
                 link=Link.OUTPUT,
                 transfer=True,
                 register=True)
        job.uses(snapshot_file_4,
                 link=Link.OUTPUT,
                 transfer=True,
                 register=True)
        job.uses(snapshot_file_5,
                 link=Link.OUTPUT,
                 transfer=True,
                 register=True)
        job.uses(snapshot_file_6,
                 link=Link.OUTPUT,
                 transfer=True,
                 register=True)
        dax.addJob(job)

        for job_before in jobs_before:
            dax.depends(job, job_before)

        self.SNAPSHOT_NUMBER += 1
Example #16
0
    def reduce(self,
               workflow=None,
               passingData=None,
               reduceEachChromosomeDataLs=None,
               transferOutput=True,
               **keywords):
        """
		2012.10.3
			#. merge all output of input jobs (passingData.mapEachIntervalDataLsLs) into one big one
		
		"""
        returnData = PassingData(no_of_jobs=0)
        returnData.jobDataLs = []
        reduceOutputDirJob = passingData.reduceOutputDirJob

        realInputVolume = passingData.jobData.file.noOfIndividuals * passingData.jobData.file.noOfLoci
        baseInputVolume = 200 * 20000
        walltime = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
             baseInputVolume=baseInputVolume, baseJobPropertyValue=60, \
             minJobPropertyValue=60, maxJobPropertyValue=500).value
        job_max_memory = self.scaleJobWalltimeOrMemoryBasedOnInput(realInputVolume=realInputVolume, \
             baseInputVolume=baseInputVolume, baseJobPropertyValue=5000, \
             minJobPropertyValue=5000, maxJobPropertyValue=10000).value

        outputFile = File(
            os.path.join(reduceOutputDirJob.output, 'sameSiteConcordance.tsv'))
        reduceJob = self.addStatMergeJob(statMergeProgram=self.mergeSameHeaderTablesIntoOne, \
               outputF=outputFile, \
               parentJobLs=[reduceOutputDirJob],extraOutputLs=[], \
               extraDependentInputLs=[], transferOutput=transferOutput,)
        returnData.jobDataLs.append(PassingData(jobLs=[reduceJob], file=reduceJob.output, \
                 fileLs=[reduceJob.output]))

        for mapEachIntervalDataLs in passingData.mapEachIntervalDataLsLs:
            for mapEachIntervalData in mapEachIntervalDataLs:
                self.addInputToStatMergeJob(statMergeJob=reduceJob, \
                  parentJobLs=[mapEachIntervalData.mapJob])

        return returnData
Example #17
0
 def __init__(self,
              folder_path,
              file_format="",
              is_reversed=False,
              scan_direction="ap",
              threads="2"):
     super(DiffusionConfiguration,
           self).__init__(folder_path,
                          file_format=file_format,
                          prefix=SubtypeConfiguration.DWI)
     self.is_dwi_reversed = is_reversed
     self.scan_direction = scan_direction
     self.number_of_threads_mrtrix = threads
     self.dwi_raw_re_file = File("dwi_raw_re.mif")
     self.dwi_raw_mif_file = File("dwi_raw.mif")
     self.dwi_nii_re_file = File("dwi_raw_re.nii.gz")
     self.dwi_mif_file = File("dwi.mif")
     self.brain_mask = File("mask.mif")
     self.b0 = File("b0.nii.gz")
Example #18
0
    def add_seeg_mrs_gain_computation_steps(self, dax, job_seeg_xyz,
                                            job_mapping_details):
        seeg_xyz = File(SEEGCompFiles.SEEG_XYZ.value)
        cort_surf = File(AsegFiles.SURF_CORT_ZIP.value)
        subcort_surf = File(AsegFiles.SURF_SUBCORT_ZIP.value)
        cort_rm = File(AsegFiles.RM_CORT_TXT.value % self.atlas_suffix)
        subcort_rm = File(AsegFiles.RM_SUBCORT_TXT.value % self.atlas_suffix)

        gain_mat = File(SeegGainFiles.SEEG_GAIN_MRS_MAT.value %
                        self.atlas_suffix)

        job = Job(SeegGainJobNames.COMPUTE_SEEG_GAIN.value)
        job.addArguments(seeg_xyz, cort_surf, subcort_surf, cort_rm,
                         subcort_rm, gain_mat, self.subject)
        job.uses(seeg_xyz, link=Link.INPUT)
        job.uses(cort_surf, link=Link.INPUT)
        job.uses(subcort_surf, link=Link.INPUT)
        job.uses(cort_rm, link=Link.INPUT)
        job.uses(subcort_rm, link=Link.INPUT)
        job.uses(gain_mat, link=Link.OUTPUT, transfer=True, register=True)
        dax.addJob(job)

        dax.depends(job, job_seeg_xyz)
        dax.depends(job, job_mapping_details)
Example #19
0
    def generate_dax(self, daxfile):
        from Pegasus.DAX3 import ADAG, Job, File, Link

        # The DAX generator
        dax = ADAG("pipeline")

        # Some bits of metadata.  Shoulf put plenty more here.
        dax.metadata("owner", self.pipeline.owner)
        dax.metadata("basename", self.pipeline.basename)
        dax.metadata("version", self.pipeline.version)

        # string tag -> pegasus File object mapping of all the
        # inputs and outputs used by any pipeline stage.
        files = {}

        # First generate the overall inputs to the pipeline,
        # i.e. ones that are not generated by any other stage
        # but must be specified at the start
        for tag in self.pipeline.input_tags():
            path = self.info['inputs'].get(tag)
            files[tag] = File(path)

        # Now go through the pipeline in sequence.
        for stage_name, stage_class in self.pipeline.sequence():
            # The stage in the pipeline.  We describe the meaning of it
            # (which image it corresponds to)
            # in the transformation catalog generation
            job = Job(stage_name, id=stage_name)

            # Configuration files for this job.
            # These will not be built during the pipeline and must be
            # provided by the user
            for config_tag, config_filename in stage_class.config.items():
                filename = self.pipeline.cfg[stage_name]['config'][config_tag]
                config_path = os.path.join(self.config_dir(), filename)
                config = File(config_path)
                job.uses(config, link=Link.INPUT)

            # Input files for the job, either created by the user or by previous
            # stages.  In either case they should be in the "files" dictionary, because
            # precursor jobs will have been added before this one.
            for input_tag in stage_class.inputs.keys():
                job.uses(files[input_tag], link=Link.INPUT)

            # Output files from the job. These will be created by the job
            # and used by future jobs
            for output_tag, output_type in stage_class.outputs.items():
                output_filename = "{}.{}".format(output_tag, output_type)
                output = File(output_filename)
                job.uses(output,
                         link=Link.OUTPUT,
                         transfer=True,
                         register=True)
                files[output_tag] = output

            # Add this job to the pipeline
            dax.addJob(job)

            # Tell pegasus which jobs this one depends on.
            # The pipeline already knows this information.
            # The pipeline.sequence command runs through
            # the jobs in an order that guarantees that a job's predecessors are
            # always done before it is, so they will always exist in the dax by this point.
            for predecessor_name in self.pipeline.dependencies(stage_name):
                dax.depends(stage_name, predecessor_name)

        # Generate the final DAX XML file.
        dax.writeXML(open(daxfile, "w"))
Example #20
0
    def _add_flirt_steps(self, dax, job_b0, job_t1, job_aparc_aseg):
        b0_nii_gz = File(DWIFiles.B0_NII_GZ.value)
        t1_nii_gz = File(T1Files.T1_NII_GZ.value)
        d2t_mat = File(CoregFiles.D2T_MAT.value)
        b0_in_t1 = File(CoregFiles.B0_IN_T1.value)
        job1 = Job(CoregJobNames.FLIRT.value, node_label="Register DWI to T1")
        job1.addArguments(b0_nii_gz, t1_nii_gz, d2t_mat, b0_in_t1)
        job1.uses(b0_nii_gz, link=Link.INPUT)
        job1.uses(t1_nii_gz, link=Link.INPUT)
        job1.uses(d2t_mat, link=Link.OUTPUT, transfer=True, register=True)
        job1.uses(b0_in_t1, link=Link.OUTPUT, transfer=True, register=True)
        dax.addJob(job1)

        dax.depends(job1, job_t1)
        dax.depends(job1, job_b0)

        self.qc_snapshots.add_2vols_snapshot_step(dax, [job1], t1_nii_gz,
                                                  b0_in_t1)

        t2d_mat = File(CoregFiles.T2D_MAT.value)
        job2 = Job(CoregJobNames.CONVERT_XFM.value,
                   node_label="Convert d2t matrix to t2d matrix")
        job2.addArguments("-omat", t2d_mat, "-inverse", d2t_mat)
        job2.uses(d2t_mat, link=Link.INPUT)
        job2.uses(t2d_mat, link=Link.OUTPUT, transfer=False, register=False)
        dax.addJob(job2)

        dax.depends(job2, job1)

        t1_in_d_nii_gz = File(CoregFiles.T1_IN_D.value)
        job3 = Job(CoregJobNames.FLIRT_REVERSED.value,
                   node_label="Register T1 to DWI")
        job3.addArguments(t1_nii_gz, b0_nii_gz, t1_in_d_nii_gz, t2d_mat)
        job3.uses(t1_nii_gz, link=Link.INPUT)
        job3.uses(b0_nii_gz, link=Link.INPUT)
        job3.uses(t2d_mat, link=Link.INPUT)
        job3.uses(t1_in_d_nii_gz,
                  link=Link.OUTPUT,
                  transfer=False,
                  register=False)
        dax.addJob(job3)

        dax.depends(job3, job2)

        self.qc_snapshots.add_2vols_snapshot_step(dax, [job3], t1_in_d_nii_gz,
                                                  b0_nii_gz)

        aparc_aseg_nii_gz = File(T1Files.APARC_ASEG_NII_GZ.value %
                                 self.atlas_suffix)
        aparc_aseg_in_d_nii_gz = File(CoregFiles.APARC_ASEG_IN_D.value %
                                      self.atlas_suffix)
        job4 = Job(CoregJobNames.FLIRT_REVERSED.value,
                   node_label="Register APARC+ASEG to DWI")
        job4.addArguments(aparc_aseg_nii_gz, b0_nii_gz, aparc_aseg_in_d_nii_gz,
                          t2d_mat)
        job4.uses(aparc_aseg_nii_gz, link=Link.INPUT)
        job4.uses(b0_nii_gz, link=Link.INPUT)
        job4.uses(t2d_mat, link=Link.INPUT)
        job4.uses(aparc_aseg_in_d_nii_gz,
                  link=Link.OUTPUT,
                  transfer=False,
                  register=False)
        dax.addJob(job4)

        dax.depends(job4, job2)
        dax.depends(job4, job_aparc_aseg)

        self.qc_snapshots.add_2vols_snapshot_step(dax, [job4],
                                                  aparc_aseg_in_d_nii_gz,
                                                  b0_nii_gz)
        self.qc_snapshots.add_3vols_snapshot_step(dax, [job3, job4],
                                                  t1_in_d_nii_gz, b0_nii_gz,
                                                  aparc_aseg_in_d_nii_gz)

        return job3, job4
Example #21
0
    def _add_fs_steps(self, dax, job_b0, job_t1, job_aparc_aseg):

        b0_nii_gz = File(DWIFiles.B0_NII_GZ.value)
        b0_in_t1_mgz = File(CoregFiles.B0_IN_T1_MGZ.value)
        d2t_reg = File("d2t.reg")
        d2t_lta = File("d2t.lta")
        d2t_mat = File("d2t.mat")
        job1 = Job(CoregJobNames.BBREGISTER.value)
        job1.addArguments(self.subject, b0_nii_gz, b0_in_t1_mgz, d2t_reg,
                          d2t_lta, d2t_mat)
        job1.uses(b0_nii_gz, link=Link.INPUT)
        job1.uses(b0_in_t1_mgz,
                  link=Link.OUTPUT,
                  transfer=False,
                  register=False)
        job1.uses(d2t_reg, link=Link.OUTPUT, transfer=False, register=False)
        job1.uses(d2t_lta, link=Link.OUTPUT, transfer=False, register=False)
        job1.uses(d2t_mat, link=Link.OUTPUT, transfer=False, register=False)
        dax.addJob(job1)

        dax.depends(job1, job_b0)

        b0_in_t1_nii_gz = File(CoregFiles.B0_IN_T1.value)
        job2 = Job(T1JobNames.MRI_CONVERT.value)
        job2.addArguments(b0_in_t1_mgz, b0_in_t1_nii_gz, "--out_orientation",
                          "RAS")
        job2.uses(b0_in_t1_mgz, link=Link.INPUT)
        job2.uses(b0_in_t1_nii_gz,
                  link=Link.OUTPUT,
                  transfer=False,
                  register=False)
        dax.addJob(job2)

        dax.depends(job2, job1)

        # self.qc_snapshots.add_2vols_snapshot_step(dax, [job1], t1_nii_gz, b0_in_t1_nii_gz)

        t1_mgz = File(T1Files.T1_MGZ.value)
        t1_in_d_nii_gz = File(CoregFiles.T1_IN_D.value)
        t1_in_d_lta = File(CoregFiles.T1_IN_D.value + ".lta")
        job3 = Job(CoregJobNames.MRI_VOL2VOL.value)
        job3.addArguments("--mov", t1_mgz, "--targ", b0_nii_gz, "--o",
                          t1_in_d_nii_gz, "--lta-inv", d2t_lta, "--save-reg")
        job3.uses(t1_mgz, link=Link.INPUT)
        job3.uses(b0_nii_gz, link=Link.INPUT)
        job3.uses(d2t_lta, link=Link.INPUT)
        job3.uses(t1_in_d_lta,
                  link=Link.OUTPUT,
                  transfer=False,
                  register=False)
        job3.uses(t1_in_d_nii_gz,
                  link=Link.OUTPUT,
                  transfer=False,
                  register=False)
        dax.addJob(job3)

        dax.depends(job3, job_t1)
        dax.depends(job3, job2)

        # self.qc_snapshots.add_2vols_snapshot_step(dax, [job3], b0_nii_gz, t1_in_d_nii_gz)

        aparc_aseg_mgz = File(T1Files.APARC_ASEG_MGZ.value % self.atlas_suffix)
        aparc_aseg_in_d_nii_gz = File(CoregFiles.APARC_ASEG_IN_D.value %
                                      self.atlas_suffix)
        job4 = Job(CoregJobNames.MRI_VOL2VOL.value)
        job4.addArguments("--mov", aparc_aseg_mgz, "--targ", b0_nii_gz, "--o",
                          aparc_aseg_in_d_nii_gz, "--reg", t1_in_d_lta,
                          "--nearest")
        job4.uses(aparc_aseg_mgz, link=Link.INPUT)
        job4.uses(b0_nii_gz, link=Link.INPUT)
        job4.uses(t1_in_d_lta, link=Link.INPUT)
        job4.uses(aparc_aseg_in_d_nii_gz,
                  link=Link.OUTPUT,
                  transfer=False,
                  register=False)
        dax.addJob(job4)

        dax.depends(job4, job_aparc_aseg)
        dax.depends(job4, job3)

        # self.qc_snapshots.add_2vols_snapshot_step(dax, [job4], b0_nii_gz, aparc_aseg_in_d_nii_gz)
        # self.qc_snapshots.add_3vols_snapshot_step(dax, [job3, job4], t1_in_d_nii_gz, b0_nii_gz, aparc_aseg_in_d_nii_gz)

        return job3, job4
Example #22
0
def registerRefFastaFile(workflow=None, refFastaFname=None, registerAffiliateFiles=True, input_site_handler='local',\
      checkAffiliateFileExistence=True, addPicardDictFile=True,\
      affiliateFilenameSuffixLs=['fai', 'amb', 'ann', 'bwt', 'pac', 'sa', 'rbwt', 'rpac', 'rsa', \
      'stidx', 'sthash'], folderName="reference"):
    """
	suffix here doesn't include ".".
	
	2013.08.23 bugfix, check if workflow has a file registered before adding it
	2013.3.26 added refSAMtoolsFastaIndexF, refPicardFastaDictF into returnData
	2013.3.20 deduce needBWARefIndexJob, needSAMtoolsFastaIndexJob, needPicardFastaDictJob, needStampyRefIndexJob from missing suffixes
	2010.10.10 added argument folderName
	2012.5.23
		add an argument "addPicardDictFile" to offer user option to exclude this file (i.e. in registerBlastNucleotideDatabaseFile)
	2012.2.24
		dict is via picard, also required for GATK
		fai is via "samtools faidx" (index reference). also required for GATK
		amb', 'ann', 'bwt', 'pac', 'sa', 'rbwt', 'rpac', 'rsa' are all bwa index.
		stidx is stampy index.
		sthash is stampy hash.
	2012.2.23
		add two suffixes, stidx (stampy index) and sthash (stampy hash)
	2011-11-11
		if needAffiliatedFiles,
			all other files, with suffix in affiliateFilenameSuffixLs, will be registered (symlinked or copied) as well.
	"""
    returnData = PassingData(refFastaFList = [], needBWARefIndexJob=False, needSAMtoolsFastaIndexJob=False, \
          needPicardFastaDictJob=False, needStampyRefIndexJob=False, needBlastMakeDBJob=False,\
          refPicardFastaDictF=None, refSAMtoolsFastaIndexF=None)
    missingSuffixSet = set()  #2013.3.20

    if registerAffiliateFiles:
        refFastaF = File(
            os.path.join(folderName, os.path.basename(refFastaFname))
        )  #use relative path, otherwise, it'll go to absolute path
        # Add it into replica only when needed.
        refFastaF.addPFN(PFN("file://" + refFastaFname, input_site_handler))
        if not workflow.hasFile(refFastaF):  #2013.08.12
            workflow.addFile(refFastaF)
        returnData.refFastaFList.append(refFastaF)
        # If it's not needed, assume the index is done and all relevant files are in absolute path.
        # and no replica transfer

        #add extra affiliated files
        suffix2PathToFileLs = {}
        if addPicardDictFile:  #2012.5.23
            picardDictSuffix = 'dict'
            pathToFile = '%s.%s' % (
                os.path.splitext(refFastaFname)[0], picardDictSuffix
            )  #remove ".fasta" from refFastaFname
            if checkAffiliateFileExistence and not os.path.isfile(pathToFile):
                sys.stderr.write(
                    "Warning: %s don't exist or not a file on file system. skip registration.\n"
                    % (pathToFile))
                missingSuffixSet.add(picardDictSuffix)
                #suffix2PathToFileLs.append(pathToFile)
            else:
                suffix2PathToFileLs[picardDictSuffix] = pathToFile
        for suffix in affiliateFilenameSuffixLs:
            pathToFile = '%s.%s' % (refFastaFname, suffix)
            if checkAffiliateFileExistence and not os.path.isfile(pathToFile):
                sys.stderr.write(
                    "Warning: %s don't exist or not a file on file system. skip registration.\n"
                    % (pathToFile))
                missingSuffixSet.add(suffix)
                continue
            suffix2PathToFileLs[suffix] = pathToFile
        for suffix, pathToFile in suffix2PathToFileLs.iteritems():
            if checkAffiliateFileExistence and not os.path.isfile(pathToFile):
                sys.stderr.write(
                    "Warning: %s don't exist or not a file on file system. skip registration.\n"
                    % (pathToFile))
                continue
            affiliateF = File(
                os.path.join(folderName, os.path.basename(pathToFile)))
            #use relative path, otherwise, it'll go to absolute path
            affiliateF.addPFN(PFN("file://" + pathToFile, input_site_handler))
            if not workflow.hasFile(affiliateF):  #2013.08.12
                workflow.addFile(affiliateF)
            returnData.refFastaFList.append(affiliateF)

            if suffix == 'dict':  #2013.3.26
                returnData.refPicardFastaDictF = affiliateF
            elif suffix == 'fai':
                returnData.refSAMtoolsFastaIndexF = affiliateF
    else:
        refFastaF = File(
            os.path.join(folderName, os.path.basename(refFastaFname)))
        returnData.refFastaFList.append(refFastaF)
    if 'bwt' in missingSuffixSet or 'pac' in missingSuffixSet:
        returnData.needBWARefIndexJob = True
    if 'fai' in missingSuffixSet:
        returnData.needSAMtoolsFastaIndexJob = True
        returnData.needPicardFastaDictJob = True
    if 'stidx' in missingSuffixSet or 'sthash' in missingSuffixSet:
        returnData.needStampyRefIndexJob = True
    if 'dict' in missingSuffixSet:
        returnData.needPicardFastaDictJob = True
    if 'nin' in missingSuffixSet or 'nhr' in missingSuffixSet or 'nsq' in missingSuffixSet:
        returnData.needBlastMakeDBJob = True
    return returnData
Example #23
0
    def add_tracts_generation_steps(self, dax, job_t1_in_d, job_mask,
                                    job_aparc_aseg_in_d, job_fs_custom):
        t1_in_d = File(CoregFiles.T1_IN_D.value)
        file_5tt = File(TractsGenFiles.FILE_5TT_MIF.value)
        job1 = Job(TractsGenJobNames.JOB_5TTGEN.value,
                   node_label="Generate 5tt MIF")
        job1.addArguments(t1_in_d, file_5tt)
        job1.uses(t1_in_d, link=Link.INPUT)
        job1.uses(file_5tt, link=Link.OUTPUT, transfer=True, register=True)
        dax.addJob(job1)

        dax.depends(job1, job_t1_in_d)

        file_gmwmi = File(TractsGenFiles.GMWMI_MIF.value)
        job2 = Job(TractsGenJobNames.JOB_5TT2GMWMI.value,
                   node_label="Extract GMWMI")
        job2.addArguments(file_5tt, file_gmwmi, "-nthreads",
                          self.mrtrix_threads)
        job2.uses(file_5tt, link=Link.INPUT)
        job2.uses(file_gmwmi, link=Link.OUTPUT, transfer=False, register=False)
        dax.addJob(job2)

        dax.depends(job2, job1)

        file_gmwmi_nii_gz = File(TractsGenFiles.GMWMI_NII_GZ.value)
        job_gmwmi_convert = Job(DWIJobNames.MRCONVERT.value)
        job_gmwmi_convert.addArguments(file_gmwmi, file_gmwmi_nii_gz)
        job_gmwmi_convert.uses(file_gmwmi, link=Link.INPUT)
        job_gmwmi_convert.uses(file_gmwmi_nii_gz,
                               link=Link.OUTPUT,
                               transfer=False,
                               register=False)
        dax.addJob(job_gmwmi_convert)

        dax.depends(job_gmwmi_convert, job2)

        self.qc_snapshots.add_2vols_snapshot_step(dax, [job_gmwmi_convert],
                                                  t1_in_d, file_gmwmi_nii_gz)

        file_5ttvis = File(TractsGenFiles.FILE_5TTVIS_MIF.value)
        job3 = Job(TractsGenJobNames.JOB_5TT2VIS.value,
                   node_label="Generate TT2VIS MIF")
        job3.addArguments(file_5tt, file_5ttvis)
        job3.uses(file_5tt, link=Link.INPUT)
        job3.uses(file_5ttvis,
                  link=Link.OUTPUT,
                  transfer=False,
                  register=False)
        dax.addJob(job3)

        dax.depends(job3, job2)

        file_wm_fod = None
        last_job = None

        dwi_mif = File(DWIFiles.DWI_MIF.value)
        mask_mif = File(DWIFiles.MASK_MIF.value)

        if self.dwi_multi_shell == "True":
            file_RF_WM = File(TractsGenFiles.RF_WM.value)
            file_RF_GM = File(TractsGenFiles.RF_GM.value)
            file_RF_CSF = File(TractsGenFiles.RF_CSF.value)
            file_RF_voxels = File(TractsGenFiles.RF_VOXELS.value)
            job4 = Job(TractsGenJobNames.DWI2RESPONSE_MSMT.value)
            job4.addArguments(dwi_mif, file_5tt, file_RF_WM, file_RF_GM,
                              file_RF_CSF, file_RF_voxels, self.mrtrix_threads)
            job4.uses(dwi_mif, link=Link.INPUT)
            job4.uses(file_5tt, link=Link.INPUT)
            job4.uses(file_RF_WM,
                      link=Link.OUTPUT,
                      transfer=True,
                      register=False)
            job4.uses(file_RF_GM,
                      link=Link.OUTPUT,
                      transfer=True,
                      register=False)
            job4.uses(file_RF_CSF,
                      link=Link.OUTPUT,
                      transfer=True,
                      register=False)
            job4.uses(file_RF_voxels,
                      link=Link.OUTPUT,
                      transfer=True,
                      register=False)
            dax.addJob(job4)

            dax.depends(job4, job3)

            gm_mif = File(DWIFiles.GM_MIF.value)
            csf_mif = File(DWIFiles.CSF_MIF.value)
            file_wm_fod = File(TractsGenFiles.WM_FOD_MIF.value)
            # TODO: does msdwi2fod exist? should we use dwi2fod with the same args?
            job5 = Job(TractsGenJobNames.MSDWI2FOD.value)
            job5.addArguments("msmt_csd", dwi_mif, file_RF_WM, file_wm_fod,
                              file_RF_GM, gm_mif, file_RF_CSF, csf_mif,
                              "-mask", mask_mif, "-nthreads",
                              self.mrtrix_threads)
            job5.uses(dwi_mif, link=Link.INPUT)
            job5.uses(file_RF_WM, link=Link.INPUT)
            job5.uses(file_RF_GM, link=Link.INPUT)
            job5.uses(file_RF_CSF, link=Link.INPUT)
            job5.uses(mask_mif, link=Link.INPUT)
            job5.uses(file_wm_fod,
                      link=Link.OUTPUT,
                      transfer=True,
                      register=False)
            job5.uses(gm_mif, link=Link.OUTPUT, transfer=True, register=False)
            job5.uses(csf_mif, link=Link.OUTPUT, transfer=True, register=False)
            dax.addJob(job5)

            dax.depends(job5, job4)

            last_job = job5

        else:
            file_response = File(TractsGenFiles.RESPONSE_TXT.value)
            job4 = Job(TractsGenJobNames.DWI2RESPONSE.value,
                       node_label="Compute the DWI Response")
            job4.addArguments(dwi_mif, file_response, mask_mif)
            job4.uses(dwi_mif, link=Link.INPUT)
            job4.uses(mask_mif, link=Link.INPUT)
            job4.uses(file_response,
                      link=Link.OUTPUT,
                      transfer=True,
                      register=True)
            dax.addJob(job4)

            dax.depends(job4, job_mask)

            file_wm_fod = File(TractsGenFiles.WM_FOD_MIF.value)
            job5 = Job(TractsGenJobNames.DWI2FOD.value,
                       node_label="Obtain WM FOD")
            job5.addArguments("csd", dwi_mif, file_response, file_wm_fod,
                              "-mask", mask_mif, "-nthreads",
                              self.mrtrix_threads)
            job5.uses(dwi_mif, link=Link.INPUT)
            job5.uses(file_response, link=Link.INPUT)
            job5.uses(mask_mif, link=Link.INPUT)
            job5.uses(file_wm_fod,
                      link=Link.OUTPUT,
                      transfer=True,
                      register=True)
            dax.addJob(job5)

            dax.depends(job5, job4)

            last_job = job5

        file_strmlns = File(TractsGenFiles.FILE_TCK.value % self.strmlns_no)
        job6 = Job(TractsGenJobNames.TCKGEN.value,
                   node_label="Generate tracts")

        if self.os == "LINUX":
            job6.addArguments(file_wm_fod, file_strmlns, "-select",
                              self.strmlns_no, "-seed_gmwmi", file_gmwmi,
                              "-act", file_5tt, "-seed_unidirectional",
                              "-maxlength", self.strmlns_size, "-step",
                              self.strmlns_step, "-nthreads",
                              self.mrtrix_threads)
        else:
            job6.addArguments(file_wm_fod, file_strmlns, "-number",
                              self.strmlns_no, "-seed_gmwmi", file_gmwmi,
                              "-act", file_5tt, "-unidirectional",
                              "-maxlength", self.strmlns_size, "-step",
                              self.strmlns_step, "-nthreads",
                              self.mrtrix_threads)
        job6.uses(file_wm_fod, link=Link.INPUT)
        job6.uses(file_gmwmi, link=Link.INPUT)
        job6.uses(file_5tt, link=Link.INPUT)
        job6.uses(file_strmlns, link=Link.OUTPUT, transfer=True, register=True)
        dax.addJob(job6)

        dax.depends(job6, last_job)
        dax.depends(job6, job1)

        file_strmlns_sift = File(TractsGenFiles.FILE_SIFT_TCK.value %
                                 self.strmlns_sift_no)
        job7 = Job(TractsGenJobNames.TCKSIFT.value, node_label="Tracts SIFT")
        job7.addArguments(file_strmlns, file_wm_fod, file_strmlns_sift,
                          "-term_number", self.strmlns_sift_no, "-act",
                          file_5tt, "-nthreads", self.mrtrix_threads)
        job7.uses(file_strmlns, link=Link.INPUT)
        job7.uses(file_wm_fod, link=Link.INPUT)
        job7.uses(file_5tt, link=Link.INPUT)
        job7.uses(file_strmlns_sift,
                  link=Link.OUTPUT,
                  transfer=True,
                  register=True)
        dax.addJob(job7)

        dax.depends(job7, job6)
        dax.depends(job7, job1)

        b0_nii_gz = File(DWIFiles.B0_NII_GZ.value)
        file_tdi_ends = File(TractsGenFiles.TDI_ENDS_MIF.value)
        job8 = Job(TractsGenJobNames.TCKMAP.value, node_label="TCKMAP")
        job8.addArguments(file_strmlns_sift, file_tdi_ends, "-vox", "1",
                          "-template", b0_nii_gz)
        job8.uses(file_strmlns_sift, link=Link.INPUT)
        job8.uses(b0_nii_gz, link=Link.INPUT)
        job8.uses(file_tdi_ends,
                  link=Link.OUTPUT,
                  transfer=True,
                  register=True)
        dax.addJob(job8)

        dax.depends(job8, job7)

        file_tdi_ends_nii_gz = File(TractsGenFiles.TDI_ENDS_NII_GZ.value)
        job_convert_tdi_ends = Job(DWIJobNames.MRCONVERT.value)
        job_convert_tdi_ends.addArguments(file_tdi_ends, file_tdi_ends_nii_gz)
        job_convert_tdi_ends.uses(file_tdi_ends, link=Link.INPUT)
        job_convert_tdi_ends.uses(file_tdi_ends_nii_gz,
                                  link=Link.OUTPUT,
                                  transfer=True,
                                  register=True)
        dax.addJob(job_convert_tdi_ends)

        dax.depends(job_convert_tdi_ends, job8)

        self.qc_snapshots.add_2vols_snapshot_step(dax, [job_convert_tdi_ends],
                                                  t1_in_d,
                                                  file_tdi_ends_nii_gz)

        fs_custom = File(AsegFiles.FS_CUSTOM_TXT.value % self.atlas_suffix)
        aparc_aseg_in_d = File(CoregFiles.APARC_ASEG_IN_D.value %
                               self.atlas_suffix)
        file_vol_lbl = File(TractsGenFiles.VOLUME_LBL_NII_GZ.value %
                            self.atlas_suffix)
        fs_color_lut = File(Inputs.FS_LUT.value)
        job9 = Job(TractsGenJobNames.LABEL_CONVERT.value,
                   node_label="Compute APARC+ASEG labeled for tracts")
        job9.addArguments(aparc_aseg_in_d, fs_color_lut, fs_custom,
                          file_vol_lbl)
        job9.uses(aparc_aseg_in_d, link=Link.INPUT)
        job9.uses(fs_color_lut, link=Link.INPUT)
        job9.uses(fs_custom, link=Link.INPUT)
        job9.uses(file_vol_lbl, link=Link.OUTPUT, transfer=True, register=True)
        dax.addJob(job9)

        dax.depends(job9, job_fs_custom)
        dax.depends(job9, job_aparc_aseg_in_d)

        self.qc_snapshots.add_2vols_snapshot_step(dax, [job9], t1_in_d,
                                                  file_vol_lbl)

        file_aparc_aseg_counts5M_csv = File(TractsGenFiles.TRACT_COUNTS.value %
                                            self.atlas_suffix)
        job10 = Job(TractsGenJobNames.TCK2CONNECTOME.value,
                    node_label="Generate weigths")
        job10.addArguments(file_strmlns_sift, file_vol_lbl,
                           "-assignment_radial_search", "2",
                           file_aparc_aseg_counts5M_csv)
        job10.uses(file_strmlns_sift, link=Link.INPUT)
        job10.uses(file_vol_lbl, link=Link.INPUT)
        job10.uses(file_aparc_aseg_counts5M_csv,
                   link=Link.OUTPUT,
                   transfer=True,
                   register=True)
        dax.addJob(job10)

        dax.depends(job10, job7)
        dax.depends(job10, job9)

        file_aparc_aseg_mean_tract_lengths5M_csv = File(
            TractsGenFiles.TRACT_LENGHTS.value % self.atlas_suffix)
        job11 = Job(TractsGenJobNames.TCK2CONNECTOME.value,
                    node_label="Generate tract lengths")
        job11.addArguments(file_strmlns_sift, file_vol_lbl,
                           "-assignment_radial_search", "2", "-scale_length",
                           "-stat_edge", "mean",
                           file_aparc_aseg_mean_tract_lengths5M_csv)
        job11.uses(file_strmlns_sift, link=Link.INPUT)
        job11.uses(file_vol_lbl, link=Link.INPUT)
        job11.uses(file_aparc_aseg_mean_tract_lengths5M_csv,
                   link=Link.OUTPUT,
                   transfer=True,
                   register=True)
        dax.addJob(job11)

        dax.depends(job11, job7)
        dax.depends(job11, job9)

        return job10, job11
Example #24
0
    sys.exit(1)

config = ConfigParser.ConfigParser({'input_file':'', 'workflow_name':'horizontal-clustering-test', 'executable_installed':"False", 'clusters_size':"3", 'clusters_maxruntime':"7"})
config.read(sys.argv[2] + '/test.config')

# Create an abstract dag
cluster = ADAG (config.get('all', 'workflow_name'))

input_file = config.get('all', 'input_file')
if (input_file == ''):
        input_file = os.getcwd ()
else:
        input_file += '/' + os.getenv ('USER') + '/inputs'

# Add input file to the DAX-level replica catalog
a = File("f.a")
a.addPFN(PFN(config.get('all', 'file_url') + input_file + "/f.a", config.get('all', 'file_site')))
cluster.addFile(a)

for i in range (1, 3):
    sleep = Executable (namespace = "cluster", name = "level" + str (i), version = "1.0", os = "linux", arch = "x86", installed=config.getboolean('all', 'executable_installed'))
    sleep.addPFN (PFN (config.get('all', 'executable_url') + sys.argv[1] + "/bin/pegasus-keg", config.get('all', 'executable_site')))
    sleep.addProfile (Profile (namespace = "pegasus", key = "clusters.size", value = config.get('all', 'clusters_size')))
    sleep.addProfile (Profile (namespace = "pegasus", key = "clusters.maxruntime", value = config.get('all', 'clusters_maxruntime')))
    cluster.addExecutable(sleep)

for i in range (4):
    job = Job (namespace = "cluster", name = "level1", version = "1.0")
    job.addArguments('-a level1 -T ' + str (i + 1))
    job.addArguments('-i', a)
    job.addProfile (Profile (namespace = "pegasus", key = "job.runtime", value = str (i + 1)))
    def preReduce(self,
                  workflow=None,
                  outputDirPrefix="",
                  passingData=None,
                  transferOutput=True,
                  **keywords):
        """
		2012.9.17
		"""
        if workflow is None:
            workflow = self
        returnData = parentClass.preReduce(self, workflow=workflow, outputDirPrefix=outputDirPrefix,\
              passingData=passingData, transferOutput=transferOutput, **keywords)
        #add a stat merge job and a genome wide plot job
        outputFile = File(
            os.path.join(self.reduceOutputDirJob.output,
                         'locusLiftOverProbability.tsv'))
        self.reduceJob = self.addStatMergeJob(statMergeProgram=self.mergeSameHeaderTablesIntoOne, \
               outputF=outputFile, \
               parentJobLs=[self.reduceOutputDirJob],extraOutputLs=None, \
               extraDependentInputLs=None, transferOutput=False)

        sortProbabilityFile = File(
            os.path.join(self.reduceOutputDirJob.output,
                         'locusLiftOverProbability.sorted.tsv'))
        sortProbabilityJob = self.addSortJob(inputFile=self.reduceJob.output, \
            outputFile=sortProbabilityFile, \
            parentJobLs=[self.reduceJob], \
            extraOutputLs=None, transferOutput=False, \
            extraArgumentList=["""-k1,1 -k2,3n """], \
            sshDBTunnel=None,\
            job_max_memory=4000, walltime=120)
        #2013.12.3 Tab delimiter syntax (-t$'\t') is removed because it can't be passed correctly.
        #2013.12.3 Tried -t "`/bin/echo -e '\t'`" as well, didn't work either.
        # However since each column field doesn't contain blank, it is fine to just use the default separator (non-blank to blank).

        returnData.jobDataLs.append(
            self.constructJobDataFromJob(sortProbabilityJob))

        outputFile = File(
            os.path.join(self.plotDirJob.output,
                         'locusLiftOverProbability.png'))
        self.addPlotGenomeWideDataJob(inputFileList=None, \
             inputFile=self.reduceJob.output,\
             outputFile=outputFile,\
             whichColumn=None, whichColumnHeader="mapPvalue", whichColumnPlotLabel="mapPvalue", \
             logX=None, logY=2, valueForNonPositiveYValue=-1, \
             xScaleLog=None, yScaleLog=None,\
             missingDataNotation='NA',\
             xColumnPlotLabel="genomePosition", xColumnHeader="oldStart", \
             xtickInterval=0,\
             drawCentromere=True, chrColumnHeader="oldChromosome", \
             minChrLength=None, minNoOfTotal=None, maxNoOfTotal=None, \
             figureDPI=100, formatString=".", ylim_type=2, samplingRate=1, logCount=False, need_svg=True,\
             tax_id=self.ref_genome_tax_id, sequence_type_id=self.ref_genome_sequence_type_id, chrOrder=1,\
             inputFileFormat=1, outputFileFormat=None,\
             parentJobLs=[self.reduceJob], \
             extraDependentInputLs=None, \
             extraArguments=None, extraArgumentList=None, \
             transferOutput=True, job_max_memory=1000, sshDBTunnel=self.needSSHDBTunnel)
        #xtickInterval=0 means no ticks on x-axis.

        outputFile = File(
            os.path.join(self.plotDirJob.output,
                         'locusLiftOverProbabilityHist.png'))
        #no spaces or parenthesis or any other shell-vulnerable letters in the x or y axis labels (whichColumnPlotLabel, xColumnPlotLabel)
        self.addDrawHistogramJob(executable=workflow.DrawHistogram, inputFileList=[self.reduceJob.output], \
             outputFile=outputFile, \
           whichColumn=None, whichColumnHeader="mapPvalue", whichColumnPlotLabel="minusLogLiftOverPvalue", \
           xScaleLog=0, yScaleLog=1, \
           logCount=False, logY=2, valueForNonPositiveYValue=50,\
           minNoOfTotal=10,\
           figureDPI=100, samplingRate=1,legendType=1, \
           parentJobLs=[self.plotDirJob, self.reduceJob], \
           extraDependentInputLs=None, \
           extraArguments=None, transferOutput=True,  job_max_memory=8000) #lots of input data,

        return returnData
Example #26
0
    def add_dwi_processing_steps(self, dax):
        last_job = None
        dwi_input = File(Inputs.DWI_INPUT.value)

        if self.use_gradient == "True":
            dwi_input_no_gradient = File(Inputs.DWI_INPUT_NO_GRAD.value)
            bvec_input = File(Inputs.DWI_BVEC.value)
            bval_input = File(Inputs.DWI_BVAL.value)

            job_gradient = Job(DWIJobNames.MRCONVERT.value)
            job_gradient.addArguments(dwi_input_no_gradient, "-fsl",
                                      bvec_input, bval_input, dwi_input)
            job_gradient.uses(dwi_input_no_gradient, link=Link.INPUT)
            job_gradient.uses(bvec_input, link=Link.INPUT)
            job_gradient.uses(bval_input, link=Link.INPUT)
            job_gradient.uses(dwi_input,
                              link=Link.OUTPUT,
                              transfer=True,
                              register=True)

            last_job = job_gradient
            dax.addJob(job_gradient)

        dwi_conv_output = None

        if self.dwi_reversed == "True":
            job1 = None
            job2 = None

            if self.dwi_format != "mif":
                if self.dwi_format == "dicom":
                    # TODO: is mrconvert interactive for reversed aquisition data? Should we use the next lines?
                    # mrchoose 0 mrconvert $DATA/DWI ./dwi_raw.mif -force
                    # mrchoose 1 mrconvert $DATA/DWI ./dwi_raw_re.mif -force
                    print
                    "Not implemented!"
                else:
                    dwi_conv_output = File(DWIFiles.DWI_RAW_MIF.value)
                    job1 = Job(DWIJobNames.MRCONVERT.value,
                               node_label="Convert DWI to MIF")
                    job1.addArguments(dwi_input, dwi_conv_output)
                    job1.uses(dwi_input, link=Link.INPUT)
                    job1.uses(dwi_conv_output,
                              link=Link.OUTPUT,
                              transfer=False,
                              register=False)
                    dax.addJob(job1)
                    if last_job is not None:
                        dax.depends(job1, last_job)

                    dwi_re_input = File(DWIFiles.DWI_RE_NII_GZ.value)
                    dwi_re = File(DWIFiles.DWI_RE_MIF.value)
                    job2 = Job(DWIJobNames.MRCONVERT.value,
                               node_label="Convert DWI_RE to MIF")
                    job2.addArguments(dwi_re_input, dwi_re)
                    job2.uses(dwi_re_input, link=Link.INPUT)
                    job2.uses(dwi_re,
                              link=Link.OUTPUT,
                              transfer=True,
                              register=False)
                    dax.addJob(job2)

            dwi_pre_output = File(DWIFiles.DWI_MIF.value)
            job3 = Job(DWIJobNames.DWIPREPROC.value,
                       node_label="DWI preprocessing")

            if self.os == "LINUX":
                job3.addArguments(dwi_conv_output, dwi_pre_output, "-pe_dir",
                                  self.dwi_pe_dir, "-rpe_pair",
                                  dwi_conv_output, dwi_re, "-nthreads",
                                  self.mrtrix_threads)
            else:
                job3.addArguments(self.dwi_pe_dir, dwi_conv_output,
                                  dwi_pre_output, "-rpe_pair", dwi_conv_output,
                                  dwi_re, "-nthreads", self.mrtrix_threads)
            job3.uses(dwi_conv_output, link=Link.INPUT)
            job3.uses(dwi_re, link=Link.INPUT)
            job3.uses(dwi_pre_output,
                      link=Link.OUTPUT,
                      transfer=False,
                      register=False)
            dax.addJob(job3)

            if job1 is not None:
                dax.depends(job3, job1)

            if job2 is not None:
                dax.depends(job3, job2)

            last_job = job3

        else:
            job1 = None

            if self.dwi_format != "mif" and self.use_gradient != "True":
                dwi_conv_output = File(DWIFiles.DWI_RAW_MIF.value)
                job1 = Job(DWIJobNames.MRCONVERT.value,
                           node_label="Convert DWI to MIF")
                job1.addArguments(dwi_input, dwi_conv_output)
                job1.uses(dwi_input, link=Link.INPUT)
                job1.uses(dwi_conv_output,
                          link=Link.OUTPUT,
                          transfer=False,
                          register=False)

                dax.addJob(job1)
                if last_job is not None:
                    dax.depends(job1, last_job)

            if dwi_conv_output is None:
                dwi_conv_output = dwi_input

            dwi_pre_output = File(DWIFiles.DWI_MIF.value)
            job2 = Job(DWIJobNames.DWIPREPROC.value,
                       node_label="DWI preprocessing")

            if self.os == "LINUX":
                job2.addArguments(dwi_conv_output, dwi_pre_output, "-pe_dir",
                                  self.dwi_pe_dir, "-rpe_none", "-nthreads",
                                  self.mrtrix_threads)

            else:
                job2.addArguments(self.dwi_pe_dir, dwi_conv_output,
                                  dwi_pre_output, "-rpe_none", "-nthreads",
                                  self.mrtrix_threads)

            job2.uses(dwi_conv_output, link=Link.INPUT)
            job2.uses(dwi_pre_output,
                      link=Link.OUTPUT,
                      transfer=True,
                      register=True)
            dax.addJob(job2)

            if job1 is not None:
                dax.depends(job2, job1)

            last_job = job2

        mask_output = File(DWIFiles.MASK_MIF.value)
        job3 = Job(DWIJobNames.DWI2MASK.value, node_label="Create DWI mask")
        job3.addArguments(dwi_pre_output, mask_output, "-nthreads",
                          self.mrtrix_threads)
        job3.uses(dwi_pre_output, link=Link.INPUT)
        job3.uses(mask_output, link=Link.OUTPUT, transfer=True, register=True)
        dax.addJob(job3)

        dax.depends(job3, last_job)

        b0_output = File(DWIFiles.B0_NII_GZ.value)
        job4 = Job(DWIJobNames.DWIEXTRACT.value, node_label="Extract DWI B0")
        job4.addArguments(dwi_pre_output, b0_output)
        job4.uses(dwi_pre_output, link=Link.INPUT)
        job4.uses(b0_output, link=Link.OUTPUT, transfer=True, register=True)
        dax.addJob(job4)

        dax.depends(job4, last_job)

        file_mask_nii_gz = File(DWIFiles.MASK_NII_GZ.value)
        job_convert_mask = Job(DWIJobNames.MRCONVERT.value)
        job_convert_mask.addArguments(mask_output, file_mask_nii_gz)
        job_convert_mask.uses(mask_output, link=Link.INPUT)
        job_convert_mask.uses(file_mask_nii_gz,
                              link=Link.OUTPUT,
                              transfer=True,
                              register=True)
        dax.addJob(job_convert_mask)

        dax.depends(job_convert_mask, job3)

        self.qc_snapshots.add_2vols_snapshot_step(dax,
                                                  [job_convert_mask, job4],
                                                  file_mask_nii_gz, b0_output)

        return job4, job3
Example #27
0
    'clusters_size': "3",
    'clusters_maxruntime': "7"
})
config.read(sys.argv[2] + '/test.config')

# Create an abstract dag
cluster = ADAG(config.get('all', 'workflow_name'))

input_file = config.get('all', 'input_file')
if (input_file == ''):
    input_file = os.getcwd()
else:
    input_file += '/' + os.getenv('USER') + '/inputs'

# Add input file to the DAX-level replica catalog
a = File("f.a")
a.addPFN(
    PFN(
        config.get('all', 'file_url') + input_file + "/f.a",
        config.get('all', 'file_site')))
cluster.addFile(a)

for i in range(1, 3):
    sleep = Executable(namespace="cluster",
                       name="level" + str(i),
                       version="1.0",
                       os="linux",
                       arch="x86_64",
                       installed=config.getboolean('all',
                                                   'executable_installed'))
    sleep.addPFN(
Example #28
0
    def write(self, filename, name='dax'):
        """Generate Pegasus abstract workflow (DAX).

        Parameters
        ----------
        filename : `str`
            File to write the DAX to.
        name : `str`, optional
            Name of the DAX.

        Returns
        -------
        `Pegasus.ADAG`
            Abstract workflow used by Pegasus' planner.
        """
        dax = ADAG(name)

        # Add files to DAX-level replica catalog.
        catalog = {}
        for file_id in self.files:
            attrs = self.graph.node[file_id]
            f = File(attrs['lfn'])

            # Add physical file names, if any.
            urls = attrs.get('urls')
            if urls is not None:
                sites = attrs.get('sites')
                if sites is None:
                    sites = ','.join(len(urls) * ['local'])
                for url, site in zip(urls.split(','), sites.split(',')):
                    f.addPFN(PFN(url, site))

            catalog[attrs['lfn']] = f
            dax.addFile(f)

        # Add jobs to the DAX.
        for task_id in self.tasks:
            attrs = self.graph.node[task_id]
            job = Job(name=attrs['name'], id=task_id)

            # Add job command line arguments replacing any file name with
            # respective Pegasus file object.
            args = attrs.get('args')
            if args is not None and args:
                args = args.split()
                lfns = list(set(catalog) & set(args))
                if lfns:
                    indices = [args.index(lfn) for lfn in lfns]
                    for idx, lfn in zip(indices, lfns):
                        args[idx] = catalog[lfn]
                job.addArguments(*args)

            # Specify job's inputs.
            inputs = [file_id for file_id in self.graph.predecessors(task_id)]
            for file_id in inputs:
                attrs = self.graph.node[file_id]
                f = catalog[attrs['lfn']]
                job.uses(f, link=Link.INPUT)

            # Specify job's outputs
            outputs = [file_id for file_id in self.graph.successors(task_id)]
            for file_id in outputs:
                attrs = self.graph.node[file_id]
                f = catalog[attrs['lfn']]
                job.uses(f, link=Link.OUTPUT)

                streams = attrs.get('streams')
                if streams is not None:
                    if streams & 1 != 0:
                        job.setStdout(f)
                    if streams & 2 != 0:
                        job.setStderr(f)

            dax.addJob(job)

        # Add job dependencies to the DAX.
        for task_id in self.tasks:
            parents = set()
            for file_id in self.graph.predecessors(task_id):
                parents.update(self.graph.predecessors(file_id))
            for parent_id in parents:
                dax.depends(parent=dax.getJob(parent_id),
                            child=dax.getJob(task_id))

        # Finally, write down the workflow in DAX format.
        with open(filename, 'w') as f:
            dax.writeXML(f)
Example #29
0
    def add_surface_resampling_steps(self, dax, job_recon):
        t1_mgz = File(T1Files.T1_MGZ.value)

        lh_pial = File(T1Files.LH_PIAL.value)
        rh_pial = File(T1Files.RH_PIAL.value)
        pials = [lh_pial, rh_pial]

        lh_aparc_annot = File(T1Files.LH_APARC_ANNOT.value % self.atlas_suffix)
        rh_aparc_annot = File(T1Files.RH_APARC_ANNOT.value % self.atlas_suffix)

        aparcs = [lh_aparc_annot, rh_aparc_annot]

        lh_pial_resamp = File(ResamplingFiles.LH_PIAL_RESAMP.value %
                              self.trg_subject)
        rh_pial_resamp = File(ResamplingFiles.RH_PIAL_RESAMP.value %
                              self.trg_subject)
        pials_resamp = [lh_pial_resamp, rh_pial_resamp]

        lh_aparc_annot_resamp = File(
            ResamplingFiles.LH_APARC_ANNOT_RESAMP.value %
            (self.trg_subject, self.atlas_suffix))
        rh_aparc_annot_resamp = File(
            ResamplingFiles.RH_APARC_ANNOT_RESAMP.value %
            (self.trg_subject, self.atlas_suffix))

        aparcs_resamp = [lh_aparc_annot_resamp, rh_aparc_annot_resamp]

        last_job = None

        for idx, hemi in enumerate(["lh", "rh"]):
            job1 = Job(ResamplingJobNames.MRI_SURF2SURF.value)
            job1.addArguments(self.atlas_suffix, "--srcsubject", self.subject,
                              "--trgsubject", self.trg_subject, "--hemi", hemi,
                              "--sval-xyz", "pial", "--tval",
                              "pial-%s" % self.trg_subject, "--tval-xyz",
                              t1_mgz)
            job1.uses(t1_mgz, link=Link.INPUT)
            job1.uses(pials[idx], link=Link.INPUT)
            job1.uses(pials_resamp[idx],
                      link=Link.OUTPUT,
                      transfer=True,
                      register=True)
            dax.addJob(job1)

            dax.depends(job1, job_recon)

            job2 = Job(ResamplingJobNames.MRI_SURF2SURF.value)
            job2.addArguments(self.atlas_suffix, "--srcsubject", self.subject,
                              "--trgsubject", self.trg_subject, "--hemi", hemi,
                              "--sval-annot", aparcs[idx], "--tval",
                              aparcs_resamp[idx])
            job2.uses(aparcs[idx], link=Link.INPUT)
            job2.uses(aparcs_resamp[idx],
                      link=Link.OUTPUT,
                      transfer=True,
                      register=True)
            dax.addJob(job2)

            dax.depends(job2, job_recon)
            last_job = job2

        lh_centered_pial = File(ResamplingFiles.LH_CENTERED_PIAL_RESAMP.value %
                                self.trg_subject)
        job5 = Job(T1JobNames.MRIS_CONVERT.value)
        job5.addArguments("--to-scanner", lh_pial_resamp, lh_centered_pial)
        job5.uses(lh_pial_resamp, link=Link.INPUT)
        job5.uses(lh_centered_pial,
                  link=Link.OUTPUT,
                  transfer=True,
                  register=True)
        dax.addJob(job5)

        dax.depends(job5, last_job)

        rh_centered_pial = File(ResamplingFiles.RH_CENTERED_PIAL_RESAMP.value %
                                self.trg_subject)
        job6 = Job(T1JobNames.MRIS_CONVERT.value)
        job6.addArguments("--to-scanner", rh_pial_resamp, rh_centered_pial)
        job6.uses(rh_pial_resamp, link=Link.INPUT)
        job6.uses(rh_centered_pial,
                  link=Link.OUTPUT,
                  transfer=True,
                  register=True)
        dax.addJob(job6)

        dax.depends(job6, last_job)

        t1_nii_gz = File(T1Files.T1_NII_GZ.value)
        self.qc_snapshots.add_vol_surf_snapshot_step(
            dax, [job5, job6], t1_nii_gz, [lh_centered_pial, rh_centered_pial])
        self.qc_snapshots.add_surf_annot_snapshot_step(dax, [job5, job6],
                                                       lh_centered_pial,
                                                       lh_aparc_annot_resamp)
        self.qc_snapshots.add_surf_annot_snapshot_step(dax, [job5, job6],
                                                       rh_centered_pial,
                                                       rh_aparc_annot_resamp)

        return job6
Example #30
0
    def add_seeg_positions_computation_steps(self, dax):
        ct_input = File(Inputs.CT_INPUT.value)
        ct_ras = File(SEEGCompFiles.CT_RAS_NII_GZ.value)
        job1 = Job(T1JobNames.MRI_CONVERT.value)
        job1.addArguments(ct_input, ct_ras, "--out_orientation", "RAS")
        job1.uses(ct_input, Link.INPUT)
        job1.uses(ct_ras, Link.OUTPUT, register=True, transfer=True)
        dax.addJob(job1)

        t1_ras = File(T1Files.T1_NII_GZ.value)
        ct_in_t1 = File(SEEGCompFiles.CT_IN_T1_NII_GZ.value)
        ct_to_t1_mat = File(SEEGCompFiles.CT_TO_T1_MAT.value)
        job2 = Job(CoregJobNames.FLIRT.value)
        job2.addArguments(ct_ras, t1_ras, ct_to_t1_mat, ct_in_t1)
        job2.uses(t1_ras, Link.INPUT)
        job2.uses(ct_ras, Link.INPUT)
        job2.uses(ct_in_t1, Link.OUTPUT, transfer=True, register=True)
        job2.uses(ct_to_t1_mat, Link.OUTPUT, transfer=True, register=True)
        dax.addJob(job2)

        dax.depends(job2, job1)

        brain_mgz = File(T1Files.BRAIN_MGZ.value)
        brain_ras = File(SEEGCompFiles.BRAIN_RAS_NII_GZ.value)
        job3 = Job(T1JobNames.MRI_CONVERT.value)
        job3.addArguments(brain_mgz, brain_ras, "--out_orientation", "RAS")
        job3.uses(brain_mgz, Link.INPUT)
        job3.uses(brain_ras, Link.OUTPUT, transfer=True, register=True)
        dax.addJob(job3)

        brain_mask = File(SEEGCompFiles.BRAIN_MASK_NII_GZ.value)
        job4 = Job(SEEGCompJobNames.MRI_BINARIZE.value)
        job4.addArguments("--i", brain_ras, "--o", brain_mask, "--min", "10", "--erode", "4")
        job4.uses(brain_ras, Link.INPUT)
        job4.uses(brain_mask, Link.OUTPUT, transfer=True, register=True)
        dax.addJob(job4)

        dax.depends(job4, job3)

        masked_ct = File(SEEGCompFiles.MASKED_CT_NII_GZ.value)
        job5 = Job(SEEGCompJobNames.MRI_BINARIZE.value)
        job5.addArguments("--i", ct_in_t1, "--o", masked_ct, "--min", "1000", "--mask", brain_mask)
        job5.uses(ct_in_t1, Link.INPUT)
        job5.uses(brain_mask, Link.INPUT)
        job5.uses(masked_ct, Link.OUTPUT, transfer=True, register=True)
        dax.addJob(job5)

        dax.depends(job5, job2)
        dax.depends(job5, job4)

        dilated_ct = File(SEEGCompFiles.DILATED_CT_NII_GZ.value)
        job6 = Job(SEEGCompJobNames.MRI_BINARIZE.value)
        job6.addArguments("--i", masked_ct, "--o", dilated_ct, "--min", "0.5", "--dilate", "2", "--erode", "1")
        job6.uses(masked_ct, Link.INPUT)
        job6.uses(dilated_ct, Link.OUTPUT, transfer=True, register=True)
        dax.addJob(job6)

        dax.depends(job6, job5)

        labeled_ct = File(SEEGCompFiles.LABELED_CT_NII_GZ.value)
        job7 = Job(SEEGCompJobNames.LABEL_CT_WITH_DILATION.value)
        job7.addArguments(masked_ct, dilated_ct, labeled_ct, self.subj)
        job7.uses(masked_ct, Link.INPUT)
        job7.uses(dilated_ct, Link.INPUT)
        job7.uses(labeled_ct, Link.OUTPUT, transfer=True, register=True)
        dax.addJob(job7)

        dax.depends(job7, job6)

        schema_txt = File(Inputs.SCHEMA_TXT.value)
        job8 = Job(SEEGCompJobNames.GEN_SCHEMA_TXT.value)
        job8.addArguments(labeled_ct, schema_txt)
        job8.uses(labeled_ct, link=Link.INPUT)
        job8.uses(schema_txt, link=Link.OUTPUT, transfer=True, register=True)
        dax.addJob(job8)

        dax.depends(job8, job7)

        seeg_xyz = File(SEEGCompFiles.SEEG_XYZ.value)
        job9 = Job(SEEGCompJobNames.GEN_SEEG_XYZ.value)
        job9.addArguments(labeled_ct, schema_txt, seeg_xyz, self.subj)
        job9.uses(labeled_ct, Link.INPUT)
        job9.uses(schema_txt, Link.INPUT)
        job9.uses(seeg_xyz, Link.OUTPUT, transfer=True, register=True)
        dax.addJob(job9)

        dax.depends(job9, job7)
        dax.depends(job9, job8)

        return job9
    def add_conversion_steps(self, dax, job_aparc_aseg, job_mapping_details,
                             job_weights, job_lengths):
        weights_csv = File(TractsGenFiles.TRACT_COUNTS.value %
                           self.atlas_suffix)
        lenghts_csv = File(TractsGenFiles.TRACT_LENGHTS.value %
                           self.atlas_suffix)

        centers = File(AsegFiles.CENTERS_TXT.value % self.atlas_suffix)
        areas = File(AsegFiles.AREAS_TXT.value % self.atlas_suffix)
        orientations = File(AsegFiles.ORIENTATIONS_TXT.value %
                            self.atlas_suffix)
        cortical = File(AsegFiles.CORTICAL_TXT.value % self.atlas_suffix)
        rm_to_aparc_aseg = File(AsegFiles.RM_TO_APARC_ASEG_TXT.value %
                                self.atlas_suffix)
        # aparc_aseg = File(T1Files.APARC_ASEG_NII_GZ.value)

        job = Job("convert_output")
        job.addArguments(weights_csv, lenghts_csv, self.atlas_suffix)

        job.uses(weights_csv, link=Link.INPUT)
        job.uses(lenghts_csv, link=Link.INPUT)
        job.uses(centers, link=Link.INPUT)
        job.uses(areas, link=Link.INPUT)
        job.uses(orientations, link=Link.INPUT)
        job.uses(cortical, link=Link.INPUT)
        job.uses(rm_to_aparc_aseg, link=Link.INPUT)
        # job.uses(aparc_aseg, link=Link.INPUT)

        job.uses(File(OutputConvFiles.APARC_ASEG_COR_NII_GZ.value %
                      self.atlas_suffix),
                 link=Link.OUTPUT,
                 transfer=True,
                 register=False)
        job.uses(File(OutputConvFiles.CONNECTIVITY_ZIP.value %
                      self.atlas_suffix),
                 link=Link.OUTPUT,
                 transfer=True,
                 register=False)

        job.uses(File(T1Files.T1_NII_GZ.value), link=Link.INPUT)
        job.uses(File(T1Files.APARC_ASEG_NII_GZ.value % self.atlas_suffix),
                 link=Link.INPUT)

        dax.addJob(job)

        dax.depends(job, job_aparc_aseg)
        dax.depends(job, job_mapping_details)
        dax.depends(job, job_weights)
        dax.depends(job, job_lengths)

        return job
Example #32
0
    def add_t1_processing_steps(self, dax, resamp_flag):
        t1_input = Inputs.T1_INPUT.value
        t1_converted = T1Files.T1_INPUT_CONVERTED.value
        t1_output, job1 = self._ensure_input_format(self.t1_format, t1_input,
                                                    t1_converted, dax)

        aparc_aseg_mgz_vol = File(T1Files.APARC_ASEG_MGZ.value %
                                  self.atlas_suffix)
        lh_pial = File(T1Files.LH_PIAL.value)
        rh_pial = File(T1Files.RH_PIAL.value)
        lh_white = File(T1Files.LH_WHITE.value)
        rh_white = File(T1Files.RH_WHITE.value)
        lh_aparc_annot = File(T1Files.LH_APARC_ANNOT.value % self.atlas_suffix)
        rh_aparc_annot = File(T1Files.RH_APARC_ANNOT.value % self.atlas_suffix)

        out_files_list = [
            aparc_aseg_mgz_vol, lh_pial, rh_pial, lh_white, rh_white,
            lh_aparc_annot, rh_aparc_annot
        ]

        t1_mgz_output = File(T1Files.T1_MGZ.value)
        norm_mgz_vol = File(T1Files.NORM_MGZ.value)
        brain_mgz_vol = File(T1Files.BRAIN_MGZ.value)
        job2 = Job(T1JobNames.RECON_ALL.value, node_label="Recon-all for T1")
        job2.addArguments(self.subject, t1_output, self.openmp_threads,
                          self.atlas_suffix)
        job2.uses(t1_output, link=Link.INPUT)
        job2.uses(t1_mgz_output,
                  link=Link.OUTPUT,
                  transfer=True,
                  register=True)
        job2.uses(norm_mgz_vol, link=Link.OUTPUT, transfer=True, register=True)
        job2.uses(brain_mgz_vol,
                  link=Link.OUTPUT,
                  transfer=True,
                  register=True)

        if self.t2_flag != "True":
            self._add_output_files(job2, out_files_list)

        dax.addJob(job2)

        if job1 is not None:
            dax.depends(job2, job1)

        last_job = job2

        if self.t2_flag == "True":
            t2_in = Inputs.T2_INPUT.value
            t2_converted = T1Files.T2_CONVERTED.value
            t2_input, job_convert = self._ensure_input_format(
                self.t2_format, t2_in, t2_converted, dax)

            job = Job(T1JobNames.AUTORECON3_T2.value)
            job.addArguments(self.subject, t2_input, self.openmp_threads)
            job.uses(t2_input, link=Link.INPUT)

            self._add_output_files(job, out_files_list)

            dax.addJob(job)

            if job_convert is not None:
                dax.depends(job, job_convert)
            dax.depends(job, last_job)

            last_job = job

        if self.flair_flag == "True":
            flair_in = Inputs.FLAIR_INPUT.value
            flair_converted = T1Files.FLAIR_CONVERTED.value
            flair_input, job_convert = self._ensure_input_format(
                self.flair_format, flair_in, flair_converted, dax)

            job = Job(T1JobNames.AUTORECON3_FLAIR.value)
            job.addArguments(self.subject, flair_input, self.openmp_threads)
            job.uses(flair_input, link=Link.INPUT)

            self._add_output_files(job, out_files_list)

            dax.addJob(job)

            if job_convert is not None:
                dax.depends(job, job_convert)
            dax.depends(job, last_job)

            last_job = job

        t1_nii_gz_vol = File(T1Files.T1_NII_GZ.value)
        job3 = Job(T1JobNames.MRI_CONVERT.value,
                   node_label="Convert T1 to NIFTI with good orientation")
        job3.addArguments(t1_mgz_output, t1_nii_gz_vol, "--out_orientation",
                          "RAS")
        job3.uses(t1_mgz_output, link=Link.INPUT)
        job3.uses(t1_nii_gz_vol,
                  link=Link.OUTPUT,
                  transfer=True,
                  register=True)
        dax.addJob(job3)

        dax.depends(job3, last_job)

        aparc_aseg_nii_gz_vol = File(T1Files.APARC_ASEG_NII_GZ.value %
                                     self.atlas_suffix)
        job4 = Job(
            T1JobNames.MRI_CONVERT.value,
            node_label="Convert APARC+ASEG to NIFTI with good orientation")
        job4.addArguments(aparc_aseg_mgz_vol, aparc_aseg_nii_gz_vol,
                          "--out_orientation", "RAS", "-rt", "nearest")
        job4.uses(aparc_aseg_mgz_vol, link=Link.INPUT)
        job4.uses(aparc_aseg_nii_gz_vol,
                  link=Link.OUTPUT,
                  transfer=False,
                  register=False)
        dax.addJob(job4)

        dax.depends(job4, last_job)

        if resamp_flag != "True":
            lh_centered_pial = File(T1Files.LH_CENTERED_PIAL.value)
            job5 = Job(T1JobNames.MRIS_CONVERT.value)
            job5.addArguments("--to-scanner", lh_pial, lh_centered_pial)
            job5.uses(lh_pial, link=Link.INPUT)
            job5.uses(lh_centered_pial,
                      link=Link.OUTPUT,
                      transfer=False,
                      register=False)
            dax.addJob(job5)

            dax.depends(job5, last_job)

            rh_centered_pial = File(T1Files.RH_CENTERED_PIAL.value)
            job6 = Job(T1JobNames.MRIS_CONVERT.value)
            job6.addArguments("--to-scanner", rh_pial, rh_centered_pial)
            job6.uses(rh_pial, link=Link.INPUT)
            job6.uses(rh_centered_pial,
                      link=Link.OUTPUT,
                      transfer=False,
                      register=False)
            dax.addJob(job6)

            dax.depends(job6, last_job)

            self.qc_snapshots.add_vol_surf_snapshot_step(
                dax, [job3, job5, job6], t1_nii_gz_vol,
                [lh_centered_pial, rh_centered_pial])
            self.qc_snapshots.add_surf_annot_snapshot_step(
                dax, [last_job, job5, job6], lh_centered_pial, lh_aparc_annot)
            self.qc_snapshots.add_surf_annot_snapshot_step(
                dax, [last_job, job5, job6], rh_centered_pial, rh_aparc_annot)

        return job3, job4