コード例 #1
0
ファイル: main.py プロジェクト: chriswmackey/lbt-recipes
class _Main_e39a9104Orchestrator(luigi.WrapperTask):
    """Runs all the tasks in this module."""
    # user input for this module
    _input_params = luigi.DictParameter()

    @property
    def input_values(self):
        params = dict(_default_inputs)
        params.update(dict(self._input_params))
        return params

    def requires(self):
        yield [
            CopyGridInfo(_input_params=self.input_values),
            CopyRedistInfo(_input_params=self.input_values),
            ParseSunUpHours(_input_params=self.input_values),
            RestructureCumulativeResults(_input_params=self.input_values),
            RestructureTimestepResults(_input_params=self.input_values),
            WriteTimestep(_input_params=self.input_values)
        ]
コード例 #2
0
class trim(luigi.Task):
	priority = 100
	resources = {'threads': 1}
	cfg = luigi.DictParameter()

	case = luigi.Parameter()
	sample = luigi.Parameter()
	lane = luigi.Parameter()

	def output(self):
		return {'trimgalore': [luigi.LocalTarget(os.path.join(self.cfg['output_dir'], self.case, 'preprocess', '%s_%s_%s_R%s_val_%s.fq.gz' % (self.case, self.sample, self.lane, n, n))) for n in [1,2]], 'fastqc': [luigi.LocalTarget(os.path.join(self.cfg['output_dir'], self.case, 'qc', '%s_%s_%s_R%s_val_%s_fastqc.zip' % (self.case, self.sample, self.lane, n, n))) for n in [1,2]], 'err_log': luigi.LocalTarget(os.path.join(self.cfg['output_dir'], self.case, 'log', '%s_%s_%s_trim_err.txt' % (self.case, self.sample, self.lane)))}

	def run(self):
		cmd = ['trim_galore', '--fastqc', '--fastqc_args "--outdir %s"' % os.path.dirname(self.output()['fastqc'][0].path), '--paired', '-o', os.path.dirname(self.output()['trimgalore'][0].path), '--basename', '%s_%s_%s' % (self.case, self.sample, self.lane), '--gzip', self.cfg['cases'][self.case][self.sample][self.lane]['fastq1'], self.cfg['cases'][self.case][self.sample][self.lane]['fastq2']]
		pipeline_utils.confirm_path(self.output()['trimgalore'][0].path)
		pipeline_utils.confirm_path(self.output()['fastqc'][0].path)
		if self.cfg['cluster_exec']:
			pipeline_utils.cluster_command_call(self, cmd, threads=1, ram=4, cfg=self.cfg, err_log=self.output()['err_log'].path)
		else:
			pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path)
コード例 #3
0
class AllDataGSheetTask(luigi.WrapperTask):
    date = luigi.DateParameter(default=date.today())
    daily_case_growth_page = luigi.IntParameter(default=23)
    positive_breakdown_index = luigi.IntParameter(default=20)
    states_and_districts = luigi.DictParameter()
    glance_page_index = luigi.IntParameter(default=1)

    def requires(self):
        yield ExtractWardPositiveBreakdownGSheetTask(
            date=self.date, page_index=self.positive_breakdown_index
        )
        yield ExtractCaseGrowthTableGSheetTask(
            date=self.date, page=self.daily_case_growth_page
        )
        yield HospitalizationSheetGSheetTask(
            date=self.date, states_and_districts=self.states_and_districts
        )
        yield ExtractGlanceWardWisePositiveCases(
            date=self.date, page_index=self.glance_page_index
        )
コード例 #4
0
ファイル: diffind-wf.py プロジェクト: BiobankLab/DIFFIND
class clean_ref(luigi.Task):
    param = luigi.DictParameter()

    def requires(self):
        return []

    def run(self):
        ref = Fa.load_from_file(str(self.param['ref']))
        #len(ref)
        i = 0
        if self.param['clear']:
            for r in ref.contigs:
                r.name = '>' + str("%04d" % i) + r.name[1:]
                i += 1
            #self.param['ref'] = self.param['ref'].rsplit('.',1)[0]+'_cleared'+self.param['ref'].rsplit('.',1)[1]
        ref.write(str(self.param['ref_cleared']))

    def output(self):
        #self.param['ref'].rsplit('.',1)[0]+'_cleared'+self.param['ref'].rsplit('.',1)[1]
        return luigi.LocalTarget(str(self.param['ref_cleared']))
コード例 #5
0
class apply_bqsr(luigi.Task):
	priority = 92
	resources = {'threads': 4} # this actually only uses one thread, but the RAM requirements are large
	cfg = luigi.DictParameter()

	case = luigi.Parameter()
	sample = luigi.Parameter()

	def requires(self):
		return {'base_recalibrator': base_recalibrator(case=self.case, sample=self.sample, cfg=self.cfg), 'mark_duplicates': mark_duplicates(case=self.case, sample=self.sample, cfg=self.cfg)} #, 'indel_realigner': indel_realigner(cfg=self.cfg)}

	def output(self):
		return {'apply_bqsr': luigi.LocalTarget(os.path.join(self.cfg['output_dir'], self.case, 'preprocess', '%s_%s_recalibrated.bam' % (self.case, self.sample))), 'err_log': luigi.LocalTarget(os.path.join(self.cfg['output_dir'], self.case, 'log', '%s_%s_apply_bqsr_err.txt' % (self.case, self.sample)))}

	def run(self):
		cmd = ['java', '-Djava.io.tmpdir=%s' % self.cfg['tmp_dir'], '-jar', '$GATK3', '-T', 'PrintReads', '-I', self.input()['mark_duplicates']['mark_duplicates']['bam'].path, '-R', self.cfg['fasta_file'], '-BQSR', self.input()['base_recalibrator']['base_recalibrator'].path, '-o', self.output()['apply_bqsr'].path] # self.input()['indel_realigner']['indel_realigner'][self.case][self.sample].path
		if self.cfg['cluster_exec']:
			pipeline_utils.cluster_command_call(self, cmd, threads=1, ram=5, cfg=self.cfg, err_log=self.output()['err_log'].path)
		else:
			pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path)
コード例 #6
0
class CreateSkyDome(QueenbeeTask):
    """Create a skydome for daylight coefficient studies."""

    # DAG Input parameters
    _input_params = luigi.DictParameter()

    # Task inputs
    sky_density = luigi.Parameter(default='1')

    @property
    def execution_folder(self):
        return self._input_params['simulation_folder'].replace('\\', '/')

    @property
    def initiation_folder(self):
        return self._input_params['simulation_folder'].replace('\\', '/')

    @property
    def params_folder(self):
        return os.path.join(self.execution_folder,
                            self._input_params['params_folder']).replace(
                                '\\', '/')

    def command(self):
        return 'honeybee-radiance sky skydome --name rflux_sky.sky --sky-density {sky_density}'.format(
            sky_density=self.sky_density)

    def output(self):
        return {
            'sky_dome':
            luigi.LocalTarget(
                os.path.join(self.execution_folder, 'resources/sky.dome'))
        }

    @property
    def output_artifacts(self):
        return [{
            'name': 'sky-dome',
            'from': 'rflux_sky.sky',
            'to': os.path.join(self.execution_folder, 'resources/sky.dome')
        }]
コード例 #7
0
class ValidateSamFile(FtarcTask):
    sam_path = luigi.Parameter()
    fa_path = luigi.Parameter()
    dest_dir_path = luigi.Parameter(default='.')
    picard = luigi.Parameter(default='picard')
    add_validatesamfile_args = luigi.ListParameter(
        default=['--MODE', 'VERBOSE', '--IGNORE', 'MISSING_TAG_NM'])
    n_cpu = luigi.IntParameter(default=1)
    memory_mb = luigi.FloatParameter(default=4096)
    sh_config = luigi.DictParameter(default=dict())
    priority = luigi.IntParameter(default=100)

    def output(self):
        return luigi.LocalTarget(
            Path(self.dest_dir_path).resolve().joinpath(
                Path(self.sam_path).name + '.ValidateSamFile.txt'))

    def run(self):
        run_id = Path(self.sam_path).name
        self.print_log(f'Validate a SAM file:\t{run_id}')
        sam = Path(self.sam_path).resolve()
        fa = Path(self.fa_path).resolve()
        fa_dict = fa.parent.joinpath(f'{fa.stem}.dict')
        dest_dir = Path(self.dest_dir_path).resolve()
        output_txt = Path(self.output().path)
        self.setup_shell(run_id=run_id,
                         commands=self.picard,
                         cwd=dest_dir,
                         **self.sh_config,
                         env={
                             'JAVA_TOOL_OPTIONS':
                             self.generate_gatk_java_options(
                                 n_cpu=self.n_cpu, memory_mb=self.memory_mb)
                         })
        self.run_shell(
            args=(f'set -e && {self.picard} ValidateSamFile' +
                  f' --INPUT {sam}' + f' --REFERENCE_SEQUENCE {fa}' +
                  ''.join(f' {a}' for a in self.add_validatesamfile_args) +
                  f' --OUTPUT {output_txt}'),
            input_files_or_dirs=[sam, fa, fa_dict],
            output_files_or_dirs=output_txt)
コード例 #8
0
class Boto3Task(tasks.PuppetTask):
    account_id = luigi.Parameter()
    region = luigi.Parameter()

    client = luigi.Parameter()
    use_paginator = luigi.BoolParameter()
    call = luigi.Parameter()
    arguments = luigi.DictParameter()
    filter = luigi.Parameter()

    requester_task_id = luigi.Parameter()
    requester_task_family = luigi.Parameter()

    def params_for_results_display(self):
        return {
            "account_id": self.account_id,
            "region": self.region,
            "client": self.client,
            "use_paginator": self.use_paginator,
            "call": self.call,
            "requester_task_id": self.requester_task_id,
            "requester_task_family": self.requester_task_family,
            "cache_invalidator": self.cache_invalidator,
        }

    def run(self):
        with self.spoke_regional_client(self.client) as client:
            if self.use_paginator:
                paginator = client.get_paginator(self.call)
                result = dict()
                for page in paginator.paginate(**self.arguments):
                    always_merger.merge(result, page)
            else:
                f = getattr(client, self.call)
                result = f(**self.arguments)

        actual_result = jmespath.search(self.filter, result)
        if isinstance(actual_result, str):
            self.write_output(actual_result.strip())
        else:
            self.write_output(actual_result)
コード例 #9
0
ファイル: tasks.py プロジェクト: denmoroz/pipemodels
class ExtractFeatures(luigi.Task):
    base_path = luigi.Parameter(default='')

    input_file = luigi.Parameter()

    extractor_class = luigi.Parameter()
    extractor_params = luigi.DictParameter()

    def _read_lines(self):
        return [line.rstrip('\n') for line in open(self.input_file)]

    def run(self):
        objects_ids = self._read_lines()

        extractor_class = import_object(self.extractor_class)

        if not issubclass(extractor_class, BaseFeaturesExtractor):
            raise ValueError('%s is not a subclass of BaseFeaturesExtractor' %
                             extractor_class)

        extractor = extractor_class(**self.extractor_params)
        extracted_features = extractor.extract(objects_ids)

        with self.output().open('wb') as output_file:
            pickle.dump(extracted_features, file=output_file)

    def output(self):
        _, extractor_class_name = split_object_path(self.extractor_class)

        input_checksum = file_checksum(self.input_file)
        extractor_checksum = object_checksum(self.extractor_params)

        features_id = object_checksum(''.join(
            [extractor_checksum, input_checksum]))

        file_name = 'features-%s-%s.pickle' % (extractor_class_name,
                                               features_id)

        return luigi.LocalTarget(os.path.join(self.base_path, 'features',
                                              file_name),
                                 format=luigi.format.Gzip)
コード例 #10
0
ファイル: resource.py プロジェクト: dceoy/vcline
class CreateIntervalListWithBed(VclineTask):
    bed_path = luigi.Parameter()
    seq_dict_path = luigi.Parameter()
    dest_dir_path = luigi.Parameter(default='.')
    gatk = luigi.Parameter(default='gatk')
    n_cpu = luigi.IntParameter(default=1)
    memory_mb = luigi.FloatParameter(default=4096)
    sh_config = luigi.DictParameter(default=dict())
    priority = 60

    def output(self):
        dest_dir = Path(self.dest_dir_path).resolve()
        return luigi.LocalTarget(
            dest_dir.joinpath(Path(self.bed_path).stem + '.interval_list')
        )

    def run(self):
        interval_list = Path(self.output().path)
        run_id = interval_list.stem
        self.print_log(f'Create an interval_list file:\t{run_id}')
        bed = Path(self.bed_path).resolve()
        seq_dict = Path(self.seq_dict_path).resolve()
        self.setup_shell(
            run_id=run_id, commands=self.gatk, cwd=interval_list.parent,
            **self.sh_config,
            env={
                'JAVA_TOOL_OPTIONS': self.generate_gatk_java_options(
                    n_cpu=self.n_cpu, memory_mb=self.memory_mb
                )
            }
        )
        self.run_shell(
            args=(
                f'set -e && {self.gatk} BedToIntervalList'
                + f' --INPUT {bed}'
                + f' --SEQUENCE_DICTIONARY {seq_dict}'
                + f' --OUTPUT {interval_list}'
            ),
            input_files_or_dirs=[bed, seq_dict],
            output_files_or_dirs=interval_list
        )
コード例 #11
0
ファイル: segmentation.py プロジェクト: joshy/rima
class Lung(luigi.Task):
    data = luigi.DictParameter()
    key = luigi.Parameter()

    def requires(self):
        return D2N(self.data, self.key)

    def run(self):
        __, imgs = load_exam(self.data["images_dir"])
        masked_lung, mask = segment(imgs)

        nifti_mask = self.create_nifti(mask)
        nifti_masked_lung = self.create_nifti(masked_lung)

        work_dir = self.workdir()
        nib.save(nifti_mask, work_dir + '/lung_mask.nii.gz')
        nib.save(nifti_masked_lung, work_dir + "/masked_lung.nii.gz")
        with self.output().open("w") as outfile:
            outfile.write("segmentation done")

    def output(self):
        return luigi.LocalTarget(
            "work/results/%s/%s/segmentation.txt" %
            (self.data["patient_id"], self.data["accession_number"]))

    def workdir(self):
        work_dir = "work/results/%s/%s" % (self.data["patient_id"],
                                           self.data["accession_number"])
        return work_dir

    def create_nifti(self, mask):
        workdir = self.workdir()
        converted_dicom = nib.load(glob.glob(workdir + "/*.nii.gz")[0])

        stacked = np.stack(mask, -1).astype(np.uint16)
        stacked = stacked[:, :, ::-1]
        stacked = np.swapaxes(stacked, 0, 1)
        stacked = stacked[:, ::-1, :]
        return nib.Nifti1Image(stacked,
                               header=converted_dicom.header,
                               affine=converted_dicom.affine)
コード例 #12
0
class RenameColumn(gokart.TaskOnKart):
    """
    Rename column names of pd.DataFrame.
    """
    task_namespace = 'redshells.data_frame_utils'
    data_task = gokart.TaskInstanceParameter(
        description='A task outputs pd.DataFrame.')
    rename_rule = luigi.DictParameter()  # type: Dict[str, str]
    output_file_path = luigi.Parameter(
        default='data/rename_column.pkl')  # type: str

    def requires(self):
        return self.data_task

    def output(self):
        return self.make_target(self.output_file_path)

    def run(self):
        column_names = set(list(self.rename_rule.keys()))
        data = self.load_data_frame(required_columns=column_names)
        self.dump(data.rename(columns=dict(self.rename_rule)))
コード例 #13
0
ファイル: hive.py プロジェクト: diegotriana11/Luigi2BigData
class ExternalHiveTask(luigi.ExternalTask):
    """
    External task that depends on a Hive table/partition.
    """

    database = luigi.Parameter(default='default')
    table = luigi.Parameter()
    partition = luigi.DictParameter(
        default={},
        description=
        'Python dictionary specifying the target partition e.g. {"date": "2013-01-25"}'
    )

    def output(self):
        if len(self.partition) != 0:
            assert self.partition, "partition required"
            return HivePartitionTarget(table=self.table,
                                       partition=self.partition,
                                       database=self.database)
        else:
            return HiveTableTarget(self.table, self.database)
コード例 #14
0
class genome_index(luigi.Task):
    max_threads = luigi.IntParameter()
    # fasta_file = luigi.Parameter()
    # threads = luigi.Parameter()
    # base_name = luigi.Parameter()
    # bowtie_build_location = luigi.Parameter()

    # fasta_dir = os.path.join(*luigi.Parameter().task_value('genome_index', 'fasta_file').split('/')[:-1])

    cfg = luigi.DictParameter()

    def output(self):
        # fasta_dir = os.path.join(*self.cfg['fasta_file'].split('/')[:-1])
        fasta_dir = '/'.join(self.cfg['fasta_file'].split('/')[:-1])
        return luigi.LocalTarget(
            os.path.join(fasta_dir, 'index', self.cfg['base_name'] + '.1.bt2'))

    def run(self):
        pipeline_utils.confirm_path(self.output().path)
        # cwd = os.getcwd()
        # if cwd.split('/')[-1] != 'wes_pipe':
        # print(cwd)
        # os.chdir(self.fasta_dir)
        # if not os.path.exists('./index'):
        # 	os.mkdir('./index')
        # os.chdir('./index')
        # cmd = [cwd + self.cfg['bowtie_location'] + 'bowtie2-build', '--threads=%s' % self.max_threads, self.cfg['fasta_file'], self.cfg['base_name']]
        # fasta_dir = os.path.join(self.cfg['fasta_file'].split('/')[:-1])
        fasta_dir = '/'.join(self.cfg['fasta_file'].split('/')[:-1])

        cmd = [
            self.cfg['bowtie_build_location'],
            '--threads=%s' % self.max_threads, self.cfg['fasta_file'],
            os.path.join(fasta_dir, 'index', self.cfg['base_name'])
        ]
        pipeline_utils.command_call(cmd, [self.output()],
                                    threads_needed=self.max_threads,
                                    sleep_time=0.1)
        # subprocess.call([self.cfg['bowtie_location'] + 'bowtie2-build', '--threads=%s' % self.max_threads, self.cfg['fasta_file'], self.cfg['base_name']], stdout=subprocess.PIPE)
        os.chdir(global_vars.cwd)
コード例 #15
0
ファイル: calculation_finders.py プロジェクト: mkhorton/GASpy
class FindBulk(FindCalculation):
    '''
    This task will try to find a bulk calculation in either our auxiliary Mongo
    database or our FireWorks database. If the calculation is complete, then it
    will return the results. If the calculation is pending, it will wait. If
    the calculation has not yet been submitted, then it will start the
    calculation.

    Args:
        mpid            A string indicating the Materials Project ID of the bulk
                        you are looking for (e.g., 'mp-30')
        vasp_settings   A dictionary containing your VASP settings
    saved output:
        doc     When the calculation is found in our auxiliary Mongo database
                successfully, then this task's output will be the matching
                Mongo document (i.e., dictionary) with various information
                about the system. Some import keys include 'fwid', 'fwname',
                or 'results'. This document should  also be able to be turned
                an `ase.Atoms` object using `gaspy.mongo.make_atoms_from_doc`.
    '''
    mpid = luigi.Parameter()
    vasp_settings = luigi.DictParameter(BULK_SETTINGS['vasp'])

    def _load_attributes(self):
        '''
        Parses and saves Luigi parameters into various class attributes
        required to run this task, as per the parent class `FindCalculation`
        '''
        self.gasdb_query = {
            "fwname.calculation_type": "unit cell optimization",
            "fwname.mpid": self.mpid
        }
        self.fw_query = {
            'name.calculation_type': 'unit cell optimization',
            'name.mpid': self.mpid
        }
        for key, value in self.vasp_settings.items():
            self.gasdb_query['fwname.vasp_settings.%s' % key] = value
            self.fw_query['name.vasp_settings.%s' % key] = value
        self.dependency = MakeBulkFW(self.mpid, self.vasp_settings)
コード例 #16
0
class SoftMasking(RomiTask):
    type = luigi.Parameter()
    params = luigi.DictParameter(default=None)

    def requires(self):
        return Undistort()

    def run(self):
        if self.type == "linear":
            coefs = self.params["coefs"]
            scale = self.params["scale"]

            def f(x):
                x = gaussian_filter(x, scale)
                img = (coefs[0] * x[:, :, 0] + coefs[1] * x[:, :, 1] +
                       coefs[2] * x[:, :, 2])
                return img
        elif self.type == "excess_green":
            scale = self.params["scale"]

            def f(x):
                img = gaussian_filter(x, scale)
                img = excess_green(img)
                for i in range(dilation):
                    img = binary_dilation(img)
                return img
        elif self.type == "vesselness":
            scale = self.params["scale"]
            f = lambda x: vesselness_2D(x[:, :, 1], scale)
        else:
            raise Exception("Unknown masking type")

        output_fileset = self.output().get()
        for fi in self.input().get().get_files():
            data = fi.read_image()
            data = np.asarray(data, float) / 255
            mask = f(data)
            mask = np.asarray(255 * mask, dtype=np.uint8)
            newf = output_fileset.get_file(fi.id, create=True)
            newf.write_image('png', mask)
コード例 #17
0
class DoTerminatePortfolioInSpokeTask(
        spoke_local_portfolio_base_task.SpokeLocalPortfolioBaseTask,
        manifest_mixin.ManifestMixen,
        dependency.DependenciesMixin,
):
    manifest_file_path = luigi.Parameter()
    spoke_local_portfolio_name = luigi.Parameter()
    puppet_account_id = luigi.Parameter()
    sharing_mode = luigi.Parameter()

    product_generation_method = luigi.Parameter()
    organization = luigi.Parameter()
    associations = luigi.ListParameter()
    launch_constraints = luigi.DictParameter()
    portfolio = luigi.Parameter()
    region = luigi.Parameter()
    account_id = luigi.Parameter()

    def params_for_results_display(self):
        return {
            "spoke_local_portfolio_name": self.spoke_local_portfolio_name,
            "account_id": self.account_id,
            "region": self.region,
            "portfolio": self.portfolio,
            "cache_invalidator": self.cache_invalidator,
        }

    def requires(self):
        return delete_portfolio_task.DeletePortfolio(
            manifest_file_path=self.manifest_file_path,
            spoke_local_portfolio_name=self.spoke_local_portfolio_name,
            account_id=self.account_id,
            region=self.region,
            portfolio=self.portfolio,
            product_generation_method=self.product_generation_method,
            puppet_account_id=self.puppet_account_id,
        )

    def run(self):
        self.write_output(self.params_for_results_display())
コード例 #18
0
ファイル: srna.py プロジェクト: mshakya/piret
class ExtractPPW(luigi.WrapperTask):
    """A wrapper task for mapping."""

    fastq_dic = luigi.DictParameter()
    indexfile = luigi.Parameter()
    workdir = luigi.Parameter()
    num_cpus = luigi.IntParameter()
    kingdom = luigi.Parameter()

    def requires(self):
        """A wrapper task for running mapping."""
        splist = [
            self.workdir + "/" + f for f in listdir(self.workdir)
            if f.endswith('.splice')
        ]
        if len(splist) > 1:
            splice_file = ','.join(splist)
        elif len(splist) == 1:
            splice_file = splist[0]
        else:
            splice_file = ''
        for samp, fastq in self.fastq_dic.items():
            trim_dir = os.path.join(self.workdir, "processes", "qc", samp)
            map_dir = os.path.join(self.workdir, "processes", "mapping", samp)
            if os.path.isdir(map_dir) is False:
                os.makedirs(map_dir)
            if self.kingdom in ['prokarya', 'eukarya']:
                yield ExtractPP(num_cpus=self.num_cpus,
                                map_dir=map_dir,
                                sample=samp,
                                kingdom=self.kingdom,
                                workdir=self.workdir)
            elif self.kingdom == 'both':
                # prok_gff = os.path.basename(self.gff_file.split(";")[0]).split(".gff")[0]
                # euk_gff = os.path.basename(self.gff_file.split(";")[1]).split(".gff")[0]
                yield ExtractPP(num_cpus=self.num_cpus,
                                map_dir=map_dir,
                                sample=samp,
                                kingdom=self.kingdom,
                                workdir=self.workdir)
コード例 #19
0
class DownloadAndIndexResourceVcfs(luigi.Task):
    bgzip = luigi.Parameter(default='bgzip')
    tabix = luigi.Parameter(default='tabix')
    n_cpu = luigi.IntParameter(default=1)
    sh_config = luigi.DictParameter(default=dict())
    priority = 10

    def output(self):
        return (self.input() + [
            luigi.LocalTarget(f'{i.path}.tbi')
            for i in self.input() if i.path.endswith('.vcf.gz')
        ])

    def run(self):
        yield [
            FetchResourceVcf(src_path=i.path,
                             bgzip=self.bgzip,
                             tabix=self.tabix,
                             n_cpu=self.n_cpu,
                             sh_config=self.sh_config) for i in self.input()
            if i.path.endswith('.vcf.gz')
        ]
コード例 #20
0
ファイル: quality_control.py プロジェクト: Sue9104/wes
class PrepareFastq(luigi.Task):
    resources = {"cpu": 1, "memory": 1}
    sample = luigi.Parameter()
    fastq = luigi.DictParameter(significant=False)
    outdir = luigi.Parameter()

    def requires(self):
        raise NotImplemetedError("Need to be implemented!")

    def output(self):
        return luigi.LocalTarget("{outdir}/raw-data/{sample}_R1.fq.gz".format(
            outdir=self.outdir, sample=self.sample))

    def run(self):
        os.makedirs(os.path.join(self.outdir, 'raw-data'), exist_ok=True)
        cmd = "ln -s {R1} {outdir}/raw-data/{sample}_R1.fq.gz\n".format(
            R1=self.fastq["R1"], sample=self.sample, outdir=self.outdir)
        if "R2" in self.fastq:
            cmd += "ln -s {R2} {outdir}/raw-data/{sample}_R2.fq.gz\n".format(
                R2=self.fastq["R2"], sample=self.sample, outdir=self.outdir)
        logging.info(cmd)
        subprocess.run(cmd, shell=True)
コード例 #21
0
class GenerateNeutralModel(NMETask):
    """Wrapper task that ties everything together."""
    num_bases = luigi.IntParameter(default=1000000000)
    no_single_copy = luigi.BoolParameter()
    neutral_data = luigi.ChoiceParameter(choices=['4d', 'ancestral_repeats'])
    rescale_chroms = luigi.DictParameter()

    def requires(self):
        if self.neutral_data == '4d':
            job = self.clone(Extract4dSites)
        else:
            job = self.clone(GenerateAncestralRepeatsBed)
        yield job
        job = self.clone(SubsampleBed, prev_task=job)
        yield job
        if not self.no_single_copy:
            job = self.clone(ExtractSingleCopyRegions, prev_task=job)
            yield job
        job = self.clone(HalPhyloPTrain, prev_task=job, num_bases=self.num_bases)
        yield job
        for set_name, chrom_set in self.rescale_chroms.items():
            yield self.clone(RescaleNeutralModel, prev_task=job, chroms=chrom_set, set_name=set_name)
コード例 #22
0
class ConvertToCSV(luigi.Task):
    target = luigi.DictParameter()

    def requires(self):
        return downloader(filepath=self.target["filename"],
                          url=self.target["url"])

    def output(self):
        target = self.target
        filename_csv = "var/raw_{}_{}_monthly.csv".format(
            target["name"], target["year"])
        return luigi.LocalTarget(filename_csv, format=luigi.format.UTF8)

    def run(self):
        target = self.target
        with self.output().open("w") as outfile:
            excel2csv(
                infile=target["filename"],
                outfile=outfile,
                filetype=target["type"],
                sheetname=target["sheets"]["monthly"],
            )
コード例 #23
0
class WristFracture(luigi.Task):
    data = luigi.DictParameter()
    key = luigi.Parameter()

    def run(self):
        files = [x for x in Path(self.data["images_dir"]).glob("**/*") if x.is_file()]
        for f in files:
            print(f"\nProcessing file: {f}\n")
            image, cropped, resized = convert_dicom(f, self.workdir())
            result = infer(image, resized, self.data)
        with self.output().open("w") as outfile:
            json.dump(result, outfile)

    def output(self):
        return luigi.LocalTarget("work/results/%s.json" % self.key)

    def workdir(self):
        work_dir = (
            f"work/results/{self.data['patient_id']}/{self.data['accession_number']}"
        )
        Path(work_dir).mkdir(parents=True, exist_ok=True)
        return work_dir
コード例 #24
0
class FetchHospitalizationTask(luigi.Task):
    date = luigi.DateParameter()
    metrics_date = luigi.DateParameter()
    city_name = luigi.Parameter()
    states_and_districts = luigi.DictParameter()

    def requires(self):
        return CreateDefaultHosptializationTask(
            date=self.date,
            metrics_date=self.metrics_date,
            states_and_districts=self.states_and_districts,
            city_name=self.city_name,
        )

    def output(self):
        return dropbox_target(
            hospitalization_csv_path(self.city_name, self.date))

    def run(self):
        with (self.input().open("r")) as previous_hospitalization_file, (
                self.output().open("w")) as output_file:
            output_file.write(previous_hospitalization_file.read())
コード例 #25
0
class MakeGasFW(FireworkMaker):
    '''
    This task will create and submit a gas relaxation for you.

    Args:
        gas_name        A string indicating which gas you want to relax
        vasp_settings   A dictionary containing your VASP settings
    '''
    gas_name = luigi.Parameter()
    vasp_settings = luigi.DictParameter(GAS_SETTINGS['vasp'])

    def requires(self):
        return GenerateGas(gas_name=self.gas_name)

    def run(self, _testing=False):
        ''' Do not use `_test=True` unless you are unit testing '''
        # Parse the input atoms object
        with open(self.input().path, 'rb') as file_handle:
            doc = pickle.load(file_handle)
        atoms = make_atoms_from_doc(doc)

        # Create, package, and submit the FireWork
        vasp_settings = unfreeze_dict(self.vasp_settings)
        fw_name = {
            'calculation_type': 'gas phase optimization',
            'gasname': self.gas_name,
            'vasp_settings': vasp_settings
        }
        fwork = make_firework(atoms=atoms,
                              fw_name=fw_name,
                              vasp_settings=vasp_settings)
        _ = submit_fwork(fwork=fwork, _testing=_testing)  # noqa: F841

        # Let Luigi know that we've made the FireWork
        self._complete = True

        # Pass out the firework for testing, if necessary
        if _testing is True:
            return fwork
コード例 #26
0
class CreateHospitalizationTask(luigi.Task):
    date = luigi.DateParameter()
    metrics_date = luigi.DateParameter()
    states_and_districts = luigi.DictParameter()
    city_name = luigi.Parameter()

    def requires(self):
        return CalculateMetricsWithoutHospitalizationTask(
            date=self.metrics_date,
            states_and_districts=self.states_and_districts,
            city_name=self.city_name,
        )

    def output(self):
        return dropbox_target(
            hospitalization_csv_path(self.city_name, self.date))

    def run(self):
        yesterday_hospitalization = yield FetchHospitalizationTask(
            date=self.date - timedelta(days=1),
            metrics_date=self.metrics_date,
            city_name=self.city_name,
            states_and_districts=self.states_and_districts,
        )
        with (yesterday_hospitalization.open("r")
              ) as previous_hospitalization_file, (
                  self.input().open("r")) as metrics_file, (
                      self.output().open("w")) as output_file:
            calculate_city_states_hospitalizations(
                textio2stringio(previous_hospitalization_file),
                textio2stringio(metrics_file),
                output_file,
                self.city_name,
            )
        self.delete()

    def delete(self):
        return dropbox_delete(
            hospitalization_csv_path(self.city_name, self.date))
コード例 #27
0
ファイル: train_word2vec.py プロジェクト: yamasakih/redshells
class TrainWord2Vec(gokart.TaskOnKart):
    task_namespace = 'redshells'
    tokenized_text_data_task = gokart.TaskInstanceParameter(
        description='The task outputs tokenized texts with type "List[List[str]]".')
    output_file_path = luigi.Parameter(default='model/word2vec.zip')  # type: str
    word2vec_kwargs = luigi.DictParameter(
        default=dict(),
        description='Arguments for Word2Vec except "sentences". Please see gensim.models.Word2Vec for more details.'
    )  # type: Dict[str, Any]

    def requires(self):
        return self.tokenized_text_data_task

    def output(self):
        return self.make_model_target(
            self.output_file_path, save_function=gensim.models.Word2Vec.save, load_function=gensim.models.Word2Vec.load)

    def run(self):
        texts = self.load()  # type: List[List[str]]
        shuffle(texts)
        model = gensim.models.Word2Vec(sentences=texts, **self.word2vec_kwargs)
        self.dump(model)
コード例 #28
0
class BundleWrapperTask(PipeTask):
    """ This task allows one to create bundles that can be referred to
    in a Disdat pipeline through a self.add_external_dependency.
    1.) User makes a bundle outside of Luigi
    2.) Luigi pipeline wants to use bundle.
       a.) Refer to the bundle as an argument and reads it using API (outside of Luigi dependencies)
       b.) Refer to the bundle using a BundleWrapperTask.  Luigi Disdat pipeline uses the latest
       bundle with the processing_name.

    Two implementation options:
    1.) We add a "add_bundle_dependency()" call to Disdat.  This directly changes how we schedule.
    2.) We add a special Luigi task (as luigi has for outside files) and use that to produce the
    processing_name, when there isn't an actual task creating the data.

    Thus this task is mainly to provide a way that
    a.) A user can create a bundle and set the processing_name
    b.) The pipeline can refer to this bundle using the same processing_name

    The parameters in this task should allow one to sufficiently identify versions
    of this bundle.

    Note:  No processing_name and No UUID
    """
    name = luigi.Parameter(default=None)
    owner = luigi.Parameter(default=None, significant=False)
    tags = luigi.DictParameter(default={}, significant=False)

    def bundle_inputs(self):
        """ Determine input bundles """
        raise NotImplementedError

    def bundle_outputs(self):
        """ Determine input bundles """
        raise NotImplementedError

    def pipeline_id(self):
        """ default is shortened version of pipe_id
        But here we want it to be the set name """
        return self.name
コード例 #29
0
class GeneratePoliciesTemplate(manifest_tasks.SectionTask):
    region = luigi.Parameter()
    sharing_policies = luigi.DictParameter()

    def output(self):
        return luigi.LocalTarget(f"output/{self.uid}.template.yaml")

    def params_for_results_display(self):
        return {
            "region": self.region,
            "puppet_account_id": self.puppet_account_id,
            "manifest_file_path": self.manifest_file_path,
        }

    def run(self):
        rendered = config.env.get_template("policies.template.yaml.j2").render(
            sharing_policies=self.sharing_policies,
            VERSION=config.get_puppet_version(),
            HOME_REGION=self.region,
        )
        with self.output().open("w") as output_file:
            output_file.write(rendered)
コード例 #30
0
class CalculateMetricsTask(luigi.Task):
    date = luigi.DateParameter(default=date.today())
    states_and_districts = luigi.DictParameter()
    city_name = luigi.Parameter()

    def requires(self):
        return CalculateMetricsWithoutHospitalizationTask(
            date=self.date,
            states_and_districts=self.states_and_districts,
            city_name=self.city_name,
        )

    def output(self):
        return dropbox_target(
            f"/data/metrics/{self.city_name}/{self.date}-metrics-with-hospitalization.csv"
        )

    def delete(self):
        return dropbox_delete(
            f"/data/metrics/{self.city_name}/{self.date}-metrics-with-hospitalization.csv"
        )

    def run(self):
        hospitalization = yield CreateHospitalizationTask(
            date=self.date,
            metrics_date=self.date,
            city_name=self.city_name,
            states_and_districts=self.states_and_districts,
        )
        with (self.input().open("r")) as metrics_without_hosptialization, (
                hospitalization.open("r")) as hospitalization_data, (
                    self.output().open("w")) as output_file:
            calculate_city_stats_with_hospitalizations(
                textio2stringio(metrics_without_hosptialization),
                textio2stringio(hospitalization_data),
                output_file,
                self.city_name,
            )
        self.delete()