class _Main_e39a9104Orchestrator(luigi.WrapperTask): """Runs all the tasks in this module.""" # user input for this module _input_params = luigi.DictParameter() @property def input_values(self): params = dict(_default_inputs) params.update(dict(self._input_params)) return params def requires(self): yield [ CopyGridInfo(_input_params=self.input_values), CopyRedistInfo(_input_params=self.input_values), ParseSunUpHours(_input_params=self.input_values), RestructureCumulativeResults(_input_params=self.input_values), RestructureTimestepResults(_input_params=self.input_values), WriteTimestep(_input_params=self.input_values) ]
class trim(luigi.Task): priority = 100 resources = {'threads': 1} cfg = luigi.DictParameter() case = luigi.Parameter() sample = luigi.Parameter() lane = luigi.Parameter() def output(self): return {'trimgalore': [luigi.LocalTarget(os.path.join(self.cfg['output_dir'], self.case, 'preprocess', '%s_%s_%s_R%s_val_%s.fq.gz' % (self.case, self.sample, self.lane, n, n))) for n in [1,2]], 'fastqc': [luigi.LocalTarget(os.path.join(self.cfg['output_dir'], self.case, 'qc', '%s_%s_%s_R%s_val_%s_fastqc.zip' % (self.case, self.sample, self.lane, n, n))) for n in [1,2]], 'err_log': luigi.LocalTarget(os.path.join(self.cfg['output_dir'], self.case, 'log', '%s_%s_%s_trim_err.txt' % (self.case, self.sample, self.lane)))} def run(self): cmd = ['trim_galore', '--fastqc', '--fastqc_args "--outdir %s"' % os.path.dirname(self.output()['fastqc'][0].path), '--paired', '-o', os.path.dirname(self.output()['trimgalore'][0].path), '--basename', '%s_%s_%s' % (self.case, self.sample, self.lane), '--gzip', self.cfg['cases'][self.case][self.sample][self.lane]['fastq1'], self.cfg['cases'][self.case][self.sample][self.lane]['fastq2']] pipeline_utils.confirm_path(self.output()['trimgalore'][0].path) pipeline_utils.confirm_path(self.output()['fastqc'][0].path) if self.cfg['cluster_exec']: pipeline_utils.cluster_command_call(self, cmd, threads=1, ram=4, cfg=self.cfg, err_log=self.output()['err_log'].path) else: pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path)
class AllDataGSheetTask(luigi.WrapperTask): date = luigi.DateParameter(default=date.today()) daily_case_growth_page = luigi.IntParameter(default=23) positive_breakdown_index = luigi.IntParameter(default=20) states_and_districts = luigi.DictParameter() glance_page_index = luigi.IntParameter(default=1) def requires(self): yield ExtractWardPositiveBreakdownGSheetTask( date=self.date, page_index=self.positive_breakdown_index ) yield ExtractCaseGrowthTableGSheetTask( date=self.date, page=self.daily_case_growth_page ) yield HospitalizationSheetGSheetTask( date=self.date, states_and_districts=self.states_and_districts ) yield ExtractGlanceWardWisePositiveCases( date=self.date, page_index=self.glance_page_index )
class clean_ref(luigi.Task): param = luigi.DictParameter() def requires(self): return [] def run(self): ref = Fa.load_from_file(str(self.param['ref'])) #len(ref) i = 0 if self.param['clear']: for r in ref.contigs: r.name = '>' + str("%04d" % i) + r.name[1:] i += 1 #self.param['ref'] = self.param['ref'].rsplit('.',1)[0]+'_cleared'+self.param['ref'].rsplit('.',1)[1] ref.write(str(self.param['ref_cleared'])) def output(self): #self.param['ref'].rsplit('.',1)[0]+'_cleared'+self.param['ref'].rsplit('.',1)[1] return luigi.LocalTarget(str(self.param['ref_cleared']))
class apply_bqsr(luigi.Task): priority = 92 resources = {'threads': 4} # this actually only uses one thread, but the RAM requirements are large cfg = luigi.DictParameter() case = luigi.Parameter() sample = luigi.Parameter() def requires(self): return {'base_recalibrator': base_recalibrator(case=self.case, sample=self.sample, cfg=self.cfg), 'mark_duplicates': mark_duplicates(case=self.case, sample=self.sample, cfg=self.cfg)} #, 'indel_realigner': indel_realigner(cfg=self.cfg)} def output(self): return {'apply_bqsr': luigi.LocalTarget(os.path.join(self.cfg['output_dir'], self.case, 'preprocess', '%s_%s_recalibrated.bam' % (self.case, self.sample))), 'err_log': luigi.LocalTarget(os.path.join(self.cfg['output_dir'], self.case, 'log', '%s_%s_apply_bqsr_err.txt' % (self.case, self.sample)))} def run(self): cmd = ['java', '-Djava.io.tmpdir=%s' % self.cfg['tmp_dir'], '-jar', '$GATK3', '-T', 'PrintReads', '-I', self.input()['mark_duplicates']['mark_duplicates']['bam'].path, '-R', self.cfg['fasta_file'], '-BQSR', self.input()['base_recalibrator']['base_recalibrator'].path, '-o', self.output()['apply_bqsr'].path] # self.input()['indel_realigner']['indel_realigner'][self.case][self.sample].path if self.cfg['cluster_exec']: pipeline_utils.cluster_command_call(self, cmd, threads=1, ram=5, cfg=self.cfg, err_log=self.output()['err_log'].path) else: pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path)
class CreateSkyDome(QueenbeeTask): """Create a skydome for daylight coefficient studies.""" # DAG Input parameters _input_params = luigi.DictParameter() # Task inputs sky_density = luigi.Parameter(default='1') @property def execution_folder(self): return self._input_params['simulation_folder'].replace('\\', '/') @property def initiation_folder(self): return self._input_params['simulation_folder'].replace('\\', '/') @property def params_folder(self): return os.path.join(self.execution_folder, self._input_params['params_folder']).replace( '\\', '/') def command(self): return 'honeybee-radiance sky skydome --name rflux_sky.sky --sky-density {sky_density}'.format( sky_density=self.sky_density) def output(self): return { 'sky_dome': luigi.LocalTarget( os.path.join(self.execution_folder, 'resources/sky.dome')) } @property def output_artifacts(self): return [{ 'name': 'sky-dome', 'from': 'rflux_sky.sky', 'to': os.path.join(self.execution_folder, 'resources/sky.dome') }]
class ValidateSamFile(FtarcTask): sam_path = luigi.Parameter() fa_path = luigi.Parameter() dest_dir_path = luigi.Parameter(default='.') picard = luigi.Parameter(default='picard') add_validatesamfile_args = luigi.ListParameter( default=['--MODE', 'VERBOSE', '--IGNORE', 'MISSING_TAG_NM']) n_cpu = luigi.IntParameter(default=1) memory_mb = luigi.FloatParameter(default=4096) sh_config = luigi.DictParameter(default=dict()) priority = luigi.IntParameter(default=100) def output(self): return luigi.LocalTarget( Path(self.dest_dir_path).resolve().joinpath( Path(self.sam_path).name + '.ValidateSamFile.txt')) def run(self): run_id = Path(self.sam_path).name self.print_log(f'Validate a SAM file:\t{run_id}') sam = Path(self.sam_path).resolve() fa = Path(self.fa_path).resolve() fa_dict = fa.parent.joinpath(f'{fa.stem}.dict') dest_dir = Path(self.dest_dir_path).resolve() output_txt = Path(self.output().path) self.setup_shell(run_id=run_id, commands=self.picard, cwd=dest_dir, **self.sh_config, env={ 'JAVA_TOOL_OPTIONS': self.generate_gatk_java_options( n_cpu=self.n_cpu, memory_mb=self.memory_mb) }) self.run_shell( args=(f'set -e && {self.picard} ValidateSamFile' + f' --INPUT {sam}' + f' --REFERENCE_SEQUENCE {fa}' + ''.join(f' {a}' for a in self.add_validatesamfile_args) + f' --OUTPUT {output_txt}'), input_files_or_dirs=[sam, fa, fa_dict], output_files_or_dirs=output_txt)
class Boto3Task(tasks.PuppetTask): account_id = luigi.Parameter() region = luigi.Parameter() client = luigi.Parameter() use_paginator = luigi.BoolParameter() call = luigi.Parameter() arguments = luigi.DictParameter() filter = luigi.Parameter() requester_task_id = luigi.Parameter() requester_task_family = luigi.Parameter() def params_for_results_display(self): return { "account_id": self.account_id, "region": self.region, "client": self.client, "use_paginator": self.use_paginator, "call": self.call, "requester_task_id": self.requester_task_id, "requester_task_family": self.requester_task_family, "cache_invalidator": self.cache_invalidator, } def run(self): with self.spoke_regional_client(self.client) as client: if self.use_paginator: paginator = client.get_paginator(self.call) result = dict() for page in paginator.paginate(**self.arguments): always_merger.merge(result, page) else: f = getattr(client, self.call) result = f(**self.arguments) actual_result = jmespath.search(self.filter, result) if isinstance(actual_result, str): self.write_output(actual_result.strip()) else: self.write_output(actual_result)
class ExtractFeatures(luigi.Task): base_path = luigi.Parameter(default='') input_file = luigi.Parameter() extractor_class = luigi.Parameter() extractor_params = luigi.DictParameter() def _read_lines(self): return [line.rstrip('\n') for line in open(self.input_file)] def run(self): objects_ids = self._read_lines() extractor_class = import_object(self.extractor_class) if not issubclass(extractor_class, BaseFeaturesExtractor): raise ValueError('%s is not a subclass of BaseFeaturesExtractor' % extractor_class) extractor = extractor_class(**self.extractor_params) extracted_features = extractor.extract(objects_ids) with self.output().open('wb') as output_file: pickle.dump(extracted_features, file=output_file) def output(self): _, extractor_class_name = split_object_path(self.extractor_class) input_checksum = file_checksum(self.input_file) extractor_checksum = object_checksum(self.extractor_params) features_id = object_checksum(''.join( [extractor_checksum, input_checksum])) file_name = 'features-%s-%s.pickle' % (extractor_class_name, features_id) return luigi.LocalTarget(os.path.join(self.base_path, 'features', file_name), format=luigi.format.Gzip)
class CreateIntervalListWithBed(VclineTask): bed_path = luigi.Parameter() seq_dict_path = luigi.Parameter() dest_dir_path = luigi.Parameter(default='.') gatk = luigi.Parameter(default='gatk') n_cpu = luigi.IntParameter(default=1) memory_mb = luigi.FloatParameter(default=4096) sh_config = luigi.DictParameter(default=dict()) priority = 60 def output(self): dest_dir = Path(self.dest_dir_path).resolve() return luigi.LocalTarget( dest_dir.joinpath(Path(self.bed_path).stem + '.interval_list') ) def run(self): interval_list = Path(self.output().path) run_id = interval_list.stem self.print_log(f'Create an interval_list file:\t{run_id}') bed = Path(self.bed_path).resolve() seq_dict = Path(self.seq_dict_path).resolve() self.setup_shell( run_id=run_id, commands=self.gatk, cwd=interval_list.parent, **self.sh_config, env={ 'JAVA_TOOL_OPTIONS': self.generate_gatk_java_options( n_cpu=self.n_cpu, memory_mb=self.memory_mb ) } ) self.run_shell( args=( f'set -e && {self.gatk} BedToIntervalList' + f' --INPUT {bed}' + f' --SEQUENCE_DICTIONARY {seq_dict}' + f' --OUTPUT {interval_list}' ), input_files_or_dirs=[bed, seq_dict], output_files_or_dirs=interval_list )
class Lung(luigi.Task): data = luigi.DictParameter() key = luigi.Parameter() def requires(self): return D2N(self.data, self.key) def run(self): __, imgs = load_exam(self.data["images_dir"]) masked_lung, mask = segment(imgs) nifti_mask = self.create_nifti(mask) nifti_masked_lung = self.create_nifti(masked_lung) work_dir = self.workdir() nib.save(nifti_mask, work_dir + '/lung_mask.nii.gz') nib.save(nifti_masked_lung, work_dir + "/masked_lung.nii.gz") with self.output().open("w") as outfile: outfile.write("segmentation done") def output(self): return luigi.LocalTarget( "work/results/%s/%s/segmentation.txt" % (self.data["patient_id"], self.data["accession_number"])) def workdir(self): work_dir = "work/results/%s/%s" % (self.data["patient_id"], self.data["accession_number"]) return work_dir def create_nifti(self, mask): workdir = self.workdir() converted_dicom = nib.load(glob.glob(workdir + "/*.nii.gz")[0]) stacked = np.stack(mask, -1).astype(np.uint16) stacked = stacked[:, :, ::-1] stacked = np.swapaxes(stacked, 0, 1) stacked = stacked[:, ::-1, :] return nib.Nifti1Image(stacked, header=converted_dicom.header, affine=converted_dicom.affine)
class RenameColumn(gokart.TaskOnKart): """ Rename column names of pd.DataFrame. """ task_namespace = 'redshells.data_frame_utils' data_task = gokart.TaskInstanceParameter( description='A task outputs pd.DataFrame.') rename_rule = luigi.DictParameter() # type: Dict[str, str] output_file_path = luigi.Parameter( default='data/rename_column.pkl') # type: str def requires(self): return self.data_task def output(self): return self.make_target(self.output_file_path) def run(self): column_names = set(list(self.rename_rule.keys())) data = self.load_data_frame(required_columns=column_names) self.dump(data.rename(columns=dict(self.rename_rule)))
class ExternalHiveTask(luigi.ExternalTask): """ External task that depends on a Hive table/partition. """ database = luigi.Parameter(default='default') table = luigi.Parameter() partition = luigi.DictParameter( default={}, description= 'Python dictionary specifying the target partition e.g. {"date": "2013-01-25"}' ) def output(self): if len(self.partition) != 0: assert self.partition, "partition required" return HivePartitionTarget(table=self.table, partition=self.partition, database=self.database) else: return HiveTableTarget(self.table, self.database)
class genome_index(luigi.Task): max_threads = luigi.IntParameter() # fasta_file = luigi.Parameter() # threads = luigi.Parameter() # base_name = luigi.Parameter() # bowtie_build_location = luigi.Parameter() # fasta_dir = os.path.join(*luigi.Parameter().task_value('genome_index', 'fasta_file').split('/')[:-1]) cfg = luigi.DictParameter() def output(self): # fasta_dir = os.path.join(*self.cfg['fasta_file'].split('/')[:-1]) fasta_dir = '/'.join(self.cfg['fasta_file'].split('/')[:-1]) return luigi.LocalTarget( os.path.join(fasta_dir, 'index', self.cfg['base_name'] + '.1.bt2')) def run(self): pipeline_utils.confirm_path(self.output().path) # cwd = os.getcwd() # if cwd.split('/')[-1] != 'wes_pipe': # print(cwd) # os.chdir(self.fasta_dir) # if not os.path.exists('./index'): # os.mkdir('./index') # os.chdir('./index') # cmd = [cwd + self.cfg['bowtie_location'] + 'bowtie2-build', '--threads=%s' % self.max_threads, self.cfg['fasta_file'], self.cfg['base_name']] # fasta_dir = os.path.join(self.cfg['fasta_file'].split('/')[:-1]) fasta_dir = '/'.join(self.cfg['fasta_file'].split('/')[:-1]) cmd = [ self.cfg['bowtie_build_location'], '--threads=%s' % self.max_threads, self.cfg['fasta_file'], os.path.join(fasta_dir, 'index', self.cfg['base_name']) ] pipeline_utils.command_call(cmd, [self.output()], threads_needed=self.max_threads, sleep_time=0.1) # subprocess.call([self.cfg['bowtie_location'] + 'bowtie2-build', '--threads=%s' % self.max_threads, self.cfg['fasta_file'], self.cfg['base_name']], stdout=subprocess.PIPE) os.chdir(global_vars.cwd)
class FindBulk(FindCalculation): ''' This task will try to find a bulk calculation in either our auxiliary Mongo database or our FireWorks database. If the calculation is complete, then it will return the results. If the calculation is pending, it will wait. If the calculation has not yet been submitted, then it will start the calculation. Args: mpid A string indicating the Materials Project ID of the bulk you are looking for (e.g., 'mp-30') vasp_settings A dictionary containing your VASP settings saved output: doc When the calculation is found in our auxiliary Mongo database successfully, then this task's output will be the matching Mongo document (i.e., dictionary) with various information about the system. Some import keys include 'fwid', 'fwname', or 'results'. This document should also be able to be turned an `ase.Atoms` object using `gaspy.mongo.make_atoms_from_doc`. ''' mpid = luigi.Parameter() vasp_settings = luigi.DictParameter(BULK_SETTINGS['vasp']) def _load_attributes(self): ''' Parses and saves Luigi parameters into various class attributes required to run this task, as per the parent class `FindCalculation` ''' self.gasdb_query = { "fwname.calculation_type": "unit cell optimization", "fwname.mpid": self.mpid } self.fw_query = { 'name.calculation_type': 'unit cell optimization', 'name.mpid': self.mpid } for key, value in self.vasp_settings.items(): self.gasdb_query['fwname.vasp_settings.%s' % key] = value self.fw_query['name.vasp_settings.%s' % key] = value self.dependency = MakeBulkFW(self.mpid, self.vasp_settings)
class SoftMasking(RomiTask): type = luigi.Parameter() params = luigi.DictParameter(default=None) def requires(self): return Undistort() def run(self): if self.type == "linear": coefs = self.params["coefs"] scale = self.params["scale"] def f(x): x = gaussian_filter(x, scale) img = (coefs[0] * x[:, :, 0] + coefs[1] * x[:, :, 1] + coefs[2] * x[:, :, 2]) return img elif self.type == "excess_green": scale = self.params["scale"] def f(x): img = gaussian_filter(x, scale) img = excess_green(img) for i in range(dilation): img = binary_dilation(img) return img elif self.type == "vesselness": scale = self.params["scale"] f = lambda x: vesselness_2D(x[:, :, 1], scale) else: raise Exception("Unknown masking type") output_fileset = self.output().get() for fi in self.input().get().get_files(): data = fi.read_image() data = np.asarray(data, float) / 255 mask = f(data) mask = np.asarray(255 * mask, dtype=np.uint8) newf = output_fileset.get_file(fi.id, create=True) newf.write_image('png', mask)
class DoTerminatePortfolioInSpokeTask( spoke_local_portfolio_base_task.SpokeLocalPortfolioBaseTask, manifest_mixin.ManifestMixen, dependency.DependenciesMixin, ): manifest_file_path = luigi.Parameter() spoke_local_portfolio_name = luigi.Parameter() puppet_account_id = luigi.Parameter() sharing_mode = luigi.Parameter() product_generation_method = luigi.Parameter() organization = luigi.Parameter() associations = luigi.ListParameter() launch_constraints = luigi.DictParameter() portfolio = luigi.Parameter() region = luigi.Parameter() account_id = luigi.Parameter() def params_for_results_display(self): return { "spoke_local_portfolio_name": self.spoke_local_portfolio_name, "account_id": self.account_id, "region": self.region, "portfolio": self.portfolio, "cache_invalidator": self.cache_invalidator, } def requires(self): return delete_portfolio_task.DeletePortfolio( manifest_file_path=self.manifest_file_path, spoke_local_portfolio_name=self.spoke_local_portfolio_name, account_id=self.account_id, region=self.region, portfolio=self.portfolio, product_generation_method=self.product_generation_method, puppet_account_id=self.puppet_account_id, ) def run(self): self.write_output(self.params_for_results_display())
class ExtractPPW(luigi.WrapperTask): """A wrapper task for mapping.""" fastq_dic = luigi.DictParameter() indexfile = luigi.Parameter() workdir = luigi.Parameter() num_cpus = luigi.IntParameter() kingdom = luigi.Parameter() def requires(self): """A wrapper task for running mapping.""" splist = [ self.workdir + "/" + f for f in listdir(self.workdir) if f.endswith('.splice') ] if len(splist) > 1: splice_file = ','.join(splist) elif len(splist) == 1: splice_file = splist[0] else: splice_file = '' for samp, fastq in self.fastq_dic.items(): trim_dir = os.path.join(self.workdir, "processes", "qc", samp) map_dir = os.path.join(self.workdir, "processes", "mapping", samp) if os.path.isdir(map_dir) is False: os.makedirs(map_dir) if self.kingdom in ['prokarya', 'eukarya']: yield ExtractPP(num_cpus=self.num_cpus, map_dir=map_dir, sample=samp, kingdom=self.kingdom, workdir=self.workdir) elif self.kingdom == 'both': # prok_gff = os.path.basename(self.gff_file.split(";")[0]).split(".gff")[0] # euk_gff = os.path.basename(self.gff_file.split(";")[1]).split(".gff")[0] yield ExtractPP(num_cpus=self.num_cpus, map_dir=map_dir, sample=samp, kingdom=self.kingdom, workdir=self.workdir)
class DownloadAndIndexResourceVcfs(luigi.Task): bgzip = luigi.Parameter(default='bgzip') tabix = luigi.Parameter(default='tabix') n_cpu = luigi.IntParameter(default=1) sh_config = luigi.DictParameter(default=dict()) priority = 10 def output(self): return (self.input() + [ luigi.LocalTarget(f'{i.path}.tbi') for i in self.input() if i.path.endswith('.vcf.gz') ]) def run(self): yield [ FetchResourceVcf(src_path=i.path, bgzip=self.bgzip, tabix=self.tabix, n_cpu=self.n_cpu, sh_config=self.sh_config) for i in self.input() if i.path.endswith('.vcf.gz') ]
class PrepareFastq(luigi.Task): resources = {"cpu": 1, "memory": 1} sample = luigi.Parameter() fastq = luigi.DictParameter(significant=False) outdir = luigi.Parameter() def requires(self): raise NotImplemetedError("Need to be implemented!") def output(self): return luigi.LocalTarget("{outdir}/raw-data/{sample}_R1.fq.gz".format( outdir=self.outdir, sample=self.sample)) def run(self): os.makedirs(os.path.join(self.outdir, 'raw-data'), exist_ok=True) cmd = "ln -s {R1} {outdir}/raw-data/{sample}_R1.fq.gz\n".format( R1=self.fastq["R1"], sample=self.sample, outdir=self.outdir) if "R2" in self.fastq: cmd += "ln -s {R2} {outdir}/raw-data/{sample}_R2.fq.gz\n".format( R2=self.fastq["R2"], sample=self.sample, outdir=self.outdir) logging.info(cmd) subprocess.run(cmd, shell=True)
class GenerateNeutralModel(NMETask): """Wrapper task that ties everything together.""" num_bases = luigi.IntParameter(default=1000000000) no_single_copy = luigi.BoolParameter() neutral_data = luigi.ChoiceParameter(choices=['4d', 'ancestral_repeats']) rescale_chroms = luigi.DictParameter() def requires(self): if self.neutral_data == '4d': job = self.clone(Extract4dSites) else: job = self.clone(GenerateAncestralRepeatsBed) yield job job = self.clone(SubsampleBed, prev_task=job) yield job if not self.no_single_copy: job = self.clone(ExtractSingleCopyRegions, prev_task=job) yield job job = self.clone(HalPhyloPTrain, prev_task=job, num_bases=self.num_bases) yield job for set_name, chrom_set in self.rescale_chroms.items(): yield self.clone(RescaleNeutralModel, prev_task=job, chroms=chrom_set, set_name=set_name)
class ConvertToCSV(luigi.Task): target = luigi.DictParameter() def requires(self): return downloader(filepath=self.target["filename"], url=self.target["url"]) def output(self): target = self.target filename_csv = "var/raw_{}_{}_monthly.csv".format( target["name"], target["year"]) return luigi.LocalTarget(filename_csv, format=luigi.format.UTF8) def run(self): target = self.target with self.output().open("w") as outfile: excel2csv( infile=target["filename"], outfile=outfile, filetype=target["type"], sheetname=target["sheets"]["monthly"], )
class WristFracture(luigi.Task): data = luigi.DictParameter() key = luigi.Parameter() def run(self): files = [x for x in Path(self.data["images_dir"]).glob("**/*") if x.is_file()] for f in files: print(f"\nProcessing file: {f}\n") image, cropped, resized = convert_dicom(f, self.workdir()) result = infer(image, resized, self.data) with self.output().open("w") as outfile: json.dump(result, outfile) def output(self): return luigi.LocalTarget("work/results/%s.json" % self.key) def workdir(self): work_dir = ( f"work/results/{self.data['patient_id']}/{self.data['accession_number']}" ) Path(work_dir).mkdir(parents=True, exist_ok=True) return work_dir
class FetchHospitalizationTask(luigi.Task): date = luigi.DateParameter() metrics_date = luigi.DateParameter() city_name = luigi.Parameter() states_and_districts = luigi.DictParameter() def requires(self): return CreateDefaultHosptializationTask( date=self.date, metrics_date=self.metrics_date, states_and_districts=self.states_and_districts, city_name=self.city_name, ) def output(self): return dropbox_target( hospitalization_csv_path(self.city_name, self.date)) def run(self): with (self.input().open("r")) as previous_hospitalization_file, ( self.output().open("w")) as output_file: output_file.write(previous_hospitalization_file.read())
class MakeGasFW(FireworkMaker): ''' This task will create and submit a gas relaxation for you. Args: gas_name A string indicating which gas you want to relax vasp_settings A dictionary containing your VASP settings ''' gas_name = luigi.Parameter() vasp_settings = luigi.DictParameter(GAS_SETTINGS['vasp']) def requires(self): return GenerateGas(gas_name=self.gas_name) def run(self, _testing=False): ''' Do not use `_test=True` unless you are unit testing ''' # Parse the input atoms object with open(self.input().path, 'rb') as file_handle: doc = pickle.load(file_handle) atoms = make_atoms_from_doc(doc) # Create, package, and submit the FireWork vasp_settings = unfreeze_dict(self.vasp_settings) fw_name = { 'calculation_type': 'gas phase optimization', 'gasname': self.gas_name, 'vasp_settings': vasp_settings } fwork = make_firework(atoms=atoms, fw_name=fw_name, vasp_settings=vasp_settings) _ = submit_fwork(fwork=fwork, _testing=_testing) # noqa: F841 # Let Luigi know that we've made the FireWork self._complete = True # Pass out the firework for testing, if necessary if _testing is True: return fwork
class CreateHospitalizationTask(luigi.Task): date = luigi.DateParameter() metrics_date = luigi.DateParameter() states_and_districts = luigi.DictParameter() city_name = luigi.Parameter() def requires(self): return CalculateMetricsWithoutHospitalizationTask( date=self.metrics_date, states_and_districts=self.states_and_districts, city_name=self.city_name, ) def output(self): return dropbox_target( hospitalization_csv_path(self.city_name, self.date)) def run(self): yesterday_hospitalization = yield FetchHospitalizationTask( date=self.date - timedelta(days=1), metrics_date=self.metrics_date, city_name=self.city_name, states_and_districts=self.states_and_districts, ) with (yesterday_hospitalization.open("r") ) as previous_hospitalization_file, ( self.input().open("r")) as metrics_file, ( self.output().open("w")) as output_file: calculate_city_states_hospitalizations( textio2stringio(previous_hospitalization_file), textio2stringio(metrics_file), output_file, self.city_name, ) self.delete() def delete(self): return dropbox_delete( hospitalization_csv_path(self.city_name, self.date))
class TrainWord2Vec(gokart.TaskOnKart): task_namespace = 'redshells' tokenized_text_data_task = gokart.TaskInstanceParameter( description='The task outputs tokenized texts with type "List[List[str]]".') output_file_path = luigi.Parameter(default='model/word2vec.zip') # type: str word2vec_kwargs = luigi.DictParameter( default=dict(), description='Arguments for Word2Vec except "sentences". Please see gensim.models.Word2Vec for more details.' ) # type: Dict[str, Any] def requires(self): return self.tokenized_text_data_task def output(self): return self.make_model_target( self.output_file_path, save_function=gensim.models.Word2Vec.save, load_function=gensim.models.Word2Vec.load) def run(self): texts = self.load() # type: List[List[str]] shuffle(texts) model = gensim.models.Word2Vec(sentences=texts, **self.word2vec_kwargs) self.dump(model)
class BundleWrapperTask(PipeTask): """ This task allows one to create bundles that can be referred to in a Disdat pipeline through a self.add_external_dependency. 1.) User makes a bundle outside of Luigi 2.) Luigi pipeline wants to use bundle. a.) Refer to the bundle as an argument and reads it using API (outside of Luigi dependencies) b.) Refer to the bundle using a BundleWrapperTask. Luigi Disdat pipeline uses the latest bundle with the processing_name. Two implementation options: 1.) We add a "add_bundle_dependency()" call to Disdat. This directly changes how we schedule. 2.) We add a special Luigi task (as luigi has for outside files) and use that to produce the processing_name, when there isn't an actual task creating the data. Thus this task is mainly to provide a way that a.) A user can create a bundle and set the processing_name b.) The pipeline can refer to this bundle using the same processing_name The parameters in this task should allow one to sufficiently identify versions of this bundle. Note: No processing_name and No UUID """ name = luigi.Parameter(default=None) owner = luigi.Parameter(default=None, significant=False) tags = luigi.DictParameter(default={}, significant=False) def bundle_inputs(self): """ Determine input bundles """ raise NotImplementedError def bundle_outputs(self): """ Determine input bundles """ raise NotImplementedError def pipeline_id(self): """ default is shortened version of pipe_id But here we want it to be the set name """ return self.name
class GeneratePoliciesTemplate(manifest_tasks.SectionTask): region = luigi.Parameter() sharing_policies = luigi.DictParameter() def output(self): return luigi.LocalTarget(f"output/{self.uid}.template.yaml") def params_for_results_display(self): return { "region": self.region, "puppet_account_id": self.puppet_account_id, "manifest_file_path": self.manifest_file_path, } def run(self): rendered = config.env.get_template("policies.template.yaml.j2").render( sharing_policies=self.sharing_policies, VERSION=config.get_puppet_version(), HOME_REGION=self.region, ) with self.output().open("w") as output_file: output_file.write(rendered)
class CalculateMetricsTask(luigi.Task): date = luigi.DateParameter(default=date.today()) states_and_districts = luigi.DictParameter() city_name = luigi.Parameter() def requires(self): return CalculateMetricsWithoutHospitalizationTask( date=self.date, states_and_districts=self.states_and_districts, city_name=self.city_name, ) def output(self): return dropbox_target( f"/data/metrics/{self.city_name}/{self.date}-metrics-with-hospitalization.csv" ) def delete(self): return dropbox_delete( f"/data/metrics/{self.city_name}/{self.date}-metrics-with-hospitalization.csv" ) def run(self): hospitalization = yield CreateHospitalizationTask( date=self.date, metrics_date=self.date, city_name=self.city_name, states_and_districts=self.states_and_districts, ) with (self.input().open("r")) as metrics_without_hosptialization, ( hospitalization.open("r")) as hospitalization_data, ( self.output().open("w")) as output_file: calculate_city_stats_with_hospitalizations( textio2stringio(metrics_without_hosptialization), textio2stringio(hospitalization_data), output_file, self.city_name, ) self.delete()