def compute(self): in_dataset = self.getInputFromPort('in_dataset') method = self.getInputFromPort('method') self.positional_args = [(method, 0, 'raw'), ] self.keyword_args = {} if len(method.split(',')) > 1: agg_constraint = "".join(method.split(',')) else: agg_constraint = method new_constraints_for_output = set([Constraint('timeagg_info', [agg_constraint]), Constraint('suffix', ['nc']), ]) this_process = ProcessUnit([in_dataset], self.out_pattern, self.command, new_constraints_for_output, execution_options=self._execution_options, positional_args=self.positional_args, cons_keywords=self.keyword_args) try: this_process.execute(simulate=configuration.simulate_execution) except Exception as e: raise vistrails_module.ModuleError(self, repr(e)) process_output = this_process.file_creator self.setResult('out_dataset', process_output)
def compute(self): # Required input in_dataset = self.getInputFromPort("in_dataset") new_cons = set([Constraint('extra_info', ['nino34']), Constraint('latsouth_info', ['5S']), Constraint('latnorth_info', ['5N']), Constraint('latagg_info', ['fldavg']), Constraint('lonwest_info', ['190E']), Constraint('loneast_info', ['240E']), Constraint('lonagg_info', ['fldavg']), Constraint('leveltop_info', ['surface']), Constraint('levelbottom_info', ['surface']), Constraint('anomaly_info', ['anom']), ]) # Execute the process. this_process = ProcessUnit([in_dataset], self.out_pattern, self.command, new_cons, positional_args=self.positional_args, execution_options=self._execution_options) try: this_process.execute(simulate=configuration.simulate_execution) except subprocess.CalledProcessError as e: raise vistrails_module.ModuleError(self, e.output) except Exception as e: raise vistrails_module.ModuleError(self, repr(e)) process_output = this_process.file_creator self.setResult('out_dataset', process_output)
def compute(self): in_dataset1 = self.getInputFromPort('in_dataset1') in_dataset2 = self.getInputFromPort('in_dataset2') operation = self.getInputFromPort('operation') self.positional_args = [ (operation, 0, 'raw'), ] self.keyword_args = {} new_constraints_for_output = set([ Constraint('extra_info', [operation]), Constraint('suffix', ['nc']), ]) this_process = ProcessUnit([in_dataset1, in_dataset2], self.out_pattern, self.command, new_constraints_for_output, execution_options=self._execution_options, positional_args=self.positional_args, cons_keywords=self.keyword_args, merge_output=['model', 'institute']) try: this_process.execute(simulate=configuration.simulate_execution) except Exception as e: raise vistrails_module.ModuleError(self, repr(e)) process_output = this_process.file_creator self.setResult('out_dataset', process_output)
def testEquality(self): """ Test that two constraints compare as equal. """ constraint_1 = Constraint('model', ['ACCESS1-0']) constraint_2 = Constraint('model', ['ACCESS1-0']) constraint_3 = Constraint('variable', ['tas', 'pr']) constraint_4 = Constraint('variable', ['pr', 'tas']) self.assertEqual(constraint_1, constraint_2) self.assertEqual(constraint_3, constraint_4)
def test_positionalargs_4(self): """ Test that positional arguments work if multiple extra constraints found only in the output are added. """ extra_cons = set([Constraint('animal', ['moose', 'kangaroo']), Constraint('colour', ['blue'])]) the_process_unit = ProcessUnit([self.a_pattern_ds], '/another/%file%/%pattern%_%animal%_%colour%.txt', 'echo', extra_constraints=extra_cons, positional_args=[('animal', 0), ('colour', 1)]) ds_result = the_process_unit.execute(simulate=True) expected_string = self.script_header + 'mkdir -p /another/file_1\necho moose blue test_file1 /another/file_1/pattern_1_moose_blue.txt\necho kangaroo blue test_file1 /another/file_1/pattern_1_kangaroo_blue.txt\n' self.assertEqual(expected_string, the_process_unit.scheduler.job.to_str())
def compute(self): # Required input in_dataset = self.getInputFromPort("in_dataset") # Set up the output command for this module, adding extra options. positional_args = [] anom_label = 'anom-wrt-all' arg_number = 0 try: clim_bounds = self.getInputFromPort('clim_bounds') positional_args += [('-b', arg_number, 'raw'), (clim_bounds, arg_number + 1, 'raw')] start_date, end_date = clim_bounds.split(',') anom_label = 'anom-wrt-' + start_date + '-' + end_date arg_number += 2 except vistrails_module.ModuleError as e: pass try: timescale = self.getInputFromPort('timescale') positional_args += [('-t', arg_number, 'raw'), (timescale, arg_number + 1, 'raw')] anom_label = timescale + anom_label except vistrails_module.ModuleError as e: pass cons_for_output = set([ Constraint('suffix', ['nc']), Constraint('anomaly_info', [anom_label]) ]) # Execute the process. this_process = ProcessUnit([in_dataset], self.out_pattern, self.command, cons_for_output, positional_args=positional_args, execution_options=self._execution_options) try: this_process.execute(simulate=configuration.simulate_execution) except Exception as e: raise vistrails_module.ModuleError(self, repr(e)) process_output = this_process.file_creator self.setResult('out_dataset', process_output)
def setUp(self): self.mock_file_pattern = '/fake/%colour%_%animal%.txt' self.mock_regex = '^/fake/(?P<colour>.+?)_(?P<animal>.+?)\\.txt$' self.fake_constraints = set([ Constraint('colour', ['green', 'blue', 'red', 'purple']), Constraint('animal', ['kangaroo', 'echidna']) ]) # Create a mock set of files to avoid hitting the file system. self.mock_file_list = [ '/fake/green_echidna.txt', '/fake/blue_kangaroo.txt', '/fake/red_kangaroo.txt', '/fake/purple_kangaroo.txt' ]
def apply_mappings(self, constraints): module_logger.debug("Before applying mappings, output_constraints are: {}" .format(constraints)) to_remove = [] for map_name, map_spec in self.map_dict.items(): # First update the outputs with values from the input. found_con = self.inputlist[map_spec[1]].get_constraint(map_spec[0]) constraints.add(Constraint(map_name, found_con.values)) # Remove the empty constraint. constraints.remove(Constraint(map_name, [])) # Update the subsets dictionary for the input. # this will fail for a FileCreator. try: for value in found_con.values: module_logger.debug("Updating subsets for {}: {}" .format(map_name, value)) found_files = self.inputlist[map_spec[1]].get_files({found_con.key: value}) module_logger.debug("Found files are: {}".format(found_files)) self.inputlist[map_spec[1]].subsets[map_name][value] = [file_ob.full_path for file_ob in found_files] except AttributeError: pass # Added the mapped constraint to the input self.cons_names self.inputlist[map_spec[1]].cons_names.append(map_name) # Removed the now obsolete constraint. self.inputlist[map_spec[1]].cons_names.remove(map_spec[0]) # Now alter the valid combinations of the input. fixed_combinations = set([]) for combination in self.inputlist[map_spec[1]].valid_combinations: module_logger.debug("Original combination is: {}".format(combination)) new_list = [] for constraint in combination: if constraint.key == map_spec[0]: new_list.append(Constraint(map_name, constraint.values)) new_list.append(constraint) module_logger.debug("New combination is: {}".format(new_list)) fixed_combinations.add(frozenset(new_list)) self.inputlist[map_spec[1]].valid_combinations = fixed_combinations module_logger.debug("After applying mappings, output_constraints are: {}" .format(constraints)) return constraints
def compute(self): logger.debug('compute() ...') #in_string = self.getInputFromPort('constraint_string') # ds = dsMain(verbose=True) # launch the GUI; wait for user selections ds = dsMain(verbose=False) # launch the GUI; wait for user selections # We want to wait here until the user clicks the "Done" button; how do we do that? logger.debug('ds = %s' % ds) in_string = ds.getConstraintString() logger.debug('in_string = %s' % in_string) output_cons = [] if len(in_string) > 0: split_strings = in_string.split(';') logger.debug('split_strings = %s' % split_strings) for cons_string in split_strings: logger.debug('cons_string = %s' % cons_string) constraint_list = cons_string.split('=') logger.debug('constraint_list = %s' % constraint_list) key = constraint_list[0].strip() logger.debug('key = %s' % key) raw_values = [val for val in constraint_list[1].split(',')] final_values = [ val_string.strip() for val_string in raw_values ] output_cons.append(Constraint(key, final_values)) logger.debug('output_cons = %s' % output_cons) self.setResult('constraint_set', output_cons)
def compute(self): # Determine file path patterns = self.get_filepath_patterns() logger.debug('Using pattern %s' % patterns) # Create constraints constraints = [ Constraint(attribute, [values]) for attribute, values in self.constraints.iteritems() ] # Add user contraints user_constraints = self.getInputFromPort("added_constraints") if user_constraints: constraints.extend(user_constraints) else: raise ModuleError( self, "No constraints set on DataSet - you can not run a workflow on the entire DataSet" ) # Create dataset based on file search path and contraints dataset = PatternDataSet(patterns, constraints) if not dataset.files: error_string = "No files found for this dataset with constraints: {}".format( constraints) error_string = error_string.replace('],', '],\n') logger.error(error_string) raise ModuleError(self, error_string) self.setResult('out_dataset', dataset)
def test_empty_constraint_overwrite(self): """ Test that ProcessUnit throw an exception if a constraint is overwritten with nothing.""" extra_con = set([Constraint('fake', [])]) self.assertRaises(EmptyOverwriteError, ProcessUnit, [self.a_pattern_ds], '/%fake%/%file%/%pattern%.txt', 'echo', extra_constraints=extra_con)
def merge_constraints(self, new_constraints): """ This function adds the constraint values to the constraints from a pattern. """ existing_cons_names = [cons.key for cons in self.constraints] # Now add the constraints - only if they are in the pattern! for cons in new_constraints: if cons.key in existing_cons_names: self.constraints.add(cons) attribute_names = [cons.key for cons in self.constraints] repeated_atts = [] for name in attribute_names: if attribute_names.count(name) > 1: repeated_atts.append(name) to_remove = [ cons for cons in self.constraints if cons.key in repeated_atts ] new_cons_dict = {} for cons in to_remove: new_cons_dict[cons.key] = set([]) for cons in to_remove: new_cons_dict[cons.key] = new_cons_dict[cons.key].union( cons.values) self.constraints.remove(cons) for key in new_cons_dict: self.constraints.add(Constraint(key, new_cons_dict[key]))
def compute(self): # Required input future_dataset = self.getInputFromPort("future_dataset") baseline_dataset = self.getInputFromPort("baseline_dataset") # Execute the process. new_constraints = [Constraint('change_type', ['abs-change'])] this_process = ProcessUnit( [future_dataset, baseline_dataset], self.out_pattern, self.command, extra_constraints=new_constraints, execution_options=self._execution_options, map_dict={ 'fut_start': ('year_start', 0), 'fut_end': ('year_end', 0), 'hist_start': ('year_start', 1), 'hist_end': ('year_end', 1) }) try: this_process.execute(simulate=configuration.simulate_execution) except Exception as e: raise vistrails_module.ModuleError(self, e.output) process_output = this_process.file_creator self.setResult('out_dataset', process_output)
def test_files_method(self): ''' A file creator .files method should only return files that are valid combinations exist. ''' cons_set = set([ Constraint('model', ['ACCESS1-0', 'ACCESS1-3']), Constraint('experiment', ['rcp45', 'rcp85']) ]) this_file_creator = FileCreator( "/a/fake/pattern/%model%_%experiment%.nc", extra_constraints=cons_set) # Now tell the file creator which files are real! # ACCESS1-3 has no rcp85 experiment in this case. file_1 = this_file_creator.get_files( { 'model': 'ACCESS1-0', 'experiment': 'rcp45' }, check=False, update=True) file_2 = this_file_creator.get_files( { 'model': 'ACCESS1-0', 'experiment': 'rcp85' }, check=False, update=True) file_3 = this_file_creator.get_files( { 'model': 'ACCESS1-3', 'experiment': 'rcp45' }, check=False, update=True) # Ensure that the FileCreator has returned a file # for each combination. for file_thing in [file_1, file_2, file_3]: self.assertTrue(file_thing) all_files = [file_thing for file_thing in this_file_creator.files] # There should only be 3 valid file combinations returned. self.assertEqual(len(all_files), 3)
def compute(self): in_dataset = self.getInputFromPort('in_dataset') method = self.getInputFromPort('method') seas_list = {'mon': 'ymon', 'seas': 'yseas', 'ann': 'tim'} ### loop over seas_list to generate all 3 season files ### for seas in seas_list.keys(): self.positional_args = [ ('%s%s' % (seas_list[seas], method), 0, 'raw'), ] self.keyword_args = {} if len(method.split(',')) > 1: agg_constraint = "".join(method.split(',')) else: agg_constraint = method new_constraints_for_output = set([ Constraint( 'timeagg_info', ['%s%s' % (seas_list[seas], method)], ), Constraint('suffix', ['nc']), ]) this_process = ProcessUnit( [in_dataset], self.out_pattern, self.command, new_constraints_for_output, execution_options=self._execution_options, positional_args=self.positional_args, cons_keywords=self.keyword_args) try: this_process.execute(simulate=configuration.simulate_execution) except Exception as e: raise vistrails_module.ModuleError(self, repr(e)) process_output = this_process.file_creator self.setResult('out_dataset_%s' % seas, process_output)
def test_iteration(self): """ Test that you can iterate through values in a Constraint. """ new_constraint = Constraint('things', ['this', 'that', 'something_else']) out_vals = [out for out in new_constraint] expected_outs = [('things', 'this'), ('things', 'that'), ('things', 'something_else')] self.assertItemsEqual(expected_outs, out_vals)
def test_badconstraints(self): """ Constructing a PatternDataset with constraints that don't exist should fail. """ # Misspelled constraint. test_cons = set([Constraint('modell', 'ACCESS1-0')]) self.assertRaises(ConstraintNotFoundError, PatternDataSet, "/not/real/pattern/%model%.nc", constraint_set=test_cons)
def test_build_glob_patterns(self): """ When constraints are given in the constructor, restrict the patterns on the fs to glob. """ given_cons = set([Constraint('colour', ['pink', 'green'])]) pattern_ds = PatternDataSet(self.mock_file_pattern, given_cons) expected_patterns = ['/fake/pink_*.txt', '/fake/green_*.txt'] self.assertItemsEqual(pattern_ds.glob_patterns, expected_patterns)
def test_overwrite_constraints(self): """ Test to ensure that Constraints are correctly overwritten when data is processed. """ extra_cons = set([ Constraint('extras', ['other_things']), Constraint('fake', ['OVERWRITE']) ]) a_process_unit = ProcessUnit( [self.a_pattern_ds], "/foo/%fake%/%file%/%pattern%_%extras%.txt", "echo", extra_constraints=extra_cons) output_ds = a_process_unit.execute(simulate=True) expected_cons = set([ Constraint('extras', ['other_things']), Constraint('fake', ['OVERWRITE']), Constraint('file', ['file_1']), Constraint('pattern', ['pattern_1']) ]) self.assertEqual(expected_cons, output_ds.constraints) expected_string = self.script_header + "mkdir -p /foo/OVERWRITE/file_1\necho /a/fake_1/file_1/pattern_1 /foo/OVERWRITE/file_1/pattern_1_other_things.txt\n" self.assertEqual(expected_string, a_process_unit.scheduler.job.to_str())
def compute(self): in_dataset = self.getInputFromPort('in_dataset') method = self.getInputFromPort('method') grid = self.getInputFromPort('grid') self.positional_args = [ (method, 0, 'raw'), (grid, 1, 'raw'), ] self.keyword_args = {} grid = grid.split('/')[-1] if len(grid.split('.') ) > 1: # i.e. a weights file as opposed to pre-defined grid grid_constraint = method + '-' + grid.split('.')[0] else: grid_constraint = method + '-' + grid new_constraints_for_output = set([ Constraint('grid_info', [grid_constraint]), Constraint('suffix', ['nc']), ]) this_process = ProcessUnit([in_dataset], self.out_pattern, self.command, new_constraints_for_output, execution_options=self._execution_options, positional_args=self.positional_args, cons_keywords=self.keyword_args) try: this_process.execute(simulate=configuration.simulate_execution) except Exception as e: raise vistrails_module.ModuleError(self, repr(e)) process_output = this_process.file_creator self.setResult('out_dataset', process_output)
def test_add_then_overwrite(self): """ Test to make sure that adding and then overwriting constraints in later process units works. """ extra_con = set([Constraint('an_extra', ['new_value'])]) a_process_unit = ProcessUnit([self.a_pattern_ds], "/%fake%/%file%/%pattern%/%an_extra%.txt", "echo", extra_constraints=extra_con) first_output = a_process_unit.execute(simulate=True) # Now make a new output with an new value of %pattern%. new_process_unit = ProcessUnit( [first_output], "/%fake%/%file%/%pattern%/%an_extra%.txt", "echo", extra_constraints=set( [Constraint('pattern', ['OVERWRITE_PATTERN'])])) new_process_unit.execute(simulate=True) expected_string = self.script_header + "mkdir -p /fake_1/file_1/OVERWRITE_PATTERN\necho /fake_1/file_1/pattern_1/new_value.txt /fake_1/file_1/OVERWRITE_PATTERN/new_value.txt\n" self.assertEqual(expected_string, new_process_unit.scheduler.job.to_str())
def constraints_from_pattern(pattern_string): """ This function builds a set of constraint objects from an output pattern. """ regex_pattern = r"%(\S+?)%" attribute_names = re.findall(regex_pattern, pattern_string) constraint_list = [Constraint(att_name, []) for att_name in attribute_names] return set(constraint_list)
def compute(self): in_dataset1 = self.getInputFromPort('in_dataset1') in_dataset2 = self.getInputFromPort('in_dataset2') self.positional_args = [] self.keyword_args = {} new_constraints_for_output = set([ Constraint('extra_info', ['timcor']), Constraint('suffix', ['nc']), ]) merge_val = self.getInputFromPort('merge_constraints') if merge_val: extra_merge = [ cons_name.strip() for cons_name in merge_val.split(',') ] else: extra_merge = [] this_process = ProcessUnit([in_dataset1, in_dataset2], self.out_pattern, self.command, new_constraints_for_output, execution_options=self._execution_options, positional_args=self.positional_args, cons_keywords=self.keyword_args, merge_output=extra_merge) try: this_process.execute(simulate=configuration.simulate_execution) except Exception as e: raise vistrails_module.ModuleError(self, repr(e)) process_output = this_process.file_creator self.setResult('out_dataset', process_output)
def compute(self): cons_list = [ 'model', 'experiment', 'variable', 'season_number', 'region' ] in_cons = set([ Constraint(cons_name, [self.getInputFromPort(cons_name)]) for cons_name in cons_list if self.getInputFromPort(cons_name) ]) file_pattern = "/home/548/teb548/cod/CMIP5_v2/%model%_%experiment%/%region%/%variable%/season_%season_number%/rawfield_analog_%season_number%" output_ds = PatternDataSet(file_pattern, in_cons) self.setResult('out_dataset', output_ds)
def setUp(self): # This creates a mock pattern dataset that returns a single file. test_cons = set([Constraint('fake', ['fake_1']), Constraint('file', ['file_1']), Constraint('pattern', ['pattern_1'])]) self.a_pattern_ds = PatternDataSet('/a/%fake%/%file%/%pattern%', constraint_set=test_cons) # Mock the get_files method - we will only return a single, mock file object. mock_file = mock.MagicMock() mock_file.full_path = 'test_file1' mock_file.__str__.return_value = 'test_file1' mock_file.all_atts = {"fake": "fake_1", "file": "file_1", "pattern": "pattern_1"} self.a_pattern_ds.get_files = mock.Mock(return_value=[mock_file]) # Create a valid set of contraints for the mock. self.a_pattern_ds.valid_combinations = set([frozenset(test_cons)]) # Constant header for the job scripts. self.script_header = "#!/bin/sh\nset -e\n\nmodule purge\nexport CWSL_CTOOLS={}\nexport PYTHONPATH=$PYTHONPATH:{}/pythonlib\n"\ .format(configuration.cwsl_ctools_path, configuration.cwsl_ctools_path)
def compute(self): cons_list = [ 'model', 'experiment', 'variable', 'season_number', 'region' ] in_cons = set([ Constraint(cons_name, [self.getInputFromPort(cons_name)]) for cons_name in cons_list if self.getInputFromPort(cons_name) ]) file_pattern = "/g/data/ua6/CAWCR_CVC_processed/staging/users/CWSL/SDM/COD/CMIP5_v2/%model%_%experiment%/%region%/%variable%/season_%season_number%/rawfield_analog_%season_number%" output_ds = PatternDataSet(file_pattern, in_cons) self.setResult('out_dataset', output_ds)
def test_positionalargs_3(self): """ Test that positional arguments work if the constraint is part of the output only. """ extra_cons = set([Constraint('animal', ['moose', 'kangaroo'])]) the_process_unit = ProcessUnit([self.a_pattern_ds], '/another/%file%/%pattern%_%animal%.txt', 'echo', extra_constraints=extra_cons, positional_args=[('animal', 0)]) ds_result = the_process_unit.execute(simulate=True) outfiles = [file_thing for file_thing in ds_result.files] self.assertEqual(len(outfiles), 2) expected_string = self.script_header + 'mkdir -p /another/file_1\necho moose test_file1 /another/file_1/pattern_1_moose.txt\necho kangaroo test_file1 /another/file_1/pattern_1_kangaroo.txt\n' self.assertEqual(expected_string, the_process_unit.scheduler.job.to_str())
def compute(self): output_cons = [] con_names = [ 'model', 'realm', 'variable', 'experiment', 'mip_table', 'institute', 'ensemble', 'frequency' ] for con_name in con_names: con_values = self.getInputFromPort(con_name) if con_values: val_list = con_values.split(',') final_vals = [val.strip() for val in val_list] output_cons.append(Constraint(con_name, final_vals)) self.setResult('constraint_set', output_cons)
def compute(self): in_string = self.getInputFromPort('constraint_string') output_cons = [] split_strings = in_string.split(';') for cons_string in split_strings: constraint_list = cons_string.split('=') key = constraint_list[0].strip() raw_values = [val for val in constraint_list[1].split(',')] final_values = [val_string.strip() for val_string in raw_values] output_cons.append(Constraint(key, final_values)) self.setResult('constraint_set', output_cons)
def climate_file_from_combination(self, keys, next_combination, check, update): """ Make a possible output MetaFile object from a combination of attributes. """ # Turn the combination tuple into a dictionary with # attribute names. sub_dict = {} cons_list = [] for key, value in zip(keys, next_combination): sub_dict[key] = value cons_list.append(Constraint(key, [value])) new_file = self.output_pattern for key in sub_dict: att_sub = "%" + key + "%" new_file = re.sub(att_sub, sub_dict[key], new_file) new_path = os.path.dirname(new_file) file_name = os.path.basename(new_file) new_climate_file = MetaFile(path_dir=new_path, filename=file_name, all_atts=sub_dict) if check: # Check that this combination is valid for the FileCreator # If it is not, return None. module_logger.debug("Checking cons_list: {}".format(cons_list)) if frozenset(cons_list) not in self.valid_combinations: module_logger.debug( "This combination: {0} is not found in {1}".format( cons_list, self.valid_combinations)) return None if update: # Add the hash to the 'valid_hashes' set. file_hash = hash(new_climate_file) self.valid_hashes.add(file_hash) self.valid_combinations.add(frozenset(cons_list)) module_logger.debug( "Returning climate file: {}".format(new_climate_file)) return new_climate_file