def create_s2(window_length, window_step, fft_min_freq, fft_max_freq, sampling_frequency, file_path): warnings.filterwarnings("ignore") type_data = pickle.load(open(file_path, 'rb')) pipeline = Pipeline([Center_surround_diff()]) time_series_data = type_data.data start, step = 0, int(np.floor(window_step * sampling_frequency)) stop = start + int(np.floor(window_length * sampling_frequency)) s2_data = [] while stop < time_series_data.shape[1]: signal_window = time_series_data[:, start:stop] window = pipeline.apply(signal_window) s2_data.append(window) start, stop = start + step, stop + step s2_data = np.array(s2_data) named_data = seizure_type_data(patient_id=type_data.patient_id, seizure_type=type_data.seizure_type, data=type_data.data, s1=type_data.s1, s2=s2_data) return named_data, os.path.basename(file_path)
def convert_to_fft(window_length, window_step, fft_min_freq, fft_max_freq, sampling_frequency, file_path): warnings.filterwarnings("ignore") type_data = pickle.load(open(file_path, 'rb')) pipeline = Pipeline( [FFT(), Slice(fft_min_freq, fft_max_freq), Magnitude(), Log10()]) #time_series_data = type_data.data time_series_data = type_data start, step = 0, int(np.floor(window_step * sampling_frequency)) stop = start + int(np.floor(window_length * sampling_frequency)) fft_data = [] while stop < time_series_data.shape[1]: signal_window = time_series_data[:, start:stop] fft_window = pipeline.apply(signal_window) fft_dfft_dataata.append(fft_window) start, stop = start + step, stop + step fft_data = np.array(fft_data) #named_data = seizure_type_data(patient_id=type_data.patient_id, seizure_type=type_data.seizure_type, data=fft_data) #return named_data,os.path.basename(file_path) return fft_data, os.path.basename(file_path)
def create_s1(window_length, window_step, fft_min_freq, fft_max_freq, sampling_frequency, file_path): warnings.filterwarnings("ignore") type_data = pickle.load(open(file_path, 'rb')) pipeline = Pipeline( [Substract_average_plus_P_2(), IFFT(), Smooth_Gaussian()]) time_series_data = type_data.data start, step = 0, int(np.floor(window_step * sampling_frequency)) stop = start + int(np.floor(window_length * sampling_frequency)) s1_data = [] while stop < time_series_data.shape[1]: signal_window = time_series_data[:, start:stop] window = pipeline.apply(signal_window) s1_data.append(window) start, stop = start + step, stop + step s1_data = np.array(s1_data) named_data = seizure_type_data(patient_id=type_data.patient_id, seizure_type=type_data.seizure_type, data=type_data.data, s1=s1_data) return named_data, os.path.basename(file_path)
def finish(self): cur_pipeline_id = self.pipeline_id self.pipelines[cur_pipeline_id] = self.cur_pipeline self.cur_pipeline = Pipeline() self.pipeline_id += 1 self.cur_last_map_index = -1 self.last_partition_function = None self.last_combine_function = None return cur_pipeline_id
def __init__(self): self.pipelines = {} self.cur_pipeline = Pipeline() self.pipeline_id = 1 self.total_num_functions = 0 self.cur_last_map_index = -1 self.last_partition_function = None self.last_combine_function = None self.rel_function_paths = []
def calculateWeightage(): global totalSample global jsonData global Featureset csv_filename = "WebcredNormalized.csv" f = open(csv_filename, 'r') data = f.readlines() pipe = Pipeline() # get json data jsonData = pipe.converttojson(data) totalSets = 10 # sets of possible weightages weightage = [] totalSample = int( subprocess.check_output(['wc', '-l', csv_filename]).split(' ')[0]) - 1 filterKeys = ['url', 'wot', 'cookie', 'redirected'] FeaturesName = list((set(jsonData[0].keys()) - set(filterKeys))) Featureset = [] alexaScoreSet = [] for i in range(totalSets): count = 0 # select sample sets while True: sample = getjsonData() featurevalue, alexaScore, wotScore = getFeatureValue([sample], FeaturesName) if checksimiliarData(featurevalue[0]): Featureset.append(featurevalue[0]) alexaScoreSet.append(alexaScore) count += 1 if count == len(FeaturesName) - 1: # sum of all weightage== 1, misc. genre == 0.1 temp = [] for j in range(len(FeaturesName)): temp.append(1) Featureset.append(temp) alexaScoreSet.append([0.9]) break # get weightage of individual feature weightage.append(getWeightage(Featureset, alexaScoreSet)) print 'getting', i, 'set of weightages' finalWeightage = np.mean(weightage, axis=0).tolist() total = 0 for i in finalWeightage: total += i print total print finalWeightage
def create_d(window_length, window_step, fft_min_freq, fft_max_freq, sampling_frequency, file_path): warnings.filterwarnings("ignore") type_data = pickle.load(open(file_path, 'rb')) #Three of these pipelines are needed, as concatenation takes a different kind of parameter (three maps) pipeline1 = Pipeline([Normalise()]) pipeline2 = Pipeline([Concatenation()]) pipeline3 = Pipeline([RGB_0_255()]) #The three feature maps data_ft = type_data.data data_s1 = type_data.s1 data_s2 = type_data.s2 start, step = 0, int(np.floor(window_step * sampling_frequency)) stop = start + int(np.floor(window_length * sampling_frequency)) d_data = [] while stop < data_ft.shape[1]: #Window definitions, the maps are of same size & shape so 1 looper can be used for all window_ft = data_ft[:, start:stop] window_s1 = data_s1[:, start:stop] window_s2 = data_s2[:, start:stop] #Normalise each window value window_ft_norm = pipeline1.apply(window_ft) window_s1_norm = pipeline1.apply(window_s1) window_s2_norm = pipeline1.apply(window_s2) #Concatenate normalised values d_norm = pipeline2.apply(window_ft_norm, window_s1_norm, window_s2_norm) #RGB 0-255 conversion d_rgb = pipeline3.apply(d_norm) d_data.append(d_rgb) start, stop = start + step, stop + step d_data = np.array(d_data) named_data = seizure_type_data(patient_id=type_data.patient_id, seizure_type=type_data.seizure_type, data=d_data) return named_data, os.path.basename(file_path)
def card_classifier(text, algorithm, parser): classifier = dict() text = remove_punctuation(text) print(text) types, prefix = card_type(text) lines = text.splitlines() if parser == 'regex': preds = regex_extractor(lines, types, prefix) else: clean = [word_extractor(line, prefix) for line in lines] clf = Pipeline(clean) preds = clf.predicts(model=algorithm) classifier['type'] = types classifier['data'] = preds pprint.pprint(classifier) return classifier
def model_pipeline(train_config): pipeline = Pipeline() pipeline.enqueue( "train-model", "Train Model", TrainModelPipeline.mutate({ "train-config": train_config, "test-config": DEFAULT_TEST_CONFIG })) pipeline.enqueue( "translate-naive", "Translate Naive Plans", PlannerTranslatePipeline.mutate({"planner-name": "naive"})) pipeline.enqueue( "translate-neural", "Translate Neural Plans", PlannerTranslatePipeline.mutate({"planner-name": "neural"})) return pipeline
def build_poem(self, *args): self.form_model = self._pick_form()() self.pipeline = Pipeline( self.vocab_model.weight, self.form_model.weight ) # start the state as the empty string state = [''] # start with no known transitions transitions = [] for i in range(50): state += self._pick(self.pipeline.pipe(state, transitions)) self.poem_view.get_buffer().set_text(' '.join(state))
def main(): """Set up the experiment, initialize folders, and write config""" # Load the experiment configuration and paths argv = sys.argv[1:] params = tc.TestConfiguration('default.ini', argv) paths = pm.PathManager(params) paths.initialize_experiment_folders() pipeline = Pipeline(params, paths) # Write config file pipeline.write_config_file() # Load the datsets # pipeline.initialize_train_dataset() # pipeline.initialize_val_dataset() # """Train or load a model""" # if pipeline.params['load model fn']: # pipeline.load_model() """Score test and train sets""" pipeline.score_saved_train_predictions() pipeline.score_saved_val_predictions()
from utils.pipeline import Pipeline # CoverageEvaluationPipeline = Pipeline() # CoverageEvaluationPipeline.enqueue("plan-all", "Plan all & score on test set", # lambda f, x: x["test-corpus"].copy().exhaustive_plan(x["train-planner"])) # CoverageEvaluationPipeline.enqueue("print", "Print stuff", # lambda f, x: "\n".join([str(len(d.graph.edges)) + " - " + str(len(d.plans)) for d in f["plan-all"].data]), ext="txt") EvaluationPipeline = Pipeline() EvaluationPipeline.enqueue("bleu", "Evaluate test reader", lambda f, x: x["translate"].evaluate()) # EvaluationPipeline.enqueue("coverage", "Coverage evaluation", CoverageEvaluationPipeline)
# TODO wotSimilarity[k] = wotSimilarity_avg print weightage, alexaSimilarityScore elif action == 'bn': global totalSample global jsonData global Featureset Featureset = [] alexaScoreSet = [] wotScoreSet = [] csv_filename = "WebcredNormalized.csv" f = open(csv_filename, 'r') data = f.readlines() pipe = Pipeline() # get json data jsonData = pipe.converttojson(data) totalSample = int( subprocess.check_output(['wc', '-l', csv_filename ]).split(' ')[0]) - 1 filterKeys = ['url', 'wot', 'cookie', 'redirected'] FeaturesName = list((set(jsonData[0].keys()) - set(filterKeys))) count = 0 tried = 0 # building matrix wiht 1000 samples while True: tried += 1 try: sample = getjsonData() featurevalue, alexaScore, wotScore = getFeatureValue(
from scorer.relation_transitions import RelationTransitionsExpert from scorer.splitting_tendencies import SplittingTendenciesExpert from utils.pipeline import Pipeline class Config: def __init__(self, reader: DataReader, planner: Planner, test_reader: DataReader = None): self.reader = { DataSetType.TRAIN: reader, DataSetType.DEV: reader, DataSetType.TEST: test_reader if test_reader else reader, } self.planner = planner MainPipeline = Pipeline() MainPipeline.enqueue("pre-process", "Pre-process training data", TrainingPreProcessPipeline) MainPipeline.enqueue("train-planner", "Train Planner", TrainPlannerPipeline) MainPipeline.enqueue("train-model", "Train Model", TrainModelPipeline) MainPipeline.enqueue("test-corpus", "Pre-process test data", TestingPreProcessPipeline) MainPipeline.enqueue("translate", "Translate Test", TranslatePipeline) MainPipeline.enqueue("evaluate", "Evaluate Translations", EvaluationPipeline) if __name__ == "__main__": naive_planner = NaivePlanner(WeightedProductOfExperts([ RelationDirectionExpert, GlobalDirectionExpert, SplittingTendenciesExpert, RelationTransitionsExpert ])) # neural_planner = NeuralPlanner()
from utils.pipeline import Pipeline REGPipeline = Pipeline() REGPipeline.enqueue( "reg", "Learn planner", lambda _, x: x["config"].reg( x["pre-process"]["train"], x["pre-process"]["dev"])) REGPipeline.enqueue("out", "Expose the reg", lambda f, _: f["reg"])
from utils.pipeline import Pipeline TrainPlannerPipeline = Pipeline() TrainPlannerPipeline.enqueue("planner", "Learn planner", lambda _, x: x["config"].planner.learn(x["pre-process"]["train"], x["pre-process"]["dev"])) TrainPlannerPipeline.enqueue("out", "Expose the planner", lambda f, _: f["planner"])
from reg.naive import NaiveREG from scorer.global_direction import GlobalDirectionExpert from scorer.product_of_experts import WeightedProductOfExperts from scorer.relation_direction import RelationDirectionExpert from scorer.relation_transitions import RelationTransitionsExpert from scorer.splitting_tendencies import SplittingTendenciesExpert from utils.pipeline import Pipeline naive_planner = NaivePlanner( WeightedProductOfExperts([ RelationDirectionExpert, GlobalDirectionExpert, SplittingTendenciesExpert, RelationTransitionsExpert ])) neural_planner = NeuralPlanner() PlanPipeline = Pipeline() PlanPipeline.enqueue("train-planner", "Train Planner", TrainPlannerPipeline) PlanPipeline.enqueue("test-corpus", "Pre-process test data", TestingPreProcessPipeline) ExperimentsPipeline = Pipeline() ExperimentsPipeline.enqueue("pre-process", "Pre-process training data", TrainingPreProcessPipeline) # Train all planners # # Naive Planner ExperimentsPipeline.enqueue( "naive-planner", "Train Naive Planner", PlanPipeline.mutate( {"config": Config(reader=WebNLGDataReader, planner=naive_planner)})) # # Neural Planner
"train_steps": 30000, "save_checkpoint_steps": 1000, "batch_size": 16, "word_vec_size": 300, "feat_vec_size": 10, "feat_merge": "concat", "layers": 3, "copy_attn": None, "position_encoding": None } } DEFAULT_TEST_CONFIG = {"beam_size": 5, "find_best": True} TrainModelPipeline = Pipeline({ "train-config": DEFAULT_TRAIN_CONFIG, "test-config": DEFAULT_TEST_CONFIG }) TrainModelPipeline.enqueue( "model", "Initialize OpenNMT", lambda f, x: OpenNMTModelRunner(x["pre-process"]["train"], x["pre-process"] ["dev"], x["train-config"]["features"])) TrainModelPipeline.enqueue("expose", "Expose Train Data", lambda f, x: f["model"].expose_train(), ext="txt") TrainModelPipeline.enqueue("pre-process", "Pre-process Train and Dev", lambda f, x: f["model"].pre_process()) TrainModelPipeline.enqueue( "train", "Train model", lambda f, x: f["model"].train( f["pre-process"], f["train-config"]["train"])) TrainModelPipeline.enqueue(
class ServerlessMR: def __init__(self): self.pipelines = {} self.cur_pipeline = Pipeline() self.pipeline_id = 1 self.total_num_functions = 0 self.cur_last_map_index = -1 self.last_partition_function = None self.last_combine_function = None self.rel_function_paths = [] def config(self, pipeline_specific_config): self.cur_pipeline.set_config(pipeline_specific_config) return self def map(self, map_function): rel_function_path = copy_job_function(map_function) self.rel_function_paths.append(rel_function_path) self.cur_pipeline.add_function( MapFunction(map_function, rel_function_path)) self.total_num_functions += 1 self.cur_last_map_index = self.cur_pipeline.get_num_functions() - 1 return self def shuffle(self, partition_function): self.last_partition_function = partition_function return self def combine(self, combine_function): self.last_combine_function = combine_function return self def _construct_map_shuffle(self, combiner_function): if self.last_partition_function is None: partition_function = default_partition rel_partition_function_path = StaticVariables.DEFAULT_PARTITION_FUNCTION_PATH else: partition_function = self.last_partition_function rel_partition_function_path = copy_job_function(partition_function) self.rel_function_paths.append(rel_partition_function_path) self.last_partition_function = None map_function_obj = self.cur_pipeline.get_function_at_index( self.cur_last_map_index) map_function = map_function_obj.get_function() rel_map_function_path = map_function_obj.get_rel_function_path() rel_combiner_function_path = copy_job_function(combiner_function) self.rel_function_paths.append(rel_combiner_function_path) map_shuffle = MapShuffleFunction(map_function, rel_map_function_path, partition_function, rel_partition_function_path, combiner_function, rel_combiner_function_path) self.cur_pipeline.set_function_at_index(self.cur_last_map_index, map_shuffle) def reduce(self, reduce_function, num_reducers): if self.last_combine_function is None: self._construct_map_shuffle(reduce_function) else: self._construct_map_shuffle(self.last_combine_function) self.last_combine_function = None rel_function_path = copy_job_function(reduce_function) self.rel_function_paths.append(rel_function_path) self.cur_pipeline.add_function( ReduceFunction(reduce_function, rel_function_path, num_reducers)) self.total_num_functions += 1 return self def finish(self): cur_pipeline_id = self.pipeline_id self.pipelines[cur_pipeline_id] = self.cur_pipeline self.cur_pipeline = Pipeline() self.pipeline_id += 1 self.cur_last_map_index = -1 self.last_partition_function = None self.last_combine_function = None return cur_pipeline_id def merge(self, dependent_pipeline_ids): self.cur_pipeline.set_dependent_pipelines_ids(dependent_pipeline_ids) return self def run(self): StaticVariables.SETUP_START_TIME = time.time() self.finish() set_up() StaticVariables.PROJECT_WORKING_DIRECTORY = project_working_dir StaticVariables.LIBRARY_WORKING_DIRECTORY = library_working_dir static_job_info_file = open(StaticVariables.STATIC_JOB_INFO_PATH, "r") static_job_info = json.loads(static_job_info_file.read()) static_job_info_file.close() is_serverless_driver = static_job_info[ StaticVariables.SERVERLESS_DRIVER_FLAG_FN] submission_time = "" if is_serverless_driver: frame = inspect.stack()[1] module = inspect.getmodule(frame[0]) os.chdir(StaticVariables.PROJECT_WORKING_DIRECTORY) main_file_path = os.path.relpath(module.__file__) os.chdir(StaticVariables.LIBRARY_WORKING_DIRECTORY) serverless_driver_setup = ServerlessDriverSetup( self.pipelines, self.total_num_functions) serverless_driver_setup.register_driver(main_file_path, self.rel_function_paths) logger.info("Driver Lambda function successfully registered") print("") command = input( "Enter invoke to start the job and other keys to exit: ") if command == "invoke": logger.info("Driver invoked and starting job execution") serverless_driver_setup.invoke() else: logger.info("The total number of functions is %s" % self.total_num_functions) driver = Driver(self.pipelines, self.total_num_functions) submission_time = driver.run() tear_down(self.rel_function_paths) return submission_time
def unique_plans_outputs(reader): plan_hyp_refs = defaultdict(lambda: ["", []]) for d in reader.data: plan_hyp_refs[d.plan][0] = d.hyp plan_hyp_refs[d.plan][1].append(d.text) return dict(plan_hyp_refs) def plans_output_single_file(plan_hyp_refs): return ["\n".join([plan, hyp, "---"] + refs) for plan, (hyp, refs) in plan_hyp_refs.items()] TranslatePipeline = Pipeline() TranslatePipeline.enqueue("translate", "Translate all plans", lambda f, x: x["test-corpus"].copy().translate_plans(x["train-model"], x["train-planner"])) TranslatePipeline.enqueue("post-process", "Post-process translated sentences", lambda f, x: f["translate"].copy().post_process(x["train-reg"])) TranslatePipeline.enqueue("plans-out", "Create a dictionary of outputs", lambda f, x: unique_plans_outputs(f["post-process"])) TranslatePipeline.enqueue("review", "Create hypothesis-references review file", lambda f, x: "\n\n".join(["\n".join([plan, hyp, "---"] + refs) for plan, (hyp, refs) in f["plans-out"].items()]), ext="txt") TranslatePipeline.enqueue("hypothesis", "Create hypothesis file", lambda f, x: "\n".join([hyp for plan, (hyp, refs) in f["plans-out"].items()]), ext="txt") TranslatePipeline.enqueue("references", "Create references file", lambda f, x: "\n\n".join(["\n".join(refs) for plan, (hyp, refs) in f["plans-out"].items()]), ext="txt")
from model.open_nmt import OpenNMTModelRunner from utils.pipeline import Pipeline train_opts = { "train_steps": 30000, "save_checkpoint_steps": 2000, "batch_size": 16, "word_vec_size": 1000, # "feat_vec_size": 300, # "feat_merge": "sum", "layers": 3, "copy_attn": None, "position_encoding": None } TrainModelPipeline = Pipeline() TrainModelPipeline.enqueue("model", "Initialize OpenNMT", lambda f, x: OpenNMTModelRunner(x["pre-process"]["train"], x["pre-process"]["dev"])) TrainModelPipeline.enqueue("expose", "Expose Train Data", lambda f, x: f["model"].expose_train(), ext="txt") TrainModelPipeline.enqueue("pre-process", "Pre-process Train and Dev", lambda f, x: f["model"].pre_process()) TrainModelPipeline.enqueue("train", "Train model", lambda f, x: f["model"].train(f["pre-process"], train_opts)) TrainModelPipeline.enqueue("find-best", "Find best model", lambda f, x: f["model"].find_best(f["train"])) TrainModelPipeline.enqueue("out", "Output a model instance", lambda f, x: f["find-best"])
} objectTo = { 'dblCenterU': endU, 'dblCenterV': endV, 'intCropWidth': endW, 'intCropHeight': endH } zoom_settings = {'objectFrom': objectFrom, 'objectTo': objectTo} if inpaint_depth: ken_burn_pipe = Pipeline(model_paths=[ estim_path, refine_path, inpaint_path, inpaint_depth_path ], dolly=dolly, output_frames=output_frames, pretrain=pretrained_refine, d2=d2) else: ken_burn_pipe = Pipeline( model_paths=[estim_path, refine_path, inpaint_path], dolly=dolly, output_frames=output_frames, pretrain=pretrained_refine, d2=d2) # ken_burn_pipe = Pipeline() with torch.no_grad(): ken_burn_pipe((tensorImage + 1) / 2, zoom_settings,
from utils.pipeline import Pipeline def unique_plans_outputs(reader): mapper = {d.plan: d.hyp for d in reader.data} print(len(mapper)) return list(mapper.values()) TranslatePipeline = Pipeline() TranslatePipeline.enqueue( "translate", "Translate all plans", lambda f, x: x["test-corpus"].copy().translate_plans(x["train-model"])) TranslatePipeline.enqueue("post-process", "Post-process translated sentences", lambda f, _: f["translate"].copy().post_process()) TranslatePipeline.enqueue( "hypothesis", "Create hypothesis file", lambda f, x: "\n".join(unique_plans_outputs(f["post-process"]))) TranslatePipeline.enqueue("out", "Expose output for parent", lambda f, _: f["post-process"].copy())
class PoemBot: def __init__(self, config, builder): self.config_path = config self.config = configparser.ConfigParser() self.config.read(self.config_path) self.builder = builder self.forms = loader.get_forms() self.forms_store = builder.get_object('forms_list_store') self.styles = loader.get_dirs('data') self.styles_store = self.builder.get_object('styles_list_store') self.config_store = builder.get_object('config_tree_store') self.poem_view = builder.get_object('poem_view') self.window = self.builder.get_object('main_window') self.window.connect('delete-event', Gtk.main_quit) self.handlers = { 'select_style': self.select_style, 'select_form': self.select_form, 'edit_config': self.edit_config, 'train_models': self.train_models, 'build_poem': self.build_poem } def select_style(self, widget, path): self.styles_store[path][1] = not self.styles_store[path][1] def select_form(self, widget, path): self.forms_store[path][2] = not self.forms_store[path][2] def edit_config(self, widget, path, text): row = self.config_store[path] if row.parent is not None: # can't edit top-level rows row[1] = text # save new config file self.config[row.parent[0]][row[0]] = text with open(self.config_path, 'w') as fh: self.config.write(fh) def train_models(self, *args): # initialize models p_len = self.config['VocabularyModel'].getint('PrefixSize') s_len = self.config['VocabularyModel'].getint('SuffixSize') regex = self.config['Tokenizer'].get('Regex') self.vocab_model = VocabularyModel(p_len, s_len, regex) # load corpus style_corpus = [] for path in self._get_styles(): style_corpus += loader.load_corpus(path) # train style models self.trainer = Trainer(style_corpus, self.vocab_model) self.trainer.on_update(self._update_progress) self.trainer.train_all() def build_poem(self, *args): self.form_model = self._pick_form()() self.pipeline = Pipeline( self.vocab_model.weight, self.form_model.weight ) # start the state as the empty string state = [''] # start with no known transitions transitions = [] for i in range(50): state += self._pick(self.pipeline.pipe(state, transitions)) self.poem_view.get_buffer().set_text(' '.join(state)) def start(self): self._load_styles() self._load_forms() self._load_config() self.builder.connect_signals(self.handlers) self.window.show_all() Gtk.main() def _update_progress(self): pass def _pick(self, options): """Pick a choice from a list of weighted options. Arguments: options: A list of (choice, probability) tuples. Where probability is within [0, 1] and the sum of all probabilities is [0, 1] """ roll = random.random() result = None cumsum = 0 while cumsum < roll and options: result = options.pop() cumsum += result[1] return result[0] def _get_styles(self): """Get the selected style paths.""" paths = [] for row in self.styles_store: if row[1]: paths.append(row[0]) return paths def _get_forms(self): """Get the selected forms.""" paths = [] for row in self.forms_store: if row[2]: paths.append(globals()[row[1]]) return paths def _load_styles(self): for s in self.styles: self.styles_store.append((s, False)) def _load_forms(self): for f in self.forms: self.forms_store.append((f.name, f.__name__, False)) def _load_config(self): for section in self.config.sections(): piter = self.config_store.append(None, (section,'')) for key in self.config[section]: val = self.config[section][key] self.config_store.append(piter, (key, val)) def _pick_form(self): """Pick a random selected form.""" return random.choice(self._get_forms()) def _on_update(self): pass
import json from data.WebNLG.reader import WebNLGDataReader from data.reader import DataSetType from utils.error_bar import error_bar from utils.pipeline import Pipeline, ParallelPipeline CorpusPreProcessPipeline = Pipeline() CorpusPreProcessPipeline.enqueue( "corpus", "Read Corpus", lambda f, x: x["config"].reader[f["set"]] (f["set"])) CorpusPreProcessPipeline.enqueue( "graphify", "RDF to Graph", lambda f, _: f["corpus"].copy().generate_graphs()) CorpusPreProcessPipeline.enqueue( "spelling", "Fix Spelling", lambda f, _: f["graphify"].copy().fix_spelling()) CorpusPreProcessPipeline.enqueue( "entities", "Describe entities", lambda f, _: f["spelling"].copy().describe_entities()) TestCorpusPreProcessPipeline = CorpusPreProcessPipeline.mutate( {}) # Test does not need matching entities or plans CorpusPreProcessPipeline.enqueue( "match-ents", "Match Entities", lambda f, _: f["entities"].copy().match_entities()) CorpusPreProcessPipeline.enqueue( "match-plans", "Match Plans", lambda f, _: f["match-ents"].copy().match_plans()) CorpusPreProcessPipeline.enqueue(
from utils.pipeline import Pipeline EvaluationPipeline = Pipeline() EvaluationPipeline.enqueue("evaluate", "Evaluate test reader", lambda f, x: x["translate"].evaluate()) EvaluationPipeline.enqueue("out", "Expose output for parent", lambda f, _: f["evaluate"].copy())