Python Pipeline Examples, utils.pipeline.Pipeline Python Examples

Example #1

0

Show file

File: spectograms.py Project: oviskup/ml-seizure

def create_s2(window_length, window_step, fft_min_freq, fft_max_freq,
              sampling_frequency, file_path):

    warnings.filterwarnings("ignore")
    type_data = pickle.load(open(file_path, 'rb'))
    pipeline = Pipeline([Center_surround_diff()])
    time_series_data = type_data.data
    start, step = 0, int(np.floor(window_step * sampling_frequency))
    stop = start + int(np.floor(window_length * sampling_frequency))
    s2_data = []

    while stop < time_series_data.shape[1]:
        signal_window = time_series_data[:, start:stop]
        window = pipeline.apply(signal_window)
        s2_data.append(window)
        start, stop = start + step, stop + step

    s2_data = np.array(s2_data)
    named_data = seizure_type_data(patient_id=type_data.patient_id,
                                   seizure_type=type_data.seizure_type,
                                   data=type_data.data,
                                   s1=type_data.s1,
                                   s2=s2_data)

    return named_data, os.path.basename(file_path)

Example #2

0

Show file

def convert_to_fft(window_length, window_step, fft_min_freq, fft_max_freq,
                   sampling_frequency, file_path):

    warnings.filterwarnings("ignore")
    type_data = pickle.load(open(file_path, 'rb'))
    pipeline = Pipeline(
        [FFT(), Slice(fft_min_freq, fft_max_freq),
         Magnitude(),
         Log10()])
    #time_series_data = type_data.data
    time_series_data = type_data
    start, step = 0, int(np.floor(window_step * sampling_frequency))
    stop = start + int(np.floor(window_length * sampling_frequency))
    fft_data = []

    while stop < time_series_data.shape[1]:
        signal_window = time_series_data[:, start:stop]
        fft_window = pipeline.apply(signal_window)
        fft_dfft_dataata.append(fft_window)
        start, stop = start + step, stop + step

    fft_data = np.array(fft_data)
    #named_data = seizure_type_data(patient_id=type_data.patient_id, seizure_type=type_data.seizure_type, data=fft_data)

    #return named_data,os.path.basename(file_path)
    return fft_data, os.path.basename(file_path)

Example #3

0

Show file

File: spectograms.py Project: oviskup/ml-seizure

def create_s1(window_length, window_step, fft_min_freq, fft_max_freq,
              sampling_frequency, file_path):

    warnings.filterwarnings("ignore")
    type_data = pickle.load(open(file_path, 'rb'))
    pipeline = Pipeline(
        [Substract_average_plus_P_2(),
         IFFT(), Smooth_Gaussian()])
    time_series_data = type_data.data
    start, step = 0, int(np.floor(window_step * sampling_frequency))
    stop = start + int(np.floor(window_length * sampling_frequency))
    s1_data = []

    while stop < time_series_data.shape[1]:
        signal_window = time_series_data[:, start:stop]
        window = pipeline.apply(signal_window)
        s1_data.append(window)
        start, stop = start + step, stop + step

    s1_data = np.array(s1_data)
    named_data = seizure_type_data(patient_id=type_data.patient_id,
                                   seizure_type=type_data.seizure_type,
                                   data=type_data.data,
                                   s1=s1_data)

    return named_data, os.path.basename(file_path)

Example #4

0

Show file

 def finish(self):
     cur_pipeline_id = self.pipeline_id
     self.pipelines[cur_pipeline_id] = self.cur_pipeline
     self.cur_pipeline = Pipeline()
     self.pipeline_id += 1
     self.cur_last_map_index = -1
     self.last_partition_function = None
     self.last_combine_function = None
     return cur_pipeline_id

Example #5

0

Show file

 def __init__(self):
     self.pipelines = {}
     self.cur_pipeline = Pipeline()
     self.pipeline_id = 1
     self.total_num_functions = 0
     self.cur_last_map_index = -1
     self.last_partition_function = None
     self.last_combine_function = None
     self.rel_function_paths = []

Example #6

0

Show file

File: weightageanalysis.py Project: abhishek8899/WEBCred

def calculateWeightage():
    global totalSample
    global jsonData
    global Featureset

    csv_filename = "WebcredNormalized.csv"

    f = open(csv_filename, 'r')
    data = f.readlines()
    pipe = Pipeline()
    # get json data
    jsonData = pipe.converttojson(data)
    totalSets = 10
    # sets of possible weightages
    weightage = []
    totalSample = int(
        subprocess.check_output(['wc', '-l', csv_filename]).split(' ')[0]) - 1
    filterKeys = ['url', 'wot', 'cookie', 'redirected']
    FeaturesName = list((set(jsonData[0].keys()) - set(filterKeys)))

    Featureset = []
    alexaScoreSet = []

    for i in range(totalSets):
        count = 0
        # select sample sets
        while True:
            sample = getjsonData()
            featurevalue, alexaScore, wotScore = getFeatureValue([sample],
                                                                 FeaturesName)
            if checksimiliarData(featurevalue[0]):
                Featureset.append(featurevalue[0])
                alexaScoreSet.append(alexaScore)
                count += 1
                if count == len(FeaturesName) - 1:
                    # sum of all weightage== 1, misc. genre == 0.1
                    temp = []
                    for j in range(len(FeaturesName)):
                        temp.append(1)
                    Featureset.append(temp)
                    alexaScoreSet.append([0.9])
                    break

        # get weightage of individual feature
        weightage.append(getWeightage(Featureset, alexaScoreSet))
        print 'getting', i, 'set of weightages'

    finalWeightage = np.mean(weightage, axis=0).tolist()

    total = 0
    for i in finalWeightage:
        total += i

    print total
    print finalWeightage

Example #7

0

Show file

File: spectograms.py Project: oviskup/ml-seizure

def create_d(window_length, window_step, fft_min_freq, fft_max_freq,
             sampling_frequency, file_path):

    warnings.filterwarnings("ignore")
    type_data = pickle.load(open(file_path, 'rb'))
    #Three of these pipelines are needed, as concatenation takes a different kind of parameter (three maps)
    pipeline1 = Pipeline([Normalise()])
    pipeline2 = Pipeline([Concatenation()])
    pipeline3 = Pipeline([RGB_0_255()])

    #The three feature maps
    data_ft = type_data.data
    data_s1 = type_data.s1
    data_s2 = type_data.s2

    start, step = 0, int(np.floor(window_step * sampling_frequency))
    stop = start + int(np.floor(window_length * sampling_frequency))
    d_data = []

    while stop < data_ft.shape[1]:
        #Window definitions, the maps are of same size & shape so 1 looper can be used for all
        window_ft = data_ft[:, start:stop]
        window_s1 = data_s1[:, start:stop]
        window_s2 = data_s2[:, start:stop]
        #Normalise each window value
        window_ft_norm = pipeline1.apply(window_ft)
        window_s1_norm = pipeline1.apply(window_s1)
        window_s2_norm = pipeline1.apply(window_s2)
        #Concatenate normalised values
        d_norm = pipeline2.apply(window_ft_norm, window_s1_norm,
                                 window_s2_norm)
        #RGB 0-255 conversion
        d_rgb = pipeline3.apply(d_norm)

        d_data.append(d_rgb)
        start, stop = start + step, stop + step

    d_data = np.array(d_data)
    named_data = seizure_type_data(patient_id=type_data.patient_id,
                                   seizure_type=type_data.seizure_type,
                                   data=d_data)

    return named_data, os.path.basename(file_path)

Example #8

0

Show file

File: ocr.py Project: naufalihsan/ocr-server

def card_classifier(text, algorithm, parser):
    classifier = dict()
    text = remove_punctuation(text)
    print(text)
    types, prefix = card_type(text)
    lines = text.splitlines()

    if parser == 'regex':
        preds = regex_extractor(lines, types, prefix)
    else:
        clean = [word_extractor(line, prefix) for line in lines]
        clf = Pipeline(clean)
        preds = clf.predicts(model=algorithm)

    classifier['type'] = types
    classifier['data'] = preds

    pprint.pprint(classifier)

    return classifier

Example #9

0

Show file

def model_pipeline(train_config):
    pipeline = Pipeline()
    pipeline.enqueue(
        "train-model", "Train Model",
        TrainModelPipeline.mutate({
            "train-config": train_config,
            "test-config": DEFAULT_TEST_CONFIG
        }))
    pipeline.enqueue(
        "translate-naive", "Translate Naive Plans",
        PlannerTranslatePipeline.mutate({"planner-name": "naive"}))
    pipeline.enqueue(
        "translate-neural", "Translate Neural Plans",
        PlannerTranslatePipeline.mutate({"planner-name": "neural"}))
    return pipeline

Example #10

0

Show file

File: poembot.py Project: crestonbunch/poem-bot

    def build_poem(self, *args):
        self.form_model = self._pick_form()()

        self.pipeline = Pipeline(
            self.vocab_model.weight,
            self.form_model.weight
        )

        # start the state as the empty string
        state = ['']
        # start with no known transitions
        transitions = []

        for i in range(50):
            state += self._pick(self.pipeline.pipe(state, transitions))

        self.poem_view.get_buffer().set_text(' '.join(state))

Example #11

0

Show file

File: rescore_from_preds.py Project: jcraley/jhu-eeg

def main():
    """Set up the experiment, initialize folders, and write config"""
    # Load the experiment configuration and paths
    argv = sys.argv[1:]
    params = tc.TestConfiguration('default.ini', argv)
    paths = pm.PathManager(params)
    paths.initialize_experiment_folders()
    pipeline = Pipeline(params, paths)

    # Write config file
    pipeline.write_config_file()

    # Load the datsets
    # pipeline.initialize_train_dataset()
    # pipeline.initialize_val_dataset()

    # """Train or load a model"""
    # if pipeline.params['load model fn']:
    #     pipeline.load_model()
    """Score test and train sets"""
    pipeline.score_saved_train_predictions()
    pipeline.score_saved_val_predictions()

Example #12

0

Show file

from utils.pipeline import Pipeline

# CoverageEvaluationPipeline = Pipeline()
# CoverageEvaluationPipeline.enqueue("plan-all", "Plan all & score on test set",
#                                    lambda f, x: x["test-corpus"].copy().exhaustive_plan(x["train-planner"]))
# CoverageEvaluationPipeline.enqueue("print", "Print stuff",
#                                    lambda f, x: "\n".join([str(len(d.graph.edges)) + " - " + str(len(d.plans)) for d in f["plan-all"].data]), ext="txt")

EvaluationPipeline = Pipeline()
EvaluationPipeline.enqueue("bleu", "Evaluate test reader", lambda f, x: x["translate"].evaluate())
# EvaluationPipeline.enqueue("coverage", "Coverage evaluation", CoverageEvaluationPipeline)

Example #13

0

Show file

File: weightageanalysis.py Project: abhishek8899/WEBCred

                # TODO wotSimilarity[k] = wotSimilarity_avg

            print weightage, alexaSimilarityScore

        elif action == 'bn':
            global totalSample
            global jsonData
            global Featureset
            Featureset = []
            alexaScoreSet = []
            wotScoreSet = []
            csv_filename = "WebcredNormalized.csv"

            f = open(csv_filename, 'r')
            data = f.readlines()
            pipe = Pipeline()
            # get json data
            jsonData = pipe.converttojson(data)
            totalSample = int(
                subprocess.check_output(['wc', '-l', csv_filename
                                         ]).split(' ')[0]) - 1
            filterKeys = ['url', 'wot', 'cookie', 'redirected']
            FeaturesName = list((set(jsonData[0].keys()) - set(filterKeys)))
            count = 0
            tried = 0
            # building matrix wiht 1000 samples
            while True:
                tried += 1
                try:
                    sample = getjsonData()
                    featurevalue, alexaScore, wotScore = getFeatureValue(

Example #14

0

Show file

File: main.py Project: yespon/chimera

from scorer.relation_transitions import RelationTransitionsExpert
from scorer.splitting_tendencies import SplittingTendenciesExpert
from utils.pipeline import Pipeline


class Config:
    def __init__(self, reader: DataReader, planner: Planner, test_reader: DataReader = None):
        self.reader = {
            DataSetType.TRAIN: reader,
            DataSetType.DEV: reader,
            DataSetType.TEST: test_reader if test_reader else reader,
        }
        self.planner = planner


MainPipeline = Pipeline()
MainPipeline.enqueue("pre-process", "Pre-process training data", TrainingPreProcessPipeline)
MainPipeline.enqueue("train-planner", "Train Planner", TrainPlannerPipeline)
MainPipeline.enqueue("train-model", "Train Model", TrainModelPipeline)
MainPipeline.enqueue("test-corpus", "Pre-process test data", TestingPreProcessPipeline)
MainPipeline.enqueue("translate", "Translate Test", TranslatePipeline)
MainPipeline.enqueue("evaluate", "Evaluate Translations", EvaluationPipeline)

if __name__ == "__main__":
    naive_planner = NaivePlanner(WeightedProductOfExperts([
        RelationDirectionExpert,
        GlobalDirectionExpert,
        SplittingTendenciesExpert,
        RelationTransitionsExpert
    ]))
    # neural_planner = NeuralPlanner()

Example #15

0

Show file

File: reg.py Project: xc15071347094/chimera

from utils.pipeline import Pipeline

REGPipeline = Pipeline()
REGPipeline.enqueue(
    "reg", "Learn planner", lambda _, x: x["config"].reg(
        x["pre-process"]["train"], x["pre-process"]["dev"]))
REGPipeline.enqueue("out", "Expose the reg", lambda f, _: f["reg"])

Example #16

0

Show file

from utils.pipeline import Pipeline

TrainPlannerPipeline = Pipeline()
TrainPlannerPipeline.enqueue("planner", "Learn planner",
                             lambda _, x: x["config"].planner.learn(x["pre-process"]["train"], x["pre-process"]["dev"]))
TrainPlannerPipeline.enqueue("out", "Expose the planner", lambda f, _: f["planner"])

Example #17

0

Show file

from reg.naive import NaiveREG
from scorer.global_direction import GlobalDirectionExpert
from scorer.product_of_experts import WeightedProductOfExperts
from scorer.relation_direction import RelationDirectionExpert
from scorer.relation_transitions import RelationTransitionsExpert
from scorer.splitting_tendencies import SplittingTendenciesExpert
from utils.pipeline import Pipeline

naive_planner = NaivePlanner(
    WeightedProductOfExperts([
        RelationDirectionExpert, GlobalDirectionExpert,
        SplittingTendenciesExpert, RelationTransitionsExpert
    ]))
neural_planner = NeuralPlanner()

PlanPipeline = Pipeline()
PlanPipeline.enqueue("train-planner", "Train Planner", TrainPlannerPipeline)
PlanPipeline.enqueue("test-corpus", "Pre-process test data",
                     TestingPreProcessPipeline)

ExperimentsPipeline = Pipeline()
ExperimentsPipeline.enqueue("pre-process", "Pre-process training data",
                            TrainingPreProcessPipeline)

# Train all planners
# # Naive Planner
ExperimentsPipeline.enqueue(
    "naive-planner", "Train Naive Planner",
    PlanPipeline.mutate(
        {"config": Config(reader=WebNLGDataReader, planner=naive_planner)}))
# # Neural Planner

Example #18

0

Show file

File: train_model.py Project: xc15071347094/chimera

        "train_steps": 30000,
        "save_checkpoint_steps": 1000,
        "batch_size": 16,
        "word_vec_size": 300,
        "feat_vec_size": 10,
        "feat_merge": "concat",
        "layers": 3,
        "copy_attn": None,
        "position_encoding": None
    }
}

DEFAULT_TEST_CONFIG = {"beam_size": 5, "find_best": True}

TrainModelPipeline = Pipeline({
    "train-config": DEFAULT_TRAIN_CONFIG,
    "test-config": DEFAULT_TEST_CONFIG
})
TrainModelPipeline.enqueue(
    "model", "Initialize OpenNMT",
    lambda f, x: OpenNMTModelRunner(x["pre-process"]["train"], x["pre-process"]
                                    ["dev"], x["train-config"]["features"]))
TrainModelPipeline.enqueue("expose",
                           "Expose Train Data",
                           lambda f, x: f["model"].expose_train(),
                           ext="txt")
TrainModelPipeline.enqueue("pre-process", "Pre-process Train and Dev",
                           lambda f, x: f["model"].pre_process())
TrainModelPipeline.enqueue(
    "train", "Train model", lambda f, x: f["model"].train(
        f["pre-process"], f["train-config"]["train"]))
TrainModelPipeline.enqueue(

Example #19

0

Show file

class ServerlessMR:
    def __init__(self):
        self.pipelines = {}
        self.cur_pipeline = Pipeline()
        self.pipeline_id = 1
        self.total_num_functions = 0
        self.cur_last_map_index = -1
        self.last_partition_function = None
        self.last_combine_function = None
        self.rel_function_paths = []

    def config(self, pipeline_specific_config):
        self.cur_pipeline.set_config(pipeline_specific_config)
        return self

    def map(self, map_function):
        rel_function_path = copy_job_function(map_function)
        self.rel_function_paths.append(rel_function_path)
        self.cur_pipeline.add_function(
            MapFunction(map_function, rel_function_path))
        self.total_num_functions += 1
        self.cur_last_map_index = self.cur_pipeline.get_num_functions() - 1
        return self

    def shuffle(self, partition_function):
        self.last_partition_function = partition_function
        return self

    def combine(self, combine_function):
        self.last_combine_function = combine_function
        return self

    def _construct_map_shuffle(self, combiner_function):
        if self.last_partition_function is None:
            partition_function = default_partition
            rel_partition_function_path = StaticVariables.DEFAULT_PARTITION_FUNCTION_PATH
        else:
            partition_function = self.last_partition_function
            rel_partition_function_path = copy_job_function(partition_function)
            self.rel_function_paths.append(rel_partition_function_path)
            self.last_partition_function = None

        map_function_obj = self.cur_pipeline.get_function_at_index(
            self.cur_last_map_index)
        map_function = map_function_obj.get_function()
        rel_map_function_path = map_function_obj.get_rel_function_path()
        rel_combiner_function_path = copy_job_function(combiner_function)
        self.rel_function_paths.append(rel_combiner_function_path)
        map_shuffle = MapShuffleFunction(map_function, rel_map_function_path,
                                         partition_function,
                                         rel_partition_function_path,
                                         combiner_function,
                                         rel_combiner_function_path)
        self.cur_pipeline.set_function_at_index(self.cur_last_map_index,
                                                map_shuffle)

    def reduce(self, reduce_function, num_reducers):
        if self.last_combine_function is None:
            self._construct_map_shuffle(reduce_function)
        else:
            self._construct_map_shuffle(self.last_combine_function)
            self.last_combine_function = None

        rel_function_path = copy_job_function(reduce_function)
        self.rel_function_paths.append(rel_function_path)
        self.cur_pipeline.add_function(
            ReduceFunction(reduce_function, rel_function_path, num_reducers))
        self.total_num_functions += 1
        return self

    def finish(self):
        cur_pipeline_id = self.pipeline_id
        self.pipelines[cur_pipeline_id] = self.cur_pipeline
        self.cur_pipeline = Pipeline()
        self.pipeline_id += 1
        self.cur_last_map_index = -1
        self.last_partition_function = None
        self.last_combine_function = None
        return cur_pipeline_id

    def merge(self, dependent_pipeline_ids):
        self.cur_pipeline.set_dependent_pipelines_ids(dependent_pipeline_ids)
        return self

    def run(self):
        StaticVariables.SETUP_START_TIME = time.time()
        self.finish()
        set_up()
        StaticVariables.PROJECT_WORKING_DIRECTORY = project_working_dir
        StaticVariables.LIBRARY_WORKING_DIRECTORY = library_working_dir
        static_job_info_file = open(StaticVariables.STATIC_JOB_INFO_PATH, "r")
        static_job_info = json.loads(static_job_info_file.read())
        static_job_info_file.close()

        is_serverless_driver = static_job_info[
            StaticVariables.SERVERLESS_DRIVER_FLAG_FN]
        submission_time = ""

        if is_serverless_driver:
            frame = inspect.stack()[1]
            module = inspect.getmodule(frame[0])
            os.chdir(StaticVariables.PROJECT_WORKING_DIRECTORY)
            main_file_path = os.path.relpath(module.__file__)
            os.chdir(StaticVariables.LIBRARY_WORKING_DIRECTORY)
            serverless_driver_setup = ServerlessDriverSetup(
                self.pipelines, self.total_num_functions)
            serverless_driver_setup.register_driver(main_file_path,
                                                    self.rel_function_paths)
            logger.info("Driver Lambda function successfully registered")
            print("")
            command = input(
                "Enter invoke to start the job and other keys to exit: ")
            if command == "invoke":
                logger.info("Driver invoked and starting job execution")
                serverless_driver_setup.invoke()
        else:
            logger.info("The total number of functions is %s" %
                        self.total_num_functions)
            driver = Driver(self.pipelines, self.total_num_functions)
            submission_time = driver.run()

        tear_down(self.rel_function_paths)
        return submission_time

Example #20

0

Show file

File: translate.py Project: xc15071347094/chimera


def unique_plans_outputs(reader):
    plan_hyp_refs = defaultdict(lambda: ["", []])
    for d in reader.data:
        plan_hyp_refs[d.plan][0] = d.hyp
        plan_hyp_refs[d.plan][1].append(d.text)

    return dict(plan_hyp_refs)


def plans_output_single_file(plan_hyp_refs):
    return ["\n".join([plan, hyp, "---"] + refs) for plan, (hyp, refs) in plan_hyp_refs.items()]


TranslatePipeline = Pipeline()

TranslatePipeline.enqueue("translate", "Translate all plans",
                          lambda f, x: x["test-corpus"].copy().translate_plans(x["train-model"], x["train-planner"]))
TranslatePipeline.enqueue("post-process", "Post-process translated sentences",
                          lambda f, x: f["translate"].copy().post_process(x["train-reg"]))
TranslatePipeline.enqueue("plans-out", "Create a dictionary of outputs",
                          lambda f, x: unique_plans_outputs(f["post-process"]))
TranslatePipeline.enqueue("review", "Create hypothesis-references review file",
                          lambda f, x: "\n\n".join(["\n".join([plan, hyp, "---"] + refs)
                                                    for plan, (hyp, refs) in f["plans-out"].items()]), ext="txt")
TranslatePipeline.enqueue("hypothesis", "Create hypothesis file",
                          lambda f, x: "\n".join([hyp for plan, (hyp, refs) in f["plans-out"].items()]), ext="txt")
TranslatePipeline.enqueue("references", "Create references file",
                          lambda f, x: "\n\n".join(["\n".join(refs)
                                                    for plan, (hyp, refs) in f["plans-out"].items()]), ext="txt")

Example #21

0

Show file

from model.open_nmt import OpenNMTModelRunner
from utils.pipeline import Pipeline

train_opts = {
    "train_steps": 30000,
    "save_checkpoint_steps": 2000,
    "batch_size": 16,
    "word_vec_size": 1000,
    # "feat_vec_size": 300,
    # "feat_merge": "sum",
    "layers": 3,
    "copy_attn": None,
    "position_encoding": None
}

TrainModelPipeline = Pipeline()
TrainModelPipeline.enqueue("model", "Initialize OpenNMT",
                           lambda f, x: OpenNMTModelRunner(x["pre-process"]["train"], x["pre-process"]["dev"]))
TrainModelPipeline.enqueue("expose", "Expose Train Data",
                           lambda f, x: f["model"].expose_train(), ext="txt")
TrainModelPipeline.enqueue("pre-process", "Pre-process Train and Dev", lambda f, x: f["model"].pre_process())
TrainModelPipeline.enqueue("train", "Train model", lambda f, x: f["model"].train(f["pre-process"], train_opts))
TrainModelPipeline.enqueue("find-best", "Find best model", lambda f, x: f["model"].find_best(f["train"]))
TrainModelPipeline.enqueue("out", "Output a model instance", lambda f, x: f["find-best"])

Example #22

0

Show file

    }

    objectTo = {
        'dblCenterU': endU,
        'dblCenterV': endV,
        'intCropWidth': endW,
        'intCropHeight': endH
    }

    zoom_settings = {'objectFrom': objectFrom, 'objectTo': objectTo}

    if inpaint_depth:
        ken_burn_pipe = Pipeline(model_paths=[
            estim_path, refine_path, inpaint_path, inpaint_depth_path
        ],
                                 dolly=dolly,
                                 output_frames=output_frames,
                                 pretrain=pretrained_refine,
                                 d2=d2)
    else:
        ken_burn_pipe = Pipeline(
            model_paths=[estim_path, refine_path, inpaint_path],
            dolly=dolly,
            output_frames=output_frames,
            pretrain=pretrained_refine,
            d2=d2)

    # ken_burn_pipe = Pipeline()
    with torch.no_grad():
        ken_burn_pipe((tensorImage + 1) / 2,
                      zoom_settings,

Example #23

0

Show file

from utils.pipeline import Pipeline


def unique_plans_outputs(reader):
    mapper = {d.plan: d.hyp for d in reader.data}
    print(len(mapper))
    return list(mapper.values())


TranslatePipeline = Pipeline()
TranslatePipeline.enqueue(
    "translate", "Translate all plans",
    lambda f, x: x["test-corpus"].copy().translate_plans(x["train-model"]))
TranslatePipeline.enqueue("post-process", "Post-process translated sentences",
                          lambda f, _: f["translate"].copy().post_process())
TranslatePipeline.enqueue(
    "hypothesis", "Create hypothesis file",
    lambda f, x: "\n".join(unique_plans_outputs(f["post-process"])))
TranslatePipeline.enqueue("out", "Expose output for parent",
                          lambda f, _: f["post-process"].copy())

Example #24

0

Show file

File: poembot.py Project: crestonbunch/poem-bot

class PoemBot:

    def __init__(self, config, builder):
        self.config_path = config
        self.config = configparser.ConfigParser()
        self.config.read(self.config_path)
        self.builder = builder
        self.forms = loader.get_forms()
        self.forms_store = builder.get_object('forms_list_store')
        self.styles = loader.get_dirs('data')
        self.styles_store = self.builder.get_object('styles_list_store')
        self.config_store = builder.get_object('config_tree_store')
        self.poem_view = builder.get_object('poem_view')
        self.window = self.builder.get_object('main_window')
        self.window.connect('delete-event', Gtk.main_quit)

        self.handlers = {
            'select_style': self.select_style,
            'select_form': self.select_form,
            'edit_config': self.edit_config,
            'train_models': self.train_models,
            'build_poem': self.build_poem
        }

    def select_style(self, widget, path):
        self.styles_store[path][1] = not self.styles_store[path][1]

    def select_form(self, widget, path):
        self.forms_store[path][2] = not self.forms_store[path][2]

    def edit_config(self, widget, path, text):
        row = self.config_store[path]
        if row.parent is not None: # can't edit top-level rows
            row[1] = text
            # save new config file
            self.config[row.parent[0]][row[0]] = text
            with open(self.config_path, 'w') as fh:
                self.config.write(fh)

    def train_models(self, *args):
        # initialize models
        p_len = self.config['VocabularyModel'].getint('PrefixSize')
        s_len = self.config['VocabularyModel'].getint('SuffixSize')
        regex = self.config['Tokenizer'].get('Regex')
        self.vocab_model = VocabularyModel(p_len, s_len, regex)

        # load corpus
        style_corpus = []
        for path in self._get_styles():
            style_corpus += loader.load_corpus(path)

        # train style models
        self.trainer = Trainer(style_corpus, self.vocab_model)
        self.trainer.on_update(self._update_progress)
        self.trainer.train_all()

    def build_poem(self, *args):
        self.form_model = self._pick_form()()

        self.pipeline = Pipeline(
            self.vocab_model.weight,
            self.form_model.weight
        )

        # start the state as the empty string
        state = ['']
        # start with no known transitions
        transitions = []

        for i in range(50):
            state += self._pick(self.pipeline.pipe(state, transitions))

        self.poem_view.get_buffer().set_text(' '.join(state))

    def start(self):
        self._load_styles()
        self._load_forms()
        self._load_config()
        self.builder.connect_signals(self.handlers)
        self.window.show_all()
        Gtk.main()

    def _update_progress(self):
        pass

    def _pick(self, options):
        """Pick a choice from a list of weighted options.

        Arguments:
            options: A list of (choice, probability) tuples. Where probability
                is within [0, 1] and the sum of all probabilities is [0, 1]
        """
        roll = random.random()

        result = None
        cumsum = 0
        while cumsum < roll and options:
            result = options.pop()
            cumsum += result[1]

        return result[0]

    def _get_styles(self):
        """Get the selected style paths."""
        paths = []
        for row in self.styles_store:
            if row[1]: paths.append(row[0])
        return paths

    def _get_forms(self):
        """Get the selected forms."""
        paths = []
        for row in self.forms_store:
            if row[2]: paths.append(globals()[row[1]])
        return paths

    def _load_styles(self):
        for s in self.styles:
            self.styles_store.append((s, False))

    def _load_forms(self):
        for f in self.forms:
            self.forms_store.append((f.name, f.__name__, False))

    def _load_config(self):
        for section in self.config.sections():
            piter = self.config_store.append(None, (section,''))
            for key in self.config[section]:
                val = self.config[section][key]
                self.config_store.append(piter, (key, val))

    def _pick_form(self):
        """Pick a random selected form."""
        return random.choice(self._get_forms())

    def _on_update(self):
        pass

Example #25

0

Show file

File: pre_process.py Project: xc15071347094/chimera

import json

from data.WebNLG.reader import WebNLGDataReader
from data.reader import DataSetType
from utils.error_bar import error_bar
from utils.pipeline import Pipeline, ParallelPipeline

CorpusPreProcessPipeline = Pipeline()
CorpusPreProcessPipeline.enqueue(
    "corpus", "Read Corpus", lambda f, x: x["config"].reader[f["set"]]
    (f["set"]))
CorpusPreProcessPipeline.enqueue(
    "graphify", "RDF to Graph",
    lambda f, _: f["corpus"].copy().generate_graphs())
CorpusPreProcessPipeline.enqueue(
    "spelling", "Fix Spelling",
    lambda f, _: f["graphify"].copy().fix_spelling())
CorpusPreProcessPipeline.enqueue(
    "entities", "Describe entities",
    lambda f, _: f["spelling"].copy().describe_entities())

TestCorpusPreProcessPipeline = CorpusPreProcessPipeline.mutate(
    {})  # Test does not need matching entities or plans

CorpusPreProcessPipeline.enqueue(
    "match-ents", "Match Entities",
    lambda f, _: f["entities"].copy().match_entities())
CorpusPreProcessPipeline.enqueue(
    "match-plans", "Match Plans",
    lambda f, _: f["match-ents"].copy().match_plans())
CorpusPreProcessPipeline.enqueue(

Example #26

0

Show file

from utils.pipeline import Pipeline

EvaluationPipeline = Pipeline()

EvaluationPipeline.enqueue("evaluate", "Evaluate test reader",
                           lambda f, x: x["translate"].evaluate())
EvaluationPipeline.enqueue("out", "Expose output for parent",
                           lambda f, _: f["evaluate"].copy())