def get_energy(self, hmm, rule_set_list, case_name):
     grammar = Grammar(hmm, RuleSet(rule_set_list))
     self.write_to_dot_to_file(grammar.get_nfa(),
                               "grammar_nfa_" + case_name)
     hypothesis = Hypothesis(grammar, self.data)
     energy = hypothesis.get_energy()
     print("{}: {}".format(case_name,
                           hypothesis.get_recent_energy_signature()))
     return energy
예제 #2
0
def get_hypothesis_from_log_string(hypothesis_string):
    from grammar import Grammar
    from hypothesis import Hypothesis

    hmm = get_hmm_from_hypothesis_string(hypothesis_string)
    rule_set = get_rule_set_from_hypothesis_string(hypothesis_string)

    grammar = Grammar(hmm, rule_set)
    return Hypothesis(grammar)
    def test_crossover(self):
        from simulations import dag_zook_opacity as simulation
        self.initialise_simulation(simulation)
        from copy import deepcopy


        rule_1 = Rule.load([[{'cont': '+'}], [{'coronal': '-'}], [{'coronal': '-'}], [], True])
        rule_2 = Rule.load([[{'cons': '+', 'low': '-'}], [{'voice': '-'}], [{'voice': '-'}], [], True])

        crossover_rule_1 = deepcopy(rule_1)
        crossover_rule_2 = deepcopy(rule_2)
        crossover_rule_1.left_context_feature_bundle_list = rule_2.left_context_feature_bundle_list
        crossover_rule_1.right_context_feature_bundle_list = rule_2.right_context_feature_bundle_list
        crossover_rule_1.change_feature_bundle_list = rule_2.change_feature_bundle_list

        crossover_rule_2.left_context_feature_bundle_list = rule_1.left_context_feature_bundle_list
        crossover_rule_2.right_context_feature_bundle_list = rule_1.right_context_feature_bundle_list
        crossover_rule_2.change_feature_bundle_list = rule_1.change_feature_bundle_list

        rule_set_1 = RuleSet([crossover_rule_1])
        rule_set_2 = RuleSet([crossover_rule_2])
        print(rule_set_1)
        print(rule_set_2)

        hmm = HMM({'q0': ['q1'],
              'q1': (['q2', 'qf'], ['dag', 'kat', 'dot', 'kod']),
              'q2': (['qf'], ['zo', 'go', 'do'])
               })
        grammar_1 = Grammar(hmm, rule_set_1)
        grammar_2 = Grammar(hmm, rule_set_2)

        data = ['kat', 'dot',     'dag', 'kod'] + \
               ['katso', 'dotso', 'dagzo', 'kodzo'] + \
               ['katko', 'dotko', 'daggo', 'kodgo'] + \
               ['katto', 'dotto', 'dagdo', 'koddo']

        self.configurations.simulation_data = data
        hypothesis_1 = Hypothesis(grammar_1)
        hypothesis_2 = Hypothesis(grammar_2)

        print(hypothesis_1.get_energy())
        print(hypothesis_2.get_energy())
    def test_morphology_only2(self):
        self.initialise_segment_table("plural_english_segment_table.txt")
        self.configurations["DATA_ENCODING_LENGTH_MULTIPLIER"] = 25
        data = [u'tozata', u'tozaso', u'tozakt', u'tozzookata', u'tozzookaso', u'tozzookakt', u'tozzook', u'tozdodata', u'tozdodaso', u'tozdodakt', u'tozdod', u'tozgosata', u'tozgosaso', u'tozgosakt', u'tozgos', u'toz', u'dagata', u'dagaso', u'dagakt', u'dagzookata', u'dagzookaso', u'dagzookakt', u'dagzook', u'dagdodata', u'dagdodaso', u'dagdodakt', u'dagdod', u'daggosata', u'daggosaso', u'daggosakt', u'daggos', u'dag', u'gasata', u'gasaso', u'gasakt', u'gaszookata', u'gaszookaso', u'gaszookakt', u'gaszook', u'gasdodata', u'gasdodaso', u'gasdodakt', u'gasdod', u'gasgosata', u'gasgosaso', u'gasgosakt', u'gasgos', u'gas', u'kodata', u'kodaso', u'kodakt', u'kodzookata', u'kodzookaso', u'kodzookakt', u'kodzook', u'koddodata', u'koddodaso', u'koddodakt', u'koddod', u'kodgosata', u'kodgosaso', u'kodgosakt', u'kodgos', u'kod', u'katata', u'kataso', u'katakt', u'katzookata', u'katzookaso', u'katzookakt', u'katzook', u'katdodata', u'katdodaso', u'katdodakt', u'katdod', u'katgosata', u'katgosaso', u'katgosakt', u'katgos', u'kat', u'dotata', u'dotaso', u'dotakt', u'dotzookata', u'dotzookaso', u'dotzookakt', u'dotzook', u'dotdodata', u'dotdodaso', u'dotdodakt', u'dotdod', u'dotgosata', u'dotgosaso', u'dotgosakt', u'dotgos', u'dot']
        hmm = HMM({'q0': [u'q1'],
        'q1': ([u'q2', u'q3', u'qf'], ['toz', 'dag', 'kat', 'dot', 'kod', 'gas']),
        'q2': ([u'q3',u'qf'], ['zook', 'gos', 'dod']),
        'q3': ([u'qf'], ['aso', 'akt', 'ata'])})

        self.configurations.simulation_data = data
        hypothesis = Hypothesis(Grammar(hmm, []))
    def test_abadnese_no_rule(self):
        self.initialise_segment_table("abd_segment_table.txt")
        data = ['bbabbba', 'baabbba', 'babbbba', 'bbabadba', 'baabadba', 'babbadba', 'bbabbad', 'baabbad',
        'babbbad', 'bbabadad', 'baabadad', 'babbadad', 'bbabbab', 'baabbab', 'babbbab', 'bbabadab',
        'baabadab', 'babbadab']

        hmm = HMM({'q0': ['q1'],
              'q1': (['q2'], ['bbab', 'baab', 'babb', 'bbaba', 'baaba', 'babba']),
              'q2': (['qf'], ['dba', 'dad', 'dab', 'bba', 'bad', 'bab'])})

        grammar = Grammar(hmm, [])
        self.configurations.simulation_data = data
        hypothesis = Hypothesis(grammar)
        self.assertEqual(int(hypothesis.get_energy()), 243)
    def test_plural_english_hypothesis(self):
        self.initialise_segment_table("plural_english_segment_table.txt")
        self.rule_set = self.get_rule_set("plural_english_rule_set.json")
        plural_english_data = 1 * ['kats', 'dogz', 'kat', 'dog']
        hmm = HMM({INITIAL_STATE: ['q1'],
                 'q1': (['q2', FINAL_STATE], ['dog', 'kat']),
                 'q2': ([FINAL_STATE], ['z'])})

        grammar = Grammar(hmm, self.rule_set)
        self.write_to_dot_file(self.rule_set.rules[0].get_transducer(), "plural_english_rule")
        self.write_to_dot_file(grammar.get_nfa(), "grammar_nfa")
        self.configurations.simulation_data = plural_english_data
        hypothesis = Hypothesis(grammar)
        self.assertEqual(int(hypothesis.get_energy()), 117)
    def test_hypothesis_unique_representation(self):
        from simulations import french_two_rules as simulation
        self.initialise_simulation(simulation)

        hmm_1 = HMM({'q0': ['q1'],
                     'q1': (['q2'],
                            ['klop', 'kylt', 'provok']),
                     'q2': (['qf'], ['kif', 'timid', 'fad', 'mal', 'byvabl', EPSILON])
                     })

        rule_1 = Rule.load([[], [{"center": "+"}], [{"cons": "+", "son": "+"}, {"son": "+", "cons": "+"}], [{"MB": True}, {"cons": "+"}], False])
        rule_set_1 = RuleSet([rule_1])

        grammar_1 = Grammar(hmm_1, rule_set_1)
        hypothesis_1 = Hypothesis(grammar_1)

        repr_1 = repr(hypothesis_1)

        hmm_2 = HMM({'q0': ['q1'],
                     'q2': (['qf'], ['kif', 'timid', 'mal', 'fad', 'byvabl', EPSILON]),
                     'q1': (['q2'],
                            ['provok', 'kylt', 'klop'])
                     })

        rule_2 = Rule.load([[], [{"center": "+"}], [{"son": "+", "cons": "+"}, {"cons": "+", "son": "+"}], [{"MB": True}, {"cons": "+"}], False])
        rule_set_2 = RuleSet([rule_2])

        grammar_2 = Grammar(hmm_2, rule_set_2)
        hypothesis_2 = Hypothesis(grammar_2)

        repr_2 = repr(hypothesis_2)

        print(repr_1)
        print(repr_2)

        assert repr_1 == repr_2
    def test_opacity_two_hypotheses(self):
        from simulations import dag_zook_opacity as simulation
        self.initialise_simulation(simulation)
        hmm = HMM({'q0': ['q1'],
              'q1': (['q2', 'q3'], ['daot', 'dkoz', 'dog', 'dok', 'gdaas', 'gkas', 'kaos', 'kat', 'kood', 'ksoag', 'ogtd', 'oktdo', 'skaz', 'tak', 'tso']),
              'q2': (['qf'], ['go', 'kazka', 'soka', 'ta', EPSILON]),
              'q3': (['qf'], ['da', 'saat', 'tsk', 'zoka'])
               })

        epenthesis_rule = Rule([], [{'low': '+'}], [{'coronal': '+'}], [{'coronal': '+'}], True)
        assimilation_rule = Rule([{'cons': '+'}], [{'voice': '-'}], [{'voice': '-'}], [], True)

        rule_set = RuleSet([assimilation_rule, epenthesis_rule])
        grammar = Grammar(hmm, rule_set)
        hypothesis = Hypothesis(grammar)
        print(hypothesis.get_energy())
    def test_incest(self):
        self.initialise_segment_table("plural_english_segment_table.txt")
        self.rule_set = self.get_rule_set("plural_english_rule_set.json")
        plural_english_data = 1 * ['kats', 'dogz', 'kat', 'dog']
        hmm = HMM({
            INITIAL_STATE: ['q1'],
            'q1': (['q2', FINAL_STATE], ['dog', 'kat']),
            'q2': ([FINAL_STATE], ['z'])
        })

        grammar = Grammar(hmm, self.rule_set)
        h1 = Hypothesis(grammar, plural_english_data)
        h2 = deepcopy(h1)
        h2.data = ['kakakakkakakaka']

        print(GeneticAlgorithm._is_incest(h1, h2))
    def test_abadnese(self):
        self.initialise_segment_table("abd_segment_table.txt")
        data = ['bbabbba', 'baabbba', 'babbbba', 'bbabadba', 'baabadba', 'babbadba', 'bbabbad', 'baabbad',
        'babbbad', 'bbabadad', 'baabadad', 'babbadad', 'bbabbab', 'baabbab', 'babbbab', 'bbabadab',
        'baabadab', 'babbadab']

        hmm = HMM({'q0': ['q1'],
              'q1': (['q2'], ['bbab', 'baab', 'babb', 'bbaba', 'baaba', 'babba']),
              'q2': (['qf'], ['dba', 'dad', 'dab'])})
        rule = Rule.load([[{"cons": "+"}], [{"labial": "+"}], [{"labial": "+"}], [], True])
        rule_set = RuleSet([rule])

        grammar = Grammar(hmm, rule_set)
        self.configurations.simulation_data = data
        hypothesis = Hypothesis(grammar)
        hypothesis.get_energy()
        self.assertEqual(ceil(hypothesis.get_energy()), 231)
    def test_assimilation_no_rule(self):
        self.initialise_segment_table("plural_english_segment_table.txt")
        data = ['kat', 'dot',     'dag', 'kod'] + \
               ['katso', 'dotso', 'dagzo', 'kodzo'] + \
               ['katko', 'dotko', 'daggo', 'kodgo'] + \
               ['katto', 'dotto', 'dagdo', 'koddo']

        hmm = HMM({'q0': ['q1'],
              'q1': (['q2', 'qf'], ['dag', 'kat', 'dot', 'kod']),
              'q2': (['qf'], ['zo', 'go', 'do', 'to', 'so', 'ko'])
               })

        grammar = Grammar(hmm, [])

        hypothesis = Hypothesis(grammar)
        self.configurations.simulation_data = data
        self.assertEqual(int(hypothesis.get_energy()), 230)
예제 #12
0
    def test_parser(self):
        hmm_multiple_paths = HMM({
            INITIAL_STATE: ['q1', 'q3'],
            'q1': (['q2', FINAL_STATE], ['dog', 'kat', 'kats', 'dogz']),
            'q2': ([FINAL_STATE], ['z']),
            'q3': (['q3', FINAL_STATE], self.plural_english_segments)
        })

        grammar = Grammar(hmm_multiple_paths, self.plural_english_rule_set)
        hypothesis = Hypothesis(grammar, ['dogz'])
        nfa = grammar.get_nfa()
        parses, outputs = nfa_parser(nfa, 'dogz')
        print(parses)
        print(outputs)

        nfa = grammar.get_nfa()

        self.write_to_dot_to_file(nfa, "test_parser_nfa")
    def test_assimilation2(self):
        self.initialise_segment_table("plural_english_segment_table.txt")
        self.rule_set = self.get_rule_set("plural_english_rule_set.json")
        data = ['kat', 'dot',     'dag', 'kod'] + \
               ['katso', 'dotso', 'dagzo', 'kodzo'] + \
               ['katko', 'dotko', 'daggo', 'kodgo'] + \
               ['katto', 'dotto', 'dagdo', 'koddo']

        hmm = HMM({'q0': ['q1'],
              'q1': (['q2', 'qf'], ['dag', 'kat', 'dot', 'kod']),
              'q2': (['qf'], ['zo', 'go', 'do'])
               })

        grammar = Grammar(hmm, self.rule_set)
        self.configurations.simulation_data = data
        hypothesis = Hypothesis(grammar)
        for _ in range(10):  #1.4
            energy = hypothesis.get_energy()
예제 #14
0
    def __init__(self, threadID, dataSet, minStartWeight, maxStartWeight):
        Process.__init__(self)
        self.threadID = threadID

        self.weights = Weights(dataSet.getDataPoint(0).dimension())
        self.weights.generateRandom(minStartWeight, maxStartWeight)
        weight = """*****\nThread {0} initial weights:\n{1}\n*****"""
        print(weight.format(self.threadID, self.weights.vector))

        self.trainingErrors = Errors(dataSet.trainingDataPoints, dataSet.trainingActualValues)
        self.testingErrors = Errors(dataSet.testingDataPoints, dataSet.testingActualValues)

        self.hypothesis = Hypothesis()

        self.trainingErrors.updateToLower(self.weights, self.hypothesis)

        self.iterations = 0
        self.alpha = STARTING_ALPHA
예제 #15
0
    def mergeEntries(self, entriesLst, cube_indx):

        # First process the goal: this will be a (regular/glue) rule
        sf_f_obj = sff.initNew(entriesLst[0].lm_heu)
        score = entriesLst[0].getScoreSansLmHeu()

        # Now process the antecedents
        anteHyps = []
        anteSfFeats = []
        anteItemsStates = []
        for ante_ent in entriesLst[1:]:
            score += ante_ent.getScoreSansLmHeu()
            anteHyps.append(ante_ent.tgt)
            anteSfFeats.append(ante_ent.sf_feat)
            anteItemsStates.append(ante_ent.consItems)

        (tgt_hyp, newConsItems) = lmm.helperConsItem(Lazy.is_last_cell, Lazy.cell_type, \
                                    Lazy.cell_span, entriesLst[0].tgt.split(), anteHyps, anteItemsStates)

        if settings.opts.force_decode and not Lazy.candMatchesRef(tgt_hyp):
            return (score, None
                    )  # Hypothesis wouldn't lead to reference; ignore this
        """
            Get hypothesis status from the classmethod (in Lazy); hypothesis status can take one of these three values:
            -2 : Hyp was not see earlier; create a new entry
            -1 : Hyp was seen earlier but current one has a better score; create a new entry to replace the existing one
             0 : Hyp was seen earlier and has a poor score than the existing one; ignore this
        """
        score_wo_LM = score - sf_f_obj.aggregSFScore(anteSfFeats)
        hyp_status = Lazy.getHypothesisStatus(tgt_hyp, score_wo_LM)
        """ Should we recombine hypothesis?
            A new hypothesis is always added; query LM for lm-score and create new entry_obj.
            If an identical hypothesis exists then the current hyp is added under below conditions:
            i) the use_unique_nbest flag is False (add new hyp; but use the LM score of the existing one)
            ii) use_unique_nbest is True and the new hyp is better than the existing one.
        """
        if (hyp_status == 0 and settings.opts.use_unique_nbest):
            entry_obj = None
        else:
            score += sf_f_obj.helperScore(newConsItems, Lazy.is_last_cell)
            entry_obj = Hypothesis(score, self.src_side, tgt_hyp, sf_f_obj, self.depth_hier, Lazy.cell_span, \
                                     entriesLst[0], entriesLst[1:], newConsItems)

        return (score, entry_obj)
    def test_abadnese_no_rule(self):
        self.initialise_segment_table("abd_segment_table.txt")
        data = [
            'bbabbba', 'baabbba', 'babbbba', 'bbabadba', 'baabadba',
            'babbadba', 'bbabbad', 'baabbad', 'babbbad', 'bbabadad',
            'baabadad', 'babbadad', 'bbabbab', 'baabbab', 'babbbab',
            'bbabadab', 'baabadab', 'babbadab'
        ]

        hmm = HMM({
            'q0': ['q1'],
            'q1': (['q2',
                    'qf'], ['bbab', 'baab', 'babb', 'bbaba', 'baaba',
                            'babba']),
            'q2': (['qf'], ['dba', 'dad', 'dab', 'bba', 'bad', 'bab'])
        })

        grammar = Grammar(hmm, [])
        hypothesis = Hypothesis(grammar, data)
        self.assertEqual(hypothesis.get_energy(), 252)
예제 #17
0
def get_energy(self, simulation_case):
    case_name = simulation_case.case_name
    if isinstance(simulation_case.hmm_dict, HMM):
        hmm = simulation_case.hmm_dict
    else:
        hmm = HMM(simulation_case.hmm_dict)
    if isinstance(simulation_case.flat_rule_set_list, RuleSet):
        rule_set = simulation_case.flat_rule_set_list
    else:
        rule_set_list = []
        for flat_rule in simulation_case.flat_rule_set_list:
            rule_set_list.append(Rule(*flat_rule))
        rule_set = RuleSet(rule_set_list)
    grammar = Grammar(hmm, rule_set)
    self.write_to_dot_to_file(hmm, "hmm")
    self.write_to_dot_to_file(grammar.get_nfa(), "grammar_nfa_"+case_name)
    hypothesis = Hypothesis(grammar, self.data)
    energy = hypothesis.get_energy()
    print("{}: {}".format(case_name, hypothesis.get_recent_energy_signature()))
    return energy
    def test_abnese(self):
        self.initialise_segment_table("ab_segment_table.txt")
        self.configurations["BRACKET_TRANSDUCER"] = True
        data = ['bab', 'aabab']

        hmm = HMM( {'q0': ['q1'],
              'q1': (['qf'], ['bb', 'aabb'])
              })
        rule = Rule([], [{"cons": "-"}], [{"cons": "+"}], [{"cons": "+"}], False)  # e->a / b_b
        rule_set = RuleSet([rule])

        print(rule_set.get_outputs_of_word("bb"))

        grammar = Grammar(hmm, rule_set)
        self.write_to_dot_file(grammar.get_nfa(), "grammar_nfa")
        self.configurations.simulation_data = data
        hypothesis = Hypothesis(grammar)

        print(hypothesis.get_energy())
        print(hypothesis.get_recent_energy_signature())
    def test_katso_two_rule(self):
        #configurations["DATA_ENCODING_LENGTH_MULTIPLIER"] = 25
        self.initialise_segment_table("plural_english_segment_table.txt")
        data = ['kat', 'dot',     'dag', 'kod',     'gas', 'toz'] + \
               ['katso', 'dotso', 'dagzo', 'kodzo', 'gasazo', 'tozazo'] + \
               ['katko', 'dotko', 'daggo', 'kodgo', 'gasko', 'tozgo'] + \
               ['katto', 'dotto', 'dagdo', 'koddo', 'gasto', 'tozdo']

        hmm = {'q0': ['q1'],
              'q1': (['q2', 'qf'], ['dag', 'kat', 'dot', 'kod', 'gas', 'toz']),
              'q2': (['qf'], ['zo', 'go', 'do'])}

        epenthesis_rule = Rule.load([[], [{"cons": "-", "low": "+"}], [{"cons": "+", "cont": "+"}], [{"cons": "+", "cont": "+"}], True])
        assimilation_rule = Rule.load([[{"cons": "+"}], [{"voice": "-"}], [{"voice": "-"}], [], True])
        rule_set = RuleSet([epenthesis_rule, assimilation_rule])
        hmm = HMM(hmm)
        grammar = Grammar(hmm, rule_set)
        self.write_to_dot_file(grammar.get_nfa(), "grammar_nfa")
        self.configurations.simulation_data = data
        hypothesis = Hypothesis(grammar)
        self.assertEqual(int(hypothesis.get_energy()), 364)
    def test_abadnese_for_ezer(self):
        self.initialise_segment_table("abd_segment_table.txt")
        data = ['aabad', 'abad', 'badaabad', 'aba', 'aaba', 'badaa']

        hmm = HMM({
            'q0': ['q1'],
            'q1': (['qf'], ['aabd', 'abd', 'bdaabd', 'aba', 'aaba', 'bdaa'])
        })
        rule = Rule([], [{}], [{
            "cons": "+",
            "labial": "+"
        }], [{
            "cons": "+",
            "labial": "+"
        }],
                    obligatory=True)
        rule_set = RuleSet([rule])

        print(rule_set.get_outputs_of_word("abb"))

        grammar = Grammar(hmm, rule_set)
        hypothesis = Hypothesis(grammar, data)
    def test_get_parsing_results_infinite(self):
        self.initialise_segment_table("abnese_lengthening_segment_table.txt")
        configurations["MORPHEME_BOUNDARY_FLAG"] = True
        configurations["LENGTHENING_FLAG"] = True
        configurations["HMM_ENCODING_LENGTH_MULTIPLIER"] = 100
        configurations["DATA_ENCODING_LENGTH_MULTIPLIER"] = 20
        hmm = HMM({
            'q0': ['q1'],
            'q1': (['qf'], ['ab:a', 'baaa', 'baab', 'baab:a'])
        })

        rule = Rule([], [{'long': '-'}], [], [{}], obligatory=False)
        rule_set = RuleSet([rule])

        grammar = Grammar(hmm, rule_set)
        data = [
            u'baba:a', u'babaab:ab', u'ab:a', u'aab:a', u'aab:ab', u'ab:ab'
        ]

        hypothesis = Hypothesis(grammar, data)
        simulated_annealing = SimulatedAnnealing(hypothesis, 0)
        print(simulated_annealing._get_parsing_results())
예제 #22
0
def get_action_infos(query, actions, force_copy=False):
    action_info_list = []
    prediction = Hypothesis()
    for t, action in enumerate(actions):
        action_info = ActionInfo(action)
        action_info.t = t
        if prediction.frontier_node:
            action_info.parent_t = prediction.frontier_node.created_time
            action_info.frontier_prod = prediction.frontier_node.production
            action_info.frontier_field = prediction.frontier_field.field

        if isinstance(action, GenTokenAction):
            try:
                token_source_index = query.index(str(action.token))
                action_info.copy_from_src = True
                action_info.src_token_position = token_source_index
            except ValueError:
                if force_copy:
                    raise ValueError('Can\'t copy input token %s' % action.token)
        prediction.apply_action(action)
        action_info_list.append(action_info)

    return action_info_list
예제 #23
0
    file_log_handler.setFormatter(file_log_formatter)
    logger.addHandler(file_log_handler)

    feature_tables_dir_path = join(dir_name, "tests/fixtures/feature_tables")
    constraint_sets_dir_path = join(dir_name, "tests/fixtures/constraint_sets")

    feature_table_file_path = join(feature_tables_dir_path,
                                   current_simulation.feature_table_file_name)
    feature_table = FeatureTable.load(feature_table_file_path)

    constraint_set_file_path = join(
        constraint_sets_dir_path, current_simulation.constraint_set_file_name)
    constraint_set = ConstraintSet.load(constraint_set_file_path)

    corpus = Corpus(current_simulation.corpus)

    data = corpus.get_words()
    max_word_length_in_data = max([len(word) for word in data])
    lexicon = Lexicon(data, max_word_length_in_data)

    grammar = Grammar(constraint_set, lexicon)
    hypothesis = Hypothesis(grammar, data)

    if hasattr(current_simulation, "target_energy"):
        target_energy = current_simulation.target_energy
    else:
        target_energy = None

    simulated_annealing = SimulatedAnnealing(hypothesis, target_energy)
    simulated_annealing.run()
예제 #24
0
파일: credco.py 프로젝트: judell/CredcoTest
import csv, traceback, json
from hypothesis import Hypothesis
from collections import defaultdict

h = Hypothesis(username='******', token='{TOKEN}')


def orderByFields(names):
    fields = 'project_id,report_id,report_title,report_url,report_date,media_content,media_url,report_status,report_author,time_delta_to_first_status,time_delta_to_last_status,time_original_media_publishing,type,contributing_users,tags,notes_count,notes_ugc_count,tasks_count,tasks_resolved_count,task_question_1,task_user_1,task_date_1,task_answer_1,task_note_1,task_question_2,task_user_2,task_date_2,task_answer_2,task_note_2,task_question_3,task_user_3,task_date_3,task_answer_3,task_note_3,task_question_4,task_user_4,task_date_4,task_answer_4,task_note_4,task_question_5,task_user_5,task_date_5,task_answer_5,task_note_5,task_question_6,task_user_6,task_date_6,task_answer_6,task_note_6,task_question_7,task_user_7,task_date_7,task_answer_7,task_note_7,task_question_8,task_user_8,task_date_8,task_answer_8,task_note_8,task_question_9,task_user_9,task_date_9,task_answer_9,task_note_9,task_question_10,task_user_10,task_date_10,task_answer_10,task_note_10,task_question_11,task_user_11,task_date_11,task_answer_11,task_note_11,task_question_12,task_user_12,task_date_12,task_answer_12,task_note_12,task_question_13,task_user_13,task_date_13,task_answer_13,task_note_13,task_question_14,task_user_14,task_date_14,task_answer_14,task_note_14,task_question_15,task_user_15,task_date_15,task_answer_15,task_note_15,task_question_16,task_user_16,task_date_16,task_answer_16,task_note_16,task_question_17,task_user_17,task_date_17,task_answer_17,task_note_17,task_question_18,task_user_18,task_date_18,task_answer_18,task_note_18,task_question_19,task_user_19,task_date_19,task_answer_19,task_note_19,task_question_20,task_user_20,task_date_20,task_answer_20,task_note_20,task_question_21,task_user_21,task_date_21,task_answer_21,task_note_21,task_question_22,task_user_22,task_date_22,task_answer_22,task_note_22,task_question_23,task_user_23,task_date_23,task_answer_23,task_note_23,task_question_24,task_user_24,task_date_24,task_answer_24,task_note_24,note_date_1,note_user_1,note_content_1,task_question_25,task_user_25,task_date_25,task_answer_25,task_note_25'.split(
        ',')
    orderedList = []
    for fieldName in fields:
        if fieldName in names:
            orderedList.append(fieldName)
    return orderedList


headerTemplate = """<p>
<i>Checked at {report_url}</i>
</p>
<hr>
"""

questionTemplate = """<p>{question}</p>"""

multiAnswerTemplate = """<p><ul>{answers}</ul></p>"""

singleAnswerTemplate = """<li>{answer}</li>"""

targets = [
    'https://www.usatoday.com/story/news/nation-now/2017/06/16/coconut-oil-isnt-healthy-its-never-been-healthy/402719001/',
예제 #25
0
    def parse(self, sentence):
        primitive_vocab = self.vocab.primitive
        processed_sentence = self.process([sentence], self.vocab.source)
        source_encodings, (last_encoder_state,
                           last_encoder_cell) = TranxParser.encode(
                               processed_sentence, [len(sentence)])
        source_encodings_attention_linear_layer = nn.Linear(256,
                                                            256,
                                                            bias=False)
        decoder_initial_vector = F.tanh(
            self.decoder_cell_initializer_linear_layer(last_encoder_cell))
        h_t1 = decoder_initial_vector
        hypothesis_scores = Variable(torch.cuda.FloatTensor([0.]),
                                     volatile=True)
        source_token_positions_by_token = OrderedDict()
        for token_position, token in enumerate(sentence):
            source_token_positions_by_token.setdefault(
                token, []).append(token_position)
        t = 0
        hypotheses = [Hypothesis()]
        hypotheses_states = [[]]
        finished_hypotheses = []

        while len(finished_hypotheses) < 15 and t < 100:
            num_of_hypotheses = len(hypotheses)
            expanded_source_encodings = source_encodings.expand(
                num_of_hypotheses, source_encodings.size(1),
                source_encodings.size(2))
            expanded_source_encodings_attention_linear_layer = \
                source_encodings_attention_linear_layer.expand(num_of_hypotheses,
                                                               source_encodings_attention_linear_layer.size(1),
                                                               source_encodings_attention_linear_layer.size(2))
            if t == 0:
                x = Variable(torch.cuda.FloatTensor(1, 128).zero_(),
                             volatile=True)
            else:
                actions = [h.actions[-1] for h in hypotheses]
                action_embeddings = []
                for action in actions:
                    if action:
                        if isinstance(action, ApplyRuleAction):
                            action_embedding = self.apply_const_and_reduce_emb.weight[
                                self.grammar.production_to_id[
                                    action.production]]
                        elif isinstance(action, ReduceAction):
                            action_embedding = self.apply_const_and_reduce_emb.weight[
                                len(self.grammar)]
                        else:
                            action_embedding = self.primitives_emb.weight[
                                self.vocab.primitive[action.token]]
                        action_embeddings.append(action_embedding)
                    else:
                        action_embeddings.append(
                            Variable(torch.cuda.FloatTensor(128).zero_()))
                action_embeddings = torch.stack(action_embeddings)
                encoder_inputs = [action_embeddings]
                encoder_inputs.append(att_t1)

                frontier_fields = [h.frontier_field.field for h in hypotheses]
                frontier_field_embeddings = self.fields_emb(
                    Variable(
                        torch.cuda.FloatTensor([
                            self.grammar.field_to_id[f]
                            for f in frontier_fields
                        ])))
                encoder_inputs.append(frontier_field_embeddings)

                parent_created_times = [
                    h.frontier_node.created_time for h in hypotheses
                ]
                parent_states = torch.stack(
                    [hypotheses_states[h_id][parent_created_time][0]]
                    for h_id, parent_created_time in enumerate(
                        parent_created_times))
                parent_cells = torch.stack([
                    hypotheses_states[h_id][parent_created_time][1] for h_id,
                    parent_created_time in enumerate(parent_created_times)
                ])
                encoder_inputs.append(parent_states)

                x = torch.cat(encoder_inputs, dim=-1)

            (h_t, cell), attention = self.step(
                x, h_t1, expanded_source_encodings,
                expanded_source_encodings_attention_linear_layer)
            log_p_of_each_apply_rule_action = F.log_softmax(
                self.production_prediction(attention), dim=-1)
            p_of_generating_each_primitive_in_vocab = F.softmax(
                self.primitive_prediction(attention), dim=-1)
            p_of_copying_from_source_sentence = self.ptr_net_lin(
                source_encodings, None,
                attention.unsqueeze(0).squeeze(0))
            p_of_making_primitive_prediction = F.softmax(
                self.gen_vs_copy_lin(attention), dim=-1)
            p_of_each_primitive = p_of_making_primitive_prediction[:, 0].unsqueeze(
                1) * p_of_generating_each_primitive_in_vocab

            hypothesis_ids_for_which_we_gentoken = []
            hypothesis_unknowns_resulting_from_gentoken = []
            hypothesis_ids_for_which_we_applyrule = []
            hypothesis_production_ids_resulting_from_applyrule_actions = []
            hypothesis_scores_resulting_from_applyrule_actions = []

            for hypothesis_id, hypothesis in enumerate(hypotheses):
                action_types = self.transition_sys.get_valid_continuation_types(
                    hypothesis)
                for action_type in action_types:
                    if action_type == ApplyRuleAction:
                        productions = self.transition_sys.get_valid_continuating_productions(
                            hypothesis)
                        for production in productions:
                            production_id = self.grammar.production_to_id[
                                production]
                            hypothesis_production_ids_resulting_from_applyrule_actions.append(
                                production_id)
                            production_score = log_p_of_each_apply_rule_action[
                                hypothesis_id, production_id].data[0]
                            new_hypothesis_score = hypothesis.score + production_score
                            hypothesis_scores_resulting_from_applyrule_actions.append(
                                new_hypothesis_score)
                            hypothesis_ids_for_which_we_applyrule.append(
                                hypothesis_id)
                    elif action_type == ReduceAction:
                        reduce_score = log_p_of_each_apply_rule_action[
                            hypothesis_id, len(self.grammar)].data[0]
                        new_hypothesis_score = hypothesis.score + reduce_score
                        hypothesis_scores_resulting_from_applyrule_actions.append(
                            new_hypothesis_score)
                        hypothesis_production_ids_resulting_from_applyrule_actions.append(
                            len(self.grammar))
                        hypothesis_ids_for_which_we_applyrule.append(
                            hypothesis_id)
                    else:
                        hypothesis_ids_for_which_we_gentoken.append(
                            hypothesis_id)
                        hypothesis_copy_probabilities_by_token = dict()
                        copied_unks_info = []
                        for token, token_positions in source_token_positions_by_token.items(
                        ):
                            total_copy_prob = torch.gather(
                                p_of_copying_from_source_sentence[
                                    hypothesis_id], 0,
                                Variable(torch.cuda.LongTensor(
                                    token_positions))).sum()
                            p_of_making_copy = p_of_making_primitive_prediction[
                                hypothesis_id, 1] * total_copy_prob
                            if token in primitive_vocab:
                                token_id = primitive_vocab[token]
                                p_of_each_primitive[
                                    hypothesis_id,
                                    token_id] = p_of_each_primitive[
                                        hypothesis_id,
                                        token_id] + p_of_making_copy
                                hypothesis_copy_probabilities_by_token[
                                    token] = (token_positions,
                                              p_of_making_copy.data[0])
                            else:
                                copied_unks_info.append({
                                    'token':
                                    token,
                                    'token_positions':
                                    token_positions,
                                    'copy_prob':
                                    p_of_making_copy.data[0]
                                })
                        if len(copied_unks_info) > 0:
                            copied_unk = np.array([
                                unk['copy_prob'] for unk in copied_unks_info
                            ]).argmax()
                            copied_token = copied_unks_info[copied_unk][
                                'token']
                            p_of_each_primitive[
                                hypothesis_id,
                                primitive_vocab.unk_id] = copied_unks_info[
                                    copied_unk]['copy_prob']
                            hypothesis_unknowns_resulting_from_gentoken.append(
                                copied_token)
                            hypothesis_copy_probabilities_by_token[
                                copied_token] = (
                                    copied_unks_info[copied_unk]
                                    ['token_positions'],
                                    copied_unks_info[copied_unk]['copy_prob'])

            new_hypothesis_scores = None
            if hypothesis_scores_resulting_from_applyrule_actions:
                new_hypothesis_scores = Variable(
                    torch.cuda.FloatTensor(
                        hypothesis_scores_resulting_from_applyrule_actions))
            if hypothesis_ids_for_which_we_gentoken:
                log_p_of_each_primitive = torch.log(p_of_each_primitive)
                gen_token_new_hypothesis_scores = (
                    hypothesis_scores[hypothesis_ids_for_which_we_gentoken].
                    unsqueeze(1) + log_p_of_each_primitive[
                        hypothesis_ids_for_which_we_gentoken, :]).view(-1)

                if new_hypothesis_scores is None:
                    new_hypothesis_scores = gen_token_new_hypothesis_scores
                else:
                    new_hypothesis_scores = torch.cat([
                        new_hypothesis_scores, gen_token_new_hypothesis_scores
                    ])
            top_new_hypothesis_scores, top_new_hypothesis_positions = torch.topk(
                new_hypothesis_scores,
                k=min(new_hypothesis_scores.size(0),
                      15 - len(finished_hypotheses)))

            working_hypothesis_ids = []
            new_hypotheses = []
            for new_hypothesis_score, new_hypothesis_position in zip(
                    top_new_hypothesis_scores.data.cpu(),
                    top_new_hypothesis_positions.data.cpu()):
                action_info = ActionInfo()
                if new_hypothesis_position < len(
                        hypothesis_scores_resulting_from_applyrule_actions):
                    previous_hypothesis_id = hypothesis_ids_for_which_we_applyrule[
                        new_hypothesis_position]
                    previous_hypothesis = hypotheses[previous_hypothesis_id]
                    production_id = hypothesis_scores_resulting_from_applyrule_actions[
                        new_hypothesis_position]
                    if production_id < len(self.grammar):
                        apply_production = self.grammar.id_to_production[
                            production_id]
                        action = ApplyRuleAction(apply_production)
                    else:
                        action = ReduceAction()
                else:
                    token_id = (
                        new_hypothesis_position -
                        len(hypothesis_scores_resulting_from_applyrule_actions)
                    ) % p_of_each_primitive.size(1)
                    previous_hypothesis_id = hypothesis_ids_for_which_we_gentoken[
                        (new_hypothesis_position -
                         len(hypothesis_scores_resulting_from_applyrule_actions
                             )) // p_of_each_primitive.size(1)]
                    previous_hypothesis = hypotheses[previous_hypothesis_id]
                    if token_id == primitive_vocab.unk_id:
                        if hypothesis_unknowns_resulting_from_gentoken:
                            token = hypothesis_unknowns_resulting_from_gentoken[
                                (new_hypothesis_position - len(
                                    hypothesis_scores_resulting_from_applyrule_actions
                                )) // p_of_each_primitive.size(1)]
                        else:
                            token = primitive_vocab.id_to_word[
                                primitive_vocab.unk_id]
                    else:
                        token = primitive_vocab.id_2_word[token_id]
                    action = GenTokenAction(token)

                    if token in source_token_positions_by_token:
                        action_info.copy_from_src = True
                        action_info.src_token_position = source_token_positions_by_token[
                            token]

                action_info.action = action
                action_info.t = t
                if t > 0:
                    action_info.parent_t = previous_hypothesis.frontier_node.created_time
                    action_info.frontier_prod = previous_hypothesis.frontier_node.production
                    action_info.frontier_field = previous_hypothesis.frontier_field.field
                new_hypothesis = previous_hypothesis.clone_and_apply_action_info(
                    action_info)
                new_hypothesis.score = new_hypothesis_score

                if new_hypothesis.completed:
                    finished_hypotheses.append(new_hypothesis)
                else:
                    new_hypotheses.append(new_hypothesis)
                    working_hypothesis_ids.append(previous_hypothesis_id)
            if working_hypothesis_ids:
                hypothesis_states = [
                    hypothesis_states[i] + [(h_t[i], cell[i])]
                    for i in working_hypothesis_ids
                ]
                h_t1 = (h_t[working_hypothesis_ids],
                        cell[working_hypothesis_ids])
                att_t1 = attention[working_hypothesis_ids]
                hypotheses = new_hypotheses
                hypothesis_scores = Variable(
                    torch.cuda.FloatTensor([hyp.score for hyp in hypotheses]))
                t += 1
            else:
                break

        finished_hypotheses.sort(key=lambda hyp: -hyp.score)

        return finished_hypotheses
예제 #26
0
target_stems_and_suffixes = get_target_stems_and_suffixes(target_stems, surface_suffixes)


corpus = target_stems + target_stems_and_suffixes
corpus = Corpus(corpus)


data = corpus.get_words()
max_word_length_in_data = max([len(word) for word in data])

#initial hypothesis

initial_lexicon = Lexicon(data, max_word_length_in_data)
initial_grammar = Grammar(initial_constraint_set, initial_lexicon)
initial_hypothesis = Hypothesis(initial_grammar, data)
initial_energy = initial_hypothesis.get_energy()
print(f"initial hypothesis: {initial_hypothesis.get_recent_energy_signature()}")
print(f"initial energy: {initial_energy}")
print_empty_line()


target_hmm_transitions = {INITIAL_STATE: ["q1"],
                          "q1": ["q2", FINAL_STATE],
                          "q2": [FINAL_STATE]}

target_hmm_emissions = {"q1": target_stems,
                        "q2": ["kun"]}

target_hmm_inner_states = ["q1", "q2"]
target_hmm = HMM(target_hmm_transitions, target_hmm_emissions, target_hmm_inner_states)
def empty_hypothesis() -> Hypothesis:
    return Hypothesis([], 0.0, 0.0, None)
    def init_target_GD(self, time):
        if len(self.unmatched.keys()) < 2:
            return

        for cid, value in self.unmatched.items():
            _detections = list()
            for detection in value['detections']:
                if get_believe(detection) > self.conf_threshold:
                    _detections.append(detection)
            value['detections'] = np.array(_detections)

        H = []
        for idx, (key, value) in enumerate(self.unmatched.items()):
            if idx == 0:
                H = [
                    Hypothesis(value['camera'], detection, self.epi_threshold)
                    for detection in value['detections']
                ]
            else:
                n_hyp = len(H)
                n_det = len(value['detections'])
                C = np.zeros((n_hyp, n_det))
                Mask = np.zeros_like(C).astype('int32')

                for hid, hyp in enumerate(H):
                    for pid, detection in enumerate(value['detections']):
                        pose_cost, veto = hyp.calculate_cost(
                            value['camera'], detection)
                        C[hid, pid] = pose_cost
                        if veto:
                            Mask[hid, pid] = 1
                rows, cols = linear_sum_assignment(C)
                handled_pids = set()
                for hid, pid in zip(rows, cols):
                    is_masked = Mask[hid, pid] == 1
                    handled_pids.add(pid)
                    if is_masked:
                        H.append(
                            Hypothesis(value['camera'],
                                       value['detections'][pid],
                                       self.epi_threshold))
                    else:
                        H[hid].merge(value['camera'], value['detections'][pid])

                for pid, detection in enumerate(value['detections']):
                    if pid not in handled_pids:
                        H.append(
                            Hypothesis(value['camera'],
                                       value['detections'][pid],
                                       self.epi_threshold))
        for hid, hyp in enumerate(H):
            if hyp.size() > 1:
                # cameras, poses2d, pose3d, joints_views, succeed = hyp.get_3dpose(self.args.lambda_t)
                cameras, poses2d, pose3d, joints_views, succeed = hyp.get_3dpose_jf(
                    self.args.init_threshold, self.args.lambda_t)
                if not succeed:
                    continue
                if len(self.tracks_ids) == 0:
                    track_id = 0
                else:
                    track_id = max(self.tracks_ids) + 1
                self.tracks.append(
                    IterTrack(track_id, time, cameras, poses2d, pose3d,
                              joints_views, self.args, self.build3D))
                self.tracks_ids.add(track_id)
 def get_target_hypo(self):
     target_hmm = deepcopy(self.simulation.target_hmm)
     target_rule_set = RuleSet.load_from_flat_list(self.simulation.target_tuple[1])
     return Hypothesis(Grammar(target_hmm, target_rule_set))
 def hypo_from_strings(self, hmm_str, rules_str):
     final_hmm = self.parse_hmm(hmm_str)
     final_rule_set = self.parse_rules(rules_str)
     return Hypothesis(Grammar(final_hmm, final_rule_set))