Example #1
0
    def get_observations(self):
        self.print_conditions()

        contents = self.contents[self.index + 1:]

        correct_answers = []
        correct_answer = ()
        observations = []
        observation = ()

        for (i, line) in enumerate(contents):
            read_line = line.rstrip()
            letters = read_line.split(" ")

            if read_line == "_ _":
                observations.append(observation)
                observation = ()
                continue
            if i + 1 == len(contents):
                observation = observation + (letters[1], )
                correct_answers.append(letters[0])
                observations.append(observation)
                observation = ()
                break

            observation = observation + (letters[1], )
            correct_answers.append(letters[0])

        correct_letters = []
        corrected_letters = []

        viterbi = Viterbi(observations[0], self.states, self.start_probability,
                          self.transition_probability,
                          self.emission_probability)
        hit = 0
        total = 0
        for (i, observation) in enumerate(observations):
            viterbi = Viterbi(observation, self.states, self.start_probability,
                              self.transition_probability,
                              self.emission_probability)
            corrected_letters = corrected_letters + viterbi.run_viterbi()[1]

        print "Some of the reconstructed state sequence: "
        for (i, letter) in enumerate(corrected_letters):
            if letter == correct_answers[i]:
                hit += 1
            if self.iteration < 100:
                print letter,
                self.iteration += 1
            total += 1

        print "\nPercent correctness:", hit / float(total) * 100
Example #2
0
def predict(train_path, threshold, reg_lambda, test_path, conf, beam_width, file_name):
    v = MaximumEntropyMarkovModel.load_v_from_pickle(dump_weights_path='weights', threshold=args.threshold,
                                                     reg_lambda=reg_lambda)
    ft_statistics = FeatureStatistics(input_file_path=train_path, threshold=threshold, config=conf)
    ft_statistics.pre_process(fill_possible_tag_dict=False)
    is_comp = 'comp' in file_name
    if is_comp:
        test_sentence_hist_list = FeatureStatistics.fill_comp_ordered_history_list(file_path=test_path)
    else:
        test_sentence_hist_list = FeatureStatistics.fill_tagged_ordered_history_list(file_path=test_path, is_test=True)
    tag_set = ft_statistics.tags_set
    all_possible_tags_dict = ft_statistics.hist_to_feature_vec_dict
    get_ft_from_hist_func = ft_statistics.get_non_zero_feature_vec_indices_from_history
    word_possible_tag_set = ft_statistics.word_possible_tag_set
    word_possible_tag_with_threshold_dict = ft_statistics.word_possible_tag_with_threshold_dict
    rare_words_tags = ft_statistics.rare_words_tags

    viterbi = Viterbi(
        v=v, sentence_hist_list=test_sentence_hist_list, tags_list=tag_set,
        all_possible_tags_dict=all_possible_tags_dict, get_feature_from_hist=get_ft_from_hist_func,
        word_possible_tag_set=word_possible_tag_set,
        word_possible_tag_with_threshold_dict=word_possible_tag_with_threshold_dict,
        rare_words_tags=rare_words_tags,
        threshold=args.threshold,
        reg_lambda=args.reg_lambda,
        file_name=file_name,
        beam_width=beam_width
    )
    viterbi.predict_all_test(num_workers=4, is_comp=is_comp)
Example #3
0
    def test(self):
        v = Viterbi("model.txt")
        predicted_slot_count = 0
        actual_slot_count = 0
        hit_count = 0
        test_set_size = len(self.test_set[0])

        print("poccessing...")
        for i in range(test_set_size):
            if i != 0 and i % 100 == 0:
                print(str(i) + " done")
            sentence = list()
            for wordidx in self.test_set[0][i]:
                sentence.append(self.__idx2words[wordidx])
            predicted_seq = v.poccess(sentence)
            predicted_slot = extract_slot(predicted_seq)

            label_seq = list()
            for labelidx in self.test_set[2][i]:
                label_seq.append(self.__idx2labels[labelidx])
            actual_slot = extract_slot(label_seq)

            for item in predicted_slot:
                if item in actual_slot:
                    hit_count += 1
            predicted_slot_count += len(predicted_slot)
            actual_slot_count += len(actual_slot)

        print("test set size:" + str(test_set_size))
        print("predicted slot:" + str(predicted_slot_count) + " actual slot:" +
              str(actual_slot_count) + " hit:" + str(hit_count))
        print("Precision:" + str(hit_count / predicted_slot_count))
        print("Recall:" + str(hit_count / actual_slot_count))
        print("F1score:" + str(2 * hit_count /
                               (actual_slot_count + predicted_slot_count)))
Example #4
0
    def test_wikipedia(self):
        hmm = {
            'Rainy': [('Rainy', 0.7), ('Sunny', 0.3)],
            'Sunny': [('Rainy', 0.4), ('Sunny', 0.6)]
        }

        start_probabilities = {'Rainy': 0.6, 'Sunny': 0.4}

        emission_probabilities = {
            'Rainy': {
                'walk': 0.1,
                'shop': 0.4,
                'clean': 0.5
            },
            'Sunny': {
                'walk': 0.6,
                'shop': 0.3,
                'clean': 0.1
            }
        }

        vit = Viterbi(hmm,
                      lambda state, obs: emission_probabilities[state][obs])

        (v, p) = vit.step('walk', start_probabilities)
        (v, p) = vit.step('shop', v, p)
        (v, p) = vit.step('clean', v, p)

        max_state = max(v, key=lambda x: v[x])
        assert (p[max_state] == ['Sunny', 'Rainy', 'Rainy'])
Example #5
0
def main():
    bijen = Bijenkhan(BIJEN_CORPUS)
    sents_tags = []
    for sents, tags in bijen.sent_tag_gen(100):
        s = zip(sents, tags)
        sents_tags.extend(s)
    random.shuffle(sents_tags)
    test_sents_tags = sents_tags[:NUM_TEST_SAMPES]
    train_sents_tags = sents_tags[NUM_TEST_SAMPES:]
    viterbi = Viterbi(len(bijen.get_tags()),
                      len(bijen.get_vocab()),
                      bijen.get_tags(),
                      bijen.get_bigram_tags(),
                      train_sents_tags)

    for i in range(len(test_sents_tags)):
        true_labels = test_sents_tags[i][1]
        print(GREEN + 'True labels: ', true_labels)
        tmp = test_sents_tags[i][0]
        pred_labels = viterbi.viterbi(tmp[1:-1])
        print(RED + 'Pred labels: ', pred_labels)
        print(CYAN + f'Accuracy: {accuracy_score(true_labels, pred_labels)}')
        print(CYAN + f'Precision: {precision_score(true_labels, pred_labels, average="macro")}')
        print(CYAN + f'Recall: {recall_score(true_labels, pred_labels, average="macro")}')
        print('\n'*2)
Example #6
0
    def run_viterbi(self):
        #print "Fin"
        #print self.observations

        viterbi = Viterbi(self.observations, self.states,
                          self.start_probability, self.transition_probability,
                          self.emission_probability)
        (junk, deduced_path) = viterbi.run_viterbi()
        self.checkSolutions(deduced_path)
Example #7
0
 def get_example(self, idx):
     valid_indices = np.arange(self.dataset_length)[self._label_mask(idx)]
     # calculation of distance taking too long on all the states_
     valid_indices_choice = np.random.choice(valid_indices, size=1000)
     valid_states = self.states[valid_indices_choice][:, 0, ...]
     observation = self.states[idx]
     hidden_states = Viterbi(valid_states)(observation, self.blanket_size)
     example = self.base_dataset[idx]
     example["neighbours"] = self.base_dataset[hidden_states[:,
                                                             0].astype(int)]
     return example
Example #8
0
def example2():
    likelihood = np.loadtxt('likelihood.txt')
    print('probs shape: %s ' % str(likelihood.shape))
    transcript = [2, 1, 3, 1, 3]

    viterbi = Viterbi(transcript, likelihood)
    alignement = viterbi.inference()
    assert len(alignement) == likelihood.shape[0]
    counter = count(alignement, transcript)
    print(alignement)
    print(counter)
Example #9
0
def example3():
    likelihood = np.loadtxt('likelihood.txt')
    print('probs shape: %s ' % str(likelihood.shape))
    transcript = ['a', 'b', 'c', 'b', 'c']
    state2idx = {'a': 2, 'b': 1, 'c': 3}

    viterbi = Viterbi(transcript, likelihood, state2idx=state2idx)
    alignement = viterbi.inference()
    assert len(alignement) == likelihood.shape[0]
    counter = count(alignement, transcript)
    print(alignement)
    print(counter)
Example #10
0
    def test(self, word_seq_path, output_path):
        original_seq, processed_seq = self.__prepare_word_seq(word_seq_path)
        decoder = Viterbi(self.vocab_list, self.tags, self.trans_prob,
                          self.emit_prob)
        tags_pred, prob = decoder.decode(processed_seq)

        with open(output_path, "w") as out:
            for word, tag in zip(original_seq, tags_pred):
                if not word:
                    out.write("\n")
                else:
                    out.write("{0}\t{1}\n".format(word, tag))
Example #11
0
    def train(self, sequences, iterations=3):
        vit = Viterbi()
        for x in range(iterations):
            self.log_space()
            for name, seq in sequences.items():
                seq['Z'] = vit.decode(self, seq['X'])
                print seq['Z']
            #we return from log space
            self.delog()
            self.train_by_counting(sequences)
            print Model(self.keys, self.model, self.labels)

        return Model(self.keys, self.model, self.labels)
Example #12
0
    def __init__(self,
                 hmm,
                 emission_probability,
                 constraint_length=10,
                 MAX_DIST=500,
                 priors=None,
                 smallV=0.00000000001):
        # initialize spatial index
        self.previous_obs = None

        if priors == None:
            priors = dict([(state, 1.0 / len(hmm)) for state in hmm])

        state_spatial_index = Rtree()
        unlocated_states = []
        id_to_state = {}
        id = 0
        for state in hmm:
            geom = self.geometry_of_state(state)
            if not geom:
                unlocated_states.append(state)
            else:
                ((lat1, lon1), (lat2, lon2)) = geom
                state_spatial_index.insert(id, (min(lon1, lon2), min(
                    lat1, lat2), max(lon1, lon2), max(lat1, lat2)))
                id_to_state[id] = state
                id = id + 1

        def candidate_states(obs):  #was (lat,lon) in place of obs
            geom = self.geometry_of_observation(obs)
            if geom == None:
                return hmm.keys()
            else:
                (lat, lon) = geom
                nearby_states = state_spatial_index.intersection(
                    (lon - MAX_DIST / METERS_PER_DEGREE_LONGITUDE,
                     lat - MAX_DIST / METERS_PER_DEGREE_LATITUDE,
                     lon + MAX_DIST / METERS_PER_DEGREE_LONGITUDE,
                     lat + MAX_DIST / METERS_PER_DEGREE_LATITUDE))

                candidates = [id_to_state[id]
                              for id in nearby_states] + unlocated_states
                return candidates

        self.viterbi = Viterbi(hmm,
                               emission_probability,
                               constraint_length=constraint_length,
                               priors=priors,
                               candidate_states=candidate_states,
                               smallV=smallV)
Example #13
0
    def _writer_viterbi(self):
        sentence = []
        original_sentence = []
        tag_set = []
        lines_to_write = []

        with open(self.input_file, "r") as f:
            data = f.readlines()

            for line in data:

                words = line.split()
                if words and words[1] != '.':
                    current_word = words[1]
                    local_tag_set = []
                    for k, v in emission_probability.iteritems():
                        keys = k.split('|')
                        if keys[0] == words[1]:
                            local_tag_set.append(keys[1])
                    if not local_tag_set:
                        words[1] = UNKNOWN_WORD
                        local_tag_set = get_unknown_word_tags()
                    sentence.append(words[1])
                    original_sentence.append(current_word)
                    tag_set.extend(local_tag_set)
                elif words and words[1] == '.':
                    # send sentence to viterbi to compute tags.
                    viterbi = Viterbi(
                        tag_set=list(set(tag_set)),
                        word_set=sentence,
                        transition_probability=transition_probability,
                        emission_probability=emission_probability)
                    viterbi_states = viterbi.get_viterbi_states()

                    for word in range(0, len(sentence)):
                        lines_to_write.append(
                            str(word + 1) + '\t' + original_sentence[word] +
                            '\t' + viterbi_states[word] + '\n')

                    lines_to_write.append(
                        str(len(sentence) + 1) + '\t' + '.' + '\t' + '.' +
                        '\n')
                    lines_to_write.append('\n')
                    sentence = []
                    original_sentence = []
                    tag_set = []

        with open(self.output_path, 'w') as of:
            of.writelines(lines_to_write)
Example #14
0
    def cross_validation(self):
        cv_data = self.group_data(self.data)
        # The transition probabilities are done on the entire train and not on each fold.
        trans_probs = self.comp_transition_prob(self.data)
        # Do 10-fold cross validation below.
        k = 10
        for i in range(0, k):
            train_set = []
            valid_set = cv_data[i]
            print("Validation Fold ", i + 1)
            for j in range(0, k):
                if j != i:
                    train_set += cv_data[j]
            # Do the Naive Bayes Classification here
            self.estimate_nb(train_set)
            nb_pred_labels = self.predict(valid_set)
            nb_act_labels = [item[0] for item in valid_set]
            nb_acc = len(
                np.where(
                    np.array(nb_pred_labels) == np.array(nb_act_labels))[0])
            print("Validation Accuracy of Naive Bayes ",
                  nb_acc / len(nb_act_labels))

            # The emission probabilities are done for each cv dataset.
            emission_probs = self.comp_emission_prob(nb_pred_labels,
                                                     nb_act_labels)
            valid_words = self.dt.build_test_words(valid_set)
            # Do the Viterbi step here
            vt_pred_labels = []
            vt_act_labels = []
            nb_pred_labels = []
            itr = 0
            for w in valid_words:
                nb_pred_word = self.predict(valid_set[itr:(itr + len(w))])
                nb_pred_labels += nb_pred_word
                vit = Viterbi(emission_probs, trans_probs, nb_pred_word)
                vt_pred_labels += vit.hmmWord()
                itr += len(w)
            nb_acc = len(
                np.where(
                    np.array(nb_pred_labels) == np.array(nb_act_labels))[0])
            vt_acc = len(
                np.where(
                    np.array(vt_pred_labels) == np.array(nb_act_labels))[0])
            print("Validation Accuracy of Viterbi ",
                  vt_acc / len(nb_act_labels))
Example #15
0
    def __init__(self, ref_frames_data_filename, ref_pickle_filename, test_pickle_filename):
        print "init..."
        self.previous_obs = None
        self.image_processor = ImageProcessor()

        self.descriptors_ref = self.image_processor.load_sift(ref_pickle_filename)
        self.descriptors_test = self.image_processor.load_sift(test_pickle_filename)

        hmm = self.ref_frames_data_to_hmm(ref_frames_data_filename)
        
        #emission_probabilities = map(lambda x: complementary_normal_distribution_cdf(x,0,EMISSION_SIGMA),range(0,int(3.0*EMISSION_SIGMA)))
            
        priors=dict([(state,1.0/len(hmm)) for state in hmm])
        
        self.viterbi = Viterbi(hmm,self.emission_probability,
                          constraint_length=2500, # BE CAREFUL with it. walking may take long time and higher value may be needed here
                          priors=priors)
Example #16
0
    def test(self):
        print('Test started...')
        start_test = time.time()
        self.pred_tags = []
        test_orig, test_prep = dataloader(self.corpus + TEST_WORDS, 'test')
        tagger = Viterbi(self.vocab, self.tags, test_prep, self.A, self.B)
        preds = tagger.decode()
        for word, tag in zip(test_orig, preds):
            self.pred_tags.append((word, tag))

        with open(PRED_T_POS, 'w') as out:
            for word, tag in self.pred_tags:
                if not word:
                    out.write("\n")
                else:
                    out.write("{0}\t{1}\n".format(word, tag))
        out.close()
        print('Test finished, file has been written in '+ str(time.time()-\
                start_test))
    def viterbi(self, train_path, test_path, output_path):

        self._nerdic = NERDic(train_path)
        io = self._io
        train_sentences = []
        test_sentences = []
        for words, poss, labels in io.read_sentences(train_path):
            train_sentences.append(Sentence(labels, words, poss, self._nerdic))

        for words, poss, labels in io.read_sentences(test_path):
            test_sentences.append(Sentence(labels, words, poss, self._nerdic))

        viterbi = Viterbi(9)
        viterbi.train(train_sentences)
        for sent in test_sentences:
            predict_ids = viterbi.search(sent)
            sent.add_predict(predict_ids)

        io.write_sentences(output_path, test_sentences)
Example #18
0
    def validate(self):
        print('Validation started...')
        start_val = time.time()
        self.pred_tags = []
        valid_orig, valid_prep = dataloader(self.corpus + \
                VALIDATE_WORDS, 'validate')
        tagger = Viterbi(self.vocab, self.tags, valid_prep, self.A, self.B)
        preds = tagger.decode()
        for word, tag in zip(valid_orig, preds):
            self.pred_tags.append((word, tag))

        with open(PRED_V_POS, 'w') as out:
            for word, tag in self.pred_tags:
                if not word:
                    out.write("\n")
                else:
                    out.write("{0}\t{1}\n".format(word, tag))
        out.close()
        print('Validation ended, file has been written in '+ str(time.time()-\
                start_val))
Example #19
0
    def predict(self):

        nvi = 12

        for i in range(nvi):

            predicted_labels_training_set = []

            print("starting viterbi run {}...".format(i))

            for j, sent in enumerate(self.training_set):

                predicted_labels_training_set.append(
                    Viterbi(sent, self.event_names, self.fweights).run())

                tmp_sent = copy.deepcopy(sent)
                tmp_sent["events"] = predicted_labels_training_set[j]

                for i, w in enumerate(sent["words"]):
                    # extract features from each word from the correcly labelled sentence..
                    ff = self.create_features(sent, i, "train")
                    # and the labelling by Viterbi
                    ff_pr = self.create_features(tmp_sent, i, "train")

                    if sent["events"][i] != tmp_sent["events"][i]:

                        for k in ff_pr:
                            if k in self.fweights:
                                self.fweights[k] -= 1
                        for g in ff:
                            if g in self.fweights:
                                self.fweights[g] += 1

            # now get scores for this Viterbi iteration
            training_labels = [st["events"] for st in self.training_set]
            # print("have {} training sentences and {} predicted ones".format(len(training_labels), len(predicted_labels_training_set)))
            Scores(training_labels, predicted_labels_training_set).show()

            with open(self.feature_trained_file_path, "w+") as f:
                json.dump(self.fweights, f)
Example #20
0
def test_viterbi_decode():
    '''
    Test case based on HW3 question.
  '''
    log = logging.getLogger('test viterbi')

    ZERO = 0.000000000000000001
    obs_space = ["moo", "hello", "quack", START_END_OBS]
    states = ["Cow", "Duck", START_END_TAG]
    trans_prob = [[0.5, 0.3, 0.2], [0.3, 0.5, 0.2], [1.0, ZERO, ZERO]]
    emit_prob = [[0.9, 0.1, ZERO, ZERO], [ZERO, 0.4, 0.6, ZERO],
                 [ZERO, ZERO, ZERO, 1.0]]

    decoder = Viterbi(obs_space, states, trans_prob, emit_prob)

    obs = ["moo", "hello", "quack", START_END_OBS]
    seq, prob = decoder.decode(obs)

    log.debug("seq: " + str(seq))
    log.debug("log_prob: " + str(prob))
    assert prob - (-5.03903) < ZERO and \
           seq == ["Cow", "Duck", "Duck", START_END_TAG]
Example #21
0
    def run_viterbi(self):
        contents = self.contents[self.index + 1:]

        observations = ()
        correct_path = []

        for line in contents:
            read_line = line.rstrip()
            if read_line == ".":
                viterbi = Viterbi(observations, self.states, \
                                  self.start_probability, \
                                  self.transition_probability, \
                                  self.emission_probability)
                deduced_path = viterbi.run_viterbi()
                junk, guessed_path = deduced_path
                self.iteration += 1
                self.check_correctness(guessed_path, correct_path)
                observations = ()
                correct_path = []
                continue

            coordinate, color = read_line.split(" ")
            observations = observations + (color,)
            correct_path.append(coordinate)
Example #22
0
import numpy as np
from viterbi import Viterbi, add_one_smoothing

TAGS = ['N', 'C', 'V', 'J']
LEXICON = ['that', 'is', 'not', 'it', 'good', 'bad']

Pi = [1 / 8, 3 / 8, 3 / 8, 1 / 8]

count_A = np.array([[2., 0., 3., 1.], [2., 0., 0., 0.], [4., 0., 1., 0.],
                    [0., 0., 0., 0.]])

count_B = np.array([[4., 0., 2., 2., 0., 0.], [2., 0., 0., 0., 0., 0.],
                    [0., 6., 0., 0., 0., 0.], [0., 0., 0., 0., 1., 0.]])

if __name__ == '__main__':
    A = add_one_smoothing(count_A)
    B = add_one_smoothing(count_B)

    viterbi = Viterbi(Pi, A, B, TAGS, LEXICON)

    sentence1 = 'bad is not good'
    sentence2 = 'is it bad'
    pred1 = viterbi.predict_tags(sentence1)
    pred2 = viterbi.predict_tags(sentence2)
    print(pred1)
    print(pred2)
Example #23
0
from opt_results1 import simple_vec
from viterbi import Viterbi

if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('--lamb', type=float, default=0)
    parser.add_argument('-f',
                        '--families',
                        nargs='+',
                        type=int,
                        default=[0, 1, 2, 3, 4, 5, 7, 8, 15, 16, 17, 19, 20])
    args = parser.parse_args()

    print(args.lamb, " ", args.families)

    vec = train.calc_weight_vector("train.wtag",
                                   families=args.families,
                                   lamb=args.lamb)
    path = train.create_and_get_path(args.families, args.lamb)
    file = open(path, "w")
    file.write("simple_vec = %s\n" % vec.x.tolist())
    file.close()

    #vec = simple_vec

    vit = Viterbi(vec.x.tolist(), args.families)
    vit.evaluate("test.wtag", 3, 0, args.lamb)

    print(args.lamb)
Example #24
0
File: toy.py Project: w-dq/PY2HZ
import utils
from viterbi import Viterbi

from utils import HmmParam

hmm = HmmParam()
# print(hmm.emission_table['data']['北京'])
# print(hmm.transition_table['data']['爱']['北京'])
# print(hmm.transition_table['data']['爱'])
# print(hmm.py2hz_dict['beijing'])

print(Viterbi(hmm, ['baobao', 'ye', 'tai', 'bang', 'le', 'ba'], 5))
Example #25
0
    print('Beam search:', beam_diameter)
    tagger = BeamSearch(counts, beam_diameter)

    test_sentences = conll.read_sentences(test_file)
    formatted_test_corpus = [
        conll.split_rows(sentence, column_names) for sentence in test_sentences
    ]

    for sentence in tqdm(formatted_test_corpus):
        tagger.tag(sentence)

    cm = ConfusionMatrix(formatted_test_corpus, POS_key)
    cm.compute_matrix()
    print("Accuracy: ", cm.compute_accuracy())

    print('Viterbi')
    tagger = Viterbi(counts)

    test_sentences = conll.read_sentences(test_file)
    formatted_test_corpus = [
        conll.split_rows(sentence, column_names) for sentence in test_sentences
    ]

    for sentence in tqdm(formatted_test_corpus):
        tagger.tag(sentence)

    cm = ConfusionMatrix(formatted_test_corpus, POS_key)
    cm.compute_matrix()
    print("Accuracy: ", cm.compute_accuracy())
    cm.print()
Example #26
0


#outputs.to_project_1_sequences_file_from_posterior_decoding(sequences.get(), probs, 'posterior-decoding-sequences.txt')

outputs.to_project_1_sequences_file(sequences.get(), probs, 'viterbi-sequences.txt')
outputs.to_project_1_probs_file(sequences.get(), probs, 'viterbi-probs.txt')


"""
if __name__ == '__main__':
    model = hmm.Model(KEYS)
    model.load(HMMFILE)
    sequences = sequences.Sequences(SEQUENCEFILE)
    # load methods
    vit = Viterbi()
    post = Posterior()

    # viterbi
    probs = {}
    for key, sequence in sequences.get().items():
        probs[key] = vit.decode(model, sequence)

    outputs.to_project_2_viterbi(sequences.get(), probs,
                                 'pred-test-sequences-project2-viterbi.txt')

    probs = {}
    for key, value in sequences.get().items():
        sequence = {'Z': post.decode(model, value), 'X': value}
        log_joint = compute_hmm(model, sequence)
Example #27
0
    except FileNotFoundError:
        print("Weights were not found")
        exit(0)

    predictions = list()
    incorrect_count = 0
    correct_count = 0
    start_time = time()
    incorrect_tags = dict()
    confusion_matrix = pd.DataFrame(index=tags, columns=tags).fillna(0)

    for i in range(test_data_size):
        # print(i+1,'/',test_data_size)
        sentence = [x[0] for x in test_data.data[i]]
        test_tags = [x[1] for x in test_data.data[i]]
        viterbi = Viterbi(tags, gen.transform, sentence, w_0, 5)
        predicted_tags = viterbi.run()
        predictions.append((sentence, predicted_tags))

        for t, p in zip(test_tags, predicted_tags):
            if t == p:
                correct_count += 1
            else:
                incorrect_count += 1
                if t in incorrect_tags:
                    incorrect_tags[t] += 1
                else:
                    incorrect_tags[t] = 1
            confusion_matrix.loc[t, p] += 1

    end_time = time() - start_time
Example #28
0
File: test.py Project: w-dq/PY2HZ
import utils
from ChineseTone import PinyinHelper
from tqdm import tqdm

with open('./test/test_set.json') as f:
	test_set = json.load(f)

hmm = HmmParam()
# hmm.py2hz_dict['不']
# hmm.emmission['不']
count_single = 0
correct_single = 0
count_sentence = 0
correct_sentence = 0
for i in tqdm(range(len(test_set))):
	count_sentence += 1
	test = test_set[i]
	flag = True
	answer = Viterbi(hmm,test['py'],5)[0].path
	# print(answer)
	for idx,an in enumerate(answer):
		count_single += 1
		if an == test['hz'][idx]:
			correct_single += 1
		else:
			flag = False
	if flag:
		correct_sentence += 1

print('single:',correct_single/count_single)
print('sentence:',correct_sentence/count_sentence)
Example #29
0
    #Set up the VITERBI
    states = ["Buy", "Sell"]
    obs = []
    obs_prices = []
    obs_delta = []
    prev_price = hist_prices["Adj Close"][0]
    for price in hist_prices["Adj Close"]:
        if price >= prev_price:
            obs.append("Up")
        else:
            obs.append("Down")
        obs_prices.append(price)
        obs_delta.append(price - prev_price)
        prev_price = price
    possible_obs = ["Up", "Down"]
    v = Viterbi(initial, states, obs, possible_obs, trans, emiss)
    v.run()
    #v.print_table()
    #v.print_backtrack_table()
    #v.print_backtrack()

    #make a graph
    backtrack = v.get_backtrack()
    backtrack.pop(0)
    to_print = pd.DataFrame(hist_prices['Adj Close'])
    to_print["Delta"] = obs_delta
    to_print["Output"] = backtrack
    print(to_print)
    fig = hist_prices['Adj Close'].plot(grid="True")
    i = start
    tmp_backtrack = backtrack
Example #30
0
    # Checking arguments
    parser = argparse.ArgumentParser(description='Output Viterbi Sequence.')
    parser.add_argument('sequence',
                        metavar='N',
                        type=str,
                        nargs='+',
                        help='an integer for the sequence')
    parser.add_argument(
        '-s',
        '--sequence',
        action='store_const',
        dest='sequence',
        const=sequence,
        default=None,
        help=
        'A sequence of numbers for the output sequence :: Required for any output.'
    )
    args = parser.parse_args()

    sequence = args.sequence[0]

    # If sequence not provided, return help and exit
    if not sequence:
        parser.print_help()
        exit()

    print 'Observation(Input Sequence):', sequence

    element = Viterbi(sequence)
    element.viterbi()