Esempio n. 1
0
def realDataTest():

    parser = MyParser("../train.wtag")
    splitted = parser.splitted
    fb = BasicFeatureVectorBuilder(parser)
    tags = parser.getUniqueTags()
    start = time.time()
    mle = MLE(tags, splitted, fb)
    end = time.time()
    print("End of preprocessing, took: ", end - start)
    v = np.ones(fb.size)
    start = time.time()
    print(mle.calculate(v))
    end = time.time()
    print("calcV took: " + str((end - start) / 60))
    start = time.time()
    array = mle.calculateGradient(v)
    np.savetxt('train_gradient2.txt', array)
    end = time.time()
    print("calcGrad took: " + str((end - start) / 60))
    truth = np.loadtxt("train_gradient.txt")
    current = np.loadtxt("train_gradient2.txt")
    dist = np.linalg.norm(truth - current)
    print(dist)
    best_v = mle.findBestV()
    print(best_v)
Esempio n. 2
0
def basicTest():
    parser = MyParser("MLE_db.wtag")
    splitted = parser.splitted
    fb = BasicFeatureVectorBuilder(parser)
    mle = MLE(["t1", "t2", "t3", "t5"], splitted, fb)
    v = np.ones(fb.size)
    res = mle.calculateGradient(v)
    print(res)
Esempio n. 3
0
def TRAIN():
    print("Training: ")
    parser = MyParser("../train.wtag")
    splitted = parser.splitted
    fb = BasicFeatureVectorBuilder(parser)
    tags = parser.getUniqueTags()
    mle = MLE(tags, splitted, fb)
    best_v = mle.findBestV(np.loadtxt("opt_v.txt"))
    print(best_v)
Esempio n. 4
0
def basicTest():
    parser = MyParser("MLE_db.wtag")
    splitted = parser.splitted
    fb = BasicFeatureVectorBuilder(parser)
    mle = MLE(["t1", "t2", "t3", "t5"], splitted, fb)
    v = np.ones(fb.size)
    history = History("t1", "t2", ["w1", "w2", "w3", "w2"], 2)
    res = mle.p(history, "t3", v)
    print(res)
Esempio n. 5
0
def calcTupleTestBasic():
    parser = MyParser("MLE_db.wtag")
    splitted = parser.splitted
    fb = BasicFeatureVectorBuilder(parser)
    mle = MLE(["t1", "t2", "t3", "t5"], splitted, fb)
    v = np.zeros(fb.size)
    res = mle.calcTuple(v)
    print(res)
    best_v = mle.findBestV()
    print(best_v)
    res1 = mle.calcTuple(best_v)
    print(res1)
def test():
    q_counter = read_counter_from_file("q.mle")
    e_counter = read_counter_from_file("e.mle")
    mle = MLE(q_counter, e_counter)
    line = "he walked home quickly".split()
    tagger = GreedyTagger(mle)
    print(tagger.tag(line))
Esempio n. 7
0
def infer_prepare_params(basic_or_complex, fileToInfer):
    train_parser = MyParser("../train.wtag")
    seenWordsToTagsDict = train_parser.getSeenWordsToTagsDict()
    fb, filePrefix = None, None
    if basic_or_complex == 'basic':
        fb = BasicFeatureVectorBuilder(train_parser, 0)
        filePrefix = 'finish_basic_opt_v_'
    elif basic_or_complex == 'complex':
        fb = ComplexFeatureVectorBuilder(train_parser, False)
        filePrefix = 'finish_complex_opt_v_'
    else:
        assert (False)
    fn = str(fileToInfer).replace('.', '').replace('/', '')
    parser = MyParser(fileToInfer)
    splitted = parser.splitted
    mle = MLE(train_parser.getUniqueTags(), splitted, fb)

    prefixed = [
        filename for filename in os.listdir('.')
        if filename.startswith(filePrefix)
    ]
    prefixed.sort()
    print(prefixed)
    results = []

    for v_file in prefixed:
        v = np.loadtxt(v_file)
        vit = Viterbi(mle, mle.allTags, v, seenWordsToTagsDict)
        res_file = open(fn + "_results_" + v_file, 'w')
        exp_file = open(fn + "_expected_" + v_file, 'w')
        accuracy = infer_aux(exp_file, res_file, v_file, splitted, vit)
        res_file.close()
        exp_file.close()
        results = results + [accuracy]
    infer_aux_results(prefixed, results, fileToInfer, fn)
Esempio n. 8
0
def calcTupleTestRealData():
    parser = MyParser("../train.wtag")
    splitted = parser.splitted
    # fb = BasicFeatureVectorBuilder(parser,0)
    fb = ComplexFeatureVectorBuilder(parser)
    tags = parser.getUniqueTags()
    start = time.time()
    mle = MLE(tags, splitted, fb, 0, "tmp1234.txt")
    end = time.time()
    print("End of preprocessing, took: ", end - start)
    v = np.ones(fb.size)
    start = time.time()
    f = open("train_gradientTuple.txt", "w")
    lv, grad = mle.calcTuple(v)
    print("L(V) = ", lv)
    print(grad)
    np.savetxt('train_gradientTuple.txt', grad)
    end = time.time()
    print("calcTuple took: ", end - start, " seconds")
Esempio n. 9
0
def fit_complex_model(continueTraining):
    v = None
    if continueTraining:
        v = np.loadtxt("finish_complex_opt_v_lambda_0_007.txt")
    lambdas = [0.007]
    parser = MyParser("../train.wtag")
    splitted = parser.splitted
    cfb = ComplexFeatureVectorBuilder(parser, False)
    tags = parser.getUniqueTags()
    mle = MLE(tags, splitted, cfb)
    fit_model_aux(mle, "complex", lambdas, 300, v)
Esempio n. 10
0
def fit_basic_model(continueTraining):
    v = None
    if continueTraining:
        v = np.loadtxt("finish_basic_opt_v_lambda_0_007.txt")
    lambdas = [0.007]
    parser = MyParser("../train.wtag")
    splitted = parser.splitted
    basicFeatureBuilder = BasicFeatureVectorBuilder(parser, 0)
    tags = parser.getUniqueTags()
    mle = MLE(tags, splitted, basicFeatureBuilder)
    fit_model_aux(mle, "basic", lambdas, 550, v)
def test():
    q_counter = read_counter_from_file("q.mle")
    e_counter = read_counter_from_file("e.mle")
    mle = MLE(q_counter, e_counter)
    from timeit import default_timer as timer
    start = timer()
    line = "It would like to peg the ceiling on Federal Housing Administration mortgage guarantees to 95 % of the median price in a particular market , instead of limiting it to $ 101,250 ; reduce ( or even eliminate ) FHA down-payment requirements and increase the availability of variable-rate mortgages ; expand the Veterans Affairs Department loan guarantee program ; provide `` adequate '' funding for the Farmers Home Administration ( FmHA ) ; increase federal funding and tax incentives for the construction of low-income and rental housing , including $ 4 billion in block grants to states and localities ; and `` fully fund '' the McKinney Act , a $ 656 million potpourri for the homeless .".split(
    )
    tagger = ViterbiTagger(mle)
    print((timer() - start) * 1000000000)
    print(tagger.tag(line))
Esempio n. 12
0
def train():
    train_parser = MyParser("../train.wtag")
    seenSentencesToTagsDict = train_parser.getSeenWordsToTagsDict()
    parser = MyParser("../comp748.wtag")
    splitted = parser.splitted
    fb = BasicFeatureVectorBuilder(parser,0)
    mle = MLE(parser.getUniqueTags(), splitted, fb)
    v = np.loadtxt("opt_v_3.txt")
    sentences = list(map(lambda tuples: [t[0] for t in tuples], splitted))
    expected_tags = list(map(lambda tuples: [t[1] for t in tuples], splitted))
    seenSentencesToTagsDict = parser.getSeenWordsToTagsDict()
    vit = Viterbi(mle, mle.allTags, v, seenSentencesToTagsDict)
    total_res = 0
    words_count = 0
    total_time = 0
    for s,expected,idx in zip(sentences,expected_tags,range(0,len(splitted))):
        curr_word_len = len(s)
        words_count = words_count + curr_word_len
        start = time.time()
        tags = vit.inference(s)

        res_file = open("test_wtag748_results.txt",'a')
        for item in tags:
            res_file.write("%s " % item)
        res_file.write("\n")
        res_file.close()

        exp_file = open("test_wtag748_expected.txt", 'a')
        for item in expected:
            exp_file.write("%s " % item)
        exp_file.write("\n")
        exp_file.close()

        stop = time.time()
        e = np.array([hash(x) for x in expected])
        t = np.array([hash(x) for x in tags])
        current_correct = np.sum(e == t)
        print("---------------------")
        print("Inference for sentence# ", idx, " took: ", stop - start, " seconds")
        total_time = total_time + (stop-start)
        print("Current sentence accuracy: ", current_correct, " of: ", curr_word_len)
        total_res = total_res + current_correct
        print("Total sentence accuracy: ", total_res, " of: ", words_count, "=", (100*total_res)/words_count, "%")
        print("Total time for ", idx, " sentences: ", (total_time / 60), " minutes")
Esempio n. 13
0
def fit_model_aux(mle: MLE, prefix_name, lambdas, iterationsNum, initv=None):
    print("Starting training with lambdas:", lambdas, "on:", prefix_name)
    if initv is None:
        print("Training with initial vector of zeros")
        v = np.zeros(mle.featureBuilder.size)
    else:
        print("Will continue training given init vector")
        v = initv
        print(v)
    for lmbda in lambdas:
        print("Current lambda:", str(lmbda))
        start = time.time()
        tmpFile = prefix_name + '_opt_v_lambda_' + str(lmbda).replace(
            '.', '_') + '.txt'
        best_v = mle.findBestV(v, lmbda, tmpFile, iterationsNum)
        resFile = 'finish_' + tmpFile
        np.savetxt(resFile, best_v.x)
        print("Training lambda: ", str(lmbda), " took: ",
              (time.time() - start) / 60, "minutes")
        print("######################################################")
Esempio n. 14
0
	def estimate(self):
		if not self.running:
			self.estimateLoad()
			self.running = True
			Thread(target=self.loading, args=(self.estimateProgress, )).start()
			image = self.estimateImageInput.get()
			folder = self.estimateFolderInput.get()
			start = time.time()
			alg = MLE(image, folder=folder, connectivity=self.convertConnectivity)
			prev = numpy.copy(alg.theta)
			while not self.stopFlag:
				alg.iteration()
				if self.estimateAutoStop.get():
					if numpy.sum(numpy.abs(prev-alg.theta)) < 0.0001 or numpy.isnan(numpy.sum(alg.theta)):
						break
				if time.time() - start > 1/self.updateFreq.get():
					self.displayParams(alg.theta, self.estimateCharWin)
					start = time.time()
				prev = numpy.copy(alg.theta)
			alg.saveThetas()
			self.estimateIntensity()
			self.running = False
Esempio n. 15
0
[theta, C_theta, ln_det_C, sigma_eta, param] = mle.estimate_parameters()
end_time = time.time()

results = x_printformat(theta, C_theta, ln_det_C, sigma_eta, offsets, param, m)

#   Estimates
Ohat = H @ theta
obs.ts_append_results(Ohat, 'MLE_Fullcov')
"""

# ---------------------------------------------- #
# MLE TEST 2
# ---------------------------------------------- #

#--- MLE
mle2 = MLE(x, F, 'AmmarGrag', H, cov)
print("Timing MLE ammar...\n")

#--- run MLE
start_time2 = time.time()
[theta, C_theta, ln_det_C, sigma_eta, param] = mle2.estimate_parameters()
end_time2 = time.time()

results2 = x_printformat(theta, C_theta, ln_det_C, sigma_eta, offsets, param, m)


#   Estimations
Ohat2 = H @ theta
obs.ts_append_results(Ohat2, 'MLE_Ammar')

Esempio n. 16
0
 def train(self):
     for c in range(self.num_of_classes):
         self.priors.append(self.class_data(c).shape[0]/self.test_data.shape[0])
         self.means.append(MLE(self.class_data(c)).mean())
         self.covariances.append(MLE(self.class_data(c)).covariance())
     return self