def realDataTest(): parser = MyParser("../train.wtag") splitted = parser.splitted fb = BasicFeatureVectorBuilder(parser) tags = parser.getUniqueTags() start = time.time() mle = MLE(tags, splitted, fb) end = time.time() print("End of preprocessing, took: ", end - start) v = np.ones(fb.size) start = time.time() print(mle.calculate(v)) end = time.time() print("calcV took: " + str((end - start) / 60)) start = time.time() array = mle.calculateGradient(v) np.savetxt('train_gradient2.txt', array) end = time.time() print("calcGrad took: " + str((end - start) / 60)) truth = np.loadtxt("train_gradient.txt") current = np.loadtxt("train_gradient2.txt") dist = np.linalg.norm(truth - current) print(dist) best_v = mle.findBestV() print(best_v)
def basicTest(): parser = MyParser("MLE_db.wtag") splitted = parser.splitted fb = BasicFeatureVectorBuilder(parser) mle = MLE(["t1", "t2", "t3", "t5"], splitted, fb) v = np.ones(fb.size) res = mle.calculateGradient(v) print(res)
def TRAIN(): print("Training: ") parser = MyParser("../train.wtag") splitted = parser.splitted fb = BasicFeatureVectorBuilder(parser) tags = parser.getUniqueTags() mle = MLE(tags, splitted, fb) best_v = mle.findBestV(np.loadtxt("opt_v.txt")) print(best_v)
def basicTest(): parser = MyParser("MLE_db.wtag") splitted = parser.splitted fb = BasicFeatureVectorBuilder(parser) mle = MLE(["t1", "t2", "t3", "t5"], splitted, fb) v = np.ones(fb.size) history = History("t1", "t2", ["w1", "w2", "w3", "w2"], 2) res = mle.p(history, "t3", v) print(res)
def calcTupleTestBasic(): parser = MyParser("MLE_db.wtag") splitted = parser.splitted fb = BasicFeatureVectorBuilder(parser) mle = MLE(["t1", "t2", "t3", "t5"], splitted, fb) v = np.zeros(fb.size) res = mle.calcTuple(v) print(res) best_v = mle.findBestV() print(best_v) res1 = mle.calcTuple(best_v) print(res1)
def test(): q_counter = read_counter_from_file("q.mle") e_counter = read_counter_from_file("e.mle") mle = MLE(q_counter, e_counter) line = "he walked home quickly".split() tagger = GreedyTagger(mle) print(tagger.tag(line))
def infer_prepare_params(basic_or_complex, fileToInfer): train_parser = MyParser("../train.wtag") seenWordsToTagsDict = train_parser.getSeenWordsToTagsDict() fb, filePrefix = None, None if basic_or_complex == 'basic': fb = BasicFeatureVectorBuilder(train_parser, 0) filePrefix = 'finish_basic_opt_v_' elif basic_or_complex == 'complex': fb = ComplexFeatureVectorBuilder(train_parser, False) filePrefix = 'finish_complex_opt_v_' else: assert (False) fn = str(fileToInfer).replace('.', '').replace('/', '') parser = MyParser(fileToInfer) splitted = parser.splitted mle = MLE(train_parser.getUniqueTags(), splitted, fb) prefixed = [ filename for filename in os.listdir('.') if filename.startswith(filePrefix) ] prefixed.sort() print(prefixed) results = [] for v_file in prefixed: v = np.loadtxt(v_file) vit = Viterbi(mle, mle.allTags, v, seenWordsToTagsDict) res_file = open(fn + "_results_" + v_file, 'w') exp_file = open(fn + "_expected_" + v_file, 'w') accuracy = infer_aux(exp_file, res_file, v_file, splitted, vit) res_file.close() exp_file.close() results = results + [accuracy] infer_aux_results(prefixed, results, fileToInfer, fn)
def calcTupleTestRealData(): parser = MyParser("../train.wtag") splitted = parser.splitted # fb = BasicFeatureVectorBuilder(parser,0) fb = ComplexFeatureVectorBuilder(parser) tags = parser.getUniqueTags() start = time.time() mle = MLE(tags, splitted, fb, 0, "tmp1234.txt") end = time.time() print("End of preprocessing, took: ", end - start) v = np.ones(fb.size) start = time.time() f = open("train_gradientTuple.txt", "w") lv, grad = mle.calcTuple(v) print("L(V) = ", lv) print(grad) np.savetxt('train_gradientTuple.txt', grad) end = time.time() print("calcTuple took: ", end - start, " seconds")
def fit_complex_model(continueTraining): v = None if continueTraining: v = np.loadtxt("finish_complex_opt_v_lambda_0_007.txt") lambdas = [0.007] parser = MyParser("../train.wtag") splitted = parser.splitted cfb = ComplexFeatureVectorBuilder(parser, False) tags = parser.getUniqueTags() mle = MLE(tags, splitted, cfb) fit_model_aux(mle, "complex", lambdas, 300, v)
def fit_basic_model(continueTraining): v = None if continueTraining: v = np.loadtxt("finish_basic_opt_v_lambda_0_007.txt") lambdas = [0.007] parser = MyParser("../train.wtag") splitted = parser.splitted basicFeatureBuilder = BasicFeatureVectorBuilder(parser, 0) tags = parser.getUniqueTags() mle = MLE(tags, splitted, basicFeatureBuilder) fit_model_aux(mle, "basic", lambdas, 550, v)
def test(): q_counter = read_counter_from_file("q.mle") e_counter = read_counter_from_file("e.mle") mle = MLE(q_counter, e_counter) from timeit import default_timer as timer start = timer() line = "It would like to peg the ceiling on Federal Housing Administration mortgage guarantees to 95 % of the median price in a particular market , instead of limiting it to $ 101,250 ; reduce ( or even eliminate ) FHA down-payment requirements and increase the availability of variable-rate mortgages ; expand the Veterans Affairs Department loan guarantee program ; provide `` adequate '' funding for the Farmers Home Administration ( FmHA ) ; increase federal funding and tax incentives for the construction of low-income and rental housing , including $ 4 billion in block grants to states and localities ; and `` fully fund '' the McKinney Act , a $ 656 million potpourri for the homeless .".split( ) tagger = ViterbiTagger(mle) print((timer() - start) * 1000000000) print(tagger.tag(line))
def train(): train_parser = MyParser("../train.wtag") seenSentencesToTagsDict = train_parser.getSeenWordsToTagsDict() parser = MyParser("../comp748.wtag") splitted = parser.splitted fb = BasicFeatureVectorBuilder(parser,0) mle = MLE(parser.getUniqueTags(), splitted, fb) v = np.loadtxt("opt_v_3.txt") sentences = list(map(lambda tuples: [t[0] for t in tuples], splitted)) expected_tags = list(map(lambda tuples: [t[1] for t in tuples], splitted)) seenSentencesToTagsDict = parser.getSeenWordsToTagsDict() vit = Viterbi(mle, mle.allTags, v, seenSentencesToTagsDict) total_res = 0 words_count = 0 total_time = 0 for s,expected,idx in zip(sentences,expected_tags,range(0,len(splitted))): curr_word_len = len(s) words_count = words_count + curr_word_len start = time.time() tags = vit.inference(s) res_file = open("test_wtag748_results.txt",'a') for item in tags: res_file.write("%s " % item) res_file.write("\n") res_file.close() exp_file = open("test_wtag748_expected.txt", 'a') for item in expected: exp_file.write("%s " % item) exp_file.write("\n") exp_file.close() stop = time.time() e = np.array([hash(x) for x in expected]) t = np.array([hash(x) for x in tags]) current_correct = np.sum(e == t) print("---------------------") print("Inference for sentence# ", idx, " took: ", stop - start, " seconds") total_time = total_time + (stop-start) print("Current sentence accuracy: ", current_correct, " of: ", curr_word_len) total_res = total_res + current_correct print("Total sentence accuracy: ", total_res, " of: ", words_count, "=", (100*total_res)/words_count, "%") print("Total time for ", idx, " sentences: ", (total_time / 60), " minutes")
def fit_model_aux(mle: MLE, prefix_name, lambdas, iterationsNum, initv=None): print("Starting training with lambdas:", lambdas, "on:", prefix_name) if initv is None: print("Training with initial vector of zeros") v = np.zeros(mle.featureBuilder.size) else: print("Will continue training given init vector") v = initv print(v) for lmbda in lambdas: print("Current lambda:", str(lmbda)) start = time.time() tmpFile = prefix_name + '_opt_v_lambda_' + str(lmbda).replace( '.', '_') + '.txt' best_v = mle.findBestV(v, lmbda, tmpFile, iterationsNum) resFile = 'finish_' + tmpFile np.savetxt(resFile, best_v.x) print("Training lambda: ", str(lmbda), " took: ", (time.time() - start) / 60, "minutes") print("######################################################")
def estimate(self): if not self.running: self.estimateLoad() self.running = True Thread(target=self.loading, args=(self.estimateProgress, )).start() image = self.estimateImageInput.get() folder = self.estimateFolderInput.get() start = time.time() alg = MLE(image, folder=folder, connectivity=self.convertConnectivity) prev = numpy.copy(alg.theta) while not self.stopFlag: alg.iteration() if self.estimateAutoStop.get(): if numpy.sum(numpy.abs(prev-alg.theta)) < 0.0001 or numpy.isnan(numpy.sum(alg.theta)): break if time.time() - start > 1/self.updateFreq.get(): self.displayParams(alg.theta, self.estimateCharWin) start = time.time() prev = numpy.copy(alg.theta) alg.saveThetas() self.estimateIntensity() self.running = False
[theta, C_theta, ln_det_C, sigma_eta, param] = mle.estimate_parameters() end_time = time.time() results = x_printformat(theta, C_theta, ln_det_C, sigma_eta, offsets, param, m) # Estimates Ohat = H @ theta obs.ts_append_results(Ohat, 'MLE_Fullcov') """ # ---------------------------------------------- # # MLE TEST 2 # ---------------------------------------------- # #--- MLE mle2 = MLE(x, F, 'AmmarGrag', H, cov) print("Timing MLE ammar...\n") #--- run MLE start_time2 = time.time() [theta, C_theta, ln_det_C, sigma_eta, param] = mle2.estimate_parameters() end_time2 = time.time() results2 = x_printformat(theta, C_theta, ln_det_C, sigma_eta, offsets, param, m) # Estimations Ohat2 = H @ theta obs.ts_append_results(Ohat2, 'MLE_Ammar')
def train(self): for c in range(self.num_of_classes): self.priors.append(self.class_data(c).shape[0]/self.test_data.shape[0]) self.means.append(MLE(self.class_data(c)).mean()) self.covariances.append(MLE(self.class_data(c)).covariance()) return self