def main(): c = 0 set1, spam, ham = mytrain.train() accuracy = myaccuracy.accuracy(set1, spam, ham) print("\nAccuracy of TRAINING data is \n", accuracy) set1, spam, ham = mytrain.test() accuracy1 = myaccuracy.accuracy1(set1, spam, ham) print("\nAccuracy of TEST data is \n", accuracy1)
def learnweights(learning_const,n): ni = n weights = {'weight_zero':1} d1 = mytrain.train() for i in d1.keys(): weights[i] = 0.0 for i in range(0,ni): for filename in os.listdir('./Training'): k = 'spam' file = open('./Training'+'//' +filename,errors='ignore') wordcount = Counter(file.read().split()) weight_sum = weights['weight_zero'] if (stopword): wordcount = remove_stopword.rem_stop(wordcount) for f in wordcount.keys(): if f not in weights: weights[f] = 0.0 weight_sum += weights[f]*wordcount[f] perceptron_output = 0.0 if weight_sum>0: perceptron_output = 1.0 target_value = 0.0 if remove_stopword.isClass(filename) == 'spam': target_value = 1.0 for w in wordcount.keys(): weights[w] += float(learning_const)*float((target_value - perceptron_output))*float(wordcount[w]) corr_guess = 0 count = 0.0 for filename in os.listdir('./Testing'): guess = find(filename,weights) if guess == 1 : if remove_stopword.isClass(filename) == 'spam': corr_guess += 1 if guess == 0: if remove_stopword.isClass(filename) == 'ham': corr_guess += 1 count += 1 accuracy = float(corr_guess)/float(count) print(accuracy)
parser = argparse.ArgumentParser(description='Word2vec') parser.add_argument('-lr', type=float, default=0.025) parser.add_argument('-epochs', type=int, default=5) parser.add_argument('-window-size', type=int, default=5) parser.add_argument('-min-count', type=int, default=5) parser.add_argument('-neg-count', type=int, default=5) parser.add_argument('-batch-size', type=int, default=100) parser.add_argument('-emb-dim', type=int, default=100) parser.add_argument('-using-hs', action='store_true', default=False) parser.add_argument('-dir', type=str, default='./data') parser.add_argument('-no-cuda', action='store_true') parser.add_argument('-test', action='store_true', default=False) args = parser.parse_args() # data data = InputData('zhihu.txt', args) args.output_file_name = 'result2.txt' # update args args.emb_size = len(data.word2id) # do skip_gram_model = SkipGramModel(args) mytrain.train(data, skip_gram_model, args)
log.info(f'json_conf: {{{conf}}}') # initialize optimizer optimizer = RiemannianSGD( model.parameters(), rgrad=opt.rgrad, retraction=opt.retraction, lr=opt.lr, ) #This is something not present in "embed.py" m = 2.0 # if nproc == 0, run single threaded, otherwise run Hogwild if opt.nproc == 0: mytrain.train(model, data, optimizer, opt, log, 1, m) else: queue = mp.Manager().Queue() model.share_memory() processes = [] for rank in range(opt.nproc): p = mp.Process(target=mytrain.train_mp, args=(model, data, optimizer, opt, log, rank + 1, m, queue)) p.start() processes.append(p) ctrl = mp.Process(target=control, args=(queue, log, adjacency, data, opt.fout, distfn, opt.epochs, processes)) ctrl.start()