def main():
    c = 0
    set1, spam, ham = mytrain.train()
    accuracy = myaccuracy.accuracy(set1, spam, ham)
    print("\nAccuracy of TRAINING data is \n", accuracy)
    set1, spam, ham = mytrain.test()
    accuracy1 = myaccuracy.accuracy1(set1, spam, ham)
    print("\nAccuracy of TEST data is \n", accuracy1)
예제 #2
0
def learnweights(learning_const,n):
    ni = n
    weights = {'weight_zero':1}
    d1 = mytrain.train()
    for i in d1.keys():
            weights[i] = 0.0
    for i in range(0,ni):
        for filename in os.listdir('./Training'):
            k = 'spam'
            file = open('./Training'+'//' +filename,errors='ignore')
            wordcount = Counter(file.read().split())
            weight_sum = weights['weight_zero']
            if (stopword):
                 wordcount = remove_stopword.rem_stop(wordcount)
            for f in wordcount.keys():
                if f not in weights:
                    weights[f] = 0.0
                weight_sum += weights[f]*wordcount[f]
            perceptron_output = 0.0
            if weight_sum>0:
                perceptron_output = 1.0
            target_value = 0.0
            if remove_stopword.isClass(filename) == 'spam':
                target_value = 1.0
            for w in wordcount.keys():
                weights[w] += float(learning_const)*float((target_value - perceptron_output))*float(wordcount[w])
    corr_guess = 0
    count = 0.0
    for filename in os.listdir('./Testing'):
        guess = find(filename,weights)
        if guess == 1 :
            if remove_stopword.isClass(filename) == 'spam':
                corr_guess += 1
        if guess == 0:
            if remove_stopword.isClass(filename) == 'ham':
                corr_guess += 1
        count += 1
    accuracy = float(corr_guess)/float(count)
    print(accuracy)
예제 #3
0
parser = argparse.ArgumentParser(description='Word2vec')
parser.add_argument('-lr', type=float, default=0.025)
parser.add_argument('-epochs', type=int, default=5)
parser.add_argument('-window-size', type=int, default=5)
parser.add_argument('-min-count', type=int, default=5)
parser.add_argument('-neg-count', type=int, default=5)
parser.add_argument('-batch-size', type=int, default=100)
parser.add_argument('-emb-dim', type=int, default=100)
parser.add_argument('-using-hs', action='store_true', default=False)

parser.add_argument('-dir', type=str, default='./data')
parser.add_argument('-no-cuda', action='store_true')
parser.add_argument('-test', action='store_true', default=False)
args = parser.parse_args()

# data
data = InputData('zhihu.txt', args)
args.output_file_name = 'result2.txt'

# update args
args.emb_size = len(data.word2id)

# do
skip_gram_model = SkipGramModel(args)
mytrain.train(data, skip_gram_model, args)




예제 #4
0
log.info(f'json_conf: {{{conf}}}')

# initialize optimizer
optimizer = RiemannianSGD(
    model.parameters(),
    rgrad=opt.rgrad,
    retraction=opt.retraction,
    lr=opt.lr,
)

#This is something not present in "embed.py"
m = 2.0

# if nproc == 0, run single threaded, otherwise run Hogwild
if opt.nproc == 0:
    mytrain.train(model, data, optimizer, opt, log, 1, m)
else:
    queue = mp.Manager().Queue()
    model.share_memory()
    processes = []
    for rank in range(opt.nproc):
        p = mp.Process(target=mytrain.train_mp,
                       args=(model, data, optimizer, opt, log, rank + 1, m,
                             queue))
        p.start()
        processes.append(p)

    ctrl = mp.Process(target=control,
                      args=(queue, log, adjacency, data, opt.fout, distfn,
                            opt.epochs, processes))
    ctrl.start()