Ejemplo n.º 1
0
                        # Save the particular model
                        model_dbow.save("/share/pi/rubin/jiaming/models/{}.model".format(model_name))
                        model_dbow.delete_temporary_training_data(keep_doctags_vectors=True, keep_inference=True)

                    elif args.model == 'fasttext':
                        model_name = "fasttext_v{}_a{}_e{}_t{}_w{}_s{}_ns{}".format(args.size, args.alpha,
                                                                                    args.epochs, args.alg, 
                                                                                    args.window, args.sample, 
                                                                                    args.ns_exponent)
                        if model_name + ".model" in trained_models:
                            print (model_name + "already trained. Passing.")
                            continue
                        
                        print("Training model: " + model_name)
                        model = FastText(min_count=10, negative=5, 
                                         size=args.size, sg = args.alg,
                                         alpha=args.alpha, min_alpha=args.alpha,
                                         window=args.window, sample=args.sample,
                                         ns_exponent=args.ns_exponent, workers=10)
                        model.build_vocab(sentences=[word_tokenize(line.strip()) for line in note_sentences])
                        for epoch in range(args.epochs):
                            model.train(sentences=utils.shuffle([x for x in tqdm(note_sentences)]), 
                                        total_examples=len(note_sentences), epochs=1)
                            model.alpha -= 0.002
                            model.min_alpha = model.alpha

                        # Save the particular model
                        model.save("/share/pi/rubin/jiaming/models/{}.model".format(model_name))