def trainer_helper(configFile,dataSetFile,tempModel): print "Training model on ",configFile,dataSetFile config = get_training_config_from_json(configFile) sentences, vocab, labels = build_data(dataSetFile,True) word_vecs = wordvecs.load_wordvecs(config.word2vec,vocab) trainer = TextCNNModelTrainer(config,word_vecs,sentences,labels) trainer.train(tempModel) print "Succesfully trained model on ",configFile,dataSetFile," and model is at ",tempModel print "Will proceed at testing the model on same data. If everything is correct, you should see the same accuracy" model = cPickle.load(open(tempModel,"rb")) op = model.classify(sentences) os.remove(tempModel)
def add_global_word_vecs(self,word_vec_dict): """ This function should be called by the instantiator, this allows the model to pick up word vectors, if they pre exist in memory. If not, they are loaded from file :param word_vec_dict: Global word vector dictionary """ if self.trainingConfig.word2vec in word_vec_dict: self.word_vecs = word_vec_dict[self.trainingConfig.word2vec] else: self.word_vecs = wordvecs.load_wordvecs(self.trainingConfig.word2vec) word_vec_dict[self.trainingConfig.word2vec] = self.word_vecs
def add_global_word_vecs(self, word_vec_dict): """ This function should be called by the instantiator, this allows the model to pick up word vectors, if they pre exist in memory. If not, they are loaded from file :param word_vec_dict: Global word vector dictionary """ if self.trainingConfig.word2vec in word_vec_dict: self.word_vecs = word_vec_dict[self.trainingConfig.word2vec] else: self.word_vecs = wordvecs.load_wordvecs( self.trainingConfig.word2vec) word_vec_dict[self.trainingConfig.word2vec] = self.word_vecs
__author__ = 'devashish.shankar' if __name__ == "__main__": if len(sys.argv) < 5: print "Usage: training.py" print "\t<model config file path>" print "\t<training data file path>" print "\t<file path to store classifier model>" print "\t<true/false(preprocessing flag)>" exit(0) #processing.. config_file = sys.argv[1] train_data_file = sys.argv[2] model_output_file = sys.argv[3] preprocess = sys.argv[4].lower() training_config = config.get_training_config_from_json(config_file) sentences, vocab, labels = datasets.build_data(train_data_file, preprocess) word_vecs = wordvecs.load_wordvecs(training_config.word2vec, vocab) if training_config.mode == "multichannel": nntrainer = MultiChannelTrainer(training_config, word_vecs, sentences, labels) else: nntrainer = TextCNNModelTrainer(training_config, word_vecs, sentences, labels) nntrainer.train(model_output_file)
if __name__=="__main__": if len(sys.argv)<5: print "Usage: training.py" print "\t<model config file path>" print "\t<training data file path>" print "\t<file path to store classifier model>" print "\t<true/false(preprocessing flag)>" exit(0) #processing.. config_file=sys.argv[1] train_data_file=sys.argv[2] model_output_file=sys.argv[3] preprocess=sys.argv[4].lower() training_config = config.get_training_config_from_json(config_file) sentences, vocab, labels = datasets.build_data(train_data_file,preprocess) print "Dataset loaded" word_vecs = wordvecs.load_wordvecs(training_config.word2vec,vocab) print "Loaded word vecs from file" if training_config.mode=="multichannel": nntrainer = MultiChannelTrainer(training_config,word_vecs,sentences,labels) else: nntrainer = TextCNNModelTrainer(training_config,word_vecs,sentences,labels) nntrainer.train(model_output_file)