def the_critic_post(): twitter_api = TwitterApi() classifier = SentimentClassifier() # Get the query from the page input query = request.form['text'] # TODO: if the query doesn't start with a hashtag, then add one tweets = twitter_api.search_tweets(query, 5) # Does it stink? jays_rating = classifier.run(tweets) return render_template("index.html", tweets = tweets, sentiment = jays_rating)
def __init__(self): ''' Constructor ''' self.sentiment_classifier = SentimentClassifier() self.emo_classifier = EmoticonClassifier() self.emo_factor = 0.8
def __init__(self): self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.tokenizer = BertTokenizer.from_pretrained(config["BERT_MODEL"]) classifier = SentimentClassifier(len(config["CLASS_NAMES"])) classifier.load_state_dict( torch.load(config["PRE_TRAINED_MODEL"], map_location=self.device)) classifier = classifier.eval() self.classifier = classifier.to(self.device)
def test_validate(self): """cross validates with an error of 35% or less""" neg = self.split_file('data/rt-polaritydata/rt-polarity.neg') pos = self.split_file('data/rt-polaritydata/rt-polarity.pos') classifier = SentimentClassifier.build( [neg['training'], pos['training']]) c = 2**7 classifier.c = c classifier.reset_model() n_er = self.validate(classifier, neg['validation'], 'negative') p_er = self.validate(classifier, pos['validation'], 'positive') total = Fraction(n_er.numerator + p_er.numerator, n_er.denominator + p_er.denominator) print(total) self.assertLess(total, 0.35)
def test_validate(self): """cross validates with an error of 35% or less""" neg = self.split_file('data/rt-polaritydata/rt-polarity.neg') pos = self.split_file('data/rt-polaritydata/rt-polarity.pos') classifier = SentimentClassifier.build([ neg['training'], pos['training'] ]) c = 2 ** 7 classifier.c = c classifier.reset_model() n_er = self.validate(classifier, neg['validation'], 'negative') p_er = self.validate(classifier, pos['validation'], 'positive') total = Fraction(n_er.numerator + p_er.numerator, n_er.denominator + p_er.denominator) print(total) self.assertLess(total, 0.35)
def test_validate_itself(self): """yields a zero error when it uses itself""" classifier = SentimentClassifier.build([ 'data/rt-polaritydata/rt-polarity.neg', 'data/rt-polaritydata/rt-polarity.pos' ]) c = 2 ** 7 classifier.c = c classifier.reset_model() n_er = self.validate(classifier, 'data/rt-polaritydata/rt-polarity.neg', 'negative') p_er = self.validate(classifier, 'data/rt-polaritydata/rt-polarity.pos', 'positive') total = Fraction(n_er.numerator + p_er.numerator, n_er.denominator + p_er.denominator) print(total) self.assertEqual(total, 0)
def test_validate_itself(self): """yields a zero error when it uses itself""" classifier = SentimentClassifier.build([ 'data/rt-polaritydata/rt-polarity.neg', 'data/rt-polaritydata/rt-polarity.pos' ]) c = 2**7 classifier.c = c classifier.reset_model() n_er = self.validate(classifier, 'data/rt-polaritydata/rt-polarity.neg', 'negative') p_er = self.validate(classifier, 'data/rt-polaritydata/rt-polarity.pos', 'positive') total = Fraction(n_er.numerator + p_er.numerator, n_er.denominator + p_er.denominator) print(total) self.assertEqual(total, 0)
__author__ = 'xead' # coding: utf-8 from sentiment_classifier import SentimentClassifier clf = SentimentClassifier() pred = clf.get_prediction_message("the film is a shit") print(pred)
def main(data_file_path, word_vec_filename, saved=True, batch_size=50, lstmUnits=24, epochs=10): """ Main function """ train_set_save_file = 'train_dataset.pkl' val_set_save_file = 'val_dataset.pkl' test_set_save_file = 'test_dataset.pkl' vocab_vector_save_file = 'vocab_vector.pkl' if saved: start_time = time.time() print("Loading Train :", train_set_save_file) train_dataset = load_obj(train_set_save_file) time_taken = time.time() - start_time print("%s Loaded: %.3f secs!" % (train_set_save_file, time_taken)) start_time = time.time() print("Loading Validation :", vocab_vector_save_file) val_dataset = load_obj(val_set_save_file) time_taken = time.time() - start_time print("%s Loaded: %.3f secs!" % (val_set_save_file, time_taken)) start_time = time.time() print("Loading Test :", vocab_vector_save_file) vocab_vector = load_obj(vocab_vector_save_file) time_taken = time.time() - start_time print("%s Loaded: %.3f secs!" % (vocab_vector_save_file, time_taken)) start_time = time.time() print("Loading Test :", test_set_save_file) test_dataset = load_obj(test_set_save_file) time_taken = time.time() - start_time print("%s Loaded: %.3f secs!" % (test_set_save_file, time_taken)) else: # Raw dataset data_set = LoadCSV(data_file_path) # Train and test train_dataset, test_dataset = split_dataset(data_set, 85) train_dataset, val_dataset = split_dataset(train_dataset, 80) vocab_vector = build_vocab(train_dataset.text, word_vec_filename) train_dataset.vectorize_text(vocab_vector.vocab, 100) val_dataset.vectorize_text( vocab_vector.vocab, normalized_length=train_dataset.max_text_length) test_dataset.vectorize_text( vocab_vector.vocab, normalized_length=train_dataset.max_text_length) save_obj(train_dataset, train_set_save_file) save_obj(val_dataset, val_set_save_file) save_obj(test_dataset, test_set_save_file) save_obj(vocab_vector, vocab_vector_save_file) print("Train dataset numrecords: %d:" % (train_dataset.num_records)) print("Validation dataset numrecords: %d:" % (val_dataset.num_records)) print("Test dataset numrecords: %d:" % (test_dataset.num_records)) # Set some config params for this dataset config = Configuration() config.epochs = epochs config.batchSize = batch_size config.lstmUnits = lstmUnits config.numClasses = train_dataset.num_classes config.maxSeqLength = train_dataset.max_text_length config.numDimensions = vocab_vector.dimension config.print() # Init classifier classifier = SentimentClassifier(config, vocab_vector.embeddings) # Train val_accs = [0 for i in range(10)] val_acc_file = open('val_accs.txt', 'w') train_acc_file = open('train_accs.txt', 'w') val_lss_file = open('val_loss.txt', 'w') train_lss_file = open('train_loss.txt', 'w') for epoch_num in range(epochs): classifier.fit_epoch(train_dataset) t_m = classifier.metrics(train_dataset) v_m = classifier.metrics(val_dataset) train_accuracy = t_m['accuracy'] * 100 val_accuracy = v_m['accuracy'] * 100 train_loss = t_m['loss'] * 100 val_loss = v_m['loss'] * 100 print("%d %.2f||%.2f %.2f||%.2f" % (epoch_num, train_accuracy, val_accuracy, train_loss, val_loss), end='\n', flush=True) train_acc_file.write("%f, " % (train_accuracy)) val_acc_file.write("%f, " % (val_accuracy)) train_lss_file.write("%f, " % (train_loss)) val_lss_file.write("%f, " % (val_loss)) max_indx = np.argmax(val_accs) if val_accuracy > val_accs[max_indx]: val_accs[max_indx] = val_accuracy else: min_indx = np.argmin(val_accs) if val_accuracy > val_accs[min_indx]: val_accs[min_indx] = val_accuracy elif val_accuracy < 77.0: # Try to get upto desired accuracy print(".", end=' ') else: print("\nTerminating training:", val_accs) break print("") val_metrics = classifier.metrics(val_dataset) print("validation Status: \n", val_metrics) train_metrics = classifier.metrics(train_dataset) print("Train Status: \n", train_metrics) test_metrics = classifier.metrics(test_dataset) print("Test Status: \n", test_metrics)
# -*- coding:utf-8 -*- import cv2 import sys from sentiment_classifier import SentimentClassifier from face_detector import detect_faces, show_detection_result if __name__ == '__main__': model = SentimentClassifier() #model.train() #model.save() model.load() img_path = input('\n Enter img path:') face_imgs = detect_faces(img_path) if len(face_imgs) != 1: print('Detect no faces.') sys.exit() result = model.predict(img_path) for item in result: print('{0} probability:{1}'.format(item['label'], item['prob']))
__author__ = 'xead' # coding: utf-8 from sentiment_classifier import SentimentClassifier clf = SentimentClassifier() pred = clf.get_prediction_message("bad") print(pred)
def __init__(self, context_path): self.__nn_estimator = SentimentClassifier.newinstance( context_path=context_path) self.__nb_estimator = SentimentClassifier.newinstance( 'nb', context_path=context_path)
from sentiment_classifier import SentimentClassifier clf = SentimentClassifier() pred = clf.get_prediction_message("This is wonderful bank!") print(pred)
from sentiment_classifier import SentimentClassifier clf = SentimentClassifier() pred = clf.get_prediction_message("i really liked the movie and had fun") print(pred) pred = clf.get_prediction_message("this movie was terrible and bad") print(pred)
#!/bin/python3 from sentiment_classifier import SentimentClassifier from data import create_bows_from_path from util.object_util import save_object MODEL_PATH = "../data/model/" TRAINING_DATA_PATH = "../data/training/" # collect bag_of_words for positive samples positive_bow_list = [] positive_training_path = TRAINING_DATA_PATH + "pos/" positive_bow_list = create_bows_from_path(positive_training_path) # collect bag_of_words for negative samples negative_bow_list = [] negative_training_path = TRAINING_DATA_PATH + "neg/" negative_bow_list = create_bows_from_path(negative_training_path) # train the bayes classifier classifier = SentimentClassifier() classifier.train(positive_bow_list, negative_bow_list) save_object(classifier, MODEL_PATH + "classifier.model")
class Classifier(object): ''' classdocs ''' def __init__(self): ''' Constructor ''' self.sentiment_classifier = SentimentClassifier() self.emo_classifier = EmoticonClassifier() self.emo_factor = 0.8 def classify(self , sentence): senti_results = self.sentiment_classifier.classify(sentence) (emo_result ,happy_count , sad_count) = self.emo_classifier.classify(sentence) (hard_count , pos_count , neg_count) = self.combine_results(senti_results, happy_count, sad_count) decision = self.decision(hard_count, pos_count, neg_count) self.print_results(sentence, decision) return self.get_result_value( decision ) def combine_results(self , senti_results , happy_count , sad_count ): (norm_happy_value , norm_sad_value) = self.normalized_emo_counts(happy_count, sad_count) (hard_count , pos_count , neg_count) = self.combine_senti_classifications(senti_results) pos_count = pos_count + norm_happy_value neg_count = neg_count + norm_sad_value return (hard_count , pos_count , neg_count) def normalized_emo_counts(self ,happy_count , sad_count ): norm_happy_value = 0 norm_sad_value = 0 sum = happy_count + sad_count if sum != 0: norm_happy_value = (happy_count * self.emo_factor) / sum norm_sad_value = (sad_count * self.emo_factor) / sum return (norm_happy_value , norm_sad_value) def combine_senti_classifications(self, results): pos_count = 0 neg_count = 0 hard_count = 0 for each_cls_res in results: if each_cls_res["result"] == POSITIVE: pos_count = pos_count +1 elif each_cls_res["result"] == NEGATIVE: neg_count = neg_count +1 elif each_cls_res["result"] == HARD_TO_CLASSIFY: hard_count = hard_count + 1 return (hard_count , pos_count , neg_count) def decision(self ,hard_count , pos_count , neg_count): decision = HARD_TO_CLASSIFY if hard_count >= pos_count and hard_count >= neg_count: decision = HARD_TO_CLASSIFY elif (pos_count > neg_count): decision = POSITIVE elif (pos_count < neg_count): decision = NEGATIVE return decision def print_results(self ,sentence ,decision): print "\n************ DECISION ************" print "Sentence :"+sentence print "\nFinal decision is: " +DECISION_LIST[decision] print "--------------------\n" def get_result_value( self ,result ): if result == NEGATIVE: return -0.8 elif result == POSITIVE: return 1 elif result == HARD_TO_CLASSIFY: return 0
from sentiment_classifier import SentimentClassifier clf = SentimentClassifier() prediction = clf.get_prediction_message( 'Ужасно слабый аккумулятор, это основной минус этого аппарата, разряжается ' 'буквально за пару часов при включенном wifi и на макс подсветке, ' 'например если играть или смотреть видео, следовательно использовать можно ' 'только если есть постоянная возможность подзарядиться. Качества звука через ' 'динамик далеко не на высоте.Наблюдаются незначительные тормоза в некоторых ' 'приложениях и вообще в меню. Очень мало встроенной памяти, а приложения ' 'устанавливаются именно туда, с этим связанны неудобства - нужно постоянно ' 'переносить их на карту памяти.\ Несколько неудобно что нету отдельной кнопки ' 'для фото. Подумываю купить батарею большей емкость мб что нибудь измениться.' ) print(prediction[0])
__author__ = 'xead' # coding: utf-8 from sentiment_classifier import SentimentClassifier clf = SentimentClassifier() pred = clf.get_prediction_message("I hate it") print (pred)
__author__ = 'xead' # coding: utf-8 from sentiment_classifier import SentimentClassifier clf = SentimentClassifier() pred = clf.get_prediction_message("Best of the best") print(pred)
def __init__(self, exp, input_dimensionality, num_of_classes, vae_hidd_size, num_of_OpinionTopics, num_of_PlotTopics, encoder_layers=1, generator_layers=4, beta_s=1.0, beta_a=1.0, encoder_dropout=False, dropout_prob=0.0, generator_shortcut=False, generator_transform=None, interaction="dot_prod", plug_Plots=False, device="cpu"): super(AdversarialVaeModel, self).__init__() # Args includes all the meta information about the experiment self.exp = exp self.args = exp.args self.beta_a = beta_a self.beta_s = beta_s self.input_dimensionality = input_dimensionality self.num_of_OpinionTopics = num_of_OpinionTopics self.num_of_PlotTopics = num_of_PlotTopics # Prior mean and variance self.priors = dict() self.priors["prior_mean_Plot"] = torch.Tensor(1, self.num_of_PlotTopics).fill_(0).to(device) self.priors["prior_variance_Plot"] = 0.995 self.priors["prior_var_Plot"] = torch.Tensor(1, self.num_of_PlotTopics).fill_(self.priors["prior_variance_Plot"]).to(device) self.priors["prior_logvar_Plot"] = self.priors["prior_var_Plot"].log() self.priors["prior_mean_Opinion"] = torch.Tensor(1, self.num_of_OpinionTopics).fill_(0).to(device) self.priors["prior_variance_Opinion"] = 0.995 self.priors["prior_var_Opinion"] = torch.Tensor(1, self.num_of_OpinionTopics).fill_(self.priors["prior_variance_Opinion"]).to(device) self.priors["prior_logvar_Opinion"] = self.priors["prior_var_Opinion"].to(device).log() # Flags self.interaction = interaction self.plug_Plots = plug_Plots self.topicType = "both" self.wordEmb = None self.alsoAspectLoss = True self.alsoSentLoss = True # Training Device self.device = device # - Inint VAE components - self.aspect_vae_model = VaeAvitmModel(input_dimensionality, d_e=vae_hidd_size, d_t=num_of_PlotTopics, encoder_layers=encoder_layers, generator_layers=generator_layers, without_decoder=True, encoder_dropout=True, dropout_rate=dropout_prob, sparsity=self.args.de_sparsity, generator_shortcut=False, generator_transform='softmax', device=device).to(device) self.sent_vae_model = VaeAvitmModel(input_dimensionality, d_e=vae_hidd_size, d_t=num_of_OpinionTopics, encoder_layers=encoder_layers, generator_layers=generator_layers, without_decoder=True, encoder_dropout=True, dropout_rate=dropout_prob, sparsity=self.args.de_sparsity, generator_shortcut=False, generator_transform='softmax', device=device).to(device) self.plot_vae_model = VaeAvitmModel(input_dimensionality, d_e=vae_hidd_size, d_t=num_of_PlotTopics, encoder_layers=encoder_layers, generator_layers=generator_layers, without_decoder=False, encoder_dropout=True, dropout_rate=dropout_prob, sparsity=self.args.de_sparsity, generator_shortcut=False, generator_transform='softmax', device=device).to(device) # - Sentiment classifier - self.num_of_classes = num_of_classes self.sent_class_model = SentimentClassifier(input_dimensionality, num_of_OpinionTopics, num_of_classes, hid_size=self.args.sent_classi_hid_size, device=device).to(device) # - Plot discriminator/classifier - # It is not an actual sentiment classifier, just reusing the same class. self.plot_discri_model = SentimentClassifier(input_dimensionality, num_of_PlotTopics, num_of_classes=2, hid_size=self.args.plot_classi_hid_size, device=device).to(device) # - Linear projection for possible asymmetric number of topics - if self.num_of_PlotTopics != self.num_of_OpinionTopics: self.plotScaling = nn.Linear(self.num_of_PlotTopics, self.num_of_OpinionTopics) # Dropout self.r_drop = nn.Dropout(dropout_prob) # - Decoder matrix - if self.interaction == "dot_prod": self.de = nn.Linear(self.num_of_PlotTopics*self.num_of_OpinionTopics, self.input_dimensionality) elif self.interaction == "concat": self.de = nn.Linear(self.num_of_PlotTopics + num_of_OpinionTopics, self.input_dimensionality) elif self.interaction == "onlySent": self.de = nn.Linear(self.num_of_OpinionTopics, self.input_dimensionality) elif self.interaction == "onlyNeutral": self.de = nn.Linear(self.num_of_PlotTopics, self.input_dimensionality) # Batch Norm. self.de_bn = nn.BatchNorm1d(self.input_dimensionality) # Orthogonal Reg. self.ortho_regul_flag = True # --- INIT --- # Decoder initialization weights_init_sparse(self.de, sparsity=self.args.de_sparsity) if self.num_of_PlotTopics != self.num_of_OpinionTopics: weights_init_xavier(self.plotScaling)
__author__ = 'xead' # coding: utf-8 from sentiment_classifier import SentimentClassifier clf = SentimentClassifier() pred = clf.get_prediction_message("Этот смартфон самый лучший") print(pred)
from sentiment_classifier import SentimentClassifier clf = SentimentClassifier() pred = clf.get_prediction_message("Super film. I love it so much") print pred
__author__ = 'astar' # coding: utf-8 from sentiment_classifier import SentimentClassifier clf = SentimentClassifier() pred = clf.get_prediction_message([]) print pred
# -*- coding: utf-8 -*- from sentiment_classifier import SentimentClassifier clf = SentimentClassifier() pred, score = clf.get_prediction_message_and_score("It was bad bank") print pred, score
class AdversarialVaeModel(nn.Module): def __init__(self, exp, input_dimensionality, num_of_classes, vae_hidd_size, num_of_OpinionTopics, num_of_PlotTopics, encoder_layers=1, generator_layers=4, beta_s=1.0, beta_a=1.0, encoder_dropout=False, dropout_prob=0.0, generator_shortcut=False, generator_transform=None, interaction="dot_prod", plug_Plots=False, device="cpu"): super(AdversarialVaeModel, self).__init__() # Args includes all the meta information about the experiment self.exp = exp self.args = exp.args self.beta_a = beta_a self.beta_s = beta_s self.input_dimensionality = input_dimensionality self.num_of_OpinionTopics = num_of_OpinionTopics self.num_of_PlotTopics = num_of_PlotTopics # Prior mean and variance self.priors = dict() self.priors["prior_mean_Plot"] = torch.Tensor(1, self.num_of_PlotTopics).fill_(0).to(device) self.priors["prior_variance_Plot"] = 0.995 self.priors["prior_var_Plot"] = torch.Tensor(1, self.num_of_PlotTopics).fill_(self.priors["prior_variance_Plot"]).to(device) self.priors["prior_logvar_Plot"] = self.priors["prior_var_Plot"].log() self.priors["prior_mean_Opinion"] = torch.Tensor(1, self.num_of_OpinionTopics).fill_(0).to(device) self.priors["prior_variance_Opinion"] = 0.995 self.priors["prior_var_Opinion"] = torch.Tensor(1, self.num_of_OpinionTopics).fill_(self.priors["prior_variance_Opinion"]).to(device) self.priors["prior_logvar_Opinion"] = self.priors["prior_var_Opinion"].to(device).log() # Flags self.interaction = interaction self.plug_Plots = plug_Plots self.topicType = "both" self.wordEmb = None self.alsoAspectLoss = True self.alsoSentLoss = True # Training Device self.device = device # - Inint VAE components - self.aspect_vae_model = VaeAvitmModel(input_dimensionality, d_e=vae_hidd_size, d_t=num_of_PlotTopics, encoder_layers=encoder_layers, generator_layers=generator_layers, without_decoder=True, encoder_dropout=True, dropout_rate=dropout_prob, sparsity=self.args.de_sparsity, generator_shortcut=False, generator_transform='softmax', device=device).to(device) self.sent_vae_model = VaeAvitmModel(input_dimensionality, d_e=vae_hidd_size, d_t=num_of_OpinionTopics, encoder_layers=encoder_layers, generator_layers=generator_layers, without_decoder=True, encoder_dropout=True, dropout_rate=dropout_prob, sparsity=self.args.de_sparsity, generator_shortcut=False, generator_transform='softmax', device=device).to(device) self.plot_vae_model = VaeAvitmModel(input_dimensionality, d_e=vae_hidd_size, d_t=num_of_PlotTopics, encoder_layers=encoder_layers, generator_layers=generator_layers, without_decoder=False, encoder_dropout=True, dropout_rate=dropout_prob, sparsity=self.args.de_sparsity, generator_shortcut=False, generator_transform='softmax', device=device).to(device) # - Sentiment classifier - self.num_of_classes = num_of_classes self.sent_class_model = SentimentClassifier(input_dimensionality, num_of_OpinionTopics, num_of_classes, hid_size=self.args.sent_classi_hid_size, device=device).to(device) # - Plot discriminator/classifier - # It is not an actual sentiment classifier, just reusing the same class. self.plot_discri_model = SentimentClassifier(input_dimensionality, num_of_PlotTopics, num_of_classes=2, hid_size=self.args.plot_classi_hid_size, device=device).to(device) # - Linear projection for possible asymmetric number of topics - if self.num_of_PlotTopics != self.num_of_OpinionTopics: self.plotScaling = nn.Linear(self.num_of_PlotTopics, self.num_of_OpinionTopics) # Dropout self.r_drop = nn.Dropout(dropout_prob) # - Decoder matrix - if self.interaction == "dot_prod": self.de = nn.Linear(self.num_of_PlotTopics*self.num_of_OpinionTopics, self.input_dimensionality) elif self.interaction == "concat": self.de = nn.Linear(self.num_of_PlotTopics + num_of_OpinionTopics, self.input_dimensionality) elif self.interaction == "onlySent": self.de = nn.Linear(self.num_of_OpinionTopics, self.input_dimensionality) elif self.interaction == "onlyNeutral": self.de = nn.Linear(self.num_of_PlotTopics, self.input_dimensionality) # Batch Norm. self.de_bn = nn.BatchNorm1d(self.input_dimensionality) # Orthogonal Reg. self.ortho_regul_flag = True # --- INIT --- # Decoder initialization weights_init_sparse(self.de, sparsity=self.args.de_sparsity) if self.num_of_PlotTopics != self.num_of_OpinionTopics: weights_init_xavier(self.plotScaling) def decoder(self, r): p_x_given_h = F.softmax(self.de_bn(self.de(r)), dim=1) return p_x_given_h def forward(self, x, x_plots=None, perplexity=False): # --- Split reviews and plots --- if self.plug_Plots and not perplexity: x_plots = x[:, self.input_dimensionality:] x = x[:, :self.input_dimensionality] # --- Encoders --- mean_a, logvar_a, var_a, z_a = self.aspect_vae_model(x) mean_s, logvar_s, var_s, z_s = self.sent_vae_model(x) # Plot Encoder if self.plug_Plots and not perplexity: mean_p, logvar_p, var_p, z_p, p_x_given_h_plots = self.plot_vae_model(x_plots) conc_z_a_z_p = torch.cat((z_a, z_p), 0) y_p_pred = self.plot_discri_model(conc_z_a_z_p[torch.randperm(conc_z_a_z_p.size()[0])]) else: y_p_pred = mean_p = logvar_p = var_p = z_p = p_x_given_h_plots = None # --- Interaction --- interaction_vec = self.z_interaction(z_a, z_s) # --- Decoder --- p_x_given_h = self.decoder(interaction_vec) # --- Adversarial prediction --- y_s_pred = self.sent_class_model(z_s) if self.num_of_PlotTopics != self.num_of_OpinionTopics: y_a_pred = self.sent_class_model(self.plotScaling(z_a)) else: y_a_pred = self.sent_class_model(z_a) # # -- Orthogonal regularization -- if self.ortho_regul_flag: decoder_weights = self.de.weight.data.transpose(0,1).to(self.device, non_blocking=True) orth_loss = orthogonal_reg_loss(self.device, decoder_weights) else: orth_loss = 0.0 return [z_a, z_s, p_x_given_h, interaction_vec, mean_a, logvar_a, var_a, mean_s, logvar_s, var_s, \ y_a_pred, y_s_pred, y_p_pred, mean_p, logvar_p, var_p, z_p, p_x_given_h_plots, orth_loss] def save_params(self, filename): torch.save(self.state_dict(), filename) def load_params(self, filename): self.load_state_dict(torch.load(filename)) def z_interaction(self, z_a, z_s): interaction_vec = None if self.interaction == "dot_prod": interaction_vec = torch.bmm(z_a.unsqueeze(2), z_s.unsqueeze(2).transpose(1,2)) batch_size = interaction_vec.size()[0] interaction_vec = interaction_vec.view(batch_size, -1) # --- Interaction through concatination --- # interaction_vec: (batch_size, 2*#topics) elif self.interaction == "concat": interaction_vec = torch.cat((z_a, z_s), 1) # -- Interaction without interaction :) --- elif self.interaction == "onlySent": interaction_vec = z_s # -- Interaction without interaction :) --- elif self.interaction == "onlyNeutral": interaction_vec = z_a return interaction_vec ################### # FREEZE PARAMETERS ################### def freeze_sent_discriminators(self, freeze): # Freeze or defrost discriminators parameters if freeze: print("Sentiment discriminator parameters have been frozen.") self.sent_class_model.freeze_parameters(freeze) else: print("Sentiment discriminator parameters have been DE-frozen") self.sent_class_model.freeze_parameters(freeze) def freeze_plot_vae_and_discriminators(self, freeze): for m in [self.plot_vae_model, self.plot_discri_model]: for param in m.parameters(): if freeze: param.requires_grad = False else: param.requires_grad = True if freeze: m.frozen = True else: m.frozen = False def freeze_aspect_sent_VAE_encoders(self, freeze): for m in [self.aspect_vae_model, self.sent_vae_model]: for param in m.parameters(): if freeze: param.requires_grad = False else: param.requires_grad = True if freeze: m.frozen = True else: m.frozen = False def freeze_VAEdecoder(self, freeze): for param in self.de.parameters(): if freeze: param.requires_grad = False else: param.requires_grad = True if freeze: self.de.frozen = True else: self.de.frozen = False def remove_ortoghonalization_regularizer(self, remove): if remove: self.ortho_regul_flag = False else: self.ortho_regul_flag = True ############### # LOSS ############### def compute_KLD(self, posterior_mean, posterior_logvar, posterior_var, prior_mean, prior_var, prior_logvar, num_of_topics): # see Appendix B from paper: # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 # https://arxiv.org/abs/1312.6114 prior_mean = prior_mean.expand_as(posterior_mean) prior_var = prior_var.expand_as(posterior_mean) prior_logvar = prior_logvar.expand_as(posterior_mean) var_division = posterior_var / prior_var diff = posterior_mean - prior_mean diff_term = diff * diff / prior_var logvar_division = prior_logvar - posterior_logvar KLD = 0.5 * ( (var_division + diff_term + logvar_division).sum(1) - num_of_topics ) return KLD def loss_function_bagOfWords(self, posterior_mean_a, posterior_logvar_a, posterior_var_a, posterior_mean_s, posterior_logvar_s, posterior_var_s, p_x_given_h, DocTerm_batch, avg_loss=True): KLD_a = self.compute_KLD(posterior_mean_a, posterior_logvar_a, posterior_var_a, self.priors["prior_mean_Plot"], self.priors["prior_var_Plot"], self.priors["prior_logvar_Plot"], self.num_of_PlotTopics) KLD_s = self.compute_KLD(posterior_mean_s, posterior_logvar_s, posterior_var_s, self.priors["prior_mean_Opinion"], self.priors["prior_var_Opinion"], self.priors["prior_logvar_Opinion"], self.num_of_OpinionTopics) nll_term = -(DocTerm_batch * (p_x_given_h+1e-10).log()).sum(1) loss = self.beta_a*KLD_a + self.beta_s*KLD_s + nll_term if avg_loss: loss = loss.mean() return (loss, nll_term) def overall_loss_func(self, reconstr_loss, y_adv_pred, y_sent_pred, y_sent_labels, y_p_pred, orth_loss, recontr_loss_plot=0.0, perplexity=False, test=False): alpha = 1.0 / (self.args.vocab_size * 0.5) beta = 0.0 gamma = 0.0 delta = 1.0 / (self.args.vocab_size * 1.3) epsilon = 0.0 zeta = 0.0 adv_sent_loss = 0.0 sent_loss = 0.0 adv_plot_loss = 0.0 if not perplexity: if self.alsoSentLoss: sent_loss = F.cross_entropy(y_sent_pred.to(self.device), y_sent_labels.to(self.device)) gamma = 1.0 if self.alsoAspectLoss: uniform_dist = torch.Tensor(len(y_adv_pred), self.num_of_classes).fill_((1./self.num_of_classes)).to(self.device) # https://github.com/peterliht/knowledge-distillation-pytorch/issues/2 # https://github.com/alinlab/Confident_classifier/blob/master/src/run_joint_confidence.py adv_sent_loss = F.kl_div(F.log_softmax(y_adv_pred), uniform_dist, reduction='sum')*self.num_of_classes beta = 2.0 if self.plug_Plots and self.plot_vae_model.frozen != True: adv_plot_loss = adv_cross_entropy(y_p_pred.to(self.device)) epsilon = 1.0 zeta = 1.0 overall_loss = alpha*reconstr_loss.mean() + beta*adv_sent_loss + gamma*sent_loss + delta*orth_loss + epsilon*adv_plot_loss + zeta*recontr_loss_plot else:
__author__ = 'nittella' # coding: utf-8 from sentiment_classifier import SentimentClassifier clf = SentimentClassifier() pred = clf.get_prediction_message('плохой экран') print(pred)
__author__ = 'xead' # coding: utf-8 from sentiment_classifier import SentimentClassifier clf = SentimentClassifier() pred = clf.get_prediction_message("Это отличный банк, просто чудесный") print pred
def fit_models(X, y): nn_classifier = SentimentClassifier.newinstance('nn') nn_classifier.fit(X, y) nb_classifier = SentimentClassifier.newinstance('nb') nb_classifier.fit(X, y) return nn_classifier, nb_classifier
from sentiment_classifier import SentimentClassifier from codecs import open import time import sys from flask import Flask, render_template, request, redirect app = Flask(__name__) print("Preparing classifier") start_time = time.time() clf = SentimentClassifier() print("Classifier is ready") print(time.time() - start_time, "seconds") @app.route("/", methods=["POST", "GET"]) # main route def index_page(): return render_template('hello.html') @app.route('/predict') def predict(text=''): # route for ajax requests text = request.args.get('text') prediction_message = clf.get_prediction_message(text) print(prediction_message) return prediction_message @app.route('/<path:path>') def hello(path=''): # other routes wil be redirected to home return redirect("/", code=302)
from sentiment_classifier import SentimentClassifier clf = SentimentClassifier() reviews = [ 'Не рекомендую к покупке! Самый худший телефон на свете!', 'Телефон интуитивно понятен. Памяти много. Хороший звук.' ] for review in reviews: print(review, ' - ', clf.get_prediction_message(review))
R.sm_sentiment[pred], R.sm_sentiment[label])) if __name__ == '__main__': from socialmedia import create_dataset dataset = create_dataset() samples = dataset['samples'] shuffle(samples) trainlen = int(len(samples) * 0.70) testlen = int(len(samples) * 0.20) validlen = testlen # split len_sorted = lambda l: sorted(l, key=lambda x: len(x[0])) trainset = len_sorted(samples[:trainlen]) testset = len_sorted(samples[trainlen:trainlen + testlen]) validset = len_sorted(samples[trainlen + testlen:]) vocab = dataset['vocab'] model = SentimentClassifier(wdim=1, hdim=1, vocab_size=len(vocab), num_labels=len(R.sm_sentiment)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) train_model(model, trainset, testset, batch_size=8, max_acc=0.90) interact(model, validset, vocab)
__author__ = 'xead' # coding: utf-8 from sentiment_classifier import SentimentClassifier clf = SentimentClassifier() pred = clf.get_prediction_message("It's an amazing movie") print pred
# -*- coding: utf-8 -*- """ Created on Mon Jun 12 16:38:54 2017 @author: ANTON """ from sentiment_classifier import SentimentClassifier from codecs import open import time from flask import Flask, render_template, request app = Flask(__name__) print "Preparing classifier" start_time = time.time() classifier = SentimentClassifier() print "Classifier is ready" print time.time() - start_time, "seconds" @app.route("/sentiment-demo", methods=["POST", "GET"]) def index_page(text="", prediction_message=""): if request.method == "POST": text = request.form["text"] logfile = open("ydf_demo_logs.txt", "a", "utf-8") print text print >> logfile, "<response>" print >> logfile, text prediction_message = classifier.get_prediction_message(text) print prediction_message print >> logfile, prediction_message print >> logfile, "</response>"
__author__ = 'xead' # coding: utf-8 from sentiment_classifier import SentimentClassifier clf = SentimentClassifier() test_reviews = ['Аккумулятор ужасен', 'Аккулятор ужасен батарея слабая', 'Аккумулятор ужасен батарея слабая звук плохой', 'Аккумулятор ужасен батарея слабая звук плохой сервис нет', 'Аккумулятор ужасен батарея слабая звук плохой сервис нет сенсор не работает', 'Аккумулятор ужасен батарея слабая звук плохой сервис нет сенсор не работает телефон отличный', 'Аккумулятор ужасен батарея слабая звук плохой сервис нет сенсор не работает телефон отличный очень надежный'] for review in test_reviews: pred = clf.get_prediction_message(review) print(pred)