Ejemplo n.º 1
0
def the_critic_post():

  twitter_api = TwitterApi()
  classifier = SentimentClassifier()

  # Get the query from the page input
  query = request.form['text']

  # TODO: if the query doesn't start with a hashtag, then add one
  
  tweets = twitter_api.search_tweets(query, 5)

  # Does it stink?
  jays_rating = classifier.run(tweets)

  return render_template("index.html", tweets = tweets, sentiment = jays_rating)
Ejemplo n.º 2
0
 def __init__(self):
     '''
     Constructor
     '''
     self.sentiment_classifier = SentimentClassifier()
     self.emo_classifier = EmoticonClassifier()
     self.emo_factor = 0.8
Ejemplo n.º 3
0
    def __init__(self):

        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")

        self.tokenizer = BertTokenizer.from_pretrained(config["BERT_MODEL"])

        classifier = SentimentClassifier(len(config["CLASS_NAMES"]))
        classifier.load_state_dict(
            torch.load(config["PRE_TRAINED_MODEL"], map_location=self.device))
        classifier = classifier.eval()
        self.classifier = classifier.to(self.device)
Ejemplo n.º 4
0
    def test_validate(self):
        """cross validates with an error of 35% or less"""
        neg = self.split_file('data/rt-polaritydata/rt-polarity.neg')
        pos = self.split_file('data/rt-polaritydata/rt-polarity.pos')

        classifier = SentimentClassifier.build(
            [neg['training'], pos['training']])

        c = 2**7
        classifier.c = c
        classifier.reset_model()

        n_er = self.validate(classifier, neg['validation'], 'negative')
        p_er = self.validate(classifier, pos['validation'], 'positive')
        total = Fraction(n_er.numerator + p_er.numerator,
                         n_er.denominator + p_er.denominator)
        print(total)
        self.assertLess(total, 0.35)
  def test_validate(self):
    """cross validates with an error of 35% or less"""
    neg = self.split_file('data/rt-polaritydata/rt-polarity.neg')
    pos = self.split_file('data/rt-polaritydata/rt-polarity.pos')

    classifier = SentimentClassifier.build([
      neg['training'],
      pos['training']
    ])

    c = 2 ** 7
    classifier.c = c
    classifier.reset_model()

    n_er = self.validate(classifier, neg['validation'], 'negative')
    p_er = self.validate(classifier, pos['validation'], 'positive')
    total = Fraction(n_er.numerator + p_er.numerator,
                     n_er.denominator + p_er.denominator)
    print(total)
    self.assertLess(total, 0.35)
  def test_validate_itself(self):
    """yields a zero error when it uses itself"""
    classifier = SentimentClassifier.build([
      'data/rt-polaritydata/rt-polarity.neg',
      'data/rt-polaritydata/rt-polarity.pos'
    ])

    c = 2 ** 7
    classifier.c = c
    classifier.reset_model()

    n_er = self.validate(classifier,
                         'data/rt-polaritydata/rt-polarity.neg',
                         'negative')
    p_er = self.validate(classifier,
                         'data/rt-polaritydata/rt-polarity.pos',
                         'positive')
    total = Fraction(n_er.numerator + p_er.numerator,
                     n_er.denominator + p_er.denominator)
    print(total)
    self.assertEqual(total, 0)
    def test_validate_itself(self):
        """yields a zero error when it uses itself"""
        classifier = SentimentClassifier.build([
            'data/rt-polaritydata/rt-polarity.neg',
            'data/rt-polaritydata/rt-polarity.pos'
        ])

        c = 2**7
        classifier.c = c
        classifier.reset_model()

        n_er = self.validate(classifier,
                             'data/rt-polaritydata/rt-polarity.neg',
                             'negative')
        p_er = self.validate(classifier,
                             'data/rt-polaritydata/rt-polarity.pos',
                             'positive')
        total = Fraction(n_er.numerator + p_er.numerator,
                         n_er.denominator + p_er.denominator)
        print(total)
        self.assertEqual(total, 0)
Ejemplo n.º 8
0
__author__ = 'xead'
# coding: utf-8

from sentiment_classifier import SentimentClassifier

clf = SentimentClassifier()

pred = clf.get_prediction_message("the film is a shit")

print(pred)
Ejemplo n.º 9
0
def main(data_file_path,
         word_vec_filename,
         saved=True,
         batch_size=50,
         lstmUnits=24,
         epochs=10):
    """ Main function """

    train_set_save_file = 'train_dataset.pkl'
    val_set_save_file = 'val_dataset.pkl'
    test_set_save_file = 'test_dataset.pkl'
    vocab_vector_save_file = 'vocab_vector.pkl'

    if saved:
        start_time = time.time()
        print("Loading Train :", train_set_save_file)
        train_dataset = load_obj(train_set_save_file)
        time_taken = time.time() - start_time
        print("%s Loaded: %.3f secs!" % (train_set_save_file, time_taken))

        start_time = time.time()
        print("Loading Validation :", vocab_vector_save_file)
        val_dataset = load_obj(val_set_save_file)
        time_taken = time.time() - start_time
        print("%s Loaded: %.3f secs!" % (val_set_save_file, time_taken))

        start_time = time.time()
        print("Loading Test :", vocab_vector_save_file)
        vocab_vector = load_obj(vocab_vector_save_file)
        time_taken = time.time() - start_time
        print("%s Loaded: %.3f secs!" % (vocab_vector_save_file, time_taken))

        start_time = time.time()
        print("Loading Test :", test_set_save_file)
        test_dataset = load_obj(test_set_save_file)
        time_taken = time.time() - start_time
        print("%s Loaded: %.3f secs!" % (test_set_save_file, time_taken))
    else:
        # Raw dataset
        data_set = LoadCSV(data_file_path)

        # Train and test
        train_dataset, test_dataset = split_dataset(data_set, 85)
        train_dataset, val_dataset = split_dataset(train_dataset, 80)
        vocab_vector = build_vocab(train_dataset.text, word_vec_filename)

        train_dataset.vectorize_text(vocab_vector.vocab, 100)
        val_dataset.vectorize_text(
            vocab_vector.vocab,
            normalized_length=train_dataset.max_text_length)
        test_dataset.vectorize_text(
            vocab_vector.vocab,
            normalized_length=train_dataset.max_text_length)

        save_obj(train_dataset, train_set_save_file)
        save_obj(val_dataset, val_set_save_file)
        save_obj(test_dataset, test_set_save_file)
        save_obj(vocab_vector, vocab_vector_save_file)

    print("Train dataset numrecords: %d:" % (train_dataset.num_records))
    print("Validation dataset numrecords: %d:" % (val_dataset.num_records))
    print("Test dataset numrecords: %d:" % (test_dataset.num_records))

    # Set some config params for this dataset
    config = Configuration()
    config.epochs = epochs
    config.batchSize = batch_size
    config.lstmUnits = lstmUnits
    config.numClasses = train_dataset.num_classes
    config.maxSeqLength = train_dataset.max_text_length
    config.numDimensions = vocab_vector.dimension
    config.print()

    # Init classifier
    classifier = SentimentClassifier(config, vocab_vector.embeddings)

    # Train
    val_accs = [0 for i in range(10)]

    val_acc_file = open('val_accs.txt', 'w')
    train_acc_file = open('train_accs.txt', 'w')

    val_lss_file = open('val_loss.txt', 'w')
    train_lss_file = open('train_loss.txt', 'w')

    for epoch_num in range(epochs):
        classifier.fit_epoch(train_dataset)

        t_m = classifier.metrics(train_dataset)
        v_m = classifier.metrics(val_dataset)

        train_accuracy = t_m['accuracy'] * 100
        val_accuracy = v_m['accuracy'] * 100

        train_loss = t_m['loss'] * 100
        val_loss = v_m['loss'] * 100

        print("%d  %.2f||%.2f  %.2f||%.2f" %
              (epoch_num, train_accuracy, val_accuracy, train_loss, val_loss),
              end='\n',
              flush=True)

        train_acc_file.write("%f, " % (train_accuracy))
        val_acc_file.write("%f, " % (val_accuracy))

        train_lss_file.write("%f, " % (train_loss))
        val_lss_file.write("%f, " % (val_loss))

        max_indx = np.argmax(val_accs)
        if val_accuracy > val_accs[max_indx]:
            val_accs[max_indx] = val_accuracy
        else:
            min_indx = np.argmin(val_accs)
            if val_accuracy > val_accs[min_indx]:
                val_accs[min_indx] = val_accuracy
            elif val_accuracy < 77.0:  # Try to get upto desired accuracy
                print(".", end=' ')
            else:
                print("\nTerminating training:", val_accs)
                break

    print("")

    val_metrics = classifier.metrics(val_dataset)
    print("validation Status: \n", val_metrics)

    train_metrics = classifier.metrics(train_dataset)
    print("Train Status: \n", train_metrics)

    test_metrics = classifier.metrics(test_dataset)
    print("Test Status: \n", test_metrics)
Ejemplo n.º 10
0
# -*- coding:utf-8 -*-
import cv2
import sys

from sentiment_classifier import SentimentClassifier
from face_detector import detect_faces, show_detection_result

if __name__ == '__main__':

    model = SentimentClassifier()

    #model.train()
    #model.save()

    model.load()

    img_path = input('\n Enter img path:')

    face_imgs = detect_faces(img_path)

    if len(face_imgs) != 1:
        print('Detect no faces.')
        sys.exit()

    result = model.predict(img_path)
    for item in result:
        print('{0} probability:{1}'.format(item['label'], item['prob']))
Ejemplo n.º 11
0
__author__ = 'xead'
# coding: utf-8

from sentiment_classifier import SentimentClassifier

clf = SentimentClassifier()
pred = clf.get_prediction_message("bad")
print(pred)
 def __init__(self, context_path):
     self.__nn_estimator = SentimentClassifier.newinstance(
         context_path=context_path)
     self.__nb_estimator = SentimentClassifier.newinstance(
         'nb', context_path=context_path)
Ejemplo n.º 13
0
from sentiment_classifier import SentimentClassifier

clf = SentimentClassifier()

pred = clf.get_prediction_message("This is wonderful bank!")

print(pred)
Ejemplo n.º 14
0
from sentiment_classifier import SentimentClassifier

clf = SentimentClassifier()

pred = clf.get_prediction_message("i really liked the movie and had fun")

print(pred)

pred = clf.get_prediction_message("this movie was terrible and bad")

print(pred)
#!/bin/python3
from sentiment_classifier import SentimentClassifier

from data import create_bows_from_path
from util.object_util import save_object

MODEL_PATH = "../data/model/"
TRAINING_DATA_PATH = "../data/training/"

# collect bag_of_words for positive samples
positive_bow_list = []
positive_training_path = TRAINING_DATA_PATH + "pos/"

positive_bow_list = create_bows_from_path(positive_training_path)

# collect bag_of_words for negative samples
negative_bow_list = []
negative_training_path = TRAINING_DATA_PATH + "neg/"

negative_bow_list = create_bows_from_path(negative_training_path)

# train the bayes classifier
classifier = SentimentClassifier()
classifier.train(positive_bow_list, negative_bow_list)

save_object(classifier, MODEL_PATH + "classifier.model")
Ejemplo n.º 16
0
class Classifier(object):
    '''
    classdocs
    '''


    def __init__(self):
        '''
        Constructor
        '''
        self.sentiment_classifier = SentimentClassifier()
        self.emo_classifier = EmoticonClassifier()
        self.emo_factor = 0.8
        
    def classify(self , sentence):
        senti_results = self.sentiment_classifier.classify(sentence)
        (emo_result ,happy_count , sad_count) = self.emo_classifier.classify(sentence)
        (hard_count , pos_count , neg_count) = self.combine_results(senti_results, happy_count, sad_count)
        decision = self.decision(hard_count, pos_count, neg_count)
        self.print_results(sentence, decision)
        return self.get_result_value( decision )
        
    def combine_results(self , senti_results , happy_count , sad_count ):
        (norm_happy_value , norm_sad_value) = self.normalized_emo_counts(happy_count, sad_count)
        (hard_count , pos_count , neg_count) = self.combine_senti_classifications(senti_results)
        pos_count = pos_count + norm_happy_value
        neg_count = neg_count + norm_sad_value
        return (hard_count , pos_count , neg_count)
        
    def normalized_emo_counts(self ,happy_count , sad_count ):
        norm_happy_value = 0
        norm_sad_value = 0
        sum = happy_count + sad_count
        if sum != 0:
            norm_happy_value = (happy_count * self.emo_factor) / sum
            norm_sad_value = (sad_count * self.emo_factor) / sum
        return (norm_happy_value , norm_sad_value)
    
    def combine_senti_classifications(self, results):
        pos_count = 0
        neg_count = 0
        hard_count = 0
        for each_cls_res in results:
            if each_cls_res["result"] == POSITIVE:
                pos_count = pos_count +1
            elif each_cls_res["result"] == NEGATIVE:
                neg_count = neg_count +1
            elif each_cls_res["result"] == HARD_TO_CLASSIFY:
                hard_count = hard_count + 1
        return (hard_count , pos_count , neg_count)
    
    def decision(self ,hard_count , pos_count , neg_count):
        decision = HARD_TO_CLASSIFY
        if hard_count >= pos_count and hard_count >= neg_count:
            decision = HARD_TO_CLASSIFY
        elif (pos_count > neg_count):
            decision = POSITIVE
        elif (pos_count < neg_count):
            decision = NEGATIVE        
        return decision
    
    def print_results(self ,sentence ,decision):
        print "\n************ DECISION ************" 
        print "Sentence :"+sentence   
        print "\nFinal decision is: " +DECISION_LIST[decision]
        print "--------------------\n"
        
    def get_result_value( self ,result ):
        if result == NEGATIVE:
            return -0.8
        elif result == POSITIVE:
            return 1
        elif result == HARD_TO_CLASSIFY:
            return 0
Ejemplo n.º 17
0
from sentiment_classifier import SentimentClassifier

clf = SentimentClassifier()

prediction = clf.get_prediction_message(
    'Ужасно слабый аккумулятор, это основной минус этого аппарата, разряжается '
    'буквально за пару часов при включенном wifi и на макс подсветке, '
    'например если играть или смотреть видео, следовательно использовать можно '
    'только если есть постоянная возможность подзарядиться. Качества звука через '
    'динамик далеко не на высоте.Наблюдаются незначительные тормоза в некоторых '
    'приложениях и вообще в меню. Очень мало встроенной памяти, а приложения '
    'устанавливаются именно туда, с этим связанны неудобства - нужно постоянно '
    'переносить их на карту памяти.\ Несколько неудобно что нету отдельной кнопки '
    'для фото. Подумываю купить батарею большей емкость мб что нибудь измениться.'
)

print(prediction[0])
Ejemplo n.º 18
0
__author__ = 'xead'
# coding: utf-8

from sentiment_classifier import SentimentClassifier

clf = SentimentClassifier()

pred = clf.get_prediction_message("I hate it")

print (pred)
Ejemplo n.º 19
0
__author__ = 'xead'
# coding: utf-8

from sentiment_classifier import SentimentClassifier

clf = SentimentClassifier()

pred = clf.get_prediction_message("Best of the best")

print(pred)
Ejemplo n.º 20
0
    def __init__(self, exp, input_dimensionality, num_of_classes, vae_hidd_size, num_of_OpinionTopics, num_of_PlotTopics, 
                 encoder_layers=1, generator_layers=4, beta_s=1.0, beta_a=1.0, encoder_dropout=False,  dropout_prob=0.0, 
                 generator_shortcut=False, generator_transform=None, interaction="dot_prod", plug_Plots=False, device="cpu"):

        super(AdversarialVaeModel, self).__init__()

        # Args includes all the meta information about the experiment
        self.exp  = exp
        self.args = exp.args

        self.beta_a = beta_a
        self.beta_s = beta_s
        
        self.input_dimensionality = input_dimensionality
        self.num_of_OpinionTopics = num_of_OpinionTopics
        self.num_of_PlotTopics    = num_of_PlotTopics

        # Prior mean and variance
        self.priors = dict()
        self.priors["prior_mean_Plot"]        = torch.Tensor(1, self.num_of_PlotTopics).fill_(0).to(device)
        self.priors["prior_variance_Plot"]    = 0.995
        self.priors["prior_var_Plot"]         = torch.Tensor(1, self.num_of_PlotTopics).fill_(self.priors["prior_variance_Plot"]).to(device)
        self.priors["prior_logvar_Plot"]      = self.priors["prior_var_Plot"].log()

        self.priors["prior_mean_Opinion"]     = torch.Tensor(1, self.num_of_OpinionTopics).fill_(0).to(device)
        self.priors["prior_variance_Opinion"] = 0.995
        self.priors["prior_var_Opinion"]      = torch.Tensor(1, self.num_of_OpinionTopics).fill_(self.priors["prior_variance_Opinion"]).to(device)
        self.priors["prior_logvar_Opinion"]   = self.priors["prior_var_Opinion"].to(device).log()
                      
        # Flags
        self.interaction    = interaction
        self.plug_Plots     = plug_Plots
        self.topicType      = "both"
        self.wordEmb        = None
        self.alsoAspectLoss = True
        self.alsoSentLoss   = True

        # Training Device
        self.device         = device

        # - Inint VAE components -
        self.aspect_vae_model = VaeAvitmModel(input_dimensionality, d_e=vae_hidd_size, d_t=num_of_PlotTopics, 
                                                encoder_layers=encoder_layers, generator_layers=generator_layers, without_decoder=True, 
                                                encoder_dropout=True, dropout_rate=dropout_prob, sparsity=self.args.de_sparsity, generator_shortcut=False, 
                                                generator_transform='softmax', device=device).to(device)

        self.sent_vae_model   = VaeAvitmModel(input_dimensionality, d_e=vae_hidd_size, d_t=num_of_OpinionTopics, 
                                                encoder_layers=encoder_layers, generator_layers=generator_layers, without_decoder=True, 
                                                encoder_dropout=True, dropout_rate=dropout_prob, sparsity=self.args.de_sparsity, generator_shortcut=False, 
                                                generator_transform='softmax', device=device).to(device)

        self.plot_vae_model   = VaeAvitmModel(input_dimensionality, d_e=vae_hidd_size, d_t=num_of_PlotTopics, 
                                                encoder_layers=encoder_layers, generator_layers=generator_layers, without_decoder=False, 
                                                encoder_dropout=True, dropout_rate=dropout_prob, sparsity=self.args.de_sparsity, generator_shortcut=False, 
                                                generator_transform='softmax', device=device).to(device)

        # - Sentiment classifier -
        self.num_of_classes    = num_of_classes
        self.sent_class_model  = SentimentClassifier(input_dimensionality, 
                                                     num_of_OpinionTopics, 
                                                     num_of_classes, 
                                                     hid_size=self.args.sent_classi_hid_size,
                                                     device=device).to(device)

        # - Plot discriminator/classifier -
        # It is not an actual sentiment classifier, just reusing the same class.
        self.plot_discri_model = SentimentClassifier(input_dimensionality, 
                                                     num_of_PlotTopics, 
                                                     num_of_classes=2, 
                                                     hid_size=self.args.plot_classi_hid_size,
                                                     device=device).to(device)

        # - Linear projection for possible asymmetric number of topics -
        if self.num_of_PlotTopics != self.num_of_OpinionTopics:
            self.plotScaling = nn.Linear(self.num_of_PlotTopics, self.num_of_OpinionTopics)

        # Dropout
        self.r_drop = nn.Dropout(dropout_prob)

        # - Decoder matrix -
        if self.interaction == "dot_prod":
            self.de = nn.Linear(self.num_of_PlotTopics*self.num_of_OpinionTopics, self.input_dimensionality)
        elif self.interaction == "concat":
            self.de = nn.Linear(self.num_of_PlotTopics + num_of_OpinionTopics, self.input_dimensionality)
        elif self.interaction == "onlySent":
            self.de = nn.Linear(self.num_of_OpinionTopics, self.input_dimensionality)
        elif self.interaction == "onlyNeutral":
            self.de = nn.Linear(self.num_of_PlotTopics, self.input_dimensionality)

        # Batch Norm.
        self.de_bn = nn.BatchNorm1d(self.input_dimensionality)

        # Orthogonal Reg.
        self.ortho_regul_flag = True
        

        # --- INIT ---
        # Decoder initialization
        weights_init_sparse(self.de, sparsity=self.args.de_sparsity)
        if self.num_of_PlotTopics != self.num_of_OpinionTopics:
            weights_init_xavier(self.plotScaling)
Ejemplo n.º 21
0
__author__ = 'xead'
# coding: utf-8

from sentiment_classifier import SentimentClassifier

clf = SentimentClassifier()

pred = clf.get_prediction_message("Этот смартфон самый лучший")

print(pred)
Ejemplo n.º 22
0

from sentiment_classifier import SentimentClassifier

clf = SentimentClassifier()

pred = clf.get_prediction_message("Super film. I love it so much")

print pred
__author__ = 'astar'
# coding: utf-8

from sentiment_classifier import SentimentClassifier

clf = SentimentClassifier()

pred = clf.get_prediction_message([])

print pred
# -*- coding: utf-8 -*-

from sentiment_classifier import SentimentClassifier

clf = SentimentClassifier()

pred, score = clf.get_prediction_message_and_score("It was bad bank")

print pred, score
Ejemplo n.º 25
0
class AdversarialVaeModel(nn.Module):
    def __init__(self, exp, input_dimensionality, num_of_classes, vae_hidd_size, num_of_OpinionTopics, num_of_PlotTopics, 
                 encoder_layers=1, generator_layers=4, beta_s=1.0, beta_a=1.0, encoder_dropout=False,  dropout_prob=0.0, 
                 generator_shortcut=False, generator_transform=None, interaction="dot_prod", plug_Plots=False, device="cpu"):

        super(AdversarialVaeModel, self).__init__()

        # Args includes all the meta information about the experiment
        self.exp  = exp
        self.args = exp.args

        self.beta_a = beta_a
        self.beta_s = beta_s
        
        self.input_dimensionality = input_dimensionality
        self.num_of_OpinionTopics = num_of_OpinionTopics
        self.num_of_PlotTopics    = num_of_PlotTopics

        # Prior mean and variance
        self.priors = dict()
        self.priors["prior_mean_Plot"]        = torch.Tensor(1, self.num_of_PlotTopics).fill_(0).to(device)
        self.priors["prior_variance_Plot"]    = 0.995
        self.priors["prior_var_Plot"]         = torch.Tensor(1, self.num_of_PlotTopics).fill_(self.priors["prior_variance_Plot"]).to(device)
        self.priors["prior_logvar_Plot"]      = self.priors["prior_var_Plot"].log()

        self.priors["prior_mean_Opinion"]     = torch.Tensor(1, self.num_of_OpinionTopics).fill_(0).to(device)
        self.priors["prior_variance_Opinion"] = 0.995
        self.priors["prior_var_Opinion"]      = torch.Tensor(1, self.num_of_OpinionTopics).fill_(self.priors["prior_variance_Opinion"]).to(device)
        self.priors["prior_logvar_Opinion"]   = self.priors["prior_var_Opinion"].to(device).log()
                      
        # Flags
        self.interaction    = interaction
        self.plug_Plots     = plug_Plots
        self.topicType      = "both"
        self.wordEmb        = None
        self.alsoAspectLoss = True
        self.alsoSentLoss   = True

        # Training Device
        self.device         = device

        # - Inint VAE components -
        self.aspect_vae_model = VaeAvitmModel(input_dimensionality, d_e=vae_hidd_size, d_t=num_of_PlotTopics, 
                                                encoder_layers=encoder_layers, generator_layers=generator_layers, without_decoder=True, 
                                                encoder_dropout=True, dropout_rate=dropout_prob, sparsity=self.args.de_sparsity, generator_shortcut=False, 
                                                generator_transform='softmax', device=device).to(device)

        self.sent_vae_model   = VaeAvitmModel(input_dimensionality, d_e=vae_hidd_size, d_t=num_of_OpinionTopics, 
                                                encoder_layers=encoder_layers, generator_layers=generator_layers, without_decoder=True, 
                                                encoder_dropout=True, dropout_rate=dropout_prob, sparsity=self.args.de_sparsity, generator_shortcut=False, 
                                                generator_transform='softmax', device=device).to(device)

        self.plot_vae_model   = VaeAvitmModel(input_dimensionality, d_e=vae_hidd_size, d_t=num_of_PlotTopics, 
                                                encoder_layers=encoder_layers, generator_layers=generator_layers, without_decoder=False, 
                                                encoder_dropout=True, dropout_rate=dropout_prob, sparsity=self.args.de_sparsity, generator_shortcut=False, 
                                                generator_transform='softmax', device=device).to(device)

        # - Sentiment classifier -
        self.num_of_classes    = num_of_classes
        self.sent_class_model  = SentimentClassifier(input_dimensionality, 
                                                     num_of_OpinionTopics, 
                                                     num_of_classes, 
                                                     hid_size=self.args.sent_classi_hid_size,
                                                     device=device).to(device)

        # - Plot discriminator/classifier -
        # It is not an actual sentiment classifier, just reusing the same class.
        self.plot_discri_model = SentimentClassifier(input_dimensionality, 
                                                     num_of_PlotTopics, 
                                                     num_of_classes=2, 
                                                     hid_size=self.args.plot_classi_hid_size,
                                                     device=device).to(device)

        # - Linear projection for possible asymmetric number of topics -
        if self.num_of_PlotTopics != self.num_of_OpinionTopics:
            self.plotScaling = nn.Linear(self.num_of_PlotTopics, self.num_of_OpinionTopics)

        # Dropout
        self.r_drop = nn.Dropout(dropout_prob)

        # - Decoder matrix -
        if self.interaction == "dot_prod":
            self.de = nn.Linear(self.num_of_PlotTopics*self.num_of_OpinionTopics, self.input_dimensionality)
        elif self.interaction == "concat":
            self.de = nn.Linear(self.num_of_PlotTopics + num_of_OpinionTopics, self.input_dimensionality)
        elif self.interaction == "onlySent":
            self.de = nn.Linear(self.num_of_OpinionTopics, self.input_dimensionality)
        elif self.interaction == "onlyNeutral":
            self.de = nn.Linear(self.num_of_PlotTopics, self.input_dimensionality)

        # Batch Norm.
        self.de_bn = nn.BatchNorm1d(self.input_dimensionality)

        # Orthogonal Reg.
        self.ortho_regul_flag = True
        

        # --- INIT ---
        # Decoder initialization
        weights_init_sparse(self.de, sparsity=self.args.de_sparsity)
        if self.num_of_PlotTopics != self.num_of_OpinionTopics:
            weights_init_xavier(self.plotScaling)
    
        
    def decoder(self, r):
        p_x_given_h = F.softmax(self.de_bn(self.de(r)), dim=1)
        return p_x_given_h
                
                
    def forward(self, x, x_plots=None, perplexity=False):
        # --- Split reviews and plots ---
        if self.plug_Plots and not perplexity:
            x_plots = x[:, self.input_dimensionality:]
            x       = x[:, :self.input_dimensionality]

        # --- Encoders ---
        mean_a, logvar_a, var_a, z_a  = self.aspect_vae_model(x)
        mean_s, logvar_s, var_s, z_s  = self.sent_vae_model(x)

        # Plot Encoder
        if self.plug_Plots and not perplexity:
            mean_p, logvar_p, var_p, z_p, p_x_given_h_plots = self.plot_vae_model(x_plots)
            conc_z_a_z_p    = torch.cat((z_a, z_p), 0)
            y_p_pred        = self.plot_discri_model(conc_z_a_z_p[torch.randperm(conc_z_a_z_p.size()[0])])
        else:
            y_p_pred = mean_p = logvar_p = var_p = z_p = p_x_given_h_plots = None

        # --- Interaction ---
        interaction_vec = self.z_interaction(z_a, z_s)

        # --- Decoder ---
        p_x_given_h     = self.decoder(interaction_vec)

        # --- Adversarial prediction ---
        y_s_pred = self.sent_class_model(z_s)
        if self.num_of_PlotTopics != self.num_of_OpinionTopics:
            y_a_pred = self.sent_class_model(self.plotScaling(z_a))
        else:
            y_a_pred = self.sent_class_model(z_a)

        # # -- Orthogonal regularization --
        if self.ortho_regul_flag:
            decoder_weights = self.de.weight.data.transpose(0,1).to(self.device, non_blocking=True)
            orth_loss       = orthogonal_reg_loss(self.device, decoder_weights)
        else:
            orth_loss = 0.0

        return [z_a, z_s, p_x_given_h, interaction_vec, mean_a, logvar_a, var_a, mean_s, logvar_s, var_s, \
                y_a_pred, y_s_pred, y_p_pred, mean_p, logvar_p, var_p, z_p, p_x_given_h_plots, orth_loss]
    
        
    def save_params(self, filename):
        torch.save(self.state_dict(), filename)
    

    def load_params(self, filename):
        self.load_state_dict(torch.load(filename))


    def z_interaction(self, z_a, z_s):
        interaction_vec = None

        if self.interaction == "dot_prod":
            interaction_vec = torch.bmm(z_a.unsqueeze(2), z_s.unsqueeze(2).transpose(1,2))
            batch_size      = interaction_vec.size()[0]
            interaction_vec = interaction_vec.view(batch_size, -1)

        # --- Interaction through concatination ---
        # interaction_vec: (batch_size, 2*#topics)
        elif self.interaction == "concat":
            interaction_vec  = torch.cat((z_a, z_s), 1) 

        # -- Interaction without interaction :) ---
        elif self.interaction == "onlySent":
            interaction_vec = z_s

        # -- Interaction without interaction :) ---
        elif self.interaction == "onlyNeutral":
            interaction_vec = z_a

        return interaction_vec



    ###################
    # FREEZE PARAMETERS
    ###################
    def freeze_sent_discriminators(self, freeze):
        # Freeze or defrost discriminators parameters
        if freeze:
            print("Sentiment discriminator parameters have been frozen.")
            self.sent_class_model.freeze_parameters(freeze)
        else:
            print("Sentiment discriminator parameters have been DE-frozen")
            self.sent_class_model.freeze_parameters(freeze)

    def freeze_plot_vae_and_discriminators(self, freeze):
        for m in [self.plot_vae_model, self.plot_discri_model]:
            for param in m.parameters():
                if freeze:
                    param.requires_grad = False
                else:
                    param.requires_grad = True
            if freeze:
                m.frozen = True
            else:
                m.frozen = False

    def freeze_aspect_sent_VAE_encoders(self, freeze):
        for m in [self.aspect_vae_model, self.sent_vae_model]:
            for param in m.parameters():
                if freeze:
                    param.requires_grad = False
                else:
                    param.requires_grad = True
            if freeze:
                m.frozen = True
            else:
                m.frozen = False

    def freeze_VAEdecoder(self, freeze):
        for param in self.de.parameters():
            if freeze:
                param.requires_grad = False
            else:
                param.requires_grad = True
        if freeze:
            self.de.frozen = True
        else:
            self.de.frozen = False

    def remove_ortoghonalization_regularizer(self, remove):
        if remove:
            self.ortho_regul_flag = False
        else:
            self.ortho_regul_flag = True



    ###############
    # LOSS
    ###############
    def compute_KLD(self, posterior_mean, posterior_logvar, posterior_var, prior_mean, 
                    prior_var, prior_logvar, num_of_topics):
        # see Appendix B from paper:
        # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
        # https://arxiv.org/abs/1312.6114
        prior_mean      = prior_mean.expand_as(posterior_mean)
        prior_var       = prior_var.expand_as(posterior_mean)
        prior_logvar    = prior_logvar.expand_as(posterior_mean)
        var_division    = posterior_var  / prior_var
        diff            = posterior_mean - prior_mean
        diff_term       = diff * diff / prior_var
        logvar_division = prior_logvar - posterior_logvar
        KLD             = 0.5 * ( (var_division + diff_term + logvar_division).sum(1) - num_of_topics )
        return KLD
    

    def loss_function_bagOfWords(self, posterior_mean_a, posterior_logvar_a, posterior_var_a, 
                                       posterior_mean_s, posterior_logvar_s, posterior_var_s, 
                                       p_x_given_h, DocTerm_batch, avg_loss=True):

        KLD_a = self.compute_KLD(posterior_mean_a, posterior_logvar_a, posterior_var_a, 
                                 self.priors["prior_mean_Plot"], self.priors["prior_var_Plot"], 
                                 self.priors["prior_logvar_Plot"], self.num_of_PlotTopics)

        KLD_s = self.compute_KLD(posterior_mean_s, posterior_logvar_s, posterior_var_s, 
                                 self.priors["prior_mean_Opinion"], self.priors["prior_var_Opinion"], 
                                 self.priors["prior_logvar_Opinion"], self.num_of_OpinionTopics)

        nll_term = -(DocTerm_batch * (p_x_given_h+1e-10).log()).sum(1)

        loss     = self.beta_a*KLD_a + self.beta_s*KLD_s  + nll_term

        if avg_loss:
            loss = loss.mean()

        return (loss, nll_term)



    def overall_loss_func(self, reconstr_loss, y_adv_pred, y_sent_pred, y_sent_labels, y_p_pred, orth_loss, 
                          recontr_loss_plot=0.0, perplexity=False, test=False):
        alpha   = 1.0 / (self.args.vocab_size * 0.5)
        beta    = 0.0
        gamma   = 0.0
        delta   = 1.0 / (self.args.vocab_size * 1.3)
        epsilon = 0.0
        zeta    = 0.0       

        adv_sent_loss     = 0.0 
        sent_loss         = 0.0    
        adv_plot_loss     = 0.0

        if not perplexity:

            if self.alsoSentLoss:
                sent_loss     = F.cross_entropy(y_sent_pred.to(self.device), y_sent_labels.to(self.device)) 
                gamma         = 1.0 

            if self.alsoAspectLoss:
                uniform_dist  = torch.Tensor(len(y_adv_pred), self.num_of_classes).fill_((1./self.num_of_classes)).to(self.device)
                # https://github.com/peterliht/knowledge-distillation-pytorch/issues/2
                # https://github.com/alinlab/Confident_classifier/blob/master/src/run_joint_confidence.py
                adv_sent_loss = F.kl_div(F.log_softmax(y_adv_pred), uniform_dist, reduction='sum')*self.num_of_classes
                beta          =  2.0 

            
            if self.plug_Plots and self.plot_vae_model.frozen != True:
                adv_plot_loss = adv_cross_entropy(y_p_pred.to(self.device))
                epsilon       = 1.0  
                zeta          = 1.0

            overall_loss = alpha*reconstr_loss.mean() + beta*adv_sent_loss + gamma*sent_loss + delta*orth_loss 
                           + epsilon*adv_plot_loss + zeta*recontr_loss_plot

        else:
Ejemplo n.º 26
0
__author__ = 'nittella'
# coding: utf-8

from sentiment_classifier import SentimentClassifier

clf = SentimentClassifier()

pred = clf.get_prediction_message('плохой экран')
print(pred)
Ejemplo n.º 27
0
__author__ = 'xead'
# coding: utf-8

from sentiment_classifier import SentimentClassifier

clf = SentimentClassifier()

pred = clf.get_prediction_message("Это отличный банк, просто чудесный")

print pred
Ejemplo n.º 28
0
def fit_models(X, y):
    nn_classifier = SentimentClassifier.newinstance('nn')
    nn_classifier.fit(X, y)
    nb_classifier = SentimentClassifier.newinstance('nb')
    nb_classifier.fit(X, y)
    return nn_classifier, nb_classifier
Ejemplo n.º 29
0
from sentiment_classifier import SentimentClassifier
from codecs import open
import time
import sys
from flask import Flask, render_template, request, redirect

app = Flask(__name__)

print("Preparing classifier")
start_time = time.time()
clf = SentimentClassifier()
print("Classifier is ready")
print(time.time() - start_time, "seconds")


@app.route("/", methods=["POST", "GET"])  # main route
def index_page():
    return render_template('hello.html')


@app.route('/predict')
def predict(text=''):  # route for ajax requests
    text = request.args.get('text')
    prediction_message = clf.get_prediction_message(text)
    print(prediction_message)
    return prediction_message


@app.route('/<path:path>')
def hello(path=''):  # other routes wil be redirected to home
    return redirect("/", code=302)
Ejemplo n.º 30
0
from sentiment_classifier import SentimentClassifier

clf = SentimentClassifier()

reviews = [
    'Не рекомендую к покупке! Самый худший телефон на свете!',
    'Телефон интуитивно понятен. Памяти много. Хороший звук.'
]

for review in reviews:
    print(review, ' - ', clf.get_prediction_message(review))
                R.sm_sentiment[pred], R.sm_sentiment[label]))


if __name__ == '__main__':

    from socialmedia import create_dataset
    dataset = create_dataset()

    samples = dataset['samples']
    shuffle(samples)
    trainlen = int(len(samples) * 0.70)
    testlen = int(len(samples) * 0.20)
    validlen = testlen
    # split
    len_sorted = lambda l: sorted(l, key=lambda x: len(x[0]))
    trainset = len_sorted(samples[:trainlen])
    testset = len_sorted(samples[trainlen:trainlen + testlen])
    validset = len_sorted(samples[trainlen + testlen:])

    vocab = dataset['vocab']

    model = SentimentClassifier(wdim=1,
                                hdim=1,
                                vocab_size=len(vocab),
                                num_labels=len(R.sm_sentiment))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        train_model(model, trainset, testset, batch_size=8, max_acc=0.90)
        interact(model, validset, vocab)
__author__ = 'xead'
# coding: utf-8

from sentiment_classifier import SentimentClassifier

clf = SentimentClassifier()

pred = clf.get_prediction_message("It's an amazing movie")

print pred
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 12 16:38:54 2017

@author: ANTON
"""

from sentiment_classifier import SentimentClassifier
from codecs import open
import time
from flask import Flask, render_template, request
app = Flask(__name__)

print "Preparing classifier"
start_time = time.time()
classifier = SentimentClassifier()
print "Classifier is ready"
print time.time() - start_time, "seconds"

@app.route("/sentiment-demo", methods=["POST", "GET"])
def index_page(text="", prediction_message=""):
    if request.method == "POST":
        text = request.form["text"]
        logfile = open("ydf_demo_logs.txt", "a", "utf-8")
        print text
        print >> logfile, "<response>"
        print >> logfile, text
        prediction_message = classifier.get_prediction_message(text)
        print prediction_message
        print >> logfile, prediction_message
        print >> logfile, "</response>"
Ejemplo n.º 34
0
__author__ = 'xead'
# coding: utf-8

from sentiment_classifier import SentimentClassifier

clf = SentimentClassifier()

test_reviews = ['Аккумулятор ужасен', 'Аккулятор ужасен батарея слабая',
                'Аккумулятор ужасен батарея слабая звук плохой',
                'Аккумулятор ужасен батарея слабая звук плохой сервис нет',
                'Аккумулятор ужасен батарея слабая звук плохой сервис нет сенсор не работает',
                'Аккумулятор ужасен батарея слабая звук плохой сервис нет сенсор не работает телефон отличный',
                'Аккумулятор ужасен батарея слабая звук плохой сервис нет сенсор не работает телефон отличный очень надежный']

for review in test_reviews:
    pred = clf.get_prediction_message(review)
    print(pred)