def retrieveYoutube(category):
    multipleID = ""
    json_vec = []

    # First request to search by keyword.
    get_youtube = requests.get(
        'http://127.0.0.1:5100/api/youtube-videos?category=' + category +
        '&count=10')
    response_json = get_youtube.json()
    for line in response_json:
        multipleID += line['videoId'] + ','
    multipleID = multipleID[:-1]

    # Second requests to get statistics of video using multiple IDs.
    get_youtube = requests.get(
        'http://127.0.0.1:5100/api/youtube-multipleID?multipleID=' +
        multipleID)
    response_json = get_youtube.json()
    for json_obj in response_json:
        comments_response = requests.get(
            'http://127.0.0.1:5100/api/youtube-comments?videoID=' +
            json_obj['videoId'] + '&count=10')
        json_data = comments_response.json()
        for c in json_data:
            comment = processVec(c['comment'].lower())
            sentiment = classifier.classify(
                word_feature_vec(comment))  #pos eller neg
            c['sentiment'] = sentiment
            c['videoTitle'] = json_obj['title']
            prob_dist = getProbabilityDist(comment)
            c['probability'] = prob_dist
            json_vec.append(c)

    return json_vec
def main():
    start = time.time()
    print('--- Reading and processing training data ---')
    #movie_vec = read_movie('dataset/training_movie.tsv')
    twitter_vec1 = read_twitter1('dataset/train.csv')
    twitter_vec2 = read_twitter2('dataset/training_twitter.csv')
    total_vec = twitter_vec1+twitter_vec2
    pos_vec =  divide_by_polarity(total_vec, 'positive')
    neg_vec =  divide_by_polarity(total_vec, 'negative')
    end = time.time()
    print('Preprocessing training data took: %f seconds' % (end-start))
 
    print('\n--- Training data ---')
    start = time.time()
    pos_feats = [(word_feature_vec(f), 'positive') for f in pos_vec ]
    neg_feats = [(word_feature_vec(f), 'negative') for f in neg_vec ]
    trainfeats = pos_feats + neg_feats
    classifier = nltk.NaiveBayesClassifier.train(trainfeats)
    end = time.time()
    print('Training classifier took: %f seconds' % (end-start))
    
    print('\n--- Reading test data ---')
    start = time.time()
    text_vec = read_twitter2('dataset/testing_twitter.csv')
    pos_vec =  divide_by_polarity(text_vec, 'positive')
    neg_vec =  divide_by_polarity(text_vec, 'negative')
    end = time.time()
    print('Reading test data took: %f seconds' % (end-start))

    print('\n--- Preprocessing test data ---')
    pos_feats = [(word_feature_vec(f), 'positive') for f in pos_vec ]
    neg_feats = [(word_feature_vec(f), 'negative') for f in neg_vec ]
    testFeats = pos_feats + neg_feats
    print("Classifier accuracy: ", nltk.classify.util.accuracy(classifier, testFeats))

    
    text_example = ["I hate terror", "Tesla is the f*****g best", "Spotify is a sinking ship", "Stefan Lofven has very good communication skills", "F*****g hell I don't want another exam", "I haven't got my results from the exam yet...."]
    for line in text_example:
        line = line.lower()
        wordvec = processVec(line)
        print(line + " : " + classifier.classify(word_feature_vec(wordvec)))
    
    classifier.show_most_informative_features(50)
    print('\n--- Saving model to pickle ---')
    save_classifier = open("naivebayes.pickle", "wb")
    pickle.dump(classifier, save_classifier)
    save_classifier.close()
def retrieveReddit(category):
    json_vec = []

    # Request reddit comments
    get_reddit = requests.get(
        'http://127.0.0.1:5100/api/reddit-comments?category=' + category +
        '&count=100')
    response_json = get_reddit.json()
    for json_obj in response_json:
        comment = processVec(json_obj['comment'].lower())
        sentiment = classifier.classify(
            word_feature_vec(comment))  #pos eller neg
        json_obj['sentiment'] = sentiment
        prob_dist = getProbabilityDist(comment)
        json_obj['probability'] = prob_dist
        json_vec.append(json_obj)

    return json_vec
def retrieveTwitter(category):
    json_vec = []

    # Request tweets
    get_twitter = requests.get('http://127.0.0.1:5100/api/tweets?category=' +
                               category + '&count=200')
    response_json = get_twitter.json()
    for json_obj in response_json:
        json_obj['comment'] = json_obj['tweetText']
        json_obj.pop('tweetText')
        tweet = clean_tweet(json_obj['comment'])
        sentiment = classifier.classify(word_feature_vec(tweet))
        json_obj['sentiment'] = sentiment
        prob_dist = getProbabilityDist(tweet)
        json_obj['probability'] = prob_dist
        json_vec.append(json_obj)

    return json_vec
def getProbabilityDist(comment):
    prob_dist = classifier.prob_classify(word_feature_vec(comment))
    return [prob_dist.prob('positive'), prob_dist.prob('negative')]