Beispiel #1
0
def algorithm(df, params):
    """
    wrapper function to put each individual algorithm inside
    :param df: dataframe that contains all the input dataset
    :param params: algorithm specific parameters
    :return: a dictionary of { outputname: output content in memory }
    """

    output = {}

    # algorithm specific code
    # construct sentiment analysis
    SA = Sentiment(df, params['column'])

    sentiment_sentence, sentiment_doc = SA.sentiment(params['algorithm'])
    output['sentiment'] = sentiment_sentence
    output['doc'] = sentiment_doc

    if params['algorithm'] == 'vader':
        output['negation'] = SA.negated()
        output['allcap'] = SA.allcap()

    # plot
    labels = ['negative', 'neutral', 'positive']
    values = [sentiment_doc['neg'], sentiment_doc['neu'],
              sentiment_doc['pos']]
    output['div'] = plot.plot_pie_chart(labels, values,
                                        title='Sentiment of the dataset')

    return output
def delete_negative_comment(insta_username):
    media_id = get_post_id(insta_username)
    request_url = (BASE_URL + 'media/%s/comments/?access_token=%s') % (
        media_id, APP_ACCESS_TOKEN)
    print 'GET request url : %s' % (request_url)
    comment_info = requests.get(request_url).json()
    no_neg_comm = 0
    no_pos_comm = 0
    if comment_info['meta']['code'] == 200:
        if len(comment_info['data']):
            #Here's a naive implementation of how to delete the negative comments :)
            for x in range(0, len(comment_info['data'])):
                comment_id = comment_info['data'][x]['id']
                comment_text = comment_info['data'][x]['text']
                blob = TextBlob(comment_text, analyzer=NaiveBayesAnalyzer())
                if (blob.sentiment.p_neg > blob.sentiment.p_pos):
                    no_neg_comm += 1
                else:
                    no_pos_comm += 1
            plot_pie_chart(no_pos_comm, no_neg_comm)
        else:
            print 'There are no existing comments on the post!'
    else:
        print 'Status code other than 200 received!'
def algorithm(df, params):
    """
    wrapper function to put each individual algorithm inside
    :param df: dataframe that contains all the input dataset
    :param params: algorithm specific parameters
    :return: a dictionary of { outputname: output content in memory }
    """

    output = {}

    # load classification model
    with open(params['pipeline'] + ".pickle", 'rb') as f:
        text_clf = pickle.load(f)

    # load data
    data = df[df[params['column']] != ''][params['column']].dropna().astype(
        'str').tolist()

    # predict using trained model
    predicted = text_clf.predict(data)
    result = [['text', 'class']]
    for i in range(len(data)):
        result.append([data[i], predicted[i]])
    output['predicted'] = result

    # plot percentage of class
    predicted_counts = Counter(predicted)
    labels = []
    values = []
    for key in predicted_counts.keys():
        labels.append("class: " + str(key))
        values.append(predicted_counts[key])
    output['div'] = plot.plot_pie_chart(
        labels, values, title="break down of the predicted class")

    return output
Beispiel #4
0
def algorithm(df, params):
    """
    wrapper function to put each individual algorithm inside
    :param df: dataframe that contains all the input dataset
    :param params: algorithm specific parameters
    :return: a dictionary of { outputname: output content in memory }
    """
    output = {}

    CF = Classification(df, params['column'])

    output['uid'] = params['uid']

    training_set, testing_set = CF.split(int(params['ratio']))
    output['training'] = training_set
    output['testing'] = testing_set

    # plot
    labels = ['training set data points', 'unlabeled data points']
    values = [len(training_set), len(testing_set)]
    output['div'] = plot.plot_pie_chart(
        labels, values, title='breakdown of training vs testing size')

    return output
def algorithm(df=None, params=None):
    """
    wrapper function to put each individual algorithm inside
    :param df: dataframe that contains all the input dataset
    :param params: algorithm specific parameters
    :return: a dictionary of { outputname: output content in memory }
    """

    output = {}

    # user specify which column to; each row is a sentence, get a list of sentences
    column = params['column']
    sentences = df[df[column] != ''][column].dropna().astype('str').tolist()

    entity_list = []
    entity_freq = {}
    entity_category = {}

    # extract entities in each sentence
    ner = TwitterNER()
    for sentence in sentences:
        tokens = tokenizeRawTweetText(sentence)
        raw_entities = ner.get_entities(tokens)

        entities = []
        for entry in raw_entities:
            # record entities
            entity = " ".join(tokens[entry[0]:entry[1]])
            category = entry[2]
            entities.append((entity, category))

            # record entity frequency
            if entity not in entity_freq.keys():
                entity_freq[entity] = 1
            else:
                entity_freq[entity] += 1

            # record category
            if category not in entity_category.keys():
                entity_category[category] = 1
            else:
                entity_category[category] += 1

        entity_list.append(entities)

    # extract entities in each sentence
    output['entity'] = entity_list

    # plot bar chart of most frequent entities
    output['freq'] = entity_freq

    output['div_freq'] = plot.plot_bar_chart(
        list(entity_freq.keys())[:30],
        list(entity_freq.values())[:30], "Top 30 Most Frequent Name Entities")

    # plot pie chart of entity categories
    output['div_category'] = plot.plot_pie_chart(
        list(entity_category.keys()), list(entity_category.values()),
        "Name Entity Category Breakdowns")

    return output