Python clean_str Exemples, preprocessing.clean_str Python Exemples

Exemple #1

0

Afficher le fichier

def load_mr():
    train_neg = []
    train_pos = []
    with open('rt-polaritydata/rt-polarity.neg', 'rb') as neg_r:
        line = neg_r.readline()
        while line:
            train_neg.append(clean_str(str(line)))
            line = neg_r.readline()
    with open('rt-polaritydata/rt-polarity.pos', 'rb') as pos_r:
        line = pos_r.readline()
        while line:
            train_pos.append(clean_str(str(line)))
            line = pos_r.readline()
    train_data = np.concatenate((train_pos, train_neg))

    pos_labels = np.ones(len(train_pos))
    neg_labels = np.zeros(len(train_neg))
    labels = np.concatenate((pos_labels, neg_labels))

    return train_data, labels

Exemple #2

0

Afficher le fichier

Fichier : run_cnn.py Projet : yiketanng/large_text_classification

def load_data(positive_data_file, negative_data_file):
    positive_examples = list(open(positive_data_file, "r").readlines())
    positive_examples = [s.strip() for s in positive_examples]
    negative_examples = list(open(negative_data_file, "r").readlines())
    negative_examples = [s.strip() for s in negative_examples]

    # Split by words
    x_text = positive_examples + negative_examples
    x_text = [preprocessing.clean_str(sent) for sent in x_text]
    # x_text = [sent.split(' ') for sent in x_text]

    # Generate labels
    positive_labels = [[0, 1] for _ in positive_examples]
    negative_labels = [[1, 0] for _ in negative_examples]
    y = np.concatenate([positive_labels, negative_labels], 0)

    return [x_text, y]

Exemple #3

0

Afficher le fichier

Fichier : track_tweets.py Projet : KhanradCoder/TwitterMoodTracker

step_size = 60  #how often graph is updated (in seconds)
num_iterations = 30  #how long you want the graph to be

x = []
lib = []
rep = []
k = 0
for i in range(num_iterations):
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    get_all_tweets("place:5635c19c2b5078d1 (coronavirus)", timestamp)
    print("Saved tweets for", timestamp)
    tweets = pd.read_csv("covid-19_" + timestamp + "_tweets.csv")
    tweets = tweets["text"]

    all_tweets = " ".join(tweets)
    all_tweets = preprocessing.clean_str(all_tweets)
    sentences = preprocessing.sequence_text(all_tweets)
    print("Preprocessed tweets")

    #get emotion - CHANGE THIS FOR POLITICAL OR TOXICITY
    emos = get_sentimeant.emotion(sentences)
    sadness = emos[0]
    fear = emos[1]

    x.append(i)
    k += 1
    lib.append(sadness)
    rep.append(fear)
    time.sleep(step_size)

plt.plot(x, lib, 'blue', x, rep, 'red')

Exemple #4

0

Afficher le fichier

import torch
import varPack as D
import preprocessing
from torch.autograd import Variable

CNN = torch.load('./saveEntireCNN')
test_data = list(open('../data/testData.txt', "r").readlines())
test_data = [preprocessing.clean_str(sent) for sent in test_data]

f = open('result.txt', 'w')
got_right = 0
for i, sent in enumerate(test_data):
    # Input
    words_in_sent = []
    words_in_sent.extend(sent.split())
    s = len(words_in_sent)
    if (s > D.max_sent_len):
        continue

    input = []
    for word in words_in_sent:
        try:
            input.extend([D.word_to_ix[word]])
        except KeyError:
            print("No Such Word in dictionary, zero padding")
            input.extend([D.word_to_ix['_UNSEEN_']])
    for _ in range(D.max_sent_len - s):
        input.extend([D.word_to_ix['_ZEROS_']])

    # Wrap Input inside the Variable
    input = Variable(torch.cuda.LongTensor(input))