def classify(processed_csv, test_file=True, **params): positive_words = utils.file_to_wordset(params.pop('positive_words')) negative_words = utils.file_to_wordset(params.pop('negative_words')) predictions = [] with open(processed_csv, 'r', encoding='ISO-8859-1') as csvfile: reader = csv.DictReader(csvfile) for row in reader: if test_file: tweet_id = row['ItemID'] tweet = row['SentimentText'] else: tweet_id = row['ItemID'] sentiment = row['Sentiment'] tweet = row['SentimentText'] pos_count, neg_count = 0, 0 for word in tweet.split(): if word in positive_words: pos_count += 1 elif word in negative_words: neg_count += 1 # print pos_count, neg_count prediction = 1 if pos_count >= neg_count else 0 if test_file: predictions.append((tweet_id, prediction)) else: predictions.append((tweet_id, sentiment, prediction)) return predictions
def classify(processed_csv, test_file=True, **params): positive_words = utils.file_to_wordset(params.pop('positive_words')) negative_words = utils.file_to_wordset(params.pop('negative_words')) predictions = [] with open(processed_csv, 'r') as csv: for line in csv: if test_file: tweet_id, tweet = line.strip().split(',') else: tweet_id, label, tweet = line.strip().split(',') pos_count, neg_count = 0, 0 for word in tweet.split(): if word in positive_words: pos_count += 1 elif word in negative_words: neg_count += 1 prediction = 1 if pos_count >= neg_count else 0 if test_file: predictions.append((tweet_id, prediction)) else: predictions.append((tweet_id, int(label), prediction)) return predictions
def classify(processed_csv, service_port, test_file=True, **params): positive_words = file_to_wordset(params.pop('positive_words'), service_port) negative_words = file_to_wordset(params.pop('negative_words'), service_port) predictions = [] for row in parse_csv(processed_csv, service_port): tweet_id = row.field[1] tweet = row.field[2] if not test_file: label = row.field[0] pos_count, neg_count = 0, 0 for word in tweet.split(): if word in positive_words: pos_count += 1 elif word in negative_words: neg_count += 1 # print pos_count, neg_count prediction = 1 if pos_count >= neg_count else 0 if test_file: predictions.append((tweet_id, prediction)) else: predictions.append((tweet_id, int(label), prediction)) return predictions