sys.path.insert(0, '..') from utils.TextPreprocess import review_to_words, tag_reviews ''' Training Data ''' train = pd.read_csv("../../data/labeledTrainData.tsv", header=0, delimiter='\t', quoting=3, error_bad_lines=False) num_reviews = train["review"].size print("Cleaning and parsing the training set movie reviews...") clean_train_reviews = [] for i in range(0, num_reviews): clean_train_reviews.append(review_to_words(train["review"][i])) ''' Test Data ''' test = pd.read_csv("../../data/testData.tsv", header=0, delimiter="\t", quoting=3) num_reviews = len(test["review"]) clean_test_reviews = [] print("Cleaning and parsing the test set movie reviews...") for i in range(0, num_reviews): clean_review = review_to_words(test["review"][i]) clean_test_reviews.append(clean_review)
@author: GongYu ''' import pandas as pd from utils.TextPreprocess import review_to_words ''' Training Data ''' train = pd.read_csv("data\\labeledTrainData.tsv", header = 0, delimiter = "\t", quoting = 3, error_bad_lines=False) num_reviews = train["review"].size print "Cleaning and parsing the training set movie reviews..." clean_train_reviews = [] for i in xrange(0, num_reviews): clean_train_reviews.append(review_to_words(train["review"][i])) ''' Test Data ''' test = pd.read_csv("data\\testData.tsv", header = 0, delimiter = "\t", quoting = 3) num_reviews = len(test["review"]) clean_test_reviews = [] print "Cleaning and parsing the test set movie reviews..." for i in xrange(0, num_reviews): clean_review = review_to_words(test["review"][i]) clean_test_reviews.append(clean_review) ''' Train and Test