def averagePrepare(product): #Create path of all the reviews allpath = "../../datasets/reviews_"+product+".csv" #Create paths of training and testing reviews trainpath = "train_"+product+".csv" testpath = "test_"+product+".csv" #Read reviews df_train = pandas.read_csv(trainpath) df_test = pandas.read_csv(testpath) df = pandas.read_csv(allpath) #Prepare the wordList for the product reviews = process.processReviews(df) wordList = process.preprocessWordlist(reviews) #Create the vectors of the bag of words trainX = np.array(process.formatAverageX(df_train, wordList)) testX = np.array(process.formatAverageX(df_test, wordList)) #Create the vectors of the evaluations trainY = np.array(process.formatY(df_train)) testY = np.array(process.formatY(df_test)) return [trainX, trainY, testX, testY]
def basePrepare(i): #Create path of all the reviews allpath = "../../datasets/reviews_all.csv" #Create paths of training and testing reviews testpath = "kFold/test_all_" + str(i) + ".csv" #Read reviews df = pandas.read_csv(allpath) df_test = pandas.read_csv(testpath) df_train = df.drop(df_test.index) #Prepare the wordList for the product reviews = process.processReviews(df) wordList = process.preprocessWordlist(reviews) #Create the vectors of the bag of words trainX = np.array(process.formatBaseX(df_train, wordList)) testX = np.array(process.formatBaseX(df_test, wordList)) #Create the vectors of the evaluations trainY = np.array(process.formatY(df_train)) testY = np.array(process.formatY(df_test)) return [trainX, trainY, testX, testY]