class MyApp: def __init__(self, name=''): self._name = name print('the app is initialized') # self.say_name() self.index = Index() self.df_amazon = pd.read_csv('Datafiniti_Amazon_Consumer_Reviews_of_Amazon_Products.csv', \ error_bad_lines=False,encoding='utf-8-sig') num_files = self.index.index_product(self.df_amazon) print("indexed %d files" % num_files) self.index.index_review(self.df_amazon) # init predict filename1 = "xgboost.pkl" filename2 = "randomForest.pkl" filename3 = "word_vectorize.pkl" filename4 = "char_vectorize.pkl" with open(filename1, "rb") as f1: self.xgboostModel = pickle.load(f1) with open(filename3, "rb") as f2: self.word_vectorizer = pickle.load(f2) with open(filename4, "rb") as f3: self.char_vectorizer = pickle.load(f3) def say_name(self): print('my name is {0}'.format(self._name)) def get_name(self): return self._name def search_product(self, query): # dataList = [] # for i in range(10): # dataList.append({'id': i+1, 'name': '{0}-{1}'.format(arg, i+1)}) return self.index.search_product(query) def get_product_info(self, id): info = dict() # reviews = [{'id': i, 'text': '{0} is good. [{1}]'.format(id, i)} for i in range(5)] labels = self.index.top_frequency_words(id) # info['topReviews'] = reviews info['labels'] = labels review_txt = default_reviews(id, self.df_amazon) info['topReviews'] = [{'text': txt} for txt in review_txt] # print(info['topReviews']) return info def search_review(self, id, term): # dummy = '{0} of {1} is good'.format(term, id) # reviews = [] # for i in range(10): # reviews.append({'id': i, 'text': '{0}-[{1}]'.format(dummy, i)}) review_text = self.index.search_review(id, term) print('# of reviews={0}'.format(len(review_text))) reviews = [{'text': txt} for txt in review_text] return reviews def predict_score(self, id, text): test1 = text test1 = [test1] test1_word = self.word_vectorizer.transform(test1) test1_char = self.char_vectorizer.transform(test1) test1_features = hstack([test1_char, test1_word]) test1_features = test1_features.tocsr() result = self.xgboostModel.predict(test1_features) return {'score': float(result)}