lambda x: re_spaces.sub(" ", re_attags.sub(" ", " " + x + " "))[1:-1]) df = df.drop_duplicates(subset=['text']) df.index = df['id'] = range(df.shape[0]) non_alphanums = re.compile('[^A-Za-z]+') def normalize_text(text): return non_alphanums.sub(' ', text).lower().strip() df['text_normalized'] = df['text'].map(lambda x: normalize_text(x)) df['textblob_score'] = df['text_normalized'].map( lambda x: textblob.TextBlob(x).polarity) import wordbag_regressor print("Train wordbag regressor") wb_regressor = wordbag_regressor.WordbagRegressor( "../models/wordbag_model.pkl.gz", tripadvisor_dir) #wb_regressor= wordbag_regressor.WordbagRegressor("../models/wordbag_model.pkl.gz") df['wordbag_score'] = wb_regressor.predict(df['text'].values) import wordhash_regressor print("Train wordhash regressor") wh_regressor = wordhash_regressor.WordhashRegressor( "../models/wordhash_model.pkl.gz", tripadvisor_dir) #wh_regressor= wordhash_regressor.WordhashRegressor("../models/wordhash_model.pkl.gz") df['wordhash_score'] = wh_regressor.predict(df['text'].values) import wordseq_regressor print("Train wordseq regressor") ws_regressor = wordseq_regressor.WordseqRegressor( "../models/wordseq_model.pkl.gz", tripadvisor_dir) #ws_regressor = wordseq_regressor.WordseqRegressor("../models/wordseq_model.pkl.gz")
df = df.drop_duplicates(subset=['text']) df.index = df['id'] = range(df.shape[0]) non_alphanums = re.compile('[^A-Za-z]+') def normalize_text(text): return non_alphanums.sub(' ', text).lower().strip() df['text_normalized'] = df['text'].map(lambda x: normalize_text(x)) df['textblob_score'] = df['text_normalized'].map( lambda x: textblob.TextBlob(x).polarity) import wordbag_regressor print("Train wordbag regressor") #wb_regressor= wordbag_regressor.WordbagRegressor("../models/wordbag_model.pkl.gz", tripadvisor_dir) wb_regressor = wordbag_regressor.WordbagRegressor( "../models/wordbag_model.pkl.gz") df['wordbag_score'] = wb_regressor.predict(df['text'].values) import wordhash_regressor print("Train wordhash regressor") wh_regressor = wordhash_regressor.WordhashRegressor( "../models/wordhash_model.pkl.gz", tripadvisor_dir) #wh_regressor= wordhash_regressor.WordhashRegressor("../models/wordhash_model.pkl.gz") df['wordhash_score'] = wh_regressor.predict(df['text'].values) import wordseq_regressor print("Train wordseq regressor") ws_regressor = wordseq_regressor.WordseqRegressor( "../models/wordseq_model.pkl.gz", tripadvisor_dir) #ws_regressor = wordseq_regressor.WordseqRegressor("../models/wordseq_model.pkl.gz") df['wordseq_score'] = ws_regressor.predict_batch(df['text'].values)