print("Train wordbag regressor") wb_regressor = wordbag_regressor.WordbagRegressor( "../models/wordbag_model.pkl.gz", tripadvisor_dir) #wb_regressor= wordbag_regressor.WordbagRegressor("../models/wordbag_model.pkl.gz") df['wordbag_score'] = wb_regressor.predict(df['text'].values) import wordhash_regressor print("Train wordhash regressor") wh_regressor = wordhash_regressor.WordhashRegressor( "../models/wordhash_model.pkl.gz", tripadvisor_dir) #wh_regressor= wordhash_regressor.WordhashRegressor("../models/wordhash_model.pkl.gz") df['wordhash_score'] = wh_regressor.predict(df['text'].values) import wordseq_regressor print("Train wordseq regressor") ws_regressor = wordseq_regressor.WordseqRegressor( "../models/wordseq_model.pkl.gz", tripadvisor_dir) #ws_regressor = wordseq_regressor.WordseqRegressor("../models/wordseq_model.pkl.gz") df['wordseq_score'] = ws_regressor.predict_batch(df['text'].values) import wordvec_regressor print("Train wordvec regressor") wv_regressor = wordvec_regressor.WordvecRegressor( "../models/wordvec_model.pkl.gz", tripadvisor_dir) #wv_regressor= wordvec_regressor.WordvecRegressor("../models/wordvec_model.pkl.gz") df['wordvec_score'] = wv_regressor.predict(df['text'].values) df['tweet_len'] = df['text'].map(lambda x: log(1 + len(x))) df['tweet_wordcount'] = df['text'].map(lambda x: log(1 + len(x.split()))) print(df) full_preds = np.zeros(df.shape[0])
import wordbag_regressor print "Train wordbag regressor" wordbag_regressor= wordbag_regressor.WordbagRegressor("../models/wordbag_model.pkl.gz", tripadvisor_dir) #wordbag_regressor= wordbag_regressor.WordbagRegressor("../models/wordbag_model.pkl.gz") df['wordbag_score']= wordbag_regressor.predict(df['text'].values) import wordhash_regressor print "Train wordhash regressor" wordhash_regressor= wordhash_regressor.WordhashRegressor("../models/wordhash_model.pkl.gz", tripadvisor_dir) #wordhash_regressor= wordhash_regressor.WordhashRegressor("../models/wordhash_model.pkl.gz") df['wordhash_score']= wordhash_regressor.predict(df['text'].values) import wordseq_regressor print "Train wordseq regressor" wordseq_regressor= wordseq_regressor.WordseqRegressor("../models/wordseq_model.neo", tripadvisor_dir) #wordseq_regressor= wordseq_regressor.WordseqRegressor("../models/wordseq_model.neo") df['wordseq_score']= wordseq_regressor.predict_batch(df['text'].values) import wordvec_regressor print "Train wordvec regressor" wordvec_regressor= wordvec_regressor.WordvecRegressor("../models/wordseq_model.pkl.gz", tripadvisor_dir) #wordvec_regressor= wordvec_regressor.WordvecRegressor("../models/wordseq_model.pkl.gz") df['wordvec_score'] = wordvec_regressor.predict(df['text'].values) df['tweet_len']= df['text'].map(lambda x: log(1+len(x))) df['tweet_wordcount']= df['text'].map(lambda x: log(1+len(x.split()))) full_preds= np.zeros(df.shape[0]) columns_pick= ['tweet_len', 'tweet_wordcount', 'wordbag_score', 'wordhash_score', 'wordseq_score', 'wordvec_score', 'textblob_score'] #Mean Squared Error: 0.297226914949 #columns_pick= ['tweet_len', 'tweet_wordcount', 'wordhash_score', 'wordseq_score', 'wordvec_score', 'textblob_score'] #Mean Squared Error: 0.306232998673