result_file_path = ''.join([str(item) for item in config['result_file_path']]) result_file_path = result_file_path%seed print model_file_path print result_file_path # quit() if config['refresh_all_model']: bow_rf.fit(train_data=(train_X_feature, train_data['LABEL_INDEX']), validation_data=(test_X_feature, test_data['LABEL_INDEX'])) bow_rf.save_model(model_file_path) else: bow_rf.model_from_pickle(model_file_path) bow_rf.print_model_descibe() print(index_to_label[bow_rf.predict('啥', transform_input=True)]) print(index_to_label[bow_rf.predict('哪台好', transform_input=True)]) bow_rf.accuracy((train_X_feature, train_data['LABEL_INDEX'].as_matrix()), False) y_pred, is_correct, accu, f1 = bow_rf.accuracy((test_X_feature,test_data['LABEL_INDEX'].as_matrix()),False) test_data['PREDICT'] = [index_to_label[item] for item in y_pred] test_data['IS_CORRECT'] = is_correct data_util.save_data(test_data,result_file_path) end_time = timeit.default_timer() print 'end! Running time:%ds!' % (end_time - start_time) logging.debug('=' * 20) logging.debug('end! Running time:%ds!' % (end_time - start_time))
dev_X_feature = feature_encoder.fit_transform(x) test_X_feature = feature_encoder.transform(test_data[u'SENTENCE'].as_matrix()) bow_rf = BowRandomForest( # rand_seed=rand_seed, verbose=0, n_estimators=estimators, min_samples_leaf=1, feature_encoder=None, ) bow_rf.fit(train_data=(dev_X_feature, y), validation_data=(test_X_feature, test_y)) _, _, dev_accuracy, _ = bow_rf.accuracy((dev_X_feature, y), False) _, _, val_accuracy, _ = bow_rf.accuracy((test_X_feature, test_y), False) train_accu.append(dev_accuracy) test_accu.append(val_accuracy) print('-' * 80) print('#***#训练准确率:%s'%(train_accu)) print('#***#测试准确率:%s'%(test_accu)) ave_acc.append(test_accu) counter += 1 print(np.asarray(ave_acc)) print('验证中平均准确率:%f' % np.average(np.asarray(ave_acc),axis=0))
else: bow_rf.model_from_pickle(model_file_path) # bow_rf.print_model_descibe() # print(index_to_label[bow_rf.predict('啥', transform_input=True)]) # print(index_to_label[bow_rf.predict('哪台好', transform_input=True)]) # print(index_to_label[bow_rf.predict('恩恩拜', transform_input=True)]) # print(index_to_label[bow_rf.predict('恩恩拜拜', transform_input=True)]) # y_pred, is_correct, accu, f1 = bow_rf.accuracy((train_X_feature, train_y), False) # train_data['PREDICT'] = [index_to_label[item] for item in y_pred] # train_data['IS_CORRECT'] = is_correct # data_util.save_data(train_data,'train_resul.csv') # quit() y_pred, is_correct, accu, f1 = bow_rf.accuracy((test_X_feature,test_y),False) # test_data['PREDICT'] = [index_to_label[item] for item in y_pred] # test_data['IS_CORRECT'] = is_correct # print(result_file_path) # data_util.save_data(test_data,result_file_path) # print('='*20) # quit() end_time = timeit.default_timer() print 'end! Running time:%ds!' % (end_time - start_time) logging.debug('=' * 20) logging.debug('end! Running time:%ds!' % (end_time - start_time))