예제 #1
0
    result_file_path = ''.join([str(item) for item in config['result_file_path']])
    result_file_path = result_file_path%seed
    print model_file_path
    print result_file_path
    # quit()
    if config['refresh_all_model']:
        bow_rf.fit(train_data=(train_X_feature, train_data['LABEL_INDEX']),
                   validation_data=(test_X_feature, test_data['LABEL_INDEX']))
        bow_rf.save_model(model_file_path)

    else:
        bow_rf.model_from_pickle(model_file_path)

    bow_rf.print_model_descibe()

    print(index_to_label[bow_rf.predict('啥', transform_input=True)])
    print(index_to_label[bow_rf.predict('哪台好', transform_input=True)])

    bow_rf.accuracy((train_X_feature, train_data['LABEL_INDEX'].as_matrix()), False)
    y_pred, is_correct, accu, f1 = bow_rf.accuracy((test_X_feature,test_data['LABEL_INDEX'].as_matrix()),False)

    test_data['PREDICT'] = [index_to_label[item] for item in y_pred]
    test_data['IS_CORRECT'] = is_correct
    data_util.save_data(test_data,result_file_path)


end_time = timeit.default_timer()
print 'end! Running time:%ds!' % (end_time - start_time)
logging.debug('=' * 20)
logging.debug('end! Running time:%ds!' % (end_time - start_time))
            dev_X_feature = feature_encoder.fit_transform(x)
            test_X_feature = feature_encoder.transform(test_data[u'SENTENCE'].as_matrix())
            bow_rf = BowRandomForest(
                # rand_seed=rand_seed,
                verbose=0,
                n_estimators=estimators,
                min_samples_leaf=1,
                feature_encoder=None,

            )


            bow_rf.fit(train_data=(dev_X_feature, y),
                       validation_data=(test_X_feature, test_y))
            _, _, dev_accuracy, _ = bow_rf.accuracy((dev_X_feature, y), False)
            _, _, val_accuracy, _ = bow_rf.accuracy((test_X_feature, test_y), False)
            train_accu.append(dev_accuracy)
            test_accu.append(val_accuracy)

        print('-' * 80)

        print('#***#训练准确率:%s'%(train_accu))
        print('#***#测试准确率:%s'%(test_accu))


        ave_acc.append(test_accu)
        counter += 1

    print(np.asarray(ave_acc))
    print('验证中平均准确率:%f' % np.average(np.asarray(ave_acc),axis=0))
예제 #3
0
    else:
        bow_rf.model_from_pickle(model_file_path)

    # bow_rf.print_model_descibe()

    # print(index_to_label[bow_rf.predict('啥', transform_input=True)])
    # print(index_to_label[bow_rf.predict('哪台好', transform_input=True)])
    # print(index_to_label[bow_rf.predict('恩恩拜', transform_input=True)])
    # print(index_to_label[bow_rf.predict('恩恩拜拜', transform_input=True)])

    # y_pred, is_correct, accu, f1 = bow_rf.accuracy((train_X_feature, train_y), False)

    # train_data['PREDICT'] = [index_to_label[item] for item in y_pred]
    # train_data['IS_CORRECT'] = is_correct
    # data_util.save_data(train_data,'train_resul.csv')
    # quit()
    y_pred, is_correct, accu, f1 = bow_rf.accuracy((test_X_feature,test_y),False)

    # test_data['PREDICT'] = [index_to_label[item] for item in y_pred]
    # test_data['IS_CORRECT'] = is_correct
    # print(result_file_path)
    # data_util.save_data(test_data,result_file_path)
    # print('='*20)
    # quit()

end_time = timeit.default_timer()
print 'end! Running time:%ds!' % (end_time - start_time)
logging.debug('=' * 20)
logging.debug('end! Running time:%ds!' % (end_time - start_time))