def fit_predict(config, X_train, y_train, X_test=None, y_test=None, ref_thd=None): """ Uses the configuration dictionary settings to train a model using the specified training algorithm. If set, also evaluates the trained model in a test set. Additionally, performs feature selection and model parameters optimization. @param config: the configuration dictionary obtained parsing the configuration file. @param X_train: the np.array object for the matrix containing the feature values for each instance in the training set. @param y_train: the np.array object for the response values of each instance in the training set. @param X_test: the np.array object for the matrix containing the feature values for each instance in the test set. Default is None. @param y_test: the np.array object for the response values of each instance in the test set. Default is None. """ # sets the selection method transformer = set_selection_method(config) # if the system is configured to run feature selection # runs it and modifies the datasets to the new dimensions if transformer is not None: log.info("Running feature selection %s" % str(transformer)) log.debug("X_train dimensions before fit_transform(): %s,%s" % X_train.shape) log.debug("y_train dimensions before fit_transform(): %s" % y_train.shape) X_train = transformer.fit_transform(X_train, y_train) log.debug("Dimensions after fit_transform(): %s,%s" % X_train.shape) if X_test is not None: X_test = transformer.transform(X_test) # sets learning algorithm and runs it over the training data estimator, scorers = set_learning_method(config, X_train, y_train) log.info("Running learning algorithm %s" % str(estimator)) estimator.fit(X_train, y_train) if (X_test is not None) and (y_test is not None): log.info("Predicting unseen data using the trained model...") y_hat = estimator.predict(X_test) log.info("Evaluating prediction on the test set...") for scorer_name, scorer_func in scorers: v = scorer_func(y_test, y_hat) log.info("%s = %s" % (scorer_name, v)) log.info("Customized scores: ") try: log.info("pearson_corrcoef = %s" % pearson_corrcoef(y_test, y_hat)) except: pass try: log.info("Precision score: = %s" % precision_score(y_test, y_hat)) except: pass try: log.info("Recall score: = %s" % recall_score(y_test, y_hat)) except: pass try: log.info("F1 score: = %s" % f1_score(y_test, y_hat)) except: pass try: log.info("MAE: = %s" % mean_absolute_error(y_test, y_hat)) except: pass try: log.info("RMSE: = %s" % root_mean_squared_error(y_test, y_hat)) except: pass try: res = classify_report_bin(y_test, y_hat) if "N/A" <> res: log.info("Classify report bin: = %s" % res) else: res = classify_report_bin_regression(y_test, y_hat) if "N/A" <> res: log.info("Classify report bin regression: = %s" % res) else: if ref_thd is None: log.error("No ref thd defined") else: refthd = float(ref_thd) res = classify_report_regression(y_test, y_hat, refthd) log.info("Classify report regression: = %s" % res) except Exception, e: print e with open("predicted.csv", "w") as _fout: for _x, _y in zip(y_test, y_hat): print >> _fout, "%f\t%f" % (_x, _y)
def fit_predict(config, X_train, y_train, X_test=None, y_test=None, ref_thd=None): ''' Uses the configuration dictionary settings to train a model using the specified training algorithm. If set, also evaluates the trained model in a test set. Additionally, performs feature selection and model parameters optimization. @param config: the configuration dictionary obtained parsing the configuration file. @param X_train: the np.array object for the matrix containing the feature values for each instance in the training set. @param y_train: the np.array object for the response values of each instance in the training set. @param X_test: the np.array object for the matrix containing the feature values for each instance in the test set. Default is None. @param y_test: the np.array object for the response values of each instance in the test set. Default is None. ''' # sets the selection method transformer = set_selection_method(config) # if the system is configured to run feature selection # runs it and modifies the datasets to the new dimensions if transformer is not None: log.info("Running feature selection %s" % str(transformer)) log.debug("X_train dimensions before fit_transform(): %s,%s" % X_train.shape) log.debug("y_train dimensions before fit_transform(): %s" % y_train.shape) X_train = transformer.fit_transform(X_train, y_train) log.debug("Dimensions after fit_transform(): %s,%s" % X_train.shape) if X_test is not None: X_test = transformer.transform(X_test) # sets learning algorithm and runs it over the training data estimator, scorers = set_learning_method(config, X_train, y_train) log.info("Running learning algorithm %s" % str(estimator)) estimator.fit(X_train, y_train) #for i,value in enumerate(get_order_index(estimator.feature_importances_)): # if value > 3072: # print i,value ################################################# file_dir = '/home/liutuan/czm/workspace/QE_project/learning' ################################################# if (X_test is not None) and (y_test is not None): log.info("Predicting unseen data using the trained model...") y_hat = estimator.predict(X_test) ############################## #y_test = y_test*100 #y_hat = y_hat*100 ############################## log.info("Evaluating prediction on the test set...") #print scorers for scorer_name, scorer_func in scorers: #print type(y_test),type(y_hat) #sys.exit() v = scorer_func(y_test, y_hat) log.info("%s = %s" % (scorer_name, v)) log.info("Customized scores: ") try: log.info("pearson_corrcoef = %s" % pearson_corrcoef(y_test, y_hat)) except: pass with open(file_dir + "/predicted.csv", 'w') as _fout: for i, _y in enumerate(zip(y_test, y_hat)): print >> _fout, "%s\t%d\t%f\t%d" % ('JXNU/Emb+RNNLM+QuEst+SVM', i + 1, _y[1], get_index(i, y_hat)) with open(file_dir + '/ref.csv', 'w') as _fout: for i, _y in enumerate(zip(y_test, y_hat)): print >> _fout, "%s\t%d\t%f\t%d" % ( 'SHEFF/QuEst', i + 1, _y[0], get_index(i, y_test)) if (X_test is not None) and (y_test is None): log.info("Predicting unseen data using the trained model...") y_hat = estimator.predict(X_test) with open(file_dir + "/predicted.csv", 'w') as _fout: for i, _y in enumerate(y_hat): print >> _fout, "%s\t%d\t%f\t%d" % ( 'JXNU/Emb+RNNLM+QuEst+SVM', i + 1, _y, get_index(i, y_hat))
def fit_predict(config, X_train, y_train, X_test=None, y_test=None, ref_thd=None): ''' Uses the configuration dictionary settings to train a model using the specified training algorithm. If set, also evaluates the trained model in a test set. Additionally, performs feature selection and model parameters optimization. @param config: the configuration dictionary obtained parsing the configuration file. @param X_train: the np.array object for the matrix containing the feature values for each instance in the training set. @param y_train: the np.array object for the response values of each instance in the training set. @param X_test: the np.array object for the matrix containing the feature values for each instance in the test set. Default is None. @param y_test: the np.array object for the response values of each instance in the test set. Default is None. ''' # sets the selection method transformer = set_selection_method(config) # if the system is configured to run feature selection # runs it and modifies the datasets to the new dimensions if transformer is not None: log.info("Running feature selection %s" % str(transformer)) log.debug("X_train dimensions before fit_transform(): %s,%s" % X_train.shape) log.debug("y_train dimensions before fit_transform(): %s" % y_train.shape) X_train = transformer.fit_transform(X_train, y_train) log.debug("Dimensions after fit_transform(): %s,%s" % X_train.shape) if X_test is not None: X_test = transformer.transform(X_test) # sets learning algorithm and runs it over the training data estimator, scorers = set_learning_method(config, X_train, y_train) log.info("Running learning algorithm %s" % str(estimator)) estimator.fit(X_train, y_train) if (X_test is not None) and (y_test is not None): log.info("Predicting unseen data using the trained model...") y_hat = estimator.predict(X_test) log.info("Evaluating prediction on the test set...") for scorer_name, scorer_func in scorers: v = scorer_func(y_test, y_hat) log.info("%s = %s" % (scorer_name, v)) log.info("Customized scores: ") try: log.info("pearson_corrcoef = %s" % pearson_corrcoef(y_test, y_hat)) except: pass try: log.info("Precision score: = %s" % precision_score(y_test, y_hat)) except: pass try: log.info("Recall score: = %s" % recall_score(y_test, y_hat)) except: pass try: log.info("F1 score: = %s" % f1_score(y_test, y_hat)) except: pass try: log.info("MAE: = %s" % mean_absolute_error(y_test, y_hat)) except: pass try: log.info("RMSE: = %s" % root_mean_squared_error(y_test, y_hat)) except: pass try: res = classify_report_bin(y_test, y_hat) if "N/A" <> res: log.info("Classify report bin: = %s" % res) else: res = classify_report_bin_regression(y_test, y_hat) if "N/A" <> res: log.info("Classify report bin regression: = %s" % res) else: if ref_thd is None: log.error("No ref thd defined") else: refthd = float(ref_thd) res = classify_report_regression(y_test, y_hat, refthd) log.info("Classify report regression: = %s" % res) except Exception, e: print e with open("predicted.csv", 'w') as _fout: for _x, _y in zip(y_test, y_hat): print >> _fout, "%f\t%f" % (_x, _y)