def fit_predict(config, X_train, y_train, X_test=None, y_test=None, ref_thd=None):
    """
    Uses the configuration dictionary settings to train a model using the
    specified training algorithm. If set, also evaluates the trained model 
    in a test set. Additionally, performs feature selection and model parameters
    optimization.
    
    @param config: the configuration dictionary obtained parsing the 
    configuration file.
    @param X_train: the np.array object for the matrix containing the feature
    values for each instance in the training set.
    @param y_train: the np.array object for the response values of each instance
    in the training set.
    @param X_test: the np.array object for the matrix containing the feature
    values for each instance in the test set. Default is None.
    @param y_test: the np.array object for the response values of each instance
    in the test set. Default is None.
    """
    # sets the selection method
    transformer = set_selection_method(config)

    # if the system is configured to run feature selection
    # runs it and modifies the datasets to the new dimensions
    if transformer is not None:
        log.info("Running feature selection %s" % str(transformer))

        log.debug("X_train dimensions before fit_transform(): %s,%s" % X_train.shape)
        log.debug("y_train dimensions before fit_transform(): %s" % y_train.shape)

        X_train = transformer.fit_transform(X_train, y_train)

        log.debug("Dimensions after fit_transform(): %s,%s" % X_train.shape)

        if X_test is not None:
            X_test = transformer.transform(X_test)

    # sets learning algorithm and runs it over the training data
    estimator, scorers = set_learning_method(config, X_train, y_train)
    log.info("Running learning algorithm %s" % str(estimator))
    estimator.fit(X_train, y_train)

    if (X_test is not None) and (y_test is not None):
        log.info("Predicting unseen data using the trained model...")
        y_hat = estimator.predict(X_test)
        log.info("Evaluating prediction on the test set...")
        for scorer_name, scorer_func in scorers:
            v = scorer_func(y_test, y_hat)
            log.info("%s = %s" % (scorer_name, v))
        log.info("Customized scores: ")
        try:
            log.info("pearson_corrcoef = %s" % pearson_corrcoef(y_test, y_hat))
        except:
            pass
        try:
            log.info("Precision score: = %s" % precision_score(y_test, y_hat))
        except:
            pass
        try:
            log.info("Recall score: = %s" % recall_score(y_test, y_hat))
        except:
            pass
        try:
            log.info("F1 score: = %s" % f1_score(y_test, y_hat))
        except:
            pass
        try:
            log.info("MAE: = %s" % mean_absolute_error(y_test, y_hat))
        except:
            pass
        try:
            log.info("RMSE: = %s" % root_mean_squared_error(y_test, y_hat))
        except:
            pass
        try:
            res = classify_report_bin(y_test, y_hat)
            if "N/A" <> res:
                log.info("Classify report bin: = %s" % res)
            else:
                res = classify_report_bin_regression(y_test, y_hat)
                if "N/A" <> res:
                    log.info("Classify report bin regression: = %s" % res)
                else:
                    if ref_thd is None:
                        log.error("No ref thd defined")
                    else:
                        refthd = float(ref_thd)
                        res = classify_report_regression(y_test, y_hat, refthd)
                        log.info("Classify report regression: = %s" % res)
        except Exception, e:
            print e
        with open("predicted.csv", "w") as _fout:
            for _x, _y in zip(y_test, y_hat):
                print >> _fout, "%f\t%f" % (_x, _y)
Exemple #2
0
def fit_predict(config,
                X_train,
                y_train,
                X_test=None,
                y_test=None,
                ref_thd=None):
    '''
    Uses the configuration dictionary settings to train a model using the
    specified training algorithm. If set, also evaluates the trained model 
    in a test set. Additionally, performs feature selection and model parameters
    optimization.
    
    @param config: the configuration dictionary obtained parsing the 
    configuration file.
    @param X_train: the np.array object for the matrix containing the feature
    values for each instance in the training set.
    @param y_train: the np.array object for the response values of each instance
    in the training set.
    @param X_test: the np.array object for the matrix containing the feature
    values for each instance in the test set. Default is None.
    @param y_test: the np.array object for the response values of each instance
    in the test set. Default is None.
    '''
    # sets the selection method
    transformer = set_selection_method(config)

    # if the system is configured to run feature selection
    # runs it and modifies the datasets to the new dimensions
    if transformer is not None:
        log.info("Running feature selection %s" % str(transformer))

        log.debug("X_train dimensions before fit_transform(): %s,%s" %
                  X_train.shape)
        log.debug("y_train dimensions before fit_transform(): %s" %
                  y_train.shape)

        X_train = transformer.fit_transform(X_train, y_train)

        log.debug("Dimensions after fit_transform(): %s,%s" % X_train.shape)

        if X_test is not None:
            X_test = transformer.transform(X_test)

    # sets learning algorithm and runs it over the training data
    estimator, scorers = set_learning_method(config, X_train, y_train)
    log.info("Running learning algorithm %s" % str(estimator))
    estimator.fit(X_train, y_train)
    #for i,value in enumerate(get_order_index(estimator.feature_importances_)):
    #    if value > 3072:
    #        print i,value
    #################################################
    file_dir = '/home/liutuan/czm/workspace/QE_project/learning'
    #################################################

    if (X_test is not None) and (y_test is not None):
        log.info("Predicting unseen data using the trained model...")
        y_hat = estimator.predict(X_test)
        ##############################
        #y_test = y_test*100
        #y_hat = y_hat*100
        ##############################
        log.info("Evaluating prediction on the test set...")
        #print scorers
        for scorer_name, scorer_func in scorers:
            #print type(y_test),type(y_hat)
            #sys.exit()
            v = scorer_func(y_test, y_hat)
            log.info("%s = %s" % (scorer_name, v))
        log.info("Customized scores: ")
        try:
            log.info("pearson_corrcoef = %s" % pearson_corrcoef(y_test, y_hat))
        except:
            pass

        with open(file_dir + "/predicted.csv", 'w') as _fout:
            for i, _y in enumerate(zip(y_test, y_hat)):
                print >> _fout, "%s\t%d\t%f\t%d" % ('JXNU/Emb+RNNLM+QuEst+SVM',
                                                    i + 1, _y[1],
                                                    get_index(i, y_hat))
        with open(file_dir + '/ref.csv', 'w') as _fout:
            for i, _y in enumerate(zip(y_test, y_hat)):
                print >> _fout, "%s\t%d\t%f\t%d" % (
                    'SHEFF/QuEst', i + 1, _y[0], get_index(i, y_test))

    if (X_test is not None) and (y_test is None):
        log.info("Predicting unseen data using the trained model...")
        y_hat = estimator.predict(X_test)
        with open(file_dir + "/predicted.csv", 'w') as _fout:
            for i, _y in enumerate(y_hat):
                print >> _fout, "%s\t%d\t%f\t%d" % (
                    'JXNU/Emb+RNNLM+QuEst+SVM', i + 1, _y, get_index(i, y_hat))
def fit_predict(config,
                X_train,
                y_train,
                X_test=None,
                y_test=None,
                ref_thd=None):
    '''
    Uses the configuration dictionary settings to train a model using the
    specified training algorithm. If set, also evaluates the trained model 
    in a test set. Additionally, performs feature selection and model parameters
    optimization.
    
    @param config: the configuration dictionary obtained parsing the 
    configuration file.
    @param X_train: the np.array object for the matrix containing the feature
    values for each instance in the training set.
    @param y_train: the np.array object for the response values of each instance
    in the training set.
    @param X_test: the np.array object for the matrix containing the feature
    values for each instance in the test set. Default is None.
    @param y_test: the np.array object for the response values of each instance
    in the test set. Default is None.
    '''
    # sets the selection method
    transformer = set_selection_method(config)

    # if the system is configured to run feature selection
    # runs it and modifies the datasets to the new dimensions
    if transformer is not None:
        log.info("Running feature selection %s" % str(transformer))

        log.debug("X_train dimensions before fit_transform(): %s,%s" %
                  X_train.shape)
        log.debug("y_train dimensions before fit_transform(): %s" %
                  y_train.shape)

        X_train = transformer.fit_transform(X_train, y_train)

        log.debug("Dimensions after fit_transform(): %s,%s" % X_train.shape)

        if X_test is not None:
            X_test = transformer.transform(X_test)

    # sets learning algorithm and runs it over the training data
    estimator, scorers = set_learning_method(config, X_train, y_train)
    log.info("Running learning algorithm %s" % str(estimator))
    estimator.fit(X_train, y_train)

    if (X_test is not None) and (y_test is not None):
        log.info("Predicting unseen data using the trained model...")
        y_hat = estimator.predict(X_test)
        log.info("Evaluating prediction on the test set...")
        for scorer_name, scorer_func in scorers:
            v = scorer_func(y_test, y_hat)
            log.info("%s = %s" % (scorer_name, v))
        log.info("Customized scores: ")
        try:
            log.info("pearson_corrcoef = %s" % pearson_corrcoef(y_test, y_hat))
        except:
            pass
        try:
            log.info("Precision score: = %s" % precision_score(y_test, y_hat))
        except:
            pass
        try:
            log.info("Recall score: = %s" % recall_score(y_test, y_hat))
        except:
            pass
        try:
            log.info("F1 score: = %s" % f1_score(y_test, y_hat))
        except:
            pass
        try:
            log.info("MAE: = %s" % mean_absolute_error(y_test, y_hat))
        except:
            pass
        try:
            log.info("RMSE: = %s" % root_mean_squared_error(y_test, y_hat))
        except:
            pass
        try:
            res = classify_report_bin(y_test, y_hat)
            if "N/A" <> res:
                log.info("Classify report bin: = %s" % res)
            else:
                res = classify_report_bin_regression(y_test, y_hat)
                if "N/A" <> res:
                    log.info("Classify report bin regression: = %s" % res)
                else:
                    if ref_thd is None:
                        log.error("No ref thd defined")
                    else:
                        refthd = float(ref_thd)
                        res = classify_report_regression(y_test, y_hat, refthd)
                        log.info("Classify report regression: = %s" % res)
        except Exception, e:
            print e
        with open("predicted.csv", 'w') as _fout:
            for _x, _y in zip(y_test, y_hat):
                print >> _fout, "%f\t%f" % (_x, _y)