Ejemplo n.º 1
0
def process_xval():
    """Cross validation on training data"""
    mysql_db.connect()
    try:
        logger.debug("Getting training data")
        features, labels = get_training()
        logger.debug("Retrieved training data")
    except:
        mysql_db.close()
        raise
    mysql_db.close()
    logger.debug("Calculating k-fold indices")
    skf = StratifiedKFold(labels, n_folds=N_FOLD)

    conf_mtx = None
    logger.debug("Starting cross validation")
    for train_idx, test_idx in skf:
        X_train, X_test = features[train_idx], features[test_idx]
        y_train, y_test = labels[train_idx], labels[test_idx]
        logger.debug("scaling")
        scaler = StandardScaler(copy=False)
        scaler.fit_transform(X_train)
        scaler.transform(X_test)
        logger.debug("Running classifier")
        predictions = Classify.runClassifiers(X_train, y_train, X_test)
        if conf_mtx == None:
            conf_mtx = confusion_matrix(y_test, predictions)
        else:
            conf_mtx += confusion_matrix(y_test, predictions)
        print classification_report(y_test, predictions, digits=2)

    print conf_mtx
Ejemplo n.º 2
0
def main():
    """Main"""
    description = "Extract features and save to Nialls's db"
    parser = argparse.ArgumentParser(description=description)
    setup_arguments(parser)
    setup_options(parser)
    args = parser.parse_args()
    error = validate_arguments(args)
    if error != None:
        parser.error(error)
    ch = logging.StreamHandler()

    if args.debug:
        logger.setLevel(logging.DEBUG)
        ch.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.WARN)
        ch.setLevel(logging.WARN)
    logger.addHandler(ch)
    mysql_db.connect()
    try:
        features_to_db(mysql_db, args.training_dir, args.test_dir,
                       args.label_file)
    except:
        mysql_db.close()
        raise
    mysql_db.close()
Ejemplo n.º 3
0
def process_normal(output_file):
    """Process test data instead of crossval data"""
    mysql_db.connect()
    try:
        logger.debug("Getting training data")
        train_features, train_labels = get_training()
        logger.debug("Getting test data")
        test_features, test_names = get_test()
    except:
        mysql_db.close()
        raise
    mysql_db.close()
    logging.debug("scaling")
    scaler = StandardScaler(copy=False)
    scaler.fit_transform(train_features)
    scaler.transform(test_features)
    logger.debug("running classifier")
    predictions = Classify.runClassifiers(train_features, train_labels,
                                          test_features)
    logger.debug("writing predictions")
    write_predictions(output_file, test_names, predictions)
from feature import Feature, mysql_db
import numpy as np
import sys

#connect to Nialls's through ssh tunnel

#At start, connect (MAKE SURE YOU HAVE UPDATED feature.py WITH DB
# SETTINGS)
mysql_db.connect()

#Get training
query = Feature.select().where(Feature.label.is_null(False))
gray_hists = []  # See feature.py for fields in table
red_hists = []
labels = []

# naive().iterator() is nicer for large numbers of rows (which we have)
for feature in query.naive().iterator():
    gray_hists.append(feature.gray_hist)
    red_hists.append(feature.red_hist)
    labels.append(feature.label)

gray_train = np.vstack(gray_hists)
red_train = np.vstack(red_hists)
gray_and_red_train = np.hstack([gray_train, red_train])

#Fit using features
# model.fit(gray_train, np.array(labels))

#Get test
query = Feature.select().where(Feature.label.is_null(True)).order_by(