def process_xval(): """Cross validation on training data""" mysql_db.connect() try: logger.debug("Getting training data") features, labels = get_training() logger.debug("Retrieved training data") except: mysql_db.close() raise mysql_db.close() logger.debug("Calculating k-fold indices") skf = StratifiedKFold(labels, n_folds=N_FOLD) conf_mtx = None logger.debug("Starting cross validation") for train_idx, test_idx in skf: X_train, X_test = features[train_idx], features[test_idx] y_train, y_test = labels[train_idx], labels[test_idx] logger.debug("scaling") scaler = StandardScaler(copy=False) scaler.fit_transform(X_train) scaler.transform(X_test) logger.debug("Running classifier") predictions = Classify.runClassifiers(X_train, y_train, X_test) if conf_mtx == None: conf_mtx = confusion_matrix(y_test, predictions) else: conf_mtx += confusion_matrix(y_test, predictions) print classification_report(y_test, predictions, digits=2) print conf_mtx
def main(): """Main""" description = "Extract features and save to Nialls's db" parser = argparse.ArgumentParser(description=description) setup_arguments(parser) setup_options(parser) args = parser.parse_args() error = validate_arguments(args) if error != None: parser.error(error) ch = logging.StreamHandler() if args.debug: logger.setLevel(logging.DEBUG) ch.setLevel(logging.DEBUG) else: logger.setLevel(logging.WARN) ch.setLevel(logging.WARN) logger.addHandler(ch) mysql_db.connect() try: features_to_db(mysql_db, args.training_dir, args.test_dir, args.label_file) except: mysql_db.close() raise mysql_db.close()
def process_normal(output_file): """Process test data instead of crossval data""" mysql_db.connect() try: logger.debug("Getting training data") train_features, train_labels = get_training() logger.debug("Getting test data") test_features, test_names = get_test() except: mysql_db.close() raise mysql_db.close() logging.debug("scaling") scaler = StandardScaler(copy=False) scaler.fit_transform(train_features) scaler.transform(test_features) logger.debug("running classifier") predictions = Classify.runClassifiers(train_features, train_labels, test_features) logger.debug("writing predictions") write_predictions(output_file, test_names, predictions)
from feature import Feature, mysql_db import numpy as np import sys #connect to Nialls's through ssh tunnel #At start, connect (MAKE SURE YOU HAVE UPDATED feature.py WITH DB # SETTINGS) mysql_db.connect() #Get training query = Feature.select().where(Feature.label.is_null(False)) gray_hists = [] # See feature.py for fields in table red_hists = [] labels = [] # naive().iterator() is nicer for large numbers of rows (which we have) for feature in query.naive().iterator(): gray_hists.append(feature.gray_hist) red_hists.append(feature.red_hist) labels.append(feature.label) gray_train = np.vstack(gray_hists) red_train = np.vstack(red_hists) gray_and_red_train = np.hstack([gray_train, red_train]) #Fit using features # model.fit(gray_train, np.array(labels)) #Get test query = Feature.select().where(Feature.label.is_null(True)).order_by(