def main(opt):
    """ Main routine that exits with status code 0
    """

    ### pre process

    # load model file
    clr = pickle.load(opt.model)
    clr_info = pickle.load(opt.model)

    # read data
    D = np.loadtxt(opt.infile)

    # split data and process missing values
    y = np.array(D[:, -1])
    if opt.ns:
        X = fill_missing_with_mean(D[:, :-(1 + N_NS)])
    else:
        X = fill_missing_with_mean(D[:, :-1])
    S = np.atleast_2d(D[:, -(1 + N_NS):-1])

    ### main process

    # set starting time
    start_time = datetime.datetime.now()
    start_utime = os.times()[0]
    opt.start_time = start_time.isoformat()
    logger.info("start time = " + start_time.isoformat())

    # prediction and write results
    p = clr.predict_proba(X)

    # output prediction
    n = 0
    m = 0
    for i in xrange(p.shape[0]):
        c = np.argmax(p[i, :])
        opt.outfile.write("%d %d " % (y[i], c))
        opt.outfile.write(" ".join(S[i, :].astype(str)) + " ")
        opt.outfile.write(str(p[i, 0]) + " " + str(p[i, 1]) + "\n")
        n += 1
        m += 1 if c == y[i] else 0

    # set end and elapsed time
    end_time = datetime.datetime.now()
    end_utime = os.times()[0]
    logger.info("end time = " + end_time.isoformat())
    opt.end_time = end_time.isoformat()
    logger.info("elapsed_time = " + str((end_time - start_time)))
    opt.elapsed_time = str((end_time - start_time))
    logger.info("elapsed_utime = " + str((end_utime - start_utime)))
    opt.elapsed_utime = str((end_utime - start_utime))

    ### output

    # add meta info
    opt.nos_samples = n
    logger.info('nos_samples = ' + str(opt.nos_samples))
    opt.nos_correct_samples = m
    logger.info('nos_correct_samples = ' + str(opt.nos_correct_samples))
    opt.accuracy = m / float(n)
    logger.info('accuracy = ' + str(opt.accuracy))
    opt.negative_mean_prob = np.mean(p[:, 0])
    logger.info('negative_mean_prob = ' + str(opt.negative_mean_prob))
    opt.positive_mean_prob = np.mean(p[:, 1])
    logger.info('positive_mean_prob = ' + str(opt.positive_mean_prob))

    # output meta information
    if opt.info:
        for key in clr_info.keys():
            opt.outfile.write("#classifier_%s=%s\n" %
                              (key, str(clr_info[key])))

        for key, key_val in vars(opt).iteritems():
            opt.outfile.write("#%s=%s\n" % (key, str(key_val)))

    ### post process

    # close file
    if opt.infile != sys.stdin:
        opt.infile.close()

    if opt.outfile != sys.stdout:
        opt.outfile.close()

    if opt.model != sys.stdout:
        opt.model.close()

    sys.exit(0)
Beispiel #2
0
def main(opt):
    """ Main routine that exits with status code 0
    """

    ### pre process

    # read data
    D = np.loadtxt(opt.infile)

    # split data and process missing values
    y = np.array(D[:, -1])
    X = fill_missing_with_mean(D[:, :-1])
    del D

    ### main process

    # set starting time
    start_time = datetime.datetime.now()
    start_utime = os.times()[0]
    opt.start_time = start_time.isoformat()
    logger.info("start time = " + start_time.isoformat())

    # init constants
    ns = 1

    # train
    if opt.ntry <= 0:
        # train only once with zero coefficients
        clr = train(X, y, ns, opt)
        opt.final_loss = clr.f_loss_
        logger.info('final_loss = ' + str(opt.final_loss))
    else:
        # train multiple times with random restarts
        clr = None
        best_loss = np.inf
        best_trial = 0
        for trial in xrange(opt.ntry):
            logger.info("Trial No. " + str(trial + 1))
            tmp_clr = train(X, y, ns, opt)
            logger.info("loss = " + str(tmp_clr.f_loss_))
            if tmp_clr.f_loss_ < best_loss:
                clr = tmp_clr
                best_loss = clr.f_loss_
                best_trial = trial + 1
        opt.final_loss = best_loss
        logger.info('final_loss = ' + str(opt.final_loss))
        opt.best_trial = best_trial
        logger.info('best_trial = ' + str(opt.best_trial))

    # set end and elapsed time
    end_time = datetime.datetime.now()
    end_utime = os.times()[0]
    logger.info("end time = " + end_time.isoformat())
    opt.end_time = end_time.isoformat()
    logger.info("elapsed_time = " + str((end_time - start_time)))
    opt.elapsed_time = str((end_time - start_time))
    logger.info("elapsed_utime = " + str((end_utime - start_utime)))
    opt.elapsed_utime = str((end_utime - start_utime))

    ### output

    # add info
    opt.nos_samples = X.shape[0]
    logger.info('nos_samples = ' + str(opt.nos_samples))
    opt.nos_features = X.shape[1]
    logger.info('nos_features = ' + str(X.shape[1]))
    opt.classifier = clr.__class__.__name__
    logger.info('classifier = ' + opt.classifier)
    opt.fadm_version = fadm_version
    logger.info('fadm_version = ' + opt.fadm_version)
    opt.sklearn_version = sklearn_version
    logger.info('sklearn_version = ' + opt.sklearn_version)
    #    opt.training_score = clr.score(X, y)
    #    logger.info('training_score = ' + str(opt.training_score))

    # write file
    pickle.dump(clr, opt.outfile)
    info = {}
    for key, key_val in vars(opt).iteritems():
        info[key] = str(key_val)
    pickle.dump(info, opt.outfile)

    ### post process

    # close file
    if opt.infile is not sys.stdin:
        opt.infile.close()

    if opt.outfile is not sys.stdout:
        opt.outfile.close()

    sys.exit(0)
Beispiel #3
0
def main(opt):
    """ Main routine that exits with status code 0
    """

    ### pre process

    # read data
    D = np.loadtxt(opt.infile)

    # split data and process missing values
    y = np.array(D[:, -1])
    if opt.ns:
        X = fill_missing_with_mean(D[:, :-(1 + N_NS)])
    else:
        X = fill_missing_with_mean(D[:, :-1])

    ### main process

    # set starting time
    start_time = datetime.datetime.now()
    start_utime = os.times()[0]
    opt.start_time = start_time.isoformat()
    logger.info("start time = " + start_time.isoformat())

    # main process
    clr = LogisticRegression(C=float(opt.C), penalty='l2', fit_intercept=True)
    clr.fit(X, y)

    # set end and elapsed time
    end_time = datetime.datetime.now()
    end_utime = os.times()[0]
    logger.info("end time = " + end_time.isoformat())
    opt.end_time = end_time.isoformat()
    logger.info("elapsed_time = " + str((end_time - start_time)))
    opt.elapsed_time = str((end_time - start_time))
    logger.info("elapsed_utime = " + str((end_utime - start_utime)))
    opt.elapsed_utime = str((end_utime - start_utime))

    ### output

    # add info
    opt.nos_samples = X.shape[0]
    logger.info('nos_samples = ' + str(opt.nos_samples))
    opt.nos_features = X.shape[1]
    logger.info('nos_features = ' + str(X.shape[1]))
    opt.classifier = clr.__class__.__name__
    logger.info('classifier = ' + opt.classifier)
    opt.fadm_version = fadm_version
    logger.info('fadm_version = ' + opt.fadm_version)
    #    opt.training_score = clr.score(X, y)
    #    logger.info('training_score = ' + str(opt.training_score))

    # write file
    pickle.dump(clr, opt.outfile)
    info = {}
    for key, key_val in vars(opt).iteritems():
        info[key] = str(key_val)
    pickle.dump(info, opt.outfile)

    ### post process

    # close file
    if opt.infile is not sys.stdin:
        opt.infile.close()

    if opt.outfile is not sys.stdout:
        opt.outfile.close()

    sys.exit(0)