def main(opt): """ Main routine that exits with status code 0 """ ### pre process # load model file clr = pickle.load(opt.model) clr_info = pickle.load(opt.model) # read data D = np.loadtxt(opt.infile) # split data and process missing values y = np.array(D[:, -1]) if opt.ns: X = fill_missing_with_mean(D[:, :-(1 + N_NS)]) else: X = fill_missing_with_mean(D[:, :-1]) S = np.atleast_2d(D[:, -(1 + N_NS):-1]) ### main process # set starting time start_time = datetime.datetime.now() start_utime = os.times()[0] opt.start_time = start_time.isoformat() logger.info("start time = " + start_time.isoformat()) # prediction and write results p = clr.predict_proba(X) # output prediction n = 0 m = 0 for i in xrange(p.shape[0]): c = np.argmax(p[i, :]) opt.outfile.write("%d %d " % (y[i], c)) opt.outfile.write(" ".join(S[i, :].astype(str)) + " ") opt.outfile.write(str(p[i, 0]) + " " + str(p[i, 1]) + "\n") n += 1 m += 1 if c == y[i] else 0 # set end and elapsed time end_time = datetime.datetime.now() end_utime = os.times()[0] logger.info("end time = " + end_time.isoformat()) opt.end_time = end_time.isoformat() logger.info("elapsed_time = " + str((end_time - start_time))) opt.elapsed_time = str((end_time - start_time)) logger.info("elapsed_utime = " + str((end_utime - start_utime))) opt.elapsed_utime = str((end_utime - start_utime)) ### output # add meta info opt.nos_samples = n logger.info('nos_samples = ' + str(opt.nos_samples)) opt.nos_correct_samples = m logger.info('nos_correct_samples = ' + str(opt.nos_correct_samples)) opt.accuracy = m / float(n) logger.info('accuracy = ' + str(opt.accuracy)) opt.negative_mean_prob = np.mean(p[:, 0]) logger.info('negative_mean_prob = ' + str(opt.negative_mean_prob)) opt.positive_mean_prob = np.mean(p[:, 1]) logger.info('positive_mean_prob = ' + str(opt.positive_mean_prob)) # output meta information if opt.info: for key in clr_info.keys(): opt.outfile.write("#classifier_%s=%s\n" % (key, str(clr_info[key]))) for key, key_val in vars(opt).iteritems(): opt.outfile.write("#%s=%s\n" % (key, str(key_val))) ### post process # close file if opt.infile != sys.stdin: opt.infile.close() if opt.outfile != sys.stdout: opt.outfile.close() if opt.model != sys.stdout: opt.model.close() sys.exit(0)
def main(opt): """ Main routine that exits with status code 0 """ ### pre process # read data D = np.loadtxt(opt.infile) # split data and process missing values y = np.array(D[:, -1]) X = fill_missing_with_mean(D[:, :-1]) del D ### main process # set starting time start_time = datetime.datetime.now() start_utime = os.times()[0] opt.start_time = start_time.isoformat() logger.info("start time = " + start_time.isoformat()) # init constants ns = 1 # train if opt.ntry <= 0: # train only once with zero coefficients clr = train(X, y, ns, opt) opt.final_loss = clr.f_loss_ logger.info('final_loss = ' + str(opt.final_loss)) else: # train multiple times with random restarts clr = None best_loss = np.inf best_trial = 0 for trial in xrange(opt.ntry): logger.info("Trial No. " + str(trial + 1)) tmp_clr = train(X, y, ns, opt) logger.info("loss = " + str(tmp_clr.f_loss_)) if tmp_clr.f_loss_ < best_loss: clr = tmp_clr best_loss = clr.f_loss_ best_trial = trial + 1 opt.final_loss = best_loss logger.info('final_loss = ' + str(opt.final_loss)) opt.best_trial = best_trial logger.info('best_trial = ' + str(opt.best_trial)) # set end and elapsed time end_time = datetime.datetime.now() end_utime = os.times()[0] logger.info("end time = " + end_time.isoformat()) opt.end_time = end_time.isoformat() logger.info("elapsed_time = " + str((end_time - start_time))) opt.elapsed_time = str((end_time - start_time)) logger.info("elapsed_utime = " + str((end_utime - start_utime))) opt.elapsed_utime = str((end_utime - start_utime)) ### output # add info opt.nos_samples = X.shape[0] logger.info('nos_samples = ' + str(opt.nos_samples)) opt.nos_features = X.shape[1] logger.info('nos_features = ' + str(X.shape[1])) opt.classifier = clr.__class__.__name__ logger.info('classifier = ' + opt.classifier) opt.fadm_version = fadm_version logger.info('fadm_version = ' + opt.fadm_version) opt.sklearn_version = sklearn_version logger.info('sklearn_version = ' + opt.sklearn_version) # opt.training_score = clr.score(X, y) # logger.info('training_score = ' + str(opt.training_score)) # write file pickle.dump(clr, opt.outfile) info = {} for key, key_val in vars(opt).iteritems(): info[key] = str(key_val) pickle.dump(info, opt.outfile) ### post process # close file if opt.infile is not sys.stdin: opt.infile.close() if opt.outfile is not sys.stdout: opt.outfile.close() sys.exit(0)
def main(opt): """ Main routine that exits with status code 0 """ ### pre process # read data D = np.loadtxt(opt.infile) # split data and process missing values y = np.array(D[:, -1]) if opt.ns: X = fill_missing_with_mean(D[:, :-(1 + N_NS)]) else: X = fill_missing_with_mean(D[:, :-1]) ### main process # set starting time start_time = datetime.datetime.now() start_utime = os.times()[0] opt.start_time = start_time.isoformat() logger.info("start time = " + start_time.isoformat()) # main process clr = LogisticRegression(C=float(opt.C), penalty='l2', fit_intercept=True) clr.fit(X, y) # set end and elapsed time end_time = datetime.datetime.now() end_utime = os.times()[0] logger.info("end time = " + end_time.isoformat()) opt.end_time = end_time.isoformat() logger.info("elapsed_time = " + str((end_time - start_time))) opt.elapsed_time = str((end_time - start_time)) logger.info("elapsed_utime = " + str((end_utime - start_utime))) opt.elapsed_utime = str((end_utime - start_utime)) ### output # add info opt.nos_samples = X.shape[0] logger.info('nos_samples = ' + str(opt.nos_samples)) opt.nos_features = X.shape[1] logger.info('nos_features = ' + str(X.shape[1])) opt.classifier = clr.__class__.__name__ logger.info('classifier = ' + opt.classifier) opt.fadm_version = fadm_version logger.info('fadm_version = ' + opt.fadm_version) # opt.training_score = clr.score(X, y) # logger.info('training_score = ' + str(opt.training_score)) # write file pickle.dump(clr, opt.outfile) info = {} for key, key_val in vars(opt).iteritems(): info[key] = str(key_val) pickle.dump(info, opt.outfile) ### post process # close file if opt.infile is not sys.stdin: opt.infile.close() if opt.outfile is not sys.stdout: opt.outfile.close() sys.exit(0)