def summarize_sdss(db): datareleases = db.query( 'SELECT db, count(*) c, count(distinct query) dc, count(distinct query)*1.0/count(*) FROM logs GROUP BY db ORDER BY c DESC' ) print "Number of queries per data release:" print tabulate(ft(datareleases), headers=['release', 'count', 'distinct', '% distinct'])
def main(): results = load_results(os.path.abspath(__file__)) plt.figure(figsize=(15, 8)) plt.suptitle( "X_Train: {}, X_Train: {}, Tests Run: {}, Runtime: {}, Max: {}, Min: {}" .format( results[0]["params"]["n_train"], results[0]["params"]["n_test"], len(results), ft(sum([x["runtime"] if "runtime" in x else 0.0 for x in results])), ft(max([x["runtime"] if "runtime" in x else 0.0 for x in results])), ft(min([x["runtime"] if "runtime" in x else 0.0 for x in results])))) plt.subplots_adjust(bottom=0.08, top=0.9, left=0.08, right=0.95, wspace=0.2, hspace=0.4) # Rows and cols to display # Note: All graphs are written to file when save_plots() is called # regardless of whether they are called here in main() global rows global cols rows = 3 cols = 3 # Comment out any graphs you wish to not display and adjust vals of rows & cols FCN_acc_v_hl(results) FCN_ttrain_v_hl(results) Conv_nc_v_acc(results) Pool_Comparison(results) Activation_Comparison_acc(results) Activation_Comparison_f1(results) channel_acc(results) epoch_acc(results) lr_acc(results) # Displayes graphs on screen plt.show()
def log_results(self): """ Results dict written to file as JSON JSON is written on a single line and appended to results file """ with open(f_path, 'a+') as f: f.write(json.dumps(self.res) + "\n") print("T - f1: {0:.3f}, accuracy: {1:.3f}, runtime: {2}".format( self.res["test"]["f1"], self.res["test"]["accuracy"], ft(self.res["run"]["runtime"])))
def train(self): """ Cross validation is performed, saving results to self.res with each iteration """ tr_start = time.time() for i in range(self.folds): X_folds = np.array_split(self.X_train, self.folds) y_folds = np.array_split(self.y_train, self.folds) X_val = X_folds.pop(i) y_val = y_folds.pop(i) X_train = np.concatenate(X_folds) y_train = np.concatenate(y_folds) it_res = {} it_res["iter"] = i it_res["datetime"] = datetime.datetime.now().strftime("%x %X") it_res["n_train"] = len(X_train) tstart = time.time() self.svc.fit(X_train, y_train) tend = time.time() it_res["t_train"] = tend - tstart it_res.update(self.test(X_val, y_val)) self.res["train"].append(it_res) log.info( "{0} - f1: {1:.3f}, accuracy: {2:.3f}, train_time: {3}, test_time: {4}" .format(i, it_res['f1'], it_res['accuracy'], ft(it_res["t_train"]), ft(it_res["runtime"]))) self.calculate_results() self.res["run"]["traintime"] = time.time() - tr_start log.info("CV- f1: {:.3f}, accuracy: {:.3f}, runtime: {}".format( self.res["run"]["cv_f1"], self.res["run"]["cv_accuracy"], ft(self.res["run"]["traintime"])))
def main(): results = load_results(os.path.abspath(__file__)) plt.figure(figsize=(15,8)) plt.suptitle("X_Train: {}, X_Train: {}, Tests Run: {}, Runtime: {}, Max: {}, Min: {}".format( results[0]["param"]["n_train"], results[0]["param"]["n_test"], len(results), ft(sum([x["run"]["runtime"] for x in results])), ft(max([x["run"]["runtime"] for x in results])), ft(min([x["run"]["runtime"] for x in results])))) plt.subplots_adjust(bottom=0.08, top=0.9, left=0.08, right=0.95, wspace=0.2, hspace=0.4) # Rows and cols to display # Note: All graphs are written to file when save_plots() is called # regardless of whether they are called here in main() global rows global cols rows = 4 cols = 3 # Comment out any graphs you wish to not display and adjust vals of rows & cols acc_v_pca(results) f1_pc_v_dimensionality(results) f1_v_pca(results) poly_c_v_acc(results) poly_c_v_f1(results) poly_degree_v_acc(results) poly_degree_v_f1(results) rbf_c_v_acc(results) rbf_c_v_f1(results) rbf_gamma_v_acc(results) rbf_gamma_v_f1(results) # Displayes graphs on screen plt.show()
def main(self): startime = time.time() try: # self.load(self.id) self.train() self.save(self.id) self.res["test"] = self.test() self.res["runtime"] = time.time() - startime print("Runtime: {}".format(ft(self.res["runtime"]))) except Exception as e: log.info("ERROR while running test") log.info(str(e)) log.info(sys.exc_info()) log.info(traceback.format_exc()) self.save_results()
def Conv_nc_v_acc(results, sp=True): if sp: plt.subplot(rows, cols, get_gn()) plt.title('ConvX Accuracy Against n Convolution & Max Pooling Layers') plt.ylabel('Accuracy') plt.xlabel('n Convolution & Max Pooling Layers') fcns = [x for x in results if x["params"]["model"] == "ConvX"] cl = [x["params"]["id"] for x in fcns] accs = [x["test"]["accuracy"] for x in fcns] f1s = [np.mean(x["test"]["f1_pc"]) for x in fcns] rts = [ft(x["runtime"]) for x in fcns] plot(cl, accs, 0) if sp: print("\nConvX Accuracy against N convolution layers") print(cl) print(accs) print(f1s) print(" ".join(rts))
def FCN_acc_v_hl(results, sp=True): if sp: plt.subplot(rows, cols, get_gn()) plt.title('FCN Accuracy Against n Hidden Layers') plt.ylabel('Accuracy') plt.xlabel('n') fcns = [x for x in results if x["params"]["model"] == "FCN"] hl = [x["params"]["hidden_layers"] for x in fcns] accs = [x["test"]["accuracy"] for x in fcns] f1s = [np.mean(x["test"]["f1_pc"]) for x in fcns] rts = [ft(x["runtime"]) for x in fcns] plot(hl, accs, 0) if sp: print("\nFCN Accuracy Against n Hidden Layers") print(hl) print(accs) print(f1s) print(" ".join(rts))
def channel_acc(results, sp=True): if sp: plt.subplot(rows, cols, get_gn()) plt.title('Accuracy of different number of channels') plt.ylabel('Accuracy') plt.xlabel('Number of channels') res = [ x for x in results if x["params"]["tid"] == 5 or ( x["params"]["tid"] == 4 and x["params"]["model"] == "ReLU") ] cl = [x["params"]["id"] for x in res] accs = [round(x["test"]["accuracy"], 3) for x in res] f1s = [round(np.mean(x["test"]["f1_pc"]), 3) for x in res] rts = [ft(x["runtime"]) for x in res] plt.bar(cl, accs) if sp: print("\nActivation function accuracy") print(cl) print(accs) print(f1s) print(" ".join(rts))
def Activation_Comparison_acc(results, sp=True): if sp: plt.subplot(rows, cols, get_gn()) plt.title('Activation Function accuracy') plt.ylabel('Accuracy') plt.xlabel('Activation Function') pools = [ x for x in results if x["params"]["tid"] == 4 or x["params"]["id"] == "Conv2" ] cl = [x["params"]["id"] for x in pools] accs = [round(x["test"]["accuracy"], 3) for x in pools] f1s = [round(np.mean(x["test"]["f1_pc"]), 3) for x in pools] rts = [ft(x["runtime"]) for x in pools] plt.bar(cl, accs) if sp: print("\nActivation function accuracy") print(cl) print(accs) print(f1s) print(" ".join(rts))
def Pool_Comparison(results, sp=True): if sp: plt.subplot(rows, cols, get_gn()) plt.title('ConvX Accuracy Against n Convolution & Max Pooling Layers') plt.ylabel('Accuracy') plt.xlabel('n Convolution & Max Pooling Layers') pools = [ x for x in results if x["params"]["tid"] == 3 or ( x["params"]["tid"] == 2 and x["params"]["id"] == "Conv2") ] cl = [x["params"]["id"] for x in pools] accs = [x["test"]["accuracy"] for x in pools] f1s = [np.mean(x["test"]["f1_pc"]) for x in pools] rts = [ft(x["runtime"]) for x in pools] plt.bar(cl, accs) if sp: print("\nAccuracy of different pooling layers") print(cl) print(accs) print(f1s) print(" ".join(rts))
def summarize_sdss(db): datareleases = db.query( 'SELECT db, count(*) c, count(distinct query) dc, count(distinct query)*1.0/count(*) FROM logs GROUP BY db ORDER BY c DESC') print "Number of queries per data release:" print tabulate(ft(datareleases), headers=['release', 'count', 'distinct', '% distinct'])