def mm_vs_distance(): dir = '../../test' c = SiestaCalc(dir, mode='out', steps = [-2, -1,]) n = c.evol[1].filter('label','C') r = c.evol[1].distance_to_group(n[0]) spin = c.evol[0]['up'] - c.evol[0]['dn'] Plot.scatter(r, N.abs(spin),'Distance to C', 'Absolute magnetic moment')
def setup(): template = '../template' c = SiestaCalc(template, mode='out', steps = [-2, -1,]) n = c.evol[1].filter('label','C') r = c.evol[1].distance_to_group(n[0]) spin = c.evol[0]['up'] - c.evol[0]['dn'] Plot.scatter(r, N.abs(spin),'Distance to C', 'Absolute magnetic moment')
def draw_x_vs_y(data, xname, yname): plots = [ ('time', 'Execution Time (μs)'), ('mips', 'mips') ] for p in plots: values = [] x = data[xname] y = data[yname] for i in range(len(x)): name = x[i]['name'] items = list(filter(lambda i: i['name'] == name, y)) if len(items) > 0: row = { xname: x[i][p[0]], yname: items[0][p[0]] } values.append(row) processed = {p[1]: values} base = "%s/scatter/vs/%s-vs-%s-" % (output_base, xname, yname) plot.scatter(processed, xname, yname, '%s%s.png' % (base, p[0]), xscale='log', yscale='log', line=True)
def plotMccEnerRelation(complx_id): # loading weight_fn = "/work/jaydy/dat/08ff_opt" weight = np.loadtxt(weight_fn) # lnr_ff = Lnr_ff('08ff_all_decoy.h5', 'all_set') lnr_ff = Lnr_ff("all_decoy.h5", "all_set") all_set = lnr_ff.loadH5() mcc = all_set[:, 0] ener = all_set[:, 1:] total_ener = np.dot(ener, weight) mcc_total = np.column_stack((mcc, total_ener)) # sampling sample_sz = 2000 if sample_sz <= mcc_total.shape[0]: mcc_total = sortMccTotalByMcc(mcc_total) sampled_mcc_total = np.vstack(row for row in sampleMccTotalByMcc(mcc_total, sample_sz)) mcc_total = sampled_mcc_total mcc, ener = mcc_total[:, 0], mcc_total[:, 1] # scatter_ofn = 'weighted_lnr' + '_scatter.pdf' scatter_ofn = complx_id + "_scatter.pdf" plot.scatter(mcc, ener, ofn=scatter_ofn, x_label="mcc", y_label="diff") # line_ofn = 'weighted_lnr' + '_line.pdf' line_ofn = complx_id + "_line.pdf" plot.two_scales(ener, mcc, ofn=line_ofn, right_label="ener", left_label="diff")
def compare(list_of_records, steps, algorithms): all_intermediate_values = [] all_average_runs = [] for record in list_of_records: minimum_value = 0 maximum_value = max(record.list_of_results[0][2]) #max value from function_value intermediate_values = np.linspace(minimum_value, maximum_value, steps) average_run = np.zeros(steps) for i in range(steps): #for each value, find the first time it was found by each iteration for l in record.list_of_results: j = 0 nevals = l[3] function_value = l[2] while function_value[j] < intermediate_values[i] and j < l[1]-1: j += 1 #if j > len(function_value): # break average_run[i] += nevals[j] average_run = np.divide(average_run, len(record.list_of_results)) all_intermediate_values.append(list(intermediate_values)) all_average_runs.append(list(average_run)) #plot.line_plot( all_intermediate_values, all_average_runs, xaxis='f(x)', yaxis='Evaluations', labels=algorithms) plot.scatter( all_intermediate_values, all_average_runs, xaxis='f(x)', yaxis='Evaluations', labels=algorithms)
def run(args, flat_tree): name_suffix = f"{args.method}-{args.seed}" run_func = { "nca": run_nca, "umap": run_umap, "catsne": run_catsne }[args.method] Z, Z_test = run_func(args) joblib.dump(Z, f"{Z_dir}/Z-{args.method}.z") joblib.dump(Z_test, f"{Z_dir}/Z_test-{args.method}.z") scatter( Z, None, # Z_test y_train, y_test, tree=flat_tree, out_name=f"{plot_dir}/{name_suffix}.png", show_group="text", ) if not args.no_score: score_name = f"{score_dir}/score-{name_suffix}.json" score_logger = ScoreLogger(score_name) evaluate_scores(X_train, y_train, X_test, y_test, Z, Z_test, args.method, score_logger) # important: save the logger filer score_logger.dump() score_logger.print()
def run_tsne(config, score_logger, seed=2020, rerun=True): Z0_name = f"{Z_dir}/Z0_{seed}.z" Z0_test_name = f"{Z_dir}/Z0_test_{seed}.z" if rerun or not os.path.exists(Z0_name): print("\n[DEBUG]Run original TSNE with ", config) Z0 = tsne(X_train, random_state=seed, **config) Z0_test = Z0.transform(X_test) joblib.dump(np.array(Z0), Z0_name) joblib.dump(np.array(Z0_test), Z0_test_name) else: Z0 = joblib.load(Z0_name) Z0_test = joblib.load(Z0_test_name) scatter( Z0, None, y_train, None, out_name=f"{plot_dir}/Z0_{seed}.png", show_group=None ) if score_logger is not None: evaluate_scores( X_train, y_train, X_test, y_test, Z0, Z0_test, "tsne", score_logger ) return Z0, Z0_test # Z0 is used an initialization in hc_tsne
def draw_scatters(group_name, data, col_map=None): base = "%s/scatter/%s" % (output_base, group_name) plot.scatter(data, 'hotness', 'mips', '%s/hotness.png' % base, xscale='log', yscale='log', col_map=col_map) plot.scatter(data, 'compilation inefficiency', 'mips', '%s/c-efficiency-vs-mips.png' % base, xscale='linear', yscale='log', col_map=col_map) plot.scatter(data, 'execution inefficiency', 'mips', '%s/e-efficiency-vs-mips.png' % base, xscale='linear', yscale='log', col_map=col_map) plot.scatter(data, 'source block size', 'compilation inefficiency', '%s/c-efficiency-vs-hotness.png' % base, xscale='log', yscale='linear', col_map=col_map) plot.scatter(data, 'source block size', 'execution inefficiency', '%s/e-efficiency-vs-hotness.png' % base, xscale='log', yscale='linear', col_map=col_map)
def test_plot3(): importlib.reload(plot) data: Iterable[Iterable[Tuple[int, int]]] = ( ((0, 0), (-10, -10)), ((10, 10), (-9, -9)), ((5, 5), (-6, 8)) ) result = plot.scatter(data, (-20, -20), (20, 20)) print(result)
def run_hc_tsne( Z_init, tree, alpha, margin, config, score_logger, seed=2020, rerun=False ): Z1_name = f"{Z_dir}/Z1_{seed}.z" Z1_test_name = f"{Z_dir}/Z1_test_{seed}.z" loss_name = f"{score_dir}/loss-{name_suffix}.json" loss_logger = LossLogger(loss_name) if rerun or not os.path.exists(Z1_name): print("\n[DEBUG]Run Hierarchical TSNE with ", config["Z_new"]) Z1 = hc_tsne( X_train, initialization=Z_init, tree=tree, alpha=alpha, margin=margin, loss_logger=loss_logger, random_state=seed, **config["hc"], **config["Z_new"], ) Z1_test = Z1.transform(X_test) loss_logger.dump() joblib.dump(np.array(Z1), Z1_name) joblib.dump(np.array(Z1_test), Z1_test_name) else: Z1 = joblib.load(Z1_name) Z1_test = joblib.load(Z1_test_name) fig_name = f"{plot_dir}/HC-{name_suffix}.png" scatter(Z1, None, y_train, None, tree=tree, out_name=fig_name) loss_logger.load(loss_name) plot_loss(loss_logger.loss, out_name=f"{plot_dir}/loss-{name_suffix}.png") if score_logger is not None: evaluate_scores( X_train, y_train, X_test, y_test, Z1, Z1_test, "hc-tsne", score_logger )
'franco_scores.csv', 'annotations_AF.csv', 'annotations_AF_new.csv', 'annotations_200+.csv', 'annotations_200+_new.csv', 'annotations.csv' ] score_path = 'score_data/' def average_score_per_exp(raw_data): max_exp = max([row[1] for row in raw_data]) initial = [[] for _ in range(max_exp + 1)] def add_to_groups(groups, datum): exp = datum[1] score = datum[2] groups[exp].append(score) return groups grouped_by_exp = reduce(add_to_groups, raw_data, initial) averages = [mean(group) for group in grouped_by_exp] return averages raw_data = [parse.parse_score_file(score_path + path) for path in paths] averages = [average_score_per_exp(datum) for datum in raw_data] for average in averages: y = average x = range(len(y)) plot.scatter(x, y) print(average) print()
print '- Lab', lab labels += [lab] tab,r,c = readTable(f) sizes = map(lambda x: abs(int(x[0])-int(x[1])), tab) # if not len(sizes): metasizes += [[0]] metasizes += sizes # also make a scatterplot of score vs length LS = map(lambda x: (abs(int(x[0])-int(x[1])), cfunc(x[3])), tab) metaLS += LS fo = sizedir+'supermeta peak size distribution freq.pdf' plot.hist(metasizes, bins=[100,125,150,175,200,225,250,275,300], file=fo, custom='set size ratio 1; set yrange [0:*]', xlabel='Peak size (nt)', ylabel='Frequency', yfreq=1) plot.scatter(metaLS, xlabel='ROI length (nt)', ylabel='ROI score', file=sizedir+'supermeta_score_vs_size.pdf', logscale='x', custom='set grid; set xrange [%s:*]'%(MINLENGTH)) except IOError: sys.exit('Cannot access bedfile %s'%(bedfile)) sys.exit() if PLOTDISTANCEONLY: print 'PLOT DISTANCE ONLY', PLOTDISTANCEONLY if isdir(PLOTDISTANCEONLY) or os.access(PLOTDISTANCEONLY,os.F_OK): dirfiles = getFiles(PLOTDISTANCEONLY) else: dirfiles = glob.glob(PLOTDISTANCEONLY)
import pandas as pd from sklearn.model_selection import train_test_split from sklearn.svm import SVC import dataInfo import plot # notes: C = Regularization; kernel transformation for better decision boundary iris = load_iris() X = pd.DataFrame(iris.data, columns=iris.feature_names) X['target'] = iris.target plot.target(X) # shows equal number of unique target values plot.count(X) plot.scatter(X) plot.swarm(X) dataInfo.general(X) dataInfo.missing_value_per_column(X) dataInfo.colType(X) y = X['target'] #(target we want to predict) X.drop('target', axis = 1, inplace=True) print(X.head()) print("target-values:", iris.target_names) X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.6, test_size=0.4,random_state=9) #####model
), #kernel_regularizer=tensorflow.keras.regularizers.l2(0.001)), tensorflow.keras.layers.Dense(1), ]) nn_cv = regression.cross_validate(nn, cleaner.x_train_np, y_np) subs = [lasso, elastic_net, kernel_ridge, nn, xg_boost] model = regression.build('Lasso', alpha=0.005) stacked = regression.build('Stacked', model=model, sub_models=subs) stacked_cv = regression.cross_validate(stacked, cleaner.x_train_np, y_np) #print('KERNEL RIDGE', kernel_ridge_cv[1]) #print('ELASTIC NET', elastic_net_cv[1]) #print('LASSO', lasso_cv[1]) #print('XG BOOST', xg_cv) #print('NEURAL NET', nn_cv[1]) print('STACKED', stacked_cv[1]) stacked.fit(cleaner.x_train_np, y_np) pred = stacked.predict(cleaner.x_train_np) plot.scatter(prices, numpy.exp(pred)) res = pandas.DataFrame() res['Id'] = test_id res['SalePrice'] = numpy.exp(stacked.predict(cleaner.x_test_np)) res.to_csv('predictions4.csv', index=False) p_res = pandas.read_csv('predictions3.csv') plot.scatter(res['SalePrice'], p_res['SalePrice'])
self.coefs = V * D @ U.T @ Y def predict(self, X): if self.intercept: X = np.insert(X, -1, 1., axis=1) return np.dot(X, self.coefs) def sigmoid(x): return (1 / (1 + np.exp(-x))) if __name__ == '__main__': from sklearn import datasets from plot import scatter from normalize import get_standardized # Load the diabetes dataset X, Y = datasets.load_diabetes(return_X_y=True) # Split the data into training/testing sets X_train, X_test = X[:-20], X[-20:] Y_train, Y_test = Y[:-20], Y[-20:] X_train, X_test = get_standardized(X_train, X_test) regr = LinearRegressor(lambda_l2=0.001) regr.fit(X_train, Y_train) Y_pred = regr.predict(X_test) scatter(Y_test, Y_pred)
def main(): l_value = "120.8" r_value = "61.7" parser = argparse.ArgumentParser(description='Delta errors simulation') parser.add_argument('-l', '--l-value', type=str, default=l_value, help='Correct l-value') parser.add_argument('-r', '--r-value', type=str, default=r_value, help='Correct r-value') parser.add_argument('-s', '--s-value', type=float, default=0.01, help='Correct step size, in mm') parser.add_argument('-a', '--a-value', type=str, default="210,330,90", help='Correct tower angles, in deg') parser.add_argument('-wl', '--wl-value', type=str, default=l_value, help='Wrong l-value') parser.add_argument('-wr', '--wr-value', type=str, default=r_value, help='Wrong r-value') parser.add_argument('-ws', '--ws-value', type=float, default=None, help='Wrong step size, in mm') parser.add_argument('-wa', '--wa-value', type=str, default=None, help='Wrong tower angles, in deg') parser.add_argument('-we', '--we-value', type=str, default="0", help='Wrong endstops') parser.add_argument('-v', '--v-value', type=str, default="wheel", help='Visualization type') # parser.add_argument('-save','--save-value',type=str,default="sim_warp.png",help='Save plot to file with name') args = parser.parse_args() if args.ws_value is None: args.ws_value = args.s_value if args.wa_value is None: args.wa_value = args.a_value correct_l = [ float(l) for l in args.l_value.split(',') ] if "," in args.l_value else [float(args.l_value) for x in range(3)] correct_r = [ float(r) for r in args.r_value.split(',') ] if "," in args.r_value else [float(args.r_value) for x in range(3)] correct_a = [float(a) for a in args.a_value.split(',')] correct_e = [0., 0., 0.] wrong_l = [ float(l) for l in args.wl_value.split(',') ] if "," in args.wl_value else [float(args.wl_value) for x in range(3)] wrong_r = [ float(r) for r in args.wr_value.split(',') ] if "," in args.wr_value else [float(args.wr_value) for x in range(3)] wrong_a = [float(a) for a in args.wa_value.split(',')] wrong_e = [float(e) for e in args.we_value.replace("#", "-").split(',') ] if "," in args.we_value else [ float(args.we_value.replace("#", "-")) for x in range(3) ] # print(correct_l) # print(wrong_l) # print(args.s_value) correct = DeltaPrinter(correct_l, correct_r, args.s_value, correct_a, correct_e) wrong = DeltaPrinter(wrong_l, wrong_r, args.ws_value, wrong_a, wrong_e) # lean on wrong printer is always 90, i.e. it thinks it's correct. NB! lean does not work now anyway # correct.home() wrong.home() center_error = error(correct, wrong, 0, 0)[2] # glue good and bad centers together viz = args.v_value csv = ["", "", ""] if viz == "heatmaps" else [""] points = [] if viz == "heatmaps": points.extend(get_points_wheel(45, 5, 15.)) else: points.extend(get_points_wheel(45, 100, 60.)) for point in points: x = point[0] y = point[1] wrong.move(x, y) correct.tower_steps = [s for s in wrong.tower_steps] nozzle_position = correct.nozzle_position() if viz == "heatmaps": err = [0, 0, 0] err[0] = nozzle_position[0] - point[ 0] # sign matches direction of shift err[1] = nozzle_position[1] - point[ 1] # sign matches direction of shift err[2] = nozzle_position[ 2] - center_error # sign matches direction of shift for i in range(3): csv[i] += "{0:.3f},{1:.3f},{2:.3f}\n".format(x, y, err[i]) else: csv[0] += "{0:.3f},{1:.3f},{2:.3f}\n".format( nozzle_position[0], nozzle_position[1], nozzle_position[2]) # max_y = error(correct, wrong, 0, 50)[1] # min_y = error(correct, wrong, 0, -50)[1] # xy_error = (max_y - min_y) # print("Dimensional accuracy:") # print("{0:.3f}mm for 100.000mm".format(xy_error)) # print("") # FIXME different args for different filenames # csv = StringIO(z_csv) # png = "{0}-{1}-with-{2}_{3}_z.png".format(correct_l, correct_r, wrong_l, wrong_r) if args.save_value == "generate" else args.save_value # print("Plot saved to file:\n" + png) # plot(csv, png, True) data = [StringIO(csv[i]) for i in range(len(csv))] # png = "{0}-{1}-with-{2}_{3}_xy.png".format(correct_l, correct_r, wrong_l, wrong_r) if args.save_value == "generate" else args.save_value # print("Plot saved to file:\n" + png) if viz == "heatmaps": plot(["X", "Y", "Z"], data, None, True, str(sys.argv).replace("', '", " ").replace("['", "").replace("']", "")) else: scatter(["COORDS"], data, None, True, str(sys.argv).replace("', '", " ").replace("['", "").replace("']", ""))
def _test_synapse(info): """ The test checks functionality of all four supported plastic synapses (namely, 'np','pn','pp', and 'nn'). For each type there are generated plots of progress of input variables, synaptic weights, and changes of weights w.r.t pre- and post- spikes. """ assert isinstance(info, TestInfo) for the_synapse in [ synapse.Synapse.plastic_peek_np(), synapse.Synapse.plastic_peek_pn(), synapse.Synapse.plastic_peek_pp(), synapse.Synapse.plastic_peek_nn(), ]: print(" Starting simulation of '" + the_synapse.get_name() + "'.") start_time = 0.0 dt = 0.001 nsteps = 20000 pre_spikes_train = spike_train.create(distribution.default_excitatory_isi_distribution(), 0.0) post_spikes_train = spike_train.create(distribution.default_excitatory_isi_distribution(), 0.0) # pre_spikes_train = spike_train.spike_train( # distribution.distribution({}), # [0.001], # start_time # ) # post_spikes_train = spike_train.spike_train( # distribution.distribution({}), # [0.002], # start_time # ) synapse_recording = {"last_pre_times": [], "last_post_times": []} for var, value in the_synapse.get_variables().items(): synapse_recording[var] = [] last_pre_spike_time = start_time last_post_spike_time = start_time t = start_time for step in range(nsteps): utility.print_progress_string(step, nsteps) the_synapse.integrate(dt) was_pre_spike_generated = pre_spikes_train.on_time_step(t, dt) if was_pre_spike_generated: the_synapse.on_pre_synaptic_spike() last_pre_spike_time = t + dt was_post_spike_generated = post_spikes_train.on_time_step(t, dt) if was_post_spike_generated: the_synapse.on_post_synaptic_spike() last_post_spike_time = t + dt for var, value in the_synapse.get_variables().items(): synapse_recording[var].append((t + dt, value)) synapse_recording["last_pre_times"].append(last_pre_spike_time) synapse_recording["last_post_times"].append(last_post_spike_time) t += dt print(" Saving results.") output_dir = os.path.join(info.output_dir, the_synapse.get_name()) for var in the_synapse.get_variables().keys(): pathname = os.path.join(output_dir, "synapse_var_" + var + ".png") if var == the_synapse.get_weight_variable_name(): title = the_synapse.get_short_description() else: title = None print(" Saving plot " + pathname) plot.curve( synapse_recording[var], pathname, title=title, colours="C1" ) weights_delta = [] for i in range(1, len(synapse_recording[the_synapse.get_weight_variable_name()])): t, w = synapse_recording[the_synapse.get_weight_variable_name()][i] pre_t = synapse_recording["last_pre_times"][i] post_t = synapse_recording["last_post_times"][i] w0 = synapse_recording[the_synapse.get_weight_variable_name()][i - 1][1] weights_delta.append((post_t - pre_t, w - w0)) weights_delta.sort(key=lambda pair: pair[0]) pathname = os.path.join(output_dir, "plasticity.png") print(" Saving plot " + pathname) plot.scatter( datalgo.merge_close_points_by_add(weights_delta, dt), pathname, xaxis_name="post_t - pre_t", faxis_name="weight delta" ) return 0
#print('GRADIENT BOOST', gdcv) xg_boost = regression.build( 'XGBoost', gamma=0.025, max_depth=4, min_child_weight=1.5, subsample=0.5, colsample_bytree=0.5, reg_lambda=0.75, reg_alpha=0.40, n_estimators=2000, learning_rate=0.01, ) xg_cv = regression.cross_validate(xg_boost, x_train_np, y_np) print('XG BOOST', xg_cv[1]) subs = [gradient_boost, xg_boost] model = regression.build('ElasticNet', alpha=0.005) stacked = regression.build('Stacked', model=model, sub_models=subs) #stacked_cv = regression.cross_validate(stacked, x_train_np, y_np) #print('STACKED', stacked_cv) xg_boost.fit(x_train_np, y_np) plot.scatter(x_train['revenue'], xg_boost.predict(x_train_np)**5) res = pandas.DataFrame() res['id'] = test_id res['revenue'] = xg_boost.predict(x_test_np)**5 res.to_csv('predictions3.csv', index=False)
def _test_synapse(info): """ The test checks functionality of all four supported plastic synapses (namely, 'np','pn','pp', and 'nn'). For each type there are generated plots of progress of input variables, synaptic weights, and changes of weights w.r.t pre- and post- spikes. """ assert isinstance(info, TestInfo) for the_synapse in [ synapse.Synapse.plastic_peek_np(), synapse.Synapse.plastic_peek_pn(), synapse.Synapse.plastic_peek_pp(), synapse.Synapse.plastic_peek_nn(), ]: print(" Starting simulation of '" + the_synapse.get_name() + "'.") start_time = 0.0 dt = 0.001 nsteps = 20000 pre_spikes_train = spike_train.create( distribution.default_excitatory_isi_distribution(), 0.0) post_spikes_train = spike_train.create( distribution.default_excitatory_isi_distribution(), 0.0) # pre_spikes_train = spike_train.spike_train( # distribution.distribution({}), # [0.001], # start_time # ) # post_spikes_train = spike_train.spike_train( # distribution.distribution({}), # [0.002], # start_time # ) synapse_recording = {"last_pre_times": [], "last_post_times": []} for var, value in the_synapse.get_variables().items(): synapse_recording[var] = [] last_pre_spike_time = start_time last_post_spike_time = start_time t = start_time for step in range(nsteps): utility.print_progress_string(step, nsteps) the_synapse.integrate(dt) was_pre_spike_generated = pre_spikes_train.on_time_step(t, dt) if was_pre_spike_generated: the_synapse.on_pre_synaptic_spike() last_pre_spike_time = t + dt was_post_spike_generated = post_spikes_train.on_time_step(t, dt) if was_post_spike_generated: the_synapse.on_post_synaptic_spike() last_post_spike_time = t + dt for var, value in the_synapse.get_variables().items(): synapse_recording[var].append((t + dt, value)) synapse_recording["last_pre_times"].append(last_pre_spike_time) synapse_recording["last_post_times"].append(last_post_spike_time) t += dt print(" Saving results.") output_dir = os.path.join(info.output_dir, the_synapse.get_name()) for var in the_synapse.get_variables().keys(): pathname = os.path.join(output_dir, "synapse_var_" + var + ".png") if var == the_synapse.get_weight_variable_name(): title = the_synapse.get_short_description() else: title = None print(" Saving plot " + pathname) plot.curve(synapse_recording[var], pathname, title=title, colours="C1") weights_delta = [] for i in range( 1, len(synapse_recording[ the_synapse.get_weight_variable_name()])): t, w = synapse_recording[the_synapse.get_weight_variable_name()][i] pre_t = synapse_recording["last_pre_times"][i] post_t = synapse_recording["last_post_times"][i] w0 = synapse_recording[the_synapse.get_weight_variable_name()][ i - 1][1] weights_delta.append((post_t - pre_t, w - w0)) weights_delta.sort(key=lambda pair: pair[0]) pathname = os.path.join(output_dir, "plasticity.png") print(" Saving plot " + pathname) plot.scatter(datalgo.merge_close_points_by_add(weights_delta, dt), pathname, xaxis_name="post_t - pre_t", faxis_name="weight delta") return 0
import plot import numpy as np import matplotlib.pyplot as plt plot.scatter(np.arange(0, 10, 1), np.arange(0, 10, 1), x_lab='testx', y_lab='testy', color='r') plt.show()
linear_regressor = LinearRegressor(intercept=self.intercept) indices_best = [] R = Y for i in range(self.n_nonzero_coefs): i_best = abs(np.dot(X.T, R)).argmax() indices_best.append(i_best) linear_regressor.fit(X[:, indices_best], Y) R = Y - linear_regressor.predict(X[:, indices_best]) self.indices_best = indices_best self.linear_regressor = linear_regressor def predict(self, X): return self.linear_regressor.predict(X[:, self.indices_best]) if __name__ == '__main__': from sklearn.datasets import make_regression from normalize import get_standardized from plot import scatter X, Y = make_regression(noise=4, random_state=0) X = get_standardized(X) omp = OrthogonalMatchingPursuit2(n_nonzero_coefs=10) omp.fit(X, Y) Y_pred = omp.predict(X) print("indices of best columns out of 100:", omp.indices_best) scatter(Y, Y_pred)
def train_Plot(self): scatter(self.train[0])