def req_0(dict_G, dict_D, cmd, check_entry_list): """ get dut and golden data,draw data picture. Arguments: dict_G - The dictionary contains commands and data. dict_D - The dictionary contains commands and data. cmd - The command to search. """ data10 = uti.read_data(dict_D, cmd) data10_G = uti.read_data(dict_G, cmd) entry = CheckEntry(cmd, CheckEntry.Y) if data10_G is None: entry.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd)) else: entry.load_data_G(data10_G) if data10 is None: entry.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd)) else: entry.load_data(data10) if data10 and data10_G and len(data10_G) != len(data10): entry.add_err_msg(GD_NOT_MATCH_S) check_entry_list.append(entry)
def main(gamma_input, proton_input, output, t_obs, flux, ref): t_obs *= u.h gammas = read_data(gamma_input, weight=True, spectrum='crab', t_obs=t_obs) protons = read_data(proton_input, weight=True, spectrum='proton', t_obs=t_obs) gammas = add_theta(gammas) protons = add_theta(protons) bins, bin_centers, bin_widths = make_energy_bins(e_min=0.08 * u.TeV, e_max=300 * u.TeV, bins=15, centering='log') rel_sens = calc_relative_sensitivity(gammas, protons, bins, method='exact', alpha=0.2) ax = plot_sensitivity(rel_sens, bins, bin_centers, label=f'This Analysis {t_obs:2.0f}') if flux: ax = plot_crab_flux(bins, ax) if ref: ax = plot_ref_sens(ax) ax.text(0.95, 0.95, 'Differential Sensitivity', transform=ax.transAxes, horizontalalignment='right', verticalalignment='center') ax.set_xscale('log') ax.set_yscale('log') ax.set_xlim([1E-2, 10**(2.5)]) ax.set_ylim([0.8E-13, 2E-10]) ax.set_ylabel( r'$ E^2 \times \mathrm{Flux}\ \mathrm{Sensitivity} \ / \ (\mathrm{erg} \ \mathrm{s}^{-1} \ \mathrm{cm}^{-2}$)' ) ax.set_xlabel( r'$\mathrm{Reconstructed}\ \mathrm{Energy}\ E\ /\ \mathrm{TeV}$') ax.legend(loc='lower left') if output: plt.savefig(output) else: plt.show()
def stack_with_features(): train = read_data('raw_data/train.csv') test = read_data('raw_data/test.csv') df_tr = pd.read_csv(agg_loc + agg_name) train = pd.merge(train, df_tr, on='card_id', how='left').fillna(0) test = pd.merge(test, df_tr, on='card_id', how='left').fillna(0) del df_tr train = fe.combine_categs(train) test = fe.combine_categs(test) train = train[ ['card_id', 'target'] + [col for col in train.columns if 'purchase' in col or 'month' in col]] test = test[ ['card_id'] + [col for col in train.columns if 'purchase' in col or 'month' in col]] print(train.columns) stacked = pd.read_csv('results/stack_n_blend/oof_predictions.csv') del stacked['Unnamed: 0'] del stacked['target'] st_test = pd.read_csv('results/stack_n_blend/all_predictions.csv') #stacked = stacked[[col for col in stacked.columns if 'lightGBM_' in col]] #st_test = st_test[[col for col in stacked.columns if 'lightGBM_' in col] + ['card_id']] train = pd.concat([train, stacked], axis=1) test = pd.merge(test, st_test, on='card_id', how='left') del train['lightGBM_full'] del test['lightGBM_full'] target = train['target'] id_to_sub = test.card_id del train['target'] del train['card_id'] del test['card_id'] kfolds = KFold(10, shuffle=True, random_state=42) predictions, cv_score, feat_imp, oof = ms.lightgbm_train( train, test, target, kfolds) sub_df = pd.DataFrame({"card_id": id_to_sub.values}) sub_df["target"] = predictions sub_df.to_csv(save_loc + 'stacked_with_feats.csv', index=False) feat_imp.to_csv(save_loc + "stacked_with_feats_featimp.csv", index=False) print(cv_score)
def req_1(dict_G, dict_D, cmd, check_entry_list): """ get dut and golden data,draw data picture. Arguments: dict_G - The dictionary contains commands and data. dict_D - The dictionary contains commands and data. cmd - The command to search. """ x = [] x_G = [] y1 = [] y1_G = [] data10 = uti.read_data(dict_D, cmd) data10_G = uti.read_data(dict_G, cmd) entry = CheckEntry(cmd, CheckEntry.XY) """ Plot Golden """ if data10_G is None: entry.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd)) else: for j in range(len(data10_G)): if j % 2 == 0: x_G.append(data10_G[j]) else: y1_G.append(data10_G[j]) if not (uti.is_same_len(x_G, y1_G)): entry.add_err_msg(XY_NOT_MATCH_S % (GOLDEN_S, cmd)) else: entry.load_data_G([x_G, y1_G]) """ Plot DUT """ if data10 is None: entry.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd)) else: for j in range(len(data10)): if j % 2 == 0: x.append(data10[j]) else: y1.append(data10[j]) if not (uti.is_same_len(x, y1)): entry.add_err_msg(XY_NOT_MATCH_S % (DUT_S, cmd)) else: entry.load_data([x, y1]) if data10_G and data10 and len(data10_G) != len(data10): entry.add_err_msg(GD_NOT_MATCH_S) check_entry_list.append(entry)
def req_2(dict_G, dict_D, cmd_x, check_entry_list): """ get dut and golden data,draw data picture. Arguments: dict_G - The dictionary contains commands and data. dict_D - The dictionary contains commands and data. cmd - The command to search. """ """ Read Data """ data10_x = uti.read_data(dict_D, cmd_x) data10_x_G = uti.read_data(dict_G, cmd_x) cmd_y = cmd_x.replace("_x", "_y") data10_y = uti.read_data(dict_D, cmd_y) data10_y_G = uti.read_data(dict_G, cmd_y) entry = CheckEntry(cmd_x, CheckEntry.XY) """ Plot Golden """ if data10_x_G is None: entry.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd_x)) if data10_y_G is None: entry.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd_y)) if data10_x_G is not None and data10_y_G is not None: if not uti.is_same_len(data10_x_G, data10_y_G): entry.add_err_msg(XY_NOT_MATCH_S % (GOLDEN_S, cmd_x)) else: entry.load_data_G([data10_x_G, data10_y_G]) """ Plot DUT """ if data10_x is None: entry.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd_x)) if data10_y is None: entry.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd_y)) if data10_x is not None and data10_y is not None: if not uti.is_same_len(data10_x, data10_y): entry.add_err_msg(XY_NOT_MATCH_S % (DUT_S, cmd_x)) else: entry.load_data([data10_x, data10_y]) if uti.is_same_len(entry.get_data(), entry.get_data_G()) and len( entry.get_data()): if not uti.is_same_len(entry.get_data()[0], entry.get_data_G()[0]): entry.add_err_msg(GD_NOT_MATCH_S) check_entry_list.append(entry)
def prepare_dataset(config, columns=COMBINED_COL): print('Reading data...') dataset = utilities.read_data([ '{}/{}'.format(config['dir']['data'], data['name']) for data in config['data'] ], columns) return dataset
def _load_data(self): global G_DATA file_path = utilities.get_file() #try: G_DATA = utilities.read_data(file_path) self.data_name = "[b]" + file_path.split("/")[-1].split( ".")[-2] + "[/b]" self.data_path = utilities.newline_insert( "/".join(file_path.split("/")[:-1]), "/", 28) for i in range(len(G_DATA.columns)): if (i % 2 == 0): background = (0.99, 0.99, 0.99, 1) else: background = (0.95, 0.95, 0.98, 1) drag_button = DraggableButton( text=" [b]o[/b] " + G_DATA.columns[i] + " <" + str(G_DATA.dtypes[i]) + ">", markup=True, pos=(325, Window.height - (280 + 20 * i)), background_color=background, drop_func=self.refurbish, droppable_zone_objects=[ self.ids.which_color, self.ids.which_filter, self.ids.which_X, self.ids.which_Y, self.ids.which_start_node, self.ids.which_end_node, self.ids.which_edges, self.ids.which_size, self.ids.whole_screen ], column=G_DATA.columns[i], origin=(325, Window.height - (280 + 20 * i))) self.add_widget(drag_button)
def main(): full_batch, num_cls, label_enum = read_data(filename=args.train_set, dim=args.dim) if args.val_set is None: print('No validation set is provided. Will output model at the last iteration.') val_batch = None else: val_batch, _, _ = read_data(filename=args.val_set, dim=args.dim, label_enum=label_enum) num_data = full_batch[0].shape[0] config = ConfigClass(args, num_data, num_cls) if isinstance(config.seed, int): tf.random.set_seed(config.seed) np.random.seed(config.seed) if config.net in ('CNN_4layers', 'CNN_7layers', 'VGG11', 'VGG13', 'VGG16','VGG19'): model = CNN_model(config.net, config.dim, config.num_cls) else: raise ValueError('Unrecognized training model') if config.loss == 'MSELoss': loss = lambda y_true, y_pred: tf.square(y_true - y_pred) else: loss = lambda y_true, y_pred: tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y_true) full_batch[0], mean_tr = normalize_and_reshape(full_batch[0], dim=config.dim, mean_tr=None) if val_batch is not None: val_batch[0], _ = normalize_and_reshape(val_batch[0], dim=config.dim, mean_tr=mean_tr) param = model.trainable_weights # TODO: check what use_resource in tf1 means # TODO: check if the following variables are used, saver saves mean_param mean_param = tf.Variable(name='mean_tr', initial_value=mean_tr, trainable=False, validate_shape=True) label_enum_var=tf.Variable(name='label_enum', initial_value=label_enum, trainable=False, validate_shape=True) if config.optim in ('SGD', 'Adam'): gradient_trainer( config, loss, model, full_batch, val_batch, test_network=None) elif config.optim == 'NewtonCG': newton_trainer( config, loss, model, full_batch, val_batch, test_network=None)
def read_data(self, file_name, **kwargs): """ read data and store them """ step_len = 100 if "step_len" not in kwargs else kwargs["step_len"] split_point = None if "split_point" not in kwargs else kwargs[ "split_point"] snr = None if "snr" not in kwargs else kwargs["snr"] norm = False if "norm" not in kwargs else kwargs["norm"] normal, fault, n_res, f_res = read_data(file_name, step_len, split_point, snr, norm) list_fault, list_parameters = parse_filename(file_name) self.step = step_len self.fe = len(normal[0]) mode = [0, 0, 0, 0, 0, 0] #para = [0, 0, 0, 0, 0, [0, 0]] para = [0, 0, 0, 0, 0, 0, 0] #normal data if norm: for i, r in zip(normal, n_res): self.map[tuple(mode)].append(len(self.input)) self.input.append(i) self.res.append(r) self.mode.append(tuple(mode)) self.para.append(tuple(para)) else: for i in normal: self.map[tuple(mode)].append(len(self.input)) self.input.append(i) self.mode.append(tuple(mode)) self.para.append(tuple(para)) #fault data #find faults and parameters for i, j in zip(list_fault, list_parameters): assert i in self.fault_type index = self.fault_type.index(i) mode[index] = 1 if isinstance(j, list): para[5] = j[0] para[6] = j[1] else: para[index] = j if norm: for i, r in zip(fault, f_res): self.map[tuple(mode)].append(len(self.input)) self.input.append(i) self.res.append(r) self.mode.append(tuple(mode)) self.para.append(tuple(para)) else: for i in fault: self.map[tuple(mode)].append(len(self.input)) self.input.append(i) self.mode.append(tuple(mode)) self.para.append(tuple(para))
def req_5(dict_G, dict_D, cmd, check_entry_list): data = uti.read_data(dict_D, cmd) data_G = uti.read_data(dict_G, cmd) entry = CheckEntry(cmd, CheckEntry.TABLE) if data is None: entry.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd)) str_D = list() else: str_D = re.split(r"\s+", data[0]) if data_G is None: entry.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd)) str_G = list() else: str_G = re.split(r"\s+", data_G[0]) comp_res = {} for s in str_G: comp_res[s.strip()] = 1 for s in str_D: if s.strip() in comp_res: comp_res[s.strip()] = 3 else: comp_res[s.strip()] = 2 t_data = list() #t_data.append([DUT_S, GOLDEN_S, "Result"]) idx = 0 for item in comp_res: t_data.append(["", "", ""]) if comp_res.get(item) & 2: t_data[idx][0] = item if comp_res.get(item) & 1: t_data[idx][1] = item if comp_res.get(item) == 3: t_data[idx][2] = "OK" else: t_data[idx][2] = "NOK" idx += 1 entry.load_t_data(t_data) check_entry_list.append(entry)
def _on_file_drop(self, window, file_path): global G_DATA file_path = file_path.decode("utf-8") ext = os.path.splitext(file_path)[-1].lower() if ext == ".png": self.background_img = file_path else: G_DATA = utilities.read_data(file_path) print(G_DATA)
def prepare_dataset(config, train_type, columns): print('Reading data') raw_dataset = utilities.read_data([ '{}/{}'.format(config['dir']['data'], data) for data in config['data'][train_type] ], columns) dataset = pd.concat([dataset for dataset in raw_dataset]) if train_type == 'main': if 'specjvm' in config['name']: dataset = dataset.iloc[1000:2000] elif 'renaissance' in config['name']: pass # dataset = dataset[dataset['gc_time_clean'] < 100] # dataset = dataset.iloc[1500:] # dataset = dataset.iloc[2000:] # [dataset['gc_time_clean'] < 1500] elif 'dacapo' in config['name']: dataset = dataset.iloc[1000:2000] print() print('Data summaries') print(dataset.describe()) print() print('Prepare dataset to predict') pred_dataset = (dataset.iloc[:, :-1], dataset.iloc[:, -1]) print() print('Create cleaned dataset') clean_dataset = utilities.clean_data(dataset) print() print('Splitting dataset') splitted_dataset = train_test_split( dataset.iloc[:, :-1], dataset.iloc[:, -1], test_size=0.25, random_state=42) print() print('Splitting cleaned dataset') splitted_cleaned_dataset = train_test_split( clean_dataset.iloc[:, :-1], clean_dataset.iloc[:, -1], test_size=0.25, random_state=42) return { 'raw': raw_dataset, 'dataset': dataset, 'predict': pred_dataset, 'cleaned': clean_dataset, 'splitted_dataset': splitted_dataset, 'splitted_cleaned_dataset': splitted_cleaned_dataset, }
def PreparingData(self): if self._status == "l": my_shelve = shelve.open(self.__filename) return self.__filename elif self._status == "s": X_train, labels_train, list_ch_train = ut.read_data( data_path=self.__pathDS, split="train") # train X_test, labels_test, list_ch_test = ut.read_data( data_path=self.__pathDS, split="test") # test features_train = ut.read_Features(data_path=self.__pathDS, split="train") # features train features_test = ut.read_Features(data_path=self.__pathDS, split="test") # features train assert list_ch_train == list_ch_test, "Mistmatch in channels!" # Normalize? X_train, X_test = ut.standardize(X_train, X_test) # X_tr, X_vld, lab_tr, lab_vld = train_test_split(X_train, labels_train, # stratify=labels_train, random_state=123) # One-hot encoding: y_tr = ut.one_hot(labels_train) # y_vld = ut.one_hot(lab_vld) y_test = ut.one_hot(labels_test) my_shelve = shelve.open(self.__filename, 'n') my_shelve['data_train'] = X_train # my_shelve['data_vld'] = X_vld my_shelve['data_test'] = X_test my_shelve['labels_train'] = y_tr my_shelve['labels_test'] = y_test # my_shelve['labels_vld'] = y_vld my_shelve['labels_test'] = y_test my_shelve['features_train'] = features_train my_shelve['features_test'] = features_test return self.__filename
def main(input_file, output, thresholds): df = read_data(input_file) df = add_theta(df) if not thresholds: thresholds = [0.0] bins, bin_centers, bin_widths = make_energy_bins(e_min=0.08 * u.TeV, e_max=300 * u.TeV, bins=15, centering='log') ax = None for t in thresholds: e_true = df[df.gamma_score_mean > t].mc_energy.values e_reco = df[df.gamma_score_mean > t].energy_mean.values resolution = np.abs(e_reco - e_true) / e_true ax = plot_percentile(e_reco, resolution, t, bins, bin_centers, bin_widths, ax=ax) ax.plot([10**0, 10**2.47], [0.1, 0.1], '--', color='silver', label='SST sub-system') ax.set_xscale('log') ax.set_xlabel( r'$\mathrm{Reconstructed}\ \mathrm{Energy}\ /\ \mathrm{TeV}$') ax.set_ylabel('$\Delta E\ /\ E\ (68\% \ \mathrm{containment})$') ax.set_ylim([0, 0.5]) ax.legend() plt.tight_layout() if output: plt.savefig(output) else: plt.show()
def main(input_file, output, thresholds): df = read_data(input_file) df = add_theta(df) if not thresholds: thresholds = [0.0] bins, bin_centers, bin_widths = make_energy_bins(e_min=0.08 * u.TeV, e_max=300 * u.TeV, bins=15, centering='log') ax = None for t in thresholds: x = df[df.gamma_score_mean > t].energy_mean.values y = df[df.gamma_score_mean > t].theta ax = plot_percentile(x, y, t, bins, bin_centers, bin_widths, ax=ax) ref = np.loadtxt('references/South-SST-AngRes.txt') plt.plot(10**ref[:, 0], ref[:, 1], '--', label='SST sub-system', color='silver') ax.set_xscale('log') ax.set_xlim([0.5, 300]) ax.set_ylim([0, 0.5]) ax.set_ylabel('Angular Resolution / deg') ax.set_xlabel( r'$\mathrm{Reconstructed}\ \mathrm{Energy}\ /\ \mathrm{TeV}$') ax.legend() plt.tight_layout() if output: plt.savefig(output) else: plt.show()
def main(argv=None): ''' ''' import numpy as np from utilities import read_data, SubPlot, axis from os.path import join if sys.version_info >= (3,0): print('%s needs matplotlib. However, no matplotlib for python %s'%( sys.argv[0],sys.version_info,)) return -1 import matplotlib as mpl global DEBUG if DEBUG: mpl.rcParams['text.usetex'] = False else: mpl.use('PDF') import matplotlib.pyplot as plt if argv is None: # Usual case argv = sys.argv[1:] sim_file, fig_dir = argv params = {'axes.labelsize': 18, # Plotting parameters for latex 'text.fontsize': 15, 'legend.fontsize': 15, 'text.usetex': True, 'font.family':'serif', 'font.serif':'Computer Modern Roman', 'xtick.labelsize': 15, 'ytick.labelsize': 15} mpl.rcParams.update(params) data = read_data(sim_file) X = axis(data=data[0], magnitude=False, label=r'$t$', ticks=np.arange(0, 100.1, 25)) def _plot(Y): fig = plt.figure(figsize=(3.5,2.5)) ax = SubPlot(fig,(1,1,1),X,Y, color='b') ax.set_ylim(-0.02, 1.02) fig.subplots_adjust(bottom=0.15) # Make more space for label fig.subplots_adjust(left=.15, bottom=.18) return (ax, fig) ax, fig_b = _plot(axis(data=data[1], magnitude=False, label=r'$S(t)$', ticks=np.arange(0, 1.1, 1))) ax, fig_d = _plot(axis(data=data[3], magnitude=False, label=r'$S(t)$', ticks=np.arange(0, 1.1, 1))) ax, fig_c = _plot(axis(data=data[2], magnitude=False, label=r'$y(t)$', ticks=np.arange(-4, 4.1, 4))) ax.set_ylim(-5, 5) fig_c.subplots_adjust(left=.2) if DEBUG: plt.show() else: fig_b.savefig(join(fig_dir, 'SGO_b.pdf')) fig_c.savefig(join(fig_dir, 'SGO_c.pdf')) fig_d.savefig(join(fig_dir, 'SGO_d.pdf')) return 0
def main(argv=None): ''' ''' import numpy as np from utilities import read_data, SubPlot, axis from os.path import join if sys.version_info >= (3, 0): print('%s needs matplotlib. However, no matplotlib for python %s' % ( sys.argv[0], sys.version_info, )) return -1 import matplotlib as mpl global DEBUG if DEBUG: mpl.rcParams['text.usetex'] = False else: mpl.use('PDF') import matplotlib.pyplot as plt if argv is None: # Usual case argv = sys.argv[1:] sim_file, fig_dir = argv params = { 'axes.labelsize': 18, # Plotting parameters for latex 'text.fontsize': 15, 'legend.fontsize': 15, 'text.usetex': True, 'font.family': 'serif', 'font.serif': 'Computer Modern Roman', 'xtick.labelsize': 15, 'ytick.labelsize': 15 } mpl.rcParams.update(params) data = read_data(sim_file) X = axis(data=data[0], magnitude=False, label=r'$t$', ticks=np.arange(0, 100.1, 25)) def _plot(Y): fig = plt.figure(figsize=(3.5, 2.5)) ax = SubPlot(fig, (1, 1, 1), X, Y, color='b') ax.set_ylim(-0.02, 1.02) fig.subplots_adjust(bottom=0.15) # Make more space for label fig.subplots_adjust(left=.15, bottom=.18) return (ax, fig) ax, fig_b = _plot( axis(data=data[1], magnitude=False, label=r'$S(t)$', ticks=np.arange(0, 1.1, 1))) ax, fig_d = _plot( axis(data=data[3], magnitude=False, label=r'$S(t)$', ticks=np.arange(0, 1.1, 1))) ax, fig_c = _plot( axis(data=data[2], magnitude=False, label=r'$y(t)$', ticks=np.arange(-4, 4.1, 4))) ax.set_ylim(-5, 5) fig_c.subplots_adjust(left=.2) if DEBUG: plt.show() else: fig_b.savefig(join(fig_dir, 'SGO_b.pdf')) fig_c.savefig(join(fig_dir, 'SGO_c.pdf')) fig_d.savefig(join(fig_dir, 'SGO_d.pdf')) return 0
import sys import math import time import numpy as np np.set_printoptions(precision=2, linewidth=1000) import msvm from msvm_kernels import linear, polynomial, rbf, sigmoid from utilities import read_data, classification_accuracy, tune if __name__ == '__main__': with open(sys.argv[1]) as f: X, y = read_data(f) N, d = X.shape train_N = int(0.8 * N) # split the data into trainint and testing sets sel_idx = np.random.choice(np.arange(N), train_N, replace=False) selection = np.full((N, ), False, dtype=bool) selection[sel_idx] = True train_X = X[selection, :] train_y = y[selection] test_X = X[np.invert(selection), :] test_y = y[np.invert(selection)]
Sconv = np.array([1,1,1,1]) # strides of each convolution layer num_trials = 10 he_init = True if(he_init): from build_234layer_1dconv_graph_he import build_234layer_1dconv_graph else: from build_234layer_1dconv_graph_rnd import build_234layer_1dconv_graph par = {'batch_size':batch_size,'seq_len':seq_len,'lrn_rate':lrn_rate,'epochs':epochs, 'krnl_sz':krnl_sz,'krnl_sz_Bsg':krnl_sz_Bsg,'L':L,'K':K,'n_classes':n_classes,'n_channels':n_channels ,'n_outchannel':N,'Spool':Spool,'Sconv':Sconv,'num_trials':num_trials,'act_func':act_func} #%% Prepare data X_train, labels_train, list_ch_train = read_data(data_path="data/", split="train") # train X_test, labels_test, list_ch_test = read_data(data_path="data/", split="test") # test assert list_ch_train == list_ch_test, "Mistmatch in channels!" # Normalize? X_train, X_test = standardize(X_train, X_test) # Train/Validation Split X_tr, X_vld, lab_tr, lab_vld = train_test_split(X_train, labels_train, stratify = labels_train, random_state = 123) # One-hot encoding: y_tr = one_hot(lab_tr) y_vld = one_hot(lab_vld) y_test = one_hot(labels_test)
def main(): full_batch, num_cls, label_enum = read_data(filename=args.train_set, dim=args.dim) if args.val_set is None: print( 'No validation set is provided. Will output model at the last iteration.' ) val_batch = None else: val_batch, _, _ = read_data(filename=args.val_set, dim=args.dim, label_enum=label_enum) num_data = full_batch[0].shape[0] config = ConfigClass(args, num_data, num_cls) if isinstance(config.seed, int): tf.compat.v1.random.set_random_seed(config.seed) np.random.seed(config.seed) if config.net in ('CNN_4layers', 'CNN_7layers', 'VGG11', 'VGG13', 'VGG16', 'VGG19'): x, y, outputs = CNN(config.net, num_cls, config.dim) test_network = None else: raise ValueError('Unrecognized training model') if config.loss == 'MSELoss': loss = tf.reduce_sum(input_tensor=tf.pow(outputs - y, 2)) else: loss = tf.reduce_sum( input_tensor=tf.nn.softmax_cross_entropy_with_logits( logits=outputs, labels=y)) network = (x, y, loss, outputs) sess_config = tf.compat.v1.ConfigProto() sess_config.gpu_options.allow_growth = True with tf.compat.v1.Session(config=sess_config) as sess: full_batch[0], mean_tr = normalize_and_reshape(full_batch[0], dim=config.dim, mean_tr=None) if val_batch is not None: val_batch[0], _ = normalize_and_reshape(val_batch[0], dim=config.dim, mean_tr=mean_tr) param = tf.compat.v1.trainable_variables() mean_param = tf.compat.v1.get_variable(name='mean_tr', initializer=mean_tr, trainable=False, validate_shape=True, use_resource=False) label_enum_var = tf.compat.v1.get_variable(name='label_enum', initializer=label_enum, trainable=False, validate_shape=True, use_resource=False) saver = tf.compat.v1.train.Saver(var_list=param + [mean_param]) if config.optim in ('SGD', 'Adam'): gradient_trainer(config, sess, network, full_batch, val_batch, saver, test_network) elif config.optim == 'NewtonCG': newton_trainer(config, sess, network, full_batch, val_batch, saver, test_network=test_network)
Y_train.append(ground_truth_train[date][x, y]) X_train += X_train_0_label[date] Y_train += Y_train_0_label[date] X_train = np.array(X_train) Y_train = np.array(Y_train) return X_train, Y_train dates = ['2019_07_25', '2019_09_20', '2019_10_11'] N = 200 repeats = 5 version = "fraction" print("1. Read data") bands_train, bands_test, ground_truth_train, ground_truth_test, mask_train, mask_test = read_data(dates=dates) print("2. Create test samples") X_test, Y_test = create_test_samples( dates=dates, bands_test=bands_test, ground_truth_test=ground_truth_test, mask_test=mask_test) print("3. Create train samples with 0 label") X_train_0_label, Y_train_0_label = create_train_samples_0_label( dates=dates, bands_train=bands_train, ground_truth_train=ground_truth_train, mask_train=mask_train) print("4. Create output files") if version == "linear": results_file = open(f"Results_f1_score_vs_no_samples_{version}.txt", "a") results_file.write("date\tn\trepeat\tF1_score_test\tcoefs[0]\tcoefs[1]\tcoefs[2]\tintercept\tthreshold\n") elif version == "fraction": results_file = open(f"Results_f1_score_vs_no_samples_{version}.txt", "a")
def single_model(): train = read_data('raw_data/train.csv') test = read_data('raw_data/test.csv') df_tr = pd.read_csv(agg_loc + agg_name) train = pd.merge(train, df_tr, on='card_id', how='left').fillna(0) test = pd.merge(test, df_tr, on='card_id', how='left').fillna(0) del df_tr train = fe.combine_categs(train) test = fe.combine_categs(test) kfolds = KFold(5, shuffle=True, random_state=42) results = {} for_second_level = pd.DataFrame({'target': train['target']}) for model in model_list.keys(): to_train = model_list.get(model) for selection in sel_list: to_select = sel_list.get(selection) print(f'{model}_{selection}') df_train = train.copy() df_test = test.copy() target = df_train['target'] id_to_sub = df_test['card_id'] del df_train['target'] del df_train['card_id'] del df_test['card_id'] df_train, df_test = to_select(df_train, df_test) predictions, cv_score, feat_imp, oof = to_train( df_train, df_test, target, kfolds) results[model + '_' + selection] = cv_score for_second_level[model + '_' + selection] = oof sub_df = pd.DataFrame({"card_id": id_to_sub.values}) sub_df["target"] = predictions sub_df.to_csv(save_loc + model + '_' + selection + '.csv', index=False) feat_imp.to_csv(save_loc + model + '_' + selection + "_featimp.csv", index=False) for_second_level.to_csv(save_loc + 'oof_predictions.csv') print(f'{model}_{selection}:\t {cv_score}') print('_' * 40) print('_' * 40) print('\n') final = pd.DataFrame.from_dict(results, orient='index', columns=['CV_score']) final.to_csv(save_loc + 'single_cvscores.csv') for_second_level.to_csv(save_loc + 'oof_predictions.csv')
import numpy as np import utilities as ut X_train, labels_train, list_ch_train = ut.read_data( data_path="./datasets/data", split="train") # train
def mainGJ(filename, **kwargs): # TODO: FIX THE KWARGS !!! """ Main execution using GaussJordan elimination""" DEBUG = get_or_default(kwargs, 'DEBUG', False) data = read_data(filename, ' ') c = Counter(data['data']) child_name = "C1" child_idx = data['header'].index(child_name) num_columns = len(data['header']) new_counter = match_by_column(c, child_idx) binary_data = binarize(new_counter) items = sorted(binary_data.items(), key=lambda x: x[1][2], reverse=True) def leak_exponent(k): #return (-sum(k)+1,) return (1,) #return () log_base = 2 A_vect = [k + leak_exponent(k) for k, v in items if v[0] not in(1.0, 0.0)] A = np.array(A_vect) * Fraction(1, 1) b_vect = [v[0] for k, v in items if v[0] not in (1.0, 0.0)] b_vect = [log(1.0 - b, log_base) for b in b_vect] b_cnt = [(v[1], v[2]) for k, v in items if v[0] not in (1.0, 0.0)] if DEBUG: for i in xrange(A.shape[0]): print "b%d"%i, A_vect[i], b_vect[i], b_cnt[i] b = np.array(sp.symbols('b0:%d' % A.shape[0])) subs = dict(zip(b,b_vect)) subs_cnt = dict(zip(b,b_cnt)) A2, b2 = GaussJordanElimination(A, b) b3 = [1.0 - float(log_base**b.evalf(subs=subs)) for b in b2] subs_str = tuple([(str(k), v) for k, v in subs.iteritems()]) + tuple([("r%d"%i, b2[i]) for i in range(len(b2)) ]) subs_str = dict(subs_str) if DEBUG: print augment([A2, b2, b3]) nonzero_i = (i for i in range(A2.shape[0]) if any(j!=0 for j in A2[i])) zero_i = (i for i in range(A2.shape[0]) if all(j==0 for j in A2[i])) nonzero_v = list((A2[i], b2[i]) for i in nonzero_i) zero_v = list((A2[i], b2[i]) for i in zero_i) def product(l): return reduce(lambda x, y: x * y, l) def _min_fitness(b_val, b_subs_cnt_orig): b_subs_cnt = dict((k, v[1]) for k, v in b_subs_cnt_orig.iteritems()) total = sum(b_subs_cnt.values()) coeff = [(b.args if b.args else (1, b)) for b in (b_val.args if not type(b_val)==sp.Symbol else [b_val])] min_c = min(b_subs_cnt[c[1]] for c in coeff) return min_c / float(total) def _avg_fitness(b_val, b_subs_cnt_orig): b_subs_cnt = dict((k, v[1]) for k, v in b_subs_cnt_orig.iteritems()) total = sum(b_subs_cnt.values()) coeff = [(b.args if b.args else (1, b)) for b in (b_val.args if not type(b_val) == sp.Symbol else [b_val])] #print coeff return sum(b_subs_cnt[s[1]] / float(total) for s in coeff)/ float(sum(abs(s) for s, _ in coeff)) #return sum(abs(s[0])*(b_subs_cnt[s[1]]/float(total)) for s in coeff) / sum(b_subs_cnt[s[1]]/float(total) for s in coeff) #return 1 def _max_count_fitness(b_val, b_subs_cnt_orig): b_subs_cnt = dict( (k,v[1]) for k, v in b_subs_cnt_orig.iteritems()) total = sum(b_subs_cnt.values()) coeff = [(b.args if b.args else (1, b)) for b in (b_val.args if not type(b_val)==sp.Symbol else [b_val])] return sum(b_subs_cnt[s[1]]/abs(s[0]) for s in coeff) / float(total) def _pu(x,n,c): n = float(n) x = float(x) c = float(c) sqr = sqrt(((x/n)*(1.0-x/n))/n) return c*sqr #return x/n-Ualph*sqr,x/n+Ualph*sqr def _pu_fitness(b_val, b_subs_cnt): #total = sum(b_subs_cnt.values()) coeff = [(b.args if b.args else (1, b)) for b in (b_val.args if not type(b_val)==sp.Symbol else [b_val])] #return 1.0 - max(b_subs_cnt[b][0]/float(b_subs_cnt[b][1]) - _pu(b_subs_cnt[b][0], b_subs_cnt[b][1], 1.65)[0] for c, b in coeff) #return 1.0 - max(b_subs_cnt[b][0]/float(b_subs_cnt[b][1]) - abs(c)*_pu(b_subs_cnt[b][0], b_subs_cnt[b][1], 1.65) for c, b in coeff) return 1.0 - max(abs(c)*_pu(b_subs_cnt[b][0], b_subs_cnt[b][1], 1.65) for c, b in coeff) #fitness = _min_fitness #fitness = _avg_fitness fitness = _pu_fitness #BELOW: poor fitness! #fitness = _max_count_fitness solutions = [] for i in nonzero_v: for zv in ([(0,0)] + zero_v): for coeff in [2, 1,-1, -2]: expr = (i[1] + coeff*zv[1]) fit = fitness(expr, subs_cnt) #print i[0], " [",coeff,"]", zv[0], "expr:",expr, "value:",float(1.0 - log_base**expr.evalf(subs=subs)), "fitness:", fit solutions.append((i[0],'V' if type(zv[0])!=int else '0', coeff, zv[1],"EXPR:", expr, float(1.0 - log_base ** expr.evalf(subs=subs)), fit)) if type(zv[0]) == int: break GJElim_fit_distribution = [] num_best_solutions = 5 for i in range(num_columns): solutions_filtered = [s for s in sorted(solutions, key= lambda x: x[-1], reverse=True) if s[0][i] == 1][:num_best_solutions] GJElim_fit_distribution.append(solutions_filtered[0][-2]) suma = sum(s[-1]*s[-2] for s in solutions_filtered) if DEBUG: for s in solutions_filtered: print s print suma / sum(s[-1] for s in solutions_filtered) print "" if DEBUG: print augment([A2, b2, b3]) GJElim_distribution = [] for i in range(num_columns): for j in range(A2.shape[0]): if A2[j][i] == 1: GJElim_distribution.append(b3[j]) break GJElim_distribution = [(d if d>0 else 10e-5) for d in GJElim_distribution] GJElim_fit_distribution = [(d if d>0 else 10e-5) for d in GJElim_fit_distribution] outs = [] labels = [] for h in data['header']: labels.append(["True", "False"]) #FIXME: data['domain'] does not keep states sorted so states are messed up #labels.append(data['domain'][h]) for solution in [GJElim_distribution, GJElim_fit_distribution]: leak = solution[-1] params = reduce( lambda x,y: x+y, [[a,0] for a in solution[:-1]]) + [leak,] parent_dims = [2]*(num_columns-1) GJ_CPT = CPT([params, [1.0 - p for p in params]], parent_dims, CPT.TYPE_NOISY_MAX, data['header'], labels) outs.append(GJ_CPT) return outs
def starting_counts(sequences): tag_starts = {} for seq in sequences: if seq[0] in tag_starts: tag_starts[seq[0]] += 1 else: tag_starts[seq[0]] = 1 return tag_starts def ending_counts(sequences): tag_ends = {} for seq in sequences: if seq[-1] in tag_ends: tag_ends[seq[-1]] += 1 else: tag_ends[seq[-1]] = 1 return tag_ends tagfile = "tags-universal.txt" datafile = "brown-universal.txt" tagset = read_tags(tagfile) sentences = read_data(datafile) keys = tuple(sentences.keys()) wordset = frozenset(chain(*[s.words for s in sentences.values()])) word_sequences = tuple([sentences[k].words for k in keys]) tag_sequences = tuple([sentences[k].tags for k in keys]) N = sum(1 for _ in chain(*(s.words for s in sentences.values())))
def plot_mag_phase(dict_G, dict_D, cmd_fr, cmd_im, cmd_re, check_entry_list, style=None): """ Read Data """ data10_fr = uti.read_data(dict_D, cmd_fr) data10_fr_G = uti.read_data(dict_G, cmd_fr) data10_re = uti.read_data(dict_D, cmd_re) data10_re_G = uti.read_data(dict_G, cmd_re) data10_im = uti.read_data(dict_D, cmd_im) data10_im_G = uti.read_data(dict_G, cmd_im) title_mag = cmd_im.replace('im', 'mag') entry_mag = CheckEntry(title_mag, CheckEntry.XY) title_phase = cmd_im.replace('im', 'phase') entry_phase = CheckEntry(title_phase, CheckEntry.XY) """ Process Golden Data """ if data10_fr_G is None: entry_mag.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd_fr)) entry_phase.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd_fr)) if data10_im_G is None: entry_mag.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd_im)) entry_phase.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd_im)) if data10_re_G is None: entry_mag.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd_re)) entry_phase.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd_re)) if data10_fr_G and data10_im_G and data10_re_G: if not (uti.is_same_len(data10_fr_G, data10_re_G) and uti.is_same_len(data10_fr_G, data10_im_G)): entry_mag.add_err_msg(XY_NOT_MATCH_S % (GOLDEN_S, title_mag)) entry_phase.add_err_msg(XY_NOT_MATCH_S % (GOLDEN_S, title_phase)) else: mag_G, phase_G = uti.get_mag_angle(data10_re_G, data10_im_G) entry_mag.load_data_G([data10_fr_G, mag_G]) entry_phase.load_data_G([data10_fr_G, phase_G]) """ Process DUT Data """ if data10_fr is None: entry_mag.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd_fr)) entry_phase.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd_fr)) if data10_im is None: entry_mag.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd_im)) entry_phase.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd_im)) if data10_re is None: entry_mag.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd_re)) entry_phase.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd_re)) if data10_fr and data10_im and data10_re: if not (uti.is_same_len(data10_fr, data10_re) and uti.is_same_len(data10_fr, data10_im)): entry_mag.add_err_msg(XY_NOT_MATCH_S % (DUT_S, title_mag)) entry_phase.add_err_msg(XY_NOT_MATCH_S % (DUT_S, title_phase)) else: mag, phase = uti.get_mag_angle(data10_re, data10_im) entry_mag.load_data([data10_fr, mag]) entry_phase.load_data([data10_fr, phase]) if uti.is_not_same_len_not_empty(entry_mag.get_data(), entry_mag.get_data_G()): entry_mag.add_err_msg(GD_NOT_MATCH_S) entry_phase.add_err_msg(GD_NOT_MATCH_S) entry_mag.xlabel = 'freq' entry_mag.ylabel = 'mag' entry_phase.xlabel = 'freq' entry_phase.ylabel = 'phase' check_entry_list.append(entry_mag) check_entry_list.append(entry_phase)
sess_config = tf.compat.v1.ConfigProto() sess_config.gpu_options.allow_growth = True with tf.compat.v1.Session(config=sess_config) as sess: graph_address = args.model_file + '.meta' imported_graph = tf.compat.v1.train.import_meta_graph(graph_address) imported_graph.restore(sess, args.model_file) mean_param = [v for v in tf.compat.v1.global_variables() if 'mean_tr:0' in v.name][0] label_enum_var = [v for v in tf.compat.v1.global_variables() if 'label_enum:0' in v.name][0] sess.run(tf.compat.v1.variables_initializer([mean_param, label_enum_var])) mean_tr = sess.run(mean_param) label_enum = sess.run(label_enum_var) test_batch, num_cls, _ = read_data(args.test_set, dim=args.dim, label_enum=label_enum) test_batch[0], _ = normalize_and_reshape(test_batch[0], dim=args.dim, mean_tr=mean_tr) x = tf.compat.v1.get_default_graph().get_tensor_by_name('main_params/input_of_net:0') y = tf.compat.v1.get_default_graph().get_tensor_by_name('main_params/labels:0') outputs = tf.compat.v1.get_default_graph().get_tensor_by_name('output_of_net:0') if args.loss == 'MSELoss': loss = tf.reduce_sum(input_tensor=tf.pow(outputs-y, 2)) else: loss = tf.reduce_sum(input_tensor=tf.nn.softmax_cross_entropy_with_logits(logits=outputs, labels=tf.stop_gradient(y))) network = (x, y, loss, outputs) avg_loss, avg_acc, results = predict(sess, network, test_batch, args.bsize)