def main(): # ~ Fetch arguments and logger ~ args = get_args() logger = get_logger(logpath=os.path.join('./hh.log'), filepath=os.path.abspath(__file__)) # ~ Set GPU/CPU if desired ~ if (args.use_gpu and torch.cuda.is_available()): device = torch.device('cuda:' + str(args.gpu)) torch.set_default_tensor_type(torch.cuda.FloatTensor) else: device = torch.device('cpu') torch.set_default_tensor_type(torch.FloatTensor) # ~ Fetch and Split Data ~ data.generate_data() dataset = pickle.load(open("./hh.pkl", "rb")) if (args.shuffle_data): random.shuffle(dataset) train = dataset[:int(args.training_split * len(dataset))] test = dataset[int(args.training_split * len(dataset)):] # ~ Instantiate Model ~ Tp = bivariate_poly(degree=args.degree).to(device) # Kinetic Vq = bivariate_poly(degree=args.degree).to(device) # Potential _Tp = gradient_wrapper(Tp) _Vq = gradient_wrapper(Vq) model = SSINN(_Tp, _Vq, fourth_order, tol=args.tol).to(device) # ~ Train Model ~ train_model(model, train, test, args, device, logger) # ~ Save Model ~ torch.save({'state_dict': model.state_dict()}, './hh_model.pth')
def gen_bound(n_train, n_test, base_param, k): n_gate = 1 values = np.random.uniform(-k, +k, n_train) ticks = np.random.uniform(0, 1, (n_train, n_gate)) < 0.01 train_data = generate_data(values, ticks) values = smoothen(np.random.uniform(-1, +1, n_test)) ticks = np.random.uniform(0, 1, (n_test, n_gate)) < 0.01 test_data = generate_data(values, ticks, last=train_data["output"][-1]) return base_param, train_data, test_data
def main(): print('--start--') # Folder Paths log_dir = './files/training_logs/' # Hyper parameters num_features = 5 classes = ['Dead', 'Alive: Wrong Direction', 'Alive: Right Direction'] num_classes = len(classes) epochs = 10 batch_size = 128 learning_rate = 0.01 # Load Data x_train, y_train = Data.generate_data(80000, num_features, num_classes) x_valid, y_valid = Data.generate_data(16000, num_features, num_classes) # Build model model = Model.build_model(num_features, num_classes, learning_rate) # View model summary model.summary() # Check memory needed during the training process (not accurate) Model.get_model_memory_usage(batch_size, model) # Get optimizer name opt_name = model.optimizer.__class__.__name__ # Get folder name hparam_str = make_hparam_string(opt_name, learning_rate, batch_size, epochs) log_dir += hparam_str output_dir = log_dir + 'model/' # Create folder prepare_dir(output_dir) # Train the model train(model, x_train, y_train, x_valid, y_valid, batch_size, epochs, log_dir) # Evaluate the model evaluate(model, classes, x_valid, y_valid, output_dir) # Save the model Model.save_model(model, classes, output_dir) # Test on game # test_in_game(model, 1000, False, True, 200) # Visualize # plt.show() print('--end--')
def gen_discrete(n_train, n_test, base_param, k): n_gate = 1 values = np.random.uniform(-1, +1, n_train) ticks = np.random.uniform(0, 1, (n_train, n_gate)) < 0.01 discrete_values = np.random.uniform(-1, 1, k) idx = np.where(ticks == 1)[0] values[idx] = np.random.choice(discrete_values, len(idx)) train_data = generate_data(values, ticks) values = smoothen(np.random.uniform(-1, +1, n_test)) ticks = np.random.uniform(0, 1, (n_test, n_gate)) < 0.01 test_data = generate_data(values, ticks, last=train_data["output"][-1]) return base_param, train_data, test_data
def gen_gate(n_train, n_test, base_param, k): n_gate = k values = np.random.uniform(-1, +1, n_train) ticks = np.random.uniform(0, 1, (n_train, n_gate)) < 0.01 train_data = generate_data(values, ticks) values = smoothen(np.random.uniform(-1, +1, n_test)) ticks = np.random.uniform(0, 1, (n_test, n_gate)) < 0.01 test_data = generate_data(values, ticks, last=train_data["output"][-1]) param = base_param.copy() param["shape"] = (1 + n_gate, param["shape"][1], n_gate) return param, train_data, test_data
def gen_trigger(n_train, n_test, base_param, k): n_gate = 1 values = np.random.uniform(-1, +1, n_train) ticks_interval = np.random.randint(1, k + 1, size=(n_train)) ticks_time = np.cumsum(ticks_interval) i_max = np.max(np.where(ticks_time < n_train)[0]) ticks = np.zeros((n_train, )) ticks[ticks_time[:i_max]] = 1 train_data = generate_data(values, ticks) values = smoothen(np.random.uniform(-1, +1, n_test)) ticks = np.random.uniform(0, 1, (n_test, n_gate)) < 0.01 test_data = generate_data(values, ticks, last=train_data["output"][-1]) return base_param, train_data, test_data
def _update_baseline(self, model, epoch): # Load or copy baseline model based on self.from_checkpoint condition if self.from_checkpoint and self.alpha == 0: print('Baseline model loaded') self.model = load_model(self.path_to_checkpoint, embed_dim=self.embed_dim, n_customer=self.n_customer) else: print('Baseline model copied') self.model = copy_model(model, embed_dim=self.embed_dim, n_customer=self.n_customer) # For checkpoint self.model.save_weights('%s%s_baseline_epoch%s.h5' % (self.weight_dir, self.task, epoch), save_format='h5') # We generate a new dataset for baseline model on each baseline update to prevent possible overfitting self.dataset = generate_data(n_samples=self.n_rollout_samples, n_customer=self.n_customer) print( f'Evaluating baseline model on baseline dataset (epoch = {epoch})') self.bl_vals = rollout(self.model, self.dataset) self.mean = tf.reduce_mean(self.bl_vals) self.cur_epoch = epoch
def train_selected_model(activation, learning_rate, momentum, n_points, n_epochs, batch_size, plot_points=False): train_data, test_data = data.generate_data(n_points) model = train.build_model(activation) optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum) criterion = framework.MSELoss() t0 = time.perf_counter() history = train.train_model(model, optimizer, criterion, train_data, test_data, n_epochs, batch_size) t1 = time.perf_counter() result = { 'train_loss': train.compute_loss(model, criterion, train_data, batch_size), 'test_loss': train.compute_error(model, train_data, batch_size) * 100, 'train_err': train.compute_loss(model, criterion, test_data, batch_size), 'test_err': train.compute_error(model, test_data, batch_size) * 100, 'time': t1 - t0 } if plot_points: plot.plot_points(test_data, train_data, model, plot_points) return history, result
def add_contract(source, destination, provider, payload, amount, signedContract): encode_data = ( source.encode() + destination.encode() + provider.encode() + payload.encode() + amount.encode() ) # TEST # if not verify_sign(provider, encode_data, signedContract): # return None # else: ######new_contract = Contract(str(time.time()), source, destination, provider, payload, amount) new_contract = Contract(str(123), source, destination, provider, payload, amount) token_ledger[source] = str(int(token_ledger[source]) - int(new_contract.stake)) token_ledger[destination] = str( int(token_ledger[destination]) - int(new_contract.stake) ) active_contract_list.append(new_contract.serialize()) block_data = generate_data( new_contract.serialize(), None, token_ledger, active_contract_list ) node_chain_instance.add_block(block_data) """ for i in range(len(node_chain_instance.block_data)): print(str(node_chain_instance.block_data[i])) #print((json.loads(node_chain_instance.block_data[-1].data)).get('ledger')) print((json.loads(node_chain_instance.block_data[-1].data))) """ return block_data
def roc_curves(n_samples, p_value=False): """ """ data = generate_data(n_samples=n_samples) X, Y = data[:, :-1], data[:, -1] if p_value: I = np.abs(0.5 - compute_values(X)) else: I = mutual_information(X, Y) tau_min, tau_max = I.min(), I.max() if not p_value and tau_min < 0: tau_min = 0 print "warning - tau_min < 0" step = 500 erange = [tau_min + i * (tau_max - tau_min) * 1. / step for i in range(step)] roc_x = [] roc_y = [] for tau in erange: pos = I > tau tp = pos[1000:].sum() tn = (1 - pos[:1000]).sum() roc_x.append(float(tp) / (200)) roc_y.append(1 - float(tn) / (1000)) return np.array(roc_x), np.array(roc_y), np.array(erange)
def gen_value(n_train, n_test, base_param, k): n_gate = 1 values = np.random.uniform(-1, +1, (n_train, k)) ticks = np.random.uniform(0, 1, (n_train, n_gate)) < 0.01 train_data = generate_data(values, ticks) values = np.empty((n_test, k)) for i in range(k): values[:, i] = smoothen(np.random.uniform(-1, +1, n_test)) ticks = np.random.uniform(0, 1, (n_test, n_gate)) < 0.01 test_data = generate_data(values, ticks, last=train_data["output"][-1]) param = base_param.copy() param["shape"] = (k + n_gate, param["shape"][1], n_gate) return param, train_data, test_data
def add_transaction(source, destination, provider, payload, amount): new_trans = Transaction( str(time.time()), source, destination, provider, payload, amount ) token_ledger[source] = str(int(token_ledger[source]) + int(new_trans.amount)) token_ledger[destination] = str( int(token_ledger[destination]) + int(new_trans.amount) ) block_data = generate_data( None, new_trans.serialize(), token_ledger, active_contract_list ) node_chain_instance.add_block(block_data) transaction = (json.loads(node_chain_instance.block_data[-1].data)).get("transactions") if transaction is not None: transaction_list.append(transaction) print(transaction_list) """ for i in range(len(node_chain_instance.block_data)): print(str(node_chain_instance.block_data[i])) print((json.loads(node_chain_instance.block_data[-1].data))) """ return block_data
def n_mutual_information(n, n_samples): nI = np.zeros((n, 1200)) for i in range(n): data = generate_data(n_samples=n_samples) X, Y = data[:, :-1], data[:, -1] nI[i, :] = mutual_information(X, Y) return nI
def test_compute_coefficients(size, mean1, mean2, num_threads): X, Y = generate_data(size, mean1, mean2) lrpy = LogisticRegressionPy() lrpy_coef = lrpy.compute_coefficients(X, Y) lrcpp = LogisticRegressionCpp(num_threads=num_threads) lrcpp_coef = lrcpp.compute_coefficients(X, Y) assert abs(lrpy_coef[0] - lrcpp_coef[0]) < 0.01 assert abs(lrpy_coef[1] - lrcpp_coef[1]) < 0.01 assert abs(lrpy_coef[2] - lrcpp_coef[2]) < 0.01
def main(args): x_train, y_train, x_test, y_test = generate_data(args.samples, args.seq_len, args.seq_dim) model = train(x_train, y_train, args) y_pred = evaluate_model(model, x_train, y_train, args) y_pred = format_predictions(y_pred, args.seq_len, args.seq_dim) plot_predictions(x_train, y_train, y_pred)
def load_model(path, embed_dim=128, n_customer=20, n_encode_layers=3): """ Load model weights from hd5 file https://stackoverflow.com/questions/51806852/cant-save-custom-subclassed-model """ small_dataset = generate_data(n_samples=5, n_customer=n_customer) model_loaded = AttentionModel(embed_dim, n_encode_layers=n_encode_layers) for data in (small_dataset.batch(5)): _, _ = model_loaded(data, decode_type='greedy') model_loaded.load_weights(path) return model_loaded
def main(args): x_train, y_train, x_test, y_test = generate_data(args.samples, args.seq_len) model = train(x_train, y_train, args) y_pred = evaluate_model(model, x_train, args) print(y_pred) print(y_train) plot_predictions(x_train, y_train, y_pred)
def estimate(T, N, sigmas, coeff, generate_data, estimators): y, X = generate_data(T, N, sigmas, coeff) betas_error = [] for estimator_fn in estimators: beta, std_error = estimator_fn(y, X) betas_error.append((beta, std_error)) return betas_error
def main(targets): data_config = json.load(open('config/data-params.json')) main_model_config = json.load(open('config/main-model-params.json')) if 'test' in targets: dataset = generate_data(**data_config) save_data(dataset, **data_config) first_baseline_rsme = first_base() knn_baseline_rsme = knn_base() main_rsme = build_model(dataset, **main_model_config) print('Main RMSE: ',main_rsme,'First baseline RMSE: ', first_baseline_rsme, 'KNN baseline RMSE: ', knn_baseline_rsme)
def create_data( train_size: int, dev_size: int, n_parts: int, train_part_range: Tuple[int, int], dev_part_range: Tuple[int, int], ) -> Dict: circuit_gen = CircuitGenerator(n_parts) data = generate_data(circuit_gen, train_size, train_part_range, dev_size, dev_part_range) return data, circuit_gen
def copy_model(model, embed_dim=128, n_customer=20): """ Copy model weights to new model https://stackoverflow.com/questions/56841736/how-to-copy-a-network-in-tensorflow-2-0 """ small_dataset = generate_data(n_samples=5, n_customer=n_customer) new_model = AttentionModel(embed_dim) for data in (small_dataset.batch(5)): # _, _ = model(data, decode_type = 'sampling') cost, _ = new_model(data, decode_type='sampling') for a, b in zip(new_model.variables, model.variables): a.assign(b) # copies the weigths variables of model_b into model_a return new_model
def compute_ratios(tau, p_value=False): data = generate_data(n_samples=1000) X, Y = data[:, :-1], data[:, -1] if p_value: I = np.abs(0.5 - compute_values(X)) else: I = mutual_information(X, Y) pos = I > tau fp = pos[:1000].sum() fn = (1 - pos[1000:]).sum() tp = pos[1000:].sum() tn = (1 - pos[:1000]).sum() return fp, fn, tp, tn
def main(): args = config.parse_command_line_arguments() inputs_train, targets_train, inputs_test, targets_test = data.generate_data( args) results = { 'inputs_train': inputs_train, 'targets_train': targets_train, 'inputs_test': inputs_test, 'targets_test': targets_test } mdl = model.create_model(args, inputs_train, targets_train) train_model(args, mdl, results)
def train(cfg, log_path = None): model = AttentionModel(cfg.embed_dim, cfg.n_encode_layers, cfg.n_heads, cfg.tanh_clipping, 'sampling') baseline = RolloutBaseline(model, cfg.task, cfg.weight_dir, cfg.n_rollout_samples, cfg.embed_dim, cfg.n_customer, cfg.warmup_beta, cfg.wp_epochs) optimizer = tf.keras.optimizers.Adam(learning_rate = cfg.lr) ave_loss = tf.keras.metrics.Mean() ave_L = tf.keras.metrics.Mean() for epoch in tqdm(range(cfg.epochs), desc = 'epoch'): t1 = time() dataset = generate_data(cfg.n_samples, cfg.n_customer) bs = baseline.eval_all(dataset) bs = tf.reshape(bs, (-1, cfg.batch)) if bs is not None else None # bs: (cfg.batch_steps, cfg.batch) or None for t, inputs in enumerate(dataset.batch(cfg.batch)): with tf.GradientTape() as tape: L, logp = model(inputs) b = bs[t] if bs is not None else baseline.eval(inputs, L) b = tf.stop_gradient(b) loss = tf.reduce_mean((L - b) * logp) L_mean = tf.reduce_mean(L) grads = tape.gradient(loss, model.trainable_weights)# model.trainable_weights == thita grads, _ = tf.clip_by_global_norm(grads, 1.0) optimizer.apply_gradients(zip(grads, model.trainable_weights))# optimizer.step ave_loss.update_state(loss) ave_L.update_state(L_mean) if t%(cfg.batch_steps*0.1) == 0: print('epoch%d, %d/%dsamples: loss %1.2f, average L %1.2f, average b %1.2f\n'%( epoch, t*cfg.batch, cfg.n_samples, ave_loss.result().numpy(), ave_L.result().numpy(), tf.reduce_mean(b))) baseline.epoch_callback(model, epoch) model.decode_type = 'sampling' model.save_weights('%s%s_epoch%s.h5'%(cfg.weight_dir, cfg.task, epoch), save_format = 'h5') if cfg.islogger: if log_path is None: log_path = '%s%s_%s.csv'%(cfg.log_dir, cfg.task, cfg.dump_date)#cfg.log_dir = ./Csv/ with open(log_path, 'w') as f: f.write('time,epoch,loss,average length\n') with open(log_path, 'a') as f: t2 = time() f.write('%dmin%dsec,%d,%1.2f,%1.2f\n'%((t2-t1)//60, (t2-t1)%60, epoch, ave_loss.result().numpy(), ave_L.result().numpy())) ave_loss.reset_states() ave_L.reset_states()
def main(): inputs_train, targets_train, inputs_test, targets_test = data.generate_data( args) results = { 'inputs_train': inputs_train, 'targets_train': targets_train, 'inputs_test': inputs_test, 'targets_test': targets_test } mdl = model.create_model( args, inputs_train, targets_train) # Actual Model that is being observed mdl_test = model.create_model( args, inputs_train, targets_train) # Dummy Model for calculating gradient train_model(args, mdl, mdl_test, results)
def train_selected_model(activation: ty.Union[framework.Tanh, framework.ReLU], learning_rate: float, momentum: float, n_points: int, n_epochs: int, batch_size: int, track_history: bool = False, plot_points: bool = False): """ Train a miniproject model with a given activation using SGD and MSE loss. :param activation: activation function :param learning_rate: SGD learning rate :param momentum: SGD momentum :param n_points: number of points in training and test data :param n_epochs: number of epochs :param batch_size: batch size :param trach_history: track training and test error and loss by epoch :param plot_points: generate plots visualing model predictions of the training and test data :returns: (history dictionary, final results) """ train_data, test_data = data.generate_data(n_points) model = train.build_model(activation) optimizer = framework.SGD(model, lr=learning_rate, momentum=momentum) criterion = framework.MSELoss(model) t0 = time.perf_counter() history = train.train_model(model, optimizer, criterion, train_data, test_data, n_epochs, batch_size, track_history) t1 = time.perf_counter() result = { 'train_loss': train.compute_loss(model, criterion, train_data, batch_size), 'test_loss': train.compute_error(model, train_data, batch_size) * 100, 'train_err': train.compute_loss(model, criterion, test_data, batch_size), 'test_err': train.compute_error(model, test_data, batch_size) * 100, 'time': t1 - t0 } if plot_points: plot.plot_points(test_data, train_data, model, plot_points) return history, result
def profile(): data = generate_data()[-1][1] funcs_generated_data = [ create_data_increasing_depth, create_data_decreasing_depth ] funcs = [ outer_flatten_1, outer_flatten_2, niccolum_flatten, tishka_flatten, zart_flatten, recursive_flatten_generator, recursive_flatten_iterator, tishka_flatten_with_stack ] for func_generated_data in funcs_generated_data: creating_data = func_generated_data(**data) for func in funcs: list(func(creating_data)) time.sleep(0.3)
def main(): """ Demonstrate the NormalDistribution class with a dataset created using the data module. """ print("-----------------------") print("| codedrome.com |") print("| Normal Distribution |") print("-----------------------\n") d = data.generate_data() nd = normaldistribution.NormalDistribution() nd.data = d nd.calculate_prob_dist() nd.print_prob_dist()
np.random.seed(1) # Build memory n_gate = 1 model = generate_model(shape=(1 + n_gate, 1000, n_gate), sparsity=0.5, radius=0.01, scaling=0.25, leak=1.0, noise=0.0001) # Training data n = 25000 values = np.random.uniform(-1, +1, n) ticks = np.random.uniform(0, 1, (n, n_gate)) < 0.01 train_data = generate_data(values, ticks) error = train_model(model, train_data) print("Training error : {0}".format(error)) # Testing data n = 2500 values = np.cos(np.linspace(0, 20 * np.pi, n)) ticks = np.zeros(n) ticks[::25] = 1 test_data = generate_data(values, ticks, last=train_data["output"][-1]) error = test_model(model, test_data) print("Testing error : {0}".format(error)) # Display
fltrs.ewma_adaptive_variance_linear, # fltrs.ewma_variance, # fltrs.maww, # fltrs.des ] configs = [ data_trend, # data_simple, data_complex_trend, data_variation, # data_reversed_trend, data_trend_jump, ] for config in configs: data.append(generate_data(config)) for f in filters: index = filters.index(f) filtered.append([]) for x, y in data: print "Filtering data using {}".format(f.func_name) x_ = f(x) y_ = np.range(len(x_)) if type(x_) is not tuple else ([range(len(x_[0]))] * len(x_)) filtered[index].append((x_, y_)) dim = Subplots.get_dimensions(len(data)) figure, subplots = plt.subplots(*dim, sharex=True, sharey=True) plots = Subplots(subplots) plots.grid(True)
def check_training(self, model_type, label_type): print('----- ' + model_type + ', ' + label_type + ' -----') tf.reset_default_graph() with tf.Graph().as_default(): # Load batch data batch_size = 4 inputs, labels, inputs_seq_len, labels_seq_len = generate_data( label_type=label_type, model='attention', batch_size=batch_size) # Define placeholders inputs_pl = tf.placeholder( tf.float32, shape=[batch_size, None, inputs.shape[-1]], name='input') # `[batch_size, max_time]` labels_pl = tf.placeholder(tf.int32, shape=[None, None], name='label') # These are prepared for computing LER indices_true_pl = tf.placeholder(tf.int64, name='indices') values_true_pl = tf.placeholder(tf.int32, name='values') shape_true_pl = tf.placeholder(tf.int64, name='shape') labels_st_true_pl = tf.SparseTensor(indices_true_pl, values_true_pl, shape_true_pl) indices_pred_pl = tf.placeholder(tf.int64, name='indices') values_pred_pl = tf.placeholder(tf.int32, name='values') shape_pred_pl = tf.placeholder(tf.int64, name='shape') labels_st_pred_pl = tf.SparseTensor(indices_pred_pl, values_pred_pl, shape_pred_pl) inputs_seq_len_pl = tf.placeholder(tf.int32, shape=[None], name='inputs_seq_len') labels_seq_len_pl = tf.placeholder(tf.int32, shape=[None], name='labels_seq_len') keep_prob_input_pl = tf.placeholder(tf.float32, name='keep_prob_input') keep_prob_hidden_pl = tf.placeholder(tf.float32, name='keep_prob_hidden') # Define model graph output_size = 26 + 2 if label_type == 'character' else 61 + 2 # model = load(model_type=model_type) network = BLSTMAttetion(batch_size=batch_size, input_size=inputs[0].shape[1], encoder_num_unit=256, encoder_num_layer=2, attention_dim=128, decoder_num_unit=256, decoder_num_layer=1, embedding_dim=20, output_size=output_size, sos_index=output_size - 2, eos_index=output_size - 1, max_decode_length=50, attention_weights_tempareture=1, logits_tempareture=1, parameter_init=0.1, clip_grad=5.0, clip_activation_encoder=50, clip_activation_decoder=50, dropout_ratio_input=1.0, dropout_ratio_hidden=1.0, weight_decay=0, beam_width=0, time_major=False) # Add to the graph each operation loss_op, logits, decoder_outputs_train, decoder_outputs_infer = network.compute_loss( inputs_pl, labels_pl, inputs_seq_len_pl, labels_seq_len_pl, keep_prob_input_pl, keep_prob_hidden_pl) learning_rate = 1e-3 train_op = network.train(loss_op, optimizer='rmsprop', learning_rate_init=learning_rate, is_scheduled=False) decode_op_train, decode_op_infer = network.decoder( decoder_outputs_train, decoder_outputs_infer, decode_type='greedy', beam_width=1) ler_op = network.compute_ler(labels_st_true_pl, labels_st_pred_pl) attention_weights = decoder_outputs_infer.attention_scores # Add the variable initializer operation init_op = tf.global_variables_initializer() # Count total parameters parameters_dict, total_parameters = count_total_parameters( tf.trainable_variables()) for parameter_name in sorted(parameters_dict.keys()): print("%s %d" % (parameter_name, parameters_dict[parameter_name])) print("Total %d variables, %s M parameters" % (len(parameters_dict.keys()), "{:,}".format( total_parameters / 1000000))) # Make feed dict feed_dict = { inputs_pl: inputs, labels_pl: labels, inputs_seq_len_pl: inputs_seq_len, labels_seq_len_pl: labels_seq_len, keep_prob_input_pl: network.dropout_ratio_input, keep_prob_hidden_pl: network.dropout_ratio_hidden, network.lr: learning_rate } with tf.Session() as sess: # Initialize parameters sess.run(init_op) # Wrapper for tfdbg # sess = tf_debug.LocalCLIDebugWrapperSession(sess) # Train model max_steps = 400 start_time_global = time.time() start_time_step = time.time() ler_train_pre = 1 not_improved_count = 0 for step in range(max_steps): # Compute loss _, loss_train = sess.run([train_op, loss_op], feed_dict=feed_dict) # Gradient check # grads = sess.run(network.clipped_grads, # feed_dict=feed_dict) # for grad in grads: # print(np.max(grad)) if (step + 1) % 10 == 0: # Change to evaluation mode feed_dict[keep_prob_input_pl] = 1.0 feed_dict[keep_prob_hidden_pl] = 1.0 # Predict class ids predicted_ids_train, predicted_ids_infer = sess.run( [decode_op_train, decode_op_infer], feed_dict=feed_dict) # Compute accuracy feed_dict_ler = { labels_st_true_pl: list2sparsetensor(labels), labels_st_pred_pl: list2sparsetensor(predicted_ids_infer) } ler_train = sess.run(ler_op, feed_dict=feed_dict_ler) duration_step = time.time() - start_time_step print('Step %d: loss = %.3f / ler = %.4f (%.3f sec)' % (step + 1, loss_train, ler_train, duration_step)) start_time_step = time.time() # Visualize if label_type == 'character': print('True : %s' % num2alpha(labels[0])) print('Pred (Training) : <%s' % num2alpha(predicted_ids_train[0])) print('Pred (Inference): <%s' % num2alpha(predicted_ids_infer[0])) else: print('True : %s' % num2phone(labels[0])) print('Pred (Training) : < %s' % num2phone(predicted_ids_train[0])) print('Pred (Inference): < %s' % num2phone(predicted_ids_infer[0])) if ler_train >= ler_train_pre: not_improved_count += 1 else: not_improved_count = 0 if not_improved_count >= 5: print('Model is Converged.') break ler_train_pre = ler_train duration_global = time.time() - start_time_global print('Total time: %.3f sec' % (duration_global))
def check_training(self): print('----- multitask -----') tf.reset_default_graph() with tf.Graph().as_default(): # Load batch data batch_size = 4 inputs, labels_true_char_st, labels_true_phone_st, inputs_seq_len = generate_data( label_type='multitask', model='ctc', batch_size=batch_size) # Define placeholders inputs_pl = tf.placeholder(tf.float32, shape=[None, None, inputs.shape[-1]], name='input') indices_pl = tf.placeholder(tf.int64, name='indices') values_pl = tf.placeholder(tf.int32, name='values') shape_pl = tf.placeholder(tf.int64, name='shape') labels_pl = tf.SparseTensor(indices_pl, values_pl, shape_pl) indices_second_pl = tf.placeholder(tf.int64, name='indices_second') values_second_pl = tf.placeholder(tf.int32, name='values_second') shape_second_pl = tf.placeholder(tf.int64, name='shape_second') labels_second_pl = tf.SparseTensor(indices_second_pl, values_second_pl, shape_second_pl) inputs_seq_len_pl = tf.placeholder(tf.int64, shape=[None], name='inputs_seq_len') keep_prob_input_pl = tf.placeholder(tf.float32, name='keep_prob_input') keep_prob_hidden_pl = tf.placeholder(tf.float32, name='keep_prob_hidden') # Define model graph output_size_main = 26 output_size_second = 61 network = Multitask_BLSTM_CTC( batch_size=batch_size, input_size=inputs[0].shape[1], num_unit=256, num_layer_main=2, num_layer_second=1, output_size_main=output_size_main, output_size_second=output_size_second, main_task_weight=0.8, parameter_init=0.1, clip_grad=5.0, clip_activation=50, dropout_ratio_input=1.0, dropout_ratio_hidden=1.0, num_proj=None, weight_decay=1e-6) # Add to the graph each operation loss_op, logits_main, logits_second = network.compute_loss( inputs_pl, labels_pl, labels_second_pl, inputs_seq_len_pl, keep_prob_input_pl, keep_prob_hidden_pl) learning_rate = 1e-3 train_op = network.train(loss_op, optimizer='rmsprop', learning_rate_init=learning_rate, is_scheduled=False) decode_op_main, decode_op_second = network.decoder( logits_main, logits_second, inputs_seq_len_pl, decode_type='beam_search', beam_width=20) ler_op_main, ler_op_second = network.compute_ler( decode_op_main, decode_op_second, labels_pl, labels_second_pl) # Add the variable initializer operation init_op = tf.global_variables_initializer() # Count total parameters parameters_dict, total_parameters = count_total_parameters( tf.trainable_variables()) for parameter_name in sorted(parameters_dict.keys()): print("%s %d" % (parameter_name, parameters_dict[parameter_name])) print("Total %d variables, %s M parameters" % (len(parameters_dict.keys()), "{:,}".format( total_parameters / 1000000))) # Make feed dict feed_dict = { inputs_pl: inputs, labels_pl: labels_true_char_st, labels_second_pl: labels_true_phone_st, inputs_seq_len_pl: inputs_seq_len, keep_prob_input_pl: network.dropout_ratio_input, keep_prob_hidden_pl: network.dropout_ratio_hidden, network.lr: learning_rate } with tf.Session() as sess: # Initialize parameters sess.run(init_op) # Wrapper for tfdbg # sess = tf_debug.LocalCLIDebugWrapperSession(sess) # Train model max_steps = 400 start_time_global = time.time() start_time_step = time.time() ler_train_char_pre = 1 not_improved_count = 0 for step in range(max_steps): # Compute loss _, loss_train = sess.run([train_op, loss_op], feed_dict=feed_dict) # Gradient check # grads = sess.run(network.clipped_grads, feed_dict=feed_dict) # for grad in grads: # print(np.max(grad)) if (step + 1) % 10 == 0: # Change to evaluation mode feed_dict[keep_prob_input_pl] = 1.0 feed_dict[keep_prob_hidden_pl] = 1.0 # Compute accuracy ler_train_char, ler_train_phone = sess.run( [ler_op_main, ler_op_second], feed_dict=feed_dict) duration_step = time.time() - start_time_step print( 'Step %d: loss = %.3f / cer = %.4f / per = %.4f (%.3f sec)\n' % (step + 1, loss_train, ler_train_char, ler_train_phone, duration_step)) start_time_step = time.time() # Visualize labels_pred_char_st, labels_pred_phone_st = sess.run( [decode_op_main, decode_op_second], feed_dict=feed_dict) labels_true_char = sparsetensor2list( labels_true_char_st, batch_size=batch_size) labels_true_phone = sparsetensor2list( labels_true_phone_st, batch_size=batch_size) labels_pred_char = sparsetensor2list( labels_pred_char_st, batch_size=batch_size) labels_pred_phone = sparsetensor2list( labels_pred_phone_st, batch_size=batch_size) # character print('Character') print(' True: %s' % num2alpha(labels_true_char[0])) print(' Pred: %s' % num2alpha(labels_pred_char[0])) print('Phone') print(' True: %s' % num2phone(labels_true_phone[0])) print(' Pred: %s' % num2phone(labels_pred_phone[0])) print('----------------------------------------') if ler_train_char >= ler_train_char_pre: not_improved_count += 1 else: not_improved_count = 0 if not_improved_count >= 5: print('Modle is Converged.') break ler_train_char_pre = ler_train_char # Change to training mode network.is_training = True duration_global = time.time() - start_time_global print('Total time: %.3f sec' % (duration_global))
embed_dim=128, n_customer=args.n_customer, n_encode_layers=3) print(f'model loading time:{time()-t1}s') if args.txt is not None: datatxt = data_from_txt(args.txt) data = [] for i in range(3): elem = [datatxt[i].squeeze(0) for j in range(args.batch)] data.append(torch.stack(elem, 0)) else: # data = generate_data(n_samples = 2, n_customer = args.n_customer, seed = args.seed) data = [] for i in range(3): elem = [ generate_data(1, args.n_customer, args.seed)[i] for j in range(args.batch) ] data.append(torch.stack(elem, 0)) print(f'data generate time:{time()-t1}s') device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') pretrained = pretrained.to(device) data = list(map(lambda x: x.to(device), data)) pretrained.eval() with torch.no_grad(): costs, _, pi = pretrained(data, return_pi=True, decode_type=args.decode_type) print('costs:', costs) idx_in_batch = torch.argmin(costs, dim=0) print(
np.random.seed(0) LAMBDAS = np.logspace(-2, 2, 13) xxx = np.linspace(0, 1, 1000) y_true = true_func(xxx) results = { 'error': [], 'bias^2': [], 'variance': [], } start = time() for l in LAMBDAS: errors = [] betas = [] for _ in range(100): X, Y = generate_data(length=25, gaussian_noise=0.1) beta = fit_polynomial_regression(X, Y, degree=12, l=l) if time() - start > 60: raise SystemExit() betas.append(beta) errors.append(mean_square_loss(y_hat(xxx, beta), y_true)) results['error'].append(np.mean(errors)) y_hats = np.array([y_hat(xxx, b) for b in betas]).T bias_2, var = bias2_variance(y_true, y_hats) results['bias^2'].append(bias_2) results['variance'].append(var) def test_best_lambda():