def load_model(path, embed_dim=128, n_customer=20, n_encode_layers=3): """ Load model weights from hd5 file https://stackoverflow.com/questions/51806852/cant-save-custom-subclassed-model """ small_dataset = generate_data(n_samples=5, n_customer=n_customer) model_loaded = AttentionModel(embed_dim, n_encode_layers=n_encode_layers) for data in (small_dataset.batch(5)): _, _ = model_loaded(data, decode_type='greedy') model_loaded.load_weights(path) return model_loaded
def train(cfg, log_path = None): model = AttentionModel(cfg.embed_dim, cfg.n_encode_layers, cfg.n_heads, cfg.tanh_clipping, 'sampling') baseline = RolloutBaseline(model, cfg.task, cfg.weight_dir, cfg.n_rollout_samples, cfg.embed_dim, cfg.n_customer, cfg.warmup_beta, cfg.wp_epochs) optimizer = tf.keras.optimizers.Adam(learning_rate = cfg.lr) ave_loss = tf.keras.metrics.Mean() ave_L = tf.keras.metrics.Mean() for epoch in tqdm(range(cfg.epochs), desc = 'epoch'): t1 = time() dataset = generate_data(cfg.n_samples, cfg.n_customer) bs = baseline.eval_all(dataset) bs = tf.reshape(bs, (-1, cfg.batch)) if bs is not None else None # bs: (cfg.batch_steps, cfg.batch) or None for t, inputs in enumerate(dataset.batch(cfg.batch)): with tf.GradientTape() as tape: L, logp = model(inputs) b = bs[t] if bs is not None else baseline.eval(inputs, L) b = tf.stop_gradient(b) loss = tf.reduce_mean((L - b) * logp) L_mean = tf.reduce_mean(L) grads = tape.gradient(loss, model.trainable_weights)# model.trainable_weights == thita grads, _ = tf.clip_by_global_norm(grads, 1.0) optimizer.apply_gradients(zip(grads, model.trainable_weights))# optimizer.step ave_loss.update_state(loss) ave_L.update_state(L_mean) if t%(cfg.batch_steps*0.1) == 0: print('epoch%d, %d/%dsamples: loss %1.2f, average L %1.2f, average b %1.2f\n'%( epoch, t*cfg.batch, cfg.n_samples, ave_loss.result().numpy(), ave_L.result().numpy(), tf.reduce_mean(b))) baseline.epoch_callback(model, epoch) model.decode_type = 'sampling' model.save_weights('%s%s_epoch%s.h5'%(cfg.weight_dir, cfg.task, epoch), save_format = 'h5') if cfg.islogger: if log_path is None: log_path = '%s%s_%s.csv'%(cfg.log_dir, cfg.task, cfg.dump_date)#cfg.log_dir = ./Csv/ with open(log_path, 'w') as f: f.write('time,epoch,loss,average length\n') with open(log_path, 'a') as f: t2 = time() f.write('%dmin%dsec,%d,%1.2f,%1.2f\n'%((t2-t1)//60, (t2-t1)%60, epoch, ave_loss.result().numpy(), ave_L.result().numpy())) ave_loss.reset_states() ave_L.reset_states()
def load_model(path, embed_dim=128, n_customer=20, n_encode_layers=3): """ Load model weights from hd5 file https://stackoverflow.com/questions/51806852/cant-save-custom-subclassed-model """ CAPACITIES = {10: 20., 20: 30., 50: 40., 100: 50.} data_random = ( tf.random.uniform((2, 2), minval=0, maxval=1), tf.random.uniform((2, n_customer, 2), minval=0, maxval=1), tf.cast( tf.random.uniform( (2, n_customer), minval=1, maxval=10, dtype=tf.int32), tf.float32) / tf.cast(CAPACITIES[n_customer], tf.float32)) model_loaded = AttentionModel(embed_dim, n_encode_layers=n_encode_layers, decode_type='greedy') _, _ = model_loaded(data_random) model_loaded.load_weights(path) return model_loaded
def BuildEvalModel(model_type, hparams, iterator, graph): if model_type == 'simple_model': model = SimpleModel(hparams=hparams, iterator=iterator, regime='DEV') if model_type == 'attention_model': model = AttentionModel(hparams=hparams, iterator=iterator, regime='DEV') return EvalModel(model, hparams.logdir, graph)
def copy_model(model, embed_dim=128, n_customer=20): """ Copy model weights to new model https://stackoverflow.com/questions/56841736/how-to-copy-a-network-in-tensorflow-2-0 """ small_dataset = generate_data(n_samples=5, n_customer=n_customer) new_model = AttentionModel(embed_dim) for data in (small_dataset.batch(5)): # _, _ = model(data, decode_type = 'sampling') cost, _ = new_model(data, decode_type='sampling') for a, b in zip(new_model.variables, model.variables): a.assign(b) # copies the weigths variables of model_b into model_a return new_model
def BuildTrainModel(model_type, hparams, iterator, graph): ### TO DO: add attention model if model_type == 'simple_model': model = SimpleModel(hparams=hparams, iterator=iterator, regime='TRAIN') if model_type == 'attention_model': model = AttentionModel(hparams=hparams, iterator=iterator, regime='TRAIN') return TrainModel(model, hparams.logdir, graph)
def copy_model(model, embed_dim=128, n_customer=20): """ Copy model weights to new model https://stackoverflow.com/questions/56841736/how-to-copy-a-network-in-tensorflow-2-0 """ CAPACITIES = {10: 20., 20: 30., 50: 40., 100: 50.} data_random = ( tf.random.uniform((2, 2), minval=0, maxval=1), tf.random.uniform((2, n_customer, 2), minval=0, maxval=1), tf.cast( tf.random.uniform( (2, n_customer), minval=1, maxval=10, dtype=tf.int32), tf.float32) / tf.cast(CAPACITIES[n_customer], tf.float32)) new_model = AttentionModel(embed_dim, decode_type='sampling') _, _ = new_model(data_random) for a, b in zip(new_model.variables, model.variables): a.assign(b) # copies the weigths variables of model_b into model_a return new_model
def BuildInferModel(model_type, hparams, iterator, graph, infer_file_path): string2id_table = (tf.contrib.lookup.index_to_string_table_from_file( hparams.filesobj.trg_vcb_file, default_value='<unk>')) if model_type == 'simple_model': model = SimpleModel(hparams=hparams, iterator=iterator, regime='TEST', id2string_lookup_table=string2id_table) if model_type == 'attention_model': model = AttentionModel(hparams=hparams, iterator=iterator, regime='TEST', id2string_lookup_table=string2id_table) return InferModel(model, hparams.logdir, graph, infer_file_path)
def train(model_type, mode, pretrain, layer): """ image1data.pkl: images_G1 for image (4, 172, 196) image2data.pkl: images_G2 for image (6, 140, 278) """ data = data_preprocess('single', mode) print("1. Get data ready!") if model_type == 'dcec': model = DCEC(opt.input_shape, opt.filters, opt.kernel_size, opt.n_clusters, opt.weights, data, opt.alpha, pretrain=pretrain, layer=layer) model.compile(loss=['kld', 'binary_crossentropy'], optimizer='adam') print("3. Compile model!") model.fit(data, opt) elif model_type == 'attention': model = AttentionModel(opt.input_shape, opt.filters, opt.kernel_size, opt.n_clusters, opt.weights, data, opt.alpha, pretrain=pretrain) model.compile(optimizer='adam') print("3. Compile model!") model.fit(data, opt) model.predict(data)
def setUp(self): super(ModelTest, self).setUp() self.graph = tf.Graph() self.session = tf.Session(graph=self.graph) with self.graph.as_default(): self.iterator, _ = iterator_utils.get_iterator( 'TRAIN', filesobj=TRAIN_FILES, buffer_size=TRAIN_HPARAMS.buffer_size, num_epochs=TRAIN_HPARAMS.num_epochs, batch_size=TRAIN_HPARAMS.batch_size, debug_mode=True) self.model = AttentionModel(TRAIN_HPARAMS, self.iterator, 'TRAIN') self.table_init_op = tf.tables_initializer() self.vars_init_op = tf.global_variables_initializer()
def load_model(path, embed_dim=128, n_customer=20, n_encode_layers=3): # https://pytorch.org/tutorials/beginner/saving_loading_models.html # small_data = generate_data(n_samples = 5, n_customer = n_customer) # small_data = list(map(lambda x: x.to(self.device), small_data)) model_loaded = AttentionModel(embed_dim=embed_dim, n_encode_layers=n_encode_layers, n_heads=8, tanh_clipping=10., FF_hidden=512) # model_loaded = model_loaded.to(self.device) # with torch.no_grad(): # _, _ = model_loaded(small_data, decode_type = 'greedy') if torch.cuda.is_available(): model_loaded.load_state_dict(torch.load(path)) else: model_loaded.load_state_dict( torch.load(path, map_location=torch.device('cpu'))) # https://pytorch.org/docs/master/generated/torch.load.html return model_loaded
def train(): model = AttentionModel(params_config, human_vocab_size, machine_vocab_size).model op = Adam(lr=params_config['learning_rate'], decay=params_config['decay'], clipnorm=params_config['clipnorm']) if os.path.exists('./Model/model.h5'): print('loading model...') model.load_weights('./Model/model.h5') model.compile(optimizer=op, loss='categorical_crossentropy', metrics=['accuracy']) else: model.compile(optimizer=op, loss='categorical_crossentropy', metrics=['accuracy']) outputs_train = list(Yoh_train.swapaxes(0, 1)) model.fit(Xoh_train, outputs_train, epochs=params_config['epochs'], batch_size=params_config['batch_size'], validation_split=0.1) if not os.path.exists('Model'): os.mkdir('Model') model.save_weights('./Model/model.h5') return model
import torch import torch.optim as optim import matplotlib.pyplot as plt import numpy as np from tqdm import tqdm from model import AttentionModel from tsp import generate_instances, evaluate, plot def try_gpu(e): if torch.cuda.is_available(): return e.cuda() return e # model = AttentionModel(2, 5, 5, 3, 3) base_model = try_gpu(AttentionModel(2,16,32,3,3)) model = try_gpu(AttentionModel(2,16,32,3,3,100)) optimizer = optim.Adam(model.parameters(), lr=0.001) problem = try_gpu(generate_instances(10, 2)) selected, log_p = model(problem) cost = evaluate(problem, selected) print(cost.sum()) for e in range(100): print('Epoch -', e) cost_total = 0 base_cost_total = 0 for i in tqdm(range(10)):
def train(cfg, log_path = None): def allocate_memory(): # https://qiita.com/studio_haneya/items/4dfaf2fb2ac44818e7e0 physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: for k in range(len(physical_devices)): tf.config.experimental.set_memory_growth(physical_devices[k], True) print('memory growth:', tf.config.experimental.get_memory_growth(physical_devices[k])) else: print("Not enough GPU hardware devices available") def rein_loss(model, inputs, bs, t): L, ll = model(inputs, decode_type = 'sampling', training = True) b = bs[t] if bs is not None else baseline.eval(inputs, L) b = tf.stop_gradient(b) return tf.reduce_mean((L - b) * ll), tf.reduce_mean(L) def grad_func(model, inputs, bs, t): with tf.GradientTape() as tape: loss, L_mean = rein_loss(model, inputs, bs, t) return loss, L_mean, tape.gradient(loss, model.trainable_variables)# model.trainable_variables == thita allocate_memory() model = AttentionModel(cfg.embed_dim, cfg.n_encode_layers, cfg.n_heads, cfg.tanh_clipping) baseline = RolloutBaseline(model, cfg.task, cfg.weight_dir, cfg.n_rollout_samples, cfg.embed_dim, cfg.n_customer, cfg.warmup_beta, cfg.wp_epochs) optimizer = tf.keras.optimizers.Adam(learning_rate = cfg.lr) ave_loss = tf.keras.metrics.Mean() ave_L = tf.keras.metrics.Mean() t1 = time() for epoch in range(cfg.epochs): dataset = generate_data(cfg.n_samples, cfg.n_customer) bs = baseline.eval_all(dataset) bs = tf.reshape(bs, (-1, cfg.batch)) if bs is not None else None# bs: (cfg.batch_steps, cfg.batch) or None for t, inputs in enumerate(dataset.batch(cfg.batch)): loss, L_mean, grads = grad_func(model, inputs, bs, t) grads, _ = tf.clip_by_global_norm(grads, 1.0) optimizer.apply_gradients(zip(grads, model.trainable_variables))# optimizer.step ave_loss.update_state(loss) ave_L.update_state(L_mean) if t%(cfg.batch_verbose) == 0: t2 = time() print('Epoch %d (batch = %d): Loss: %1.3f L: %1.3f, %dmin%dsec'%( epoch, t, ave_loss.result().numpy(), ave_L.result().numpy(), (t2-t1)//60, (t2-t1)%60)) if cfg.islogger: if log_path is None: log_path = '%s%s_%s.csv'%(cfg.log_dir, cfg.task, cfg.dump_date)#cfg.log_dir = ./Csv/ with open(log_path, 'w') as f: f.write('time,epoch,batch,loss,cost\n') with open(log_path, 'a') as f: f.write('%dmin%dsec,%d,%d,%1.3f,%1.3f\n'%( (t2-t1)//60, (t2-t1)%60, epoch, t, ave_loss.result().numpy(), ave_L.result().numpy())) t1 = time() baseline.epoch_callback(model, epoch) model.save_weights('%s%s_epoch%s.h5'%(cfg.weight_dir, cfg.task, epoch), save_format = 'h5')#cfg.weight_dir = ./Weights/ ave_loss.reset_states() ave_L.reset_states()
# add padding train_tokens = data.add_padding(train_tokens, max) test_tokens = data.add_padding(test_tokens, max) # convert2vec train_tokens, train_tags = data.convert2vec(train_tokens, train_tags, word2idx, tag2idx) test_tokens, test_tags = data.convert2vec(test_tokens, test_tags, word2idx=word2idx, tag2idx = tag2idx) # dataset train_dataset = myDataSet(train_tokens, train_tags, train_seqlen) test_dataset = myDataSet(test_tokens, test_tags, test_seqlen) # dataloader train_data = DataLoader(train_dataset, batch_size=args.batch_size) test_data = DataLoader(test_dataset, batch_size=args.batch_size) # model baseModel = baseModel(vocab_size=vocab_size, embedding_dim=args.embedding_dim, hidden_dim=args.hidden_dim, tag2idx=tag2idx, batch_size=args.batch_size, use_gpu=use_gpu, idx2word=idx2word, emb_path=emb_path) attentionModel = AttentionModel(vocab_size=vocab_size, embedding_dim=args.embedding_dim, hidden_dim=args.hidden_dim, tag2idx=tag2idx, batch_size=args.batch_size, use_gpu=use_gpu, idx2word=idx2word, emb_path=emb_path) optimizer = optim.Adam(attentionModel.parameters(), lr=args.lr) # trainer if args.mode == 'base': myTrainer = trainer(model=baseModel, train_dataloader=train_data, test_dataloader=test_data, optimizer=optimizer, epochs=args.epochs, word2idx=word2idx, tag2idx=tag2idx, idx2word=idx2word, idx2tag=idx2tag, use_gpu=use_gpu) if args.mode == 'attention': myTrainer = trainer(model=attentionModel, train_dataloader=train_data, test_dataloader=test_data, optimizer=optimizer, epochs=args.epochs, word2idx=word2idx, tag2idx=tag2idx, idx2word=idx2word, idx2tag=idx2tag, use_gpu=use_gpu) else: print('not right mode') myTrainer.train()
def train(cfg, log_path=None): torch.backends.cudnn.benchmark = True def rein_loss(model, inputs, bs, t, device): inputs = list(map(lambda x: x.to(device), inputs)) L, ll = model(inputs, decode_type='sampling') b = bs[t] if bs is not None else baseline.eval(inputs, L) return ((L - b.to(device)) * ll).mean(), L.mean() model = AttentionModel(cfg.embed_dim, cfg.n_encode_layers, cfg.n_heads, cfg.tanh_clipping) model.train() device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model.to(device) baseline = RolloutBaseline(model, cfg.task, cfg.weight_dir, cfg.n_rollout_samples, cfg.embed_dim, cfg.n_customer, cfg.warmup_beta, cfg.wp_epochs, device) optimizer = optim.Adam(model.parameters(), lr=cfg.lr) t1 = time() for epoch in range(cfg.epochs): ave_loss, ave_L = 0., 0. dataset = Generator(cfg.batch * cfg.batch_steps, cfg.n_customer) bs = baseline.eval_all(dataset) bs = bs.view( -1, cfg.batch ) if bs is not None else None # bs: (cfg.batch_steps, cfg.batch) or None dataloader = DataLoader(dataset, batch_size=cfg.batch, shuffle=True) for t, inputs in enumerate(dataloader): loss, L_mean = rein_loss(model, inputs, bs, t, device) optimizer.zero_grad() loss.backward() # print('grad: ', model.Decoder.Wk1.weight.grad[0][0]) # https://github.com/wouterkool/attention-learn-to-route/blob/master/train.py nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0, norm_type=2) optimizer.step() ave_loss += loss.item() ave_L += L_mean.item() if t % (cfg.batch_verbose) == 0: t2 = time() print( 'Epoch %d (batch = %d): Loss: %1.3f L: %1.3f, %dmin%dsec' % (epoch, t, ave_loss / (t + 1), ave_L / (t + 1), (t2 - t1) // 60, (t2 - t1) % 60)) if cfg.islogger: if log_path is None: log_path = '%s%s_%s.csv' % ( cfg.log_dir, cfg.task, cfg.dump_date ) #cfg.log_dir = ./Csv/ with open(log_path, 'w') as f: f.write('time,epoch,batch,loss,cost\n') with open(log_path, 'a') as f: f.write('%dmin%dsec,%d,%d,%1.3f,%1.3f\n' % ((t2 - t1) // 60, (t2 - t1) % 60, epoch, t, ave_loss / (t + 1), ave_L / (t + 1))) t1 = time() baseline.epoch_callback(model, epoch) torch.save(model.state_dict(), '%s%s_epoch%s.pt' % (cfg.weight_dir, cfg.task, epoch))
marker = dict(size = 15), name = 'depot' ) layout = go.Layout(title = '<b>Example: {}</b>'.format(title), xaxis = dict(title = 'X coordinate'), yaxis = dict(title = 'Y coordinate'), showlegend = True, width = 1000, height = 1000, template = "plotly_white" ) data = [trace_points, trace_depo] + list_of_path_traces print('Current path: ', pi_) fig = go.Figure(data = data, layout = layout) fig.show() if __name__ == '__main__': model = AttentionModel(decode_type = 'sampling') pretrained = load_model(file_parser().path) dataset = generate_data(n_customer = 20) for i, data in enumerate(dataset.batch(5)): cost, _, pi = model(data, return_pi = True) idx_min = tf.argmin(cost, axis = 0) get_journey(data, pi, 'untrained model', idx_min) cost, _, pi = pretrained(data, return_pi = True) idx_min = tf.argmin(cost, axis = 0) get_journey(data, pi, 'pretrained model', idx_min) if i == 0: break