Exemple #1
0
def load_model(path, embed_dim=128, n_customer=20, n_encode_layers=3):
    """ Load model weights from hd5 file
		https://stackoverflow.com/questions/51806852/cant-save-custom-subclassed-model
	"""
    small_dataset = generate_data(n_samples=5, n_customer=n_customer)
    model_loaded = AttentionModel(embed_dim, n_encode_layers=n_encode_layers)
    for data in (small_dataset.batch(5)):
        _, _ = model_loaded(data, decode_type='greedy')

    model_loaded.load_weights(path)
    return model_loaded
Exemple #2
0
def train(cfg, log_path = None):
	
	model = AttentionModel(cfg.embed_dim, cfg.n_encode_layers, cfg.n_heads, 
						cfg.tanh_clipping, 'sampling')
	baseline = RolloutBaseline(model, cfg.task, cfg.weight_dir, cfg.n_rollout_samples, 
								cfg.embed_dim, cfg.n_customer, cfg.warmup_beta, cfg.wp_epochs)
	optimizer = tf.keras.optimizers.Adam(learning_rate = cfg.lr)
	ave_loss = tf.keras.metrics.Mean()
	ave_L = tf.keras.metrics.Mean()
	
	for epoch in tqdm(range(cfg.epochs), desc = 'epoch'):
		t1 = time()
		dataset = generate_data(cfg.n_samples, cfg.n_customer)
		bs = baseline.eval_all(dataset)
		bs = tf.reshape(bs, (-1, cfg.batch)) if bs is not None else None # bs: (cfg.batch_steps, cfg.batch) or None
		
		for t, inputs in enumerate(dataset.batch(cfg.batch)):
			with tf.GradientTape() as tape:
				L, logp = model(inputs)
				b = bs[t] if bs is not None else baseline.eval(inputs, L)
				b = tf.stop_gradient(b)
				loss = tf.reduce_mean((L - b) * logp)
				L_mean = tf.reduce_mean(L)
			grads = tape.gradient(loss, model.trainable_weights)# model.trainable_weights == thita
			grads, _ = tf.clip_by_global_norm(grads, 1.0)
			optimizer.apply_gradients(zip(grads, model.trainable_weights))# optimizer.step

			ave_loss.update_state(loss)
			ave_L.update_state(L_mean)
			if t%(cfg.batch_steps*0.1) == 0:
				print('epoch%d, %d/%dsamples: loss %1.2f, average L %1.2f, average b %1.2f\n'%(
						epoch, t*cfg.batch, cfg.n_samples, ave_loss.result().numpy(), ave_L.result().numpy(), tf.reduce_mean(b)))

		baseline.epoch_callback(model, epoch)
		model.decode_type = 'sampling'
		model.save_weights('%s%s_epoch%s.h5'%(cfg.weight_dir, cfg.task, epoch), save_format = 'h5')
		
		if cfg.islogger:
				if log_path is None:
					log_path = '%s%s_%s.csv'%(cfg.log_dir, cfg.task, cfg.dump_date)#cfg.log_dir = ./Csv/
					with open(log_path, 'w') as f:
						f.write('time,epoch,loss,average length\n')
				with open(log_path, 'a') as f:
					t2 = time()
					f.write('%dmin%dsec,%d,%1.2f,%1.2f\n'%((t2-t1)//60, (t2-t1)%60, epoch, ave_loss.result().numpy(), ave_L.result().numpy()))

		ave_loss.reset_states()
		ave_L.reset_states()
Exemple #3
0
def load_model(path, embed_dim=128, n_customer=20, n_encode_layers=3):
    """ Load model weights from hd5 file
		https://stackoverflow.com/questions/51806852/cant-save-custom-subclassed-model
	"""
    CAPACITIES = {10: 20., 20: 30., 50: 40., 100: 50.}
    data_random = (
        tf.random.uniform((2, 2), minval=0, maxval=1),
        tf.random.uniform((2, n_customer, 2), minval=0, maxval=1),
        tf.cast(
            tf.random.uniform(
                (2, n_customer), minval=1, maxval=10, dtype=tf.int32),
            tf.float32) / tf.cast(CAPACITIES[n_customer], tf.float32))

    model_loaded = AttentionModel(embed_dim,
                                  n_encode_layers=n_encode_layers,
                                  decode_type='greedy')
    _, _ = model_loaded(data_random)
    model_loaded.load_weights(path)
    return model_loaded
def BuildEvalModel(model_type, hparams, iterator, graph):

    if model_type == 'simple_model':

        model = SimpleModel(hparams=hparams, iterator=iterator, regime='DEV')

    if model_type == 'attention_model':

        model = AttentionModel(hparams=hparams,
                               iterator=iterator,
                               regime='DEV')

    return EvalModel(model, hparams.logdir, graph)
Exemple #5
0
def copy_model(model, embed_dim=128, n_customer=20):
    """ Copy model weights to new model
		https://stackoverflow.com/questions/56841736/how-to-copy-a-network-in-tensorflow-2-0
	"""
    small_dataset = generate_data(n_samples=5, n_customer=n_customer)
    new_model = AttentionModel(embed_dim)
    for data in (small_dataset.batch(5)):
        # _, _ = model(data, decode_type = 'sampling')
        cost, _ = new_model(data, decode_type='sampling')

    for a, b in zip(new_model.variables, model.variables):
        a.assign(b)  # copies the weigths variables of model_b into model_a
    return new_model
def BuildTrainModel(model_type, hparams, iterator, graph):

    ### TO DO: add attention model

    if model_type == 'simple_model':

        model = SimpleModel(hparams=hparams, iterator=iterator, regime='TRAIN')

    if model_type == 'attention_model':

        model = AttentionModel(hparams=hparams,
                               iterator=iterator,
                               regime='TRAIN')

    return TrainModel(model, hparams.logdir, graph)
Exemple #7
0
def copy_model(model, embed_dim=128, n_customer=20):
    """ Copy model weights to new model
		https://stackoverflow.com/questions/56841736/how-to-copy-a-network-in-tensorflow-2-0
	"""
    CAPACITIES = {10: 20., 20: 30., 50: 40., 100: 50.}
    data_random = (
        tf.random.uniform((2, 2), minval=0, maxval=1),
        tf.random.uniform((2, n_customer, 2), minval=0, maxval=1),
        tf.cast(
            tf.random.uniform(
                (2, n_customer), minval=1, maxval=10, dtype=tf.int32),
            tf.float32) / tf.cast(CAPACITIES[n_customer], tf.float32))

    new_model = AttentionModel(embed_dim, decode_type='sampling')
    _, _ = new_model(data_random)
    for a, b in zip(new_model.variables, model.variables):
        a.assign(b)  # copies the weigths variables of model_b into model_a
    return new_model
def BuildInferModel(model_type, hparams, iterator, graph, infer_file_path):

    string2id_table = (tf.contrib.lookup.index_to_string_table_from_file(
        hparams.filesobj.trg_vcb_file, default_value='<unk>'))

    if model_type == 'simple_model':

        model = SimpleModel(hparams=hparams,
                            iterator=iterator,
                            regime='TEST',
                            id2string_lookup_table=string2id_table)

    if model_type == 'attention_model':

        model = AttentionModel(hparams=hparams,
                               iterator=iterator,
                               regime='TEST',
                               id2string_lookup_table=string2id_table)

    return InferModel(model, hparams.logdir, graph, infer_file_path)
def train(model_type, mode, pretrain, layer):
    """
    image1data.pkl: images_G1 for image (4, 172, 196)
    image2data.pkl: images_G2 for image (6, 140, 278)
    """

    data = data_preprocess('single', mode)
    print("1. Get data ready!")

    if model_type == 'dcec':
        model = DCEC(opt.input_shape,
                     opt.filters,
                     opt.kernel_size,
                     opt.n_clusters,
                     opt.weights,
                     data,
                     opt.alpha,
                     pretrain=pretrain,
                     layer=layer)
        model.compile(loss=['kld', 'binary_crossentropy'], optimizer='adam')
        print("3. Compile model!")

        model.fit(data, opt)

    elif model_type == 'attention':
        model = AttentionModel(opt.input_shape,
                               opt.filters,
                               opt.kernel_size,
                               opt.n_clusters,
                               opt.weights,
                               data,
                               opt.alpha,
                               pretrain=pretrain)
        model.compile(optimizer='adam')
        print("3. Compile model!")

        model.fit(data, opt)

        model.predict(data)
    def setUp(self):

        super(ModelTest, self).setUp()

        self.graph = tf.Graph()

        self.session = tf.Session(graph=self.graph)

        with self.graph.as_default():

            self.iterator, _ = iterator_utils.get_iterator(
                'TRAIN',
                filesobj=TRAIN_FILES,
                buffer_size=TRAIN_HPARAMS.buffer_size,
                num_epochs=TRAIN_HPARAMS.num_epochs,
                batch_size=TRAIN_HPARAMS.batch_size,
                debug_mode=True)

            self.model = AttentionModel(TRAIN_HPARAMS, self.iterator, 'TRAIN')

            self.table_init_op = tf.tables_initializer()

            self.vars_init_op = tf.global_variables_initializer()
Exemple #11
0
def load_model(path, embed_dim=128, n_customer=20, n_encode_layers=3):
    # https://pytorch.org/tutorials/beginner/saving_loading_models.html

    # small_data = generate_data(n_samples = 5, n_customer = n_customer)
    # small_data = list(map(lambda x: x.to(self.device), small_data))
    model_loaded = AttentionModel(embed_dim=embed_dim,
                                  n_encode_layers=n_encode_layers,
                                  n_heads=8,
                                  tanh_clipping=10.,
                                  FF_hidden=512)
    # model_loaded = model_loaded.to(self.device)
    # with torch.no_grad():
    # 	_, _ = model_loaded(small_data, decode_type = 'greedy')
    if torch.cuda.is_available():
        model_loaded.load_state_dict(torch.load(path))
    else:
        model_loaded.load_state_dict(
            torch.load(path, map_location=torch.device('cpu')))
        # https://pytorch.org/docs/master/generated/torch.load.html
    return model_loaded
Exemple #12
0
def train():
    model = AttentionModel(params_config, human_vocab_size,
                           machine_vocab_size).model

    op = Adam(lr=params_config['learning_rate'],
              decay=params_config['decay'],
              clipnorm=params_config['clipnorm'])

    if os.path.exists('./Model/model.h5'):
        print('loading model...')

        model.load_weights('./Model/model.h5')

        model.compile(optimizer=op,
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

    else:
        model.compile(optimizer=op,
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        outputs_train = list(Yoh_train.swapaxes(0, 1))

        model.fit(Xoh_train,
                  outputs_train,
                  epochs=params_config['epochs'],
                  batch_size=params_config['batch_size'],
                  validation_split=0.1)

        if not os.path.exists('Model'):
            os.mkdir('Model')

        model.save_weights('./Model/model.h5')
    return model
import torch
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from model import AttentionModel
from tsp import generate_instances, evaluate, plot


def try_gpu(e):
    if torch.cuda.is_available():
        return e.cuda()
    return e

# model = AttentionModel(2, 5, 5, 3, 3)
base_model = try_gpu(AttentionModel(2,16,32,3,3))
model = try_gpu(AttentionModel(2,16,32,3,3,100))
optimizer = optim.Adam(model.parameters(), lr=0.001)

problem = try_gpu(generate_instances(10, 2))
selected, log_p = model(problem)
cost = evaluate(problem, selected)
print(cost.sum())

for e in range(100):
    print('Epoch -', e)

    cost_total = 0
    base_cost_total = 0
    
    for i in tqdm(range(10)):
Exemple #14
0
def train(cfg, log_path = None):

	def allocate_memory():
	# https://qiita.com/studio_haneya/items/4dfaf2fb2ac44818e7e0
		physical_devices = tf.config.experimental.list_physical_devices('GPU')
		if len(physical_devices) > 0:
			for k in range(len(physical_devices)):
				tf.config.experimental.set_memory_growth(physical_devices[k], True)
				print('memory growth:', tf.config.experimental.get_memory_growth(physical_devices[k]))
		else:
			print("Not enough GPU hardware devices available")

	def rein_loss(model, inputs, bs, t):
		L, ll = model(inputs, decode_type = 'sampling', training = True)
		b = bs[t] if bs is not None else baseline.eval(inputs, L)
		b = tf.stop_gradient(b)
		return tf.reduce_mean((L - b) * ll), tf.reduce_mean(L)

	def grad_func(model, inputs, bs, t):
		with tf.GradientTape() as tape:
			loss, L_mean = rein_loss(model, inputs, bs, t)
		return loss, L_mean, tape.gradient(loss, model.trainable_variables)# model.trainable_variables == thita

	allocate_memory()
	model = AttentionModel(cfg.embed_dim, cfg.n_encode_layers, cfg.n_heads, cfg.tanh_clipping)
	baseline = RolloutBaseline(model, cfg.task, cfg.weight_dir, cfg.n_rollout_samples, 
							cfg.embed_dim, cfg.n_customer, cfg.warmup_beta, cfg.wp_epochs)
	optimizer = tf.keras.optimizers.Adam(learning_rate = cfg.lr)
	ave_loss = tf.keras.metrics.Mean()
	ave_L = tf.keras.metrics.Mean()
	
	t1 = time()
	for epoch in range(cfg.epochs):
		dataset = generate_data(cfg.n_samples, cfg.n_customer)
		
		bs = baseline.eval_all(dataset)
		bs = tf.reshape(bs, (-1, cfg.batch)) if bs is not None else None# bs: (cfg.batch_steps, cfg.batch) or None
		
		for t, inputs in enumerate(dataset.batch(cfg.batch)):
			
			loss, L_mean, grads = grad_func(model, inputs, bs, t)

			grads, _ = tf.clip_by_global_norm(grads, 1.0)
			optimizer.apply_gradients(zip(grads, model.trainable_variables))# optimizer.step
			
			ave_loss.update_state(loss)
			ave_L.update_state(L_mean)
			
			if t%(cfg.batch_verbose) == 0:
				t2 = time()
				print('Epoch %d (batch = %d): Loss: %1.3f L: %1.3f, %dmin%dsec'%(
					epoch, t, ave_loss.result().numpy(), ave_L.result().numpy(), (t2-t1)//60, (t2-t1)%60))
				if cfg.islogger:
					if log_path is None:
						log_path = '%s%s_%s.csv'%(cfg.log_dir, cfg.task, cfg.dump_date)#cfg.log_dir = ./Csv/
						with open(log_path, 'w') as f:
							f.write('time,epoch,batch,loss,cost\n')
					with open(log_path, 'a') as f:
						f.write('%dmin%dsec,%d,%d,%1.3f,%1.3f\n'%(
							(t2-t1)//60, (t2-t1)%60, epoch, t, ave_loss.result().numpy(), ave_L.result().numpy()))
				t1 = time()

		baseline.epoch_callback(model, epoch)
		model.save_weights('%s%s_epoch%s.h5'%(cfg.weight_dir, cfg.task, epoch), save_format = 'h5')#cfg.weight_dir = ./Weights/

		ave_loss.reset_states()
		ave_L.reset_states()
Exemple #15
0
    # add padding
    train_tokens = data.add_padding(train_tokens, max)
    test_tokens = data.add_padding(test_tokens, max)
    # convert2vec
    train_tokens, train_tags = data.convert2vec(train_tokens, train_tags, word2idx, tag2idx)
    test_tokens, test_tags = data.convert2vec(test_tokens, test_tags, word2idx=word2idx, tag2idx = tag2idx)
    # dataset
    train_dataset = myDataSet(train_tokens, train_tags, train_seqlen)
    test_dataset = myDataSet(test_tokens, test_tags, test_seqlen)
    # dataloader
    train_data = DataLoader(train_dataset, batch_size=args.batch_size)
    test_data = DataLoader(test_dataset, batch_size=args.batch_size)

    # model
    baseModel = baseModel(vocab_size=vocab_size, embedding_dim=args.embedding_dim, hidden_dim=args.hidden_dim, tag2idx=tag2idx,
                          batch_size=args.batch_size, use_gpu=use_gpu, idx2word=idx2word, emb_path=emb_path)
    attentionModel = AttentionModel(vocab_size=vocab_size, embedding_dim=args.embedding_dim, hidden_dim=args.hidden_dim, tag2idx=tag2idx,
                          batch_size=args.batch_size, use_gpu=use_gpu, idx2word=idx2word, emb_path=emb_path)
    optimizer = optim.Adam(attentionModel.parameters(), lr=args.lr)

    # trainer
    if args.mode == 'base':
        myTrainer = trainer(model=baseModel, train_dataloader=train_data, test_dataloader=test_data, optimizer=optimizer,
                            epochs=args.epochs, word2idx=word2idx, tag2idx=tag2idx, idx2word=idx2word, idx2tag=idx2tag, use_gpu=use_gpu)
    if args.mode == 'attention':
        myTrainer = trainer(model=attentionModel, train_dataloader=train_data, test_dataloader=test_data, optimizer=optimizer,
                            epochs=args.epochs, word2idx=word2idx, tag2idx=tag2idx, idx2word=idx2word, idx2tag=idx2tag, use_gpu=use_gpu)
    else:
        print('not right mode')
    myTrainer.train()
Exemple #16
0
def train(cfg, log_path=None):
    torch.backends.cudnn.benchmark = True

    def rein_loss(model, inputs, bs, t, device):
        inputs = list(map(lambda x: x.to(device), inputs))
        L, ll = model(inputs, decode_type='sampling')
        b = bs[t] if bs is not None else baseline.eval(inputs, L)
        return ((L - b.to(device)) * ll).mean(), L.mean()

    model = AttentionModel(cfg.embed_dim, cfg.n_encode_layers, cfg.n_heads,
                           cfg.tanh_clipping)
    model.train()
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    baseline = RolloutBaseline(model, cfg.task, cfg.weight_dir,
                               cfg.n_rollout_samples, cfg.embed_dim,
                               cfg.n_customer, cfg.warmup_beta, cfg.wp_epochs,
                               device)
    optimizer = optim.Adam(model.parameters(), lr=cfg.lr)

    t1 = time()
    for epoch in range(cfg.epochs):
        ave_loss, ave_L = 0., 0.
        dataset = Generator(cfg.batch * cfg.batch_steps, cfg.n_customer)

        bs = baseline.eval_all(dataset)
        bs = bs.view(
            -1, cfg.batch
        ) if bs is not None else None  # bs: (cfg.batch_steps, cfg.batch) or None

        dataloader = DataLoader(dataset, batch_size=cfg.batch, shuffle=True)
        for t, inputs in enumerate(dataloader):

            loss, L_mean = rein_loss(model, inputs, bs, t, device)
            optimizer.zero_grad()
            loss.backward()
            # print('grad: ', model.Decoder.Wk1.weight.grad[0][0])
            # https://github.com/wouterkool/attention-learn-to-route/blob/master/train.py
            nn.utils.clip_grad_norm_(model.parameters(),
                                     max_norm=1.0,
                                     norm_type=2)
            optimizer.step()

            ave_loss += loss.item()
            ave_L += L_mean.item()

            if t % (cfg.batch_verbose) == 0:
                t2 = time()
                print(
                    'Epoch %d (batch = %d): Loss: %1.3f L: %1.3f, %dmin%dsec' %
                    (epoch, t, ave_loss / (t + 1), ave_L / (t + 1),
                     (t2 - t1) // 60, (t2 - t1) % 60))
                if cfg.islogger:
                    if log_path is None:
                        log_path = '%s%s_%s.csv' % (
                            cfg.log_dir, cfg.task, cfg.dump_date
                        )  #cfg.log_dir = ./Csv/
                        with open(log_path, 'w') as f:
                            f.write('time,epoch,batch,loss,cost\n')
                    with open(log_path, 'a') as f:
                        f.write('%dmin%dsec,%d,%d,%1.3f,%1.3f\n' %
                                ((t2 - t1) // 60,
                                 (t2 - t1) % 60, epoch, t, ave_loss /
                                 (t + 1), ave_L / (t + 1)))
                t1 = time()

        baseline.epoch_callback(model, epoch)
        torch.save(model.state_dict(),
                   '%s%s_epoch%s.pt' % (cfg.weight_dir, cfg.task, epoch))
Exemple #17
0
							marker = dict(size = 15),
							name = 'depot'
							)

	layout = go.Layout(title = '<b>Example: {}</b>'.format(title),
					   xaxis = dict(title = 'X coordinate'),
					   yaxis = dict(title = 'Y coordinate'),
					   showlegend = True,
					   width = 1000,
					   height = 1000,
					   template = "plotly_white"
					   )

	data = [trace_points, trace_depo] + list_of_path_traces
	print('Current path: ', pi_)
	fig = go.Figure(data = data, layout = layout)
	fig.show()

if __name__ == '__main__':
	model = AttentionModel(decode_type = 'sampling')
	pretrained = load_model(file_parser().path)
	dataset = generate_data(n_customer = 20)
	for i, data in enumerate(dataset.batch(5)):
		cost, _, pi = model(data, return_pi = True)
		idx_min = tf.argmin(cost, axis = 0)
		get_journey(data, pi, 'untrained model', idx_min)
		cost, _, pi = pretrained(data, return_pi = True)
		idx_min = tf.argmin(cost, axis = 0)
		get_journey(data, pi, 'pretrained model', idx_min)
		if i == 0:
			break