def train_cls_step(inputs, labels): """Training step for simple classification. Args: inputs: list or tuple a list of tensors multiple input tensors can be passed to given model labels: tensor Returns: loss: batch loss acc: train accuracy """ nonlocal model nonlocal optimizer nonlocal loss_func with tf.GradientTape() as tape: out = model(*inputs) if isinstance(out, tuple): out = out[0] cost = loss_func(labels, out, from_logits=True) loss = tf.reduce_mean(cost) trainables = model.trainable_variables gradients = tape.gradient(loss, trainables) optimizer.apply_gradients(zip(gradients, trainables)) acc = calculate_accuracy(out, labels) return loss, acc
def test_calculate_accuracy_works(self): batch_size = 10 num_classes = 5 # this should be calculated argmax first input1 = tf.Variable(np.random.rand(batch_size, 4)) _labels = np.zeros((batch_size, num_classes + 1), dtype=np.int32) for i in range(batch_size): _labels[i, random.randint(0, num_classes)] = 1 labels1 = tf.Variable(_labels) score = calculate_accuracy(input1, labels1) self.assertGreater(score, 0) self.assertLess(score, 1) # this does not need to be calculated argmax labels2 = tf.Variable(np.arange(1, batch_size + 1, dtype=np.int32)) score = calculate_accuracy(input1, labels2) self.assertGreater(score, 0) self.assertLess(score, 1)
def test_calculate_accuracy_with_numpy(self): # check if numpy compatibility batch_size = 10 num_classes = 5 # this should be calculated argmax first input1 = \ np.random.rand(batch_size, 4) labels1 = np.zeros((batch_size, num_classes + 1)) for i in range(batch_size): labels1[i, random.randint(0, num_classes)] = 1 score = calculate_accuracy(input1, labels1) self.assertGreater(score, 0) self.assertLess(score, 1) # this does not need to be calculated argmax labels2 = np.arange(1, batch_size + 1) score = calculate_accuracy(input1, labels2) self.assertGreater(score, 0) self.assertLess(score, 1)
def main(train, val, *, save=False): max_acc = 0 # set up training model model = ClassificationModel(units, pad_max_len, processor.vocab_size, embedding_dim, 3) optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) train_cls_step = make_training_cls_model( model, optimizer, 'sparse_categorical_crossentropy') for epoch in range(1, epochs + 1): epoch_start = time.time() print('=====' * 10) print(' Epochs:', epoch) print('=====' * 10) batch_st = time.time() random.shuffle(train) for batch, (inputs, labels) \ in enumerate(data_generator(train, batch_size=batch_size)): st = time.time() loss, acc = train_cls_step(inputs, labels) if batch % display_step == 0: if DEBUG: pred, weights = model(*inputs) print('[DEBUG] Batch: {}'.format(batch)) #print('[DEBUG] Average weights :'.format(batch)) #for layer in model.layers: # print(' Layer:', model.name + ':' + layer.name) # print(' Weights:') # print(' mean:', np.mean(layer.get_weights()[0])) # print(' std:', np.std(layer.get_weights()[0])) # print() print('[DEBUG] Prediction:') #print(' values:\n', pred.reshape(-1, 3)) print(' Pred:\n ', np.argmax(pred, axis=-1)) print(' Label:\n ', labels.reshape(-1, )) print('[DEBUG] Weights/Question:') print(weights[0].numpy().reshape(-1)) print(*[ processor.index_word[q] for q in questions[0] if q > 0 ]) print() end = time.time() batch_end = time.time() print(' Batch -', batch) print(' Train: Loss - {:.4f} Acc - {:.4f} ' 'Time(calc) - {:.4f}s/batch ' 'Time(total) - {:.4f}s/batch'.format( loss, acc, end - st, batch_end - batch_st)) batch_st = time.time() loss_val = 0 acc_val = 0 count = 0 val_st = time.time() # calculate validation data for in_val, l_val in data_generator(val, batch_size=batch_size): out_val = model(*in_val) if isinstance(out_val, tuple): out_val = out_val[0] cost = tf.keras.losses.sparse_categorical_crossentropy( l_val, out_val, from_logits=True) loss_val += tf.reduce_mean(cost) acc_val += calculate_accuracy(out_val, l_val) count += 1 # calculate average loss_val /= count acc_val /= count val_end = time.time() print() print(' Validation(approx.): Loss - {:.4f} Acc - {:.4f} ' 'Time - {:.4f}s'.format(loss_val, acc_val, val_end - val_st)) print(' Total time per epoch: {:.4f}s'.format(time.time() - epoch_start)) print() # save when get the highest accuracy in validation score = acc_val - loss_val if save and acc_val > max_acc: max_acc = acc_val print('Saving model weights') model.save_weights( os.path.join(Config.MODELS.get('Y/N'), 'weights')) print('Saved!')
def main(*, training=True, save_to=None, load_from=None, val=0.2): global data_size global num_classes global processor vqa = VQA() vqa.load_data(num_data=data_size) questions, question_types, _, _ = next(vqa.data_generator()) labels = [ q2id[q] if q in q2id else q2id['none of the above'] for q in question_types ] # build processor based on training dataset # if processor is not reused if training: # preprocessing dataset # split train and test set train_size = int(data_size * (1 - val)) # inputs inputs_train = questions[:train_size] inputs_val = questions[train_size:] # process inputs # if tokenizer is not loaded, create new one if processor is None: processor = text_processor(inputs_train) # iinitialize model model = QuestionTypeClassification( embedding_dim=embedding_dim, units=hidden_units, vocab_size=vocab_size, # need to add 1 due to Embedding implementation num_classes=num_classes) # set initial weights to the model if load_from is not None: print('Loading weights...') model.load_weights(load_from) # TRAINING STEP if training: min_loss_val = 1.0 print('Start training') inputs_train = processor(inputs_train) inputs_val = [processor(inputs_val)] # labels labels = np.array(labels, dtype=np.int32) labels_train = labels[:train_size] labels_val = labels[train_size:] loss = 0 optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) train_cls_step = make_training_cls_model( model, optimizer, loss='sparse_categorical_crossentropy') # execute training for epoch in range(epochs): print('=====' * 10) print(' Epoch {}'.format(epoch + 1)) print('=====' * 10) dataset = data_generator(inputs_train, labels_train, batch_size) for batch, (ins, outs) in enumerate(dataset): st = time.time() ins = [ins] batch_loss, accuracy = train_cls_step(ins, outs) end = time.time() if batch % 100 == 0: out_val = model(*inputs_val) cost_val = tf.keras.losses.sparse_categorical_crossentropy( labels_val, out_val, from_logits=True) loss_val = tf.reduce_mean(cost_val) acc_val = calculate_accuracy(out_val, labels_val) if DEBUG: print('[DEBUG] Batch:', batch) for layer in model.layers: print(' Layer:', model.name + ':' + layer.name) print(' Weights:') print(' mean:', np.mean(layer.get_weights()[0])) print(' std:', np.std(layer.get_weights()[0])) print() batch_loss = batch_loss.numpy() print(' Batch:', batch) # TODO: add accuracy print( ' Loss: {:.4f} Accuracy(Train): {:.4f} Loss(Val): {:.4f} Accuracy(Val): {:.4f} Time(batch): {:.4f}s' .format(batch_loss, accuracy, loss_val, acc_val, end - st)) if loss_val < min_loss_val: min_loss_val = loss_val print('Saving models...') # save tokenizer info for resuse processor.to_json('./.env/tokenizer_config.json') model.save_weights(save_to) print('Saved!!') print() print('Training completed') else: # if not training mode test with all given data st = time.time() inputs = processor(questions) out = model(inputs) labels = tf.Variable(labels, dtype=tf.int32) accuracy = calculate_accuracy(out, labels) end = time.time() print('Evaluated score: Accuracy: {:.4f} Time: {:.4f}s'.format( accuracy, end - st)) return model
def main(train, val): global graph # not necessary when run this as script # but this is needed if run this code iteratively such as on jupyter notebook tf.compat.v1.reset_default_graph() graph = tf.Graph() with graph.as_default(): with tf.name_scope('cls'): LABELS = tf.compat.v1.placeholder(dtype=tf.float32, shape=(None, ), name='labels') with tf.name_scope('questions'): embedding = tf.keras.layers.Embedding(processor.vocab_size + 1, embedding_dim) q_gru = tf.keras.layers.GRU(units, return_state=True, return_sequences=True, recurrent_initializer='glorot_uniform') attention_q = Attention(units) QS = tf.compat.v1.placeholder(dtype=tf.float32, shape=(None, pad_max_len), name='encoded_questions') with tf.name_scope('images'): attention_img = Attention(units) IMGS = tf.compat.v1.placeholder(dtype=tf.float32, shape=(None, image_seq, 1024), name='imgs') # images img_encoded = tf.keras.layers.Dense(embedding_dim)(IMGS) # use last state from question encoding for attention input # (batch_size, seq_length, embedding_dim) q_encoded = embedding(QS) q_outputs, q_state = q_gru(q_encoded) # image attention context2, _ = attention_img(img_encoded, q_state) # questions context1, WEIGHTS = attention_q(q_outputs, context2) # classification x = tf.concat([context1, context2], axis=-1) x = tf.keras.layers.Dense(1024)(x) x = tf.keras.layers.Dense(1024)(x) PRED = tf.keras.layers.Dense(3)(x) COST = tf.keras.losses.sparse_categorical_crossentropy( LABELS, PRED, from_logits=True, axis=-1) LOSS = tf.reduce_mean(COST) OPT = tf.compat.v1.train.GradientDescentOptimizer( learning_rate=learning_rate).minimize(LOSS) with tf.compat.v1.Session(graph=graph) as sess: sess.run(tf.compat.v1.global_variables_initializer()) set_session(sess) if DEBUG: trainables = sess.run(tf.compat.v1.trainable_variables()) print('Total trainables:', len(trainables)) for epoch in range(1, epochs + 1): epoch_start = time.time() print('=====' * 10) print(' Epochs:', epoch) print('=====' * 10) batch_start = time.time() random.shuffle(train) for batch, (questions, labels, img_features) \ in enumerate(data_generator(train, batch_size=batch_size)): st = time.time() _, loss, cost, pred, weights = sess.run( [OPT, LOSS, COST, PRED, WEIGHTS], feed_dict={ QS: questions, LABELS: labels, IMGS: img_features }) end_calc = time.time() if DEBUG: if batch % display_step == 0: print('[DEBUG] Batch: {}'.format(batch)) #print('[DEBUG] Average weights :'.format(batch)) #for layer in model.layers: # print('Layer:', model.name + ':' + layer.name) # print('Weights:') # print(' mean:', np.mean(layer.get_weights()[0])) # print(' std:', np.std(layer.get_weights()[0])) # print() print('[DEBUG] Prediction:') #print(' values:\n', pred.reshape(-1, 3)) print(' Pred:\n ', np.argmax(pred, axis=-1)) print(' Label:\n ', labels.reshape(-1, )) print('[DEBUG] Weights/Question:') print(weights[0].reshape(-1)) print(*[ processor.index_word[q] for q in questions[0] if q > 0 ]) print() acc = calculate_accuracy(pred, labels) if batch % display_step == 0: print(' Batch -', batch) print( ' Train: Loss - {:.4f} Acc - {:.4f} Time(calc) - {:.4f}s/batch Time(total) - {:.4f}s/batch' .format(loss, acc, end_calc - st, time.time() - batch_start)) batch_start = time.time() # after finished training in each epoch # evaluate model by validation dataset loss_val = 0 acc_val = 0 st_val = time.time() #for q_val, l_val, i_val in data_generator(val, batch_size=batch_size): # TODO:test for q_val, l_val, i_val in data_generator(val, batch_size=batch_size): _loss_val, pred_val = sess.run([LOSS, PRED], feed_dict={ QS: q_val, LABELS: l_val, IMGS: i_val }) l_val = l_val.ravel() loss_val += _loss_val acc_val += calculate_accuracy(pred_val, l_val) loss_val /= step_per_val acc_val /= step_per_val end_val = time.time() print() print( ' Validation(approx.): Loss - {:.4f} Acc - {:.4f} Time - {:.4f}s' .format(loss_val, acc_val, end_val - st_val)) print(' Total time per epoch: {:.4f}s'.format(time.time() - epoch_start)) print()