X_embedding = tf.concat([X_1, X_2], axis=-1) tf.logging.info("input dimension:{}".format(X_embedding.get_shape())) if args.tf_model_type == 'capsule-A': poses, activations = capsule_model_A(X_embedding, args.num_classes) if args.tf_model_type == 'capsule-B': poses, activations = capsule_model_B(X_embedding, args.num_classes) if args.tf_model_type == 'CNN': poses, activations = baseline_model_cnn(X_embedding, args.num_classes) if args.tf_model_type == 'KIMCNN': poses, activations = baseline_model_kimcnn(X_embedding, args.max_sent, args.num_classes) if args.tf_loss_type == 'spread_loss': loss = spread_loss(y, activations, margin) if args.tf_loss_type == 'margin_loss': loss = margin_loss(y, activations) if args.tf_loss_type == 'cross_entropy': loss = cross_entropy(y, activations) y_pred = tf.argmax(activations, axis=1, name="y_proba") correct = tf.equal(tf.argmax(y, axis=1), y_pred, name="correct") accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy") # tf.summary.scalar('accuracy', accuracy) # merged = tf.summary.merge_all() # writer = tf.summary.FileWriter('/tmp/writer_log') optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) training_op = optimizer.minimize(loss, name="training_op") gradients, variables = zip(*optimizer.compute_gradients(loss))
X_2 = X_2[...,tf.newaxis] X_embedding = tf.concat([X_1,X_2],axis=-1) tf.logging.info("input dimension:{}".format(X_embedding.get_shape())) if args.model_type == 'capsule-A': poses, activations = capsule_model_A(X_embedding, args.num_classes) if args.model_type == 'capsule-B': poses, activations = capsule_model_B(X_embedding, args.num_classes) if args.model_type == 'CNN': poses, activations = baseline_model_cnn(X_embedding, args.num_classes) if args.model_type == 'KIMCNN': poses, activations = baseline_model_kimcnn(X_embedding, args.max_sent, args.num_classes) if args.loss_type == 'spread_loss': loss = spread_loss(y, activations, margin) if args.loss_type == 'margin_loss': loss = margin_loss(y, activations) if args.loss_type == 'cross_entropy': loss = cross_entropy(y, activations) y_pred = tf.argmax(activations, axis=1, name="y_proba") correct = tf.equal(tf.argmax(y, axis=1), y_pred, name="correct") accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy") optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) training_op = optimizer.minimize(loss, name="training_op") gradients, variables = zip(*optimizer.compute_gradients(loss)) grad_check = [tf.check_numerics(g, message='Gradient NaN Found!') for g in gradients if g is not None] + [tf.check_numerics(loss, message='Loss NaN Found')]