Beispiel #1
0
    def load_model(self):
        sess = tf.Session()
        saver = tf.train.import_meta_graph('model.ckpt.meta')
        saver.restore(sess, tf.train.latest_checkpoint('./'))

        # Get tensorflow training graph
        self.sess, self.saver = sess, saver
def linear_regression():
    x_train = np.asarray([1, 2, 3, 4, 5, 6, 7, 8, 9, 11])
    y_train = np.asarray([0.1, 0.2, 0.32, 0.43, 0.54, 0.65, 0.77, 0.88, 0.94, 1])
    n_sample = x_train.shape[0]
    x_ = tf.placeholder(tf.float32, name="x")
    y_ = tf.placeholder(tf.float32, name="y")
    w = tf.get_variable("weights", initializer=tf.constant(0.0))
    b = tf.get_variable("bias", initializer=tf.constant(0.0))
    y_predict = w * x_ + b
    loss = tf.square(y_ - y_predict, name='loss')
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)
    writer = tf.summary.FileWriter("./graphs", tf.get_default_graph())
    writer.close()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(100):
            total_loss = 0
            for x, y in zip(x_train, y_train):
                _, _loss = sess.run([optimizer, loss], feed_dict={x_: x, y_: y})
                total_loss += _loss
            print(f"Epoch {i}: {total_loss / n_sample}")
        w_out, b_out = sess.run([w, b])
        y_predict = x_train * w_out + b_out
        for i, j in zip(y_predict, y_train):
            print(f"{i} : {j}")
        plt.plot(x_train, y_predict, "r-", label="predict")
        plt.plot(x_train, y_train, "go", label="data")
        plt.title("ABC")
        plt.xlabel("x")
        plt.ylabel("y")
        plt.show()
Beispiel #3
0
    def __init__(self,
                 exp_rate=0.3,
                 lr=0.1,
                 n_steps=5,
                 episodes=1000,
                 sess: tf.Session = None):
        self.maze = DynaQMaze()
        self.actions = self.maze.action_space
        self.n_actions = len(self.actions)
        self.state_actions = []  # state & action transition
        self.exp_rate = exp_rate
        self.lr = lr
        self.steps = n_steps
        self.episodes = episodes  # number of episodes going to play
        self.steps_per_episode = []
        self.state = self.maze.get_current_state()
        self.Q_values = {}
        # model function
        self.model = {}
        self.maze.render()
        if sess is None:
            self.sess = tf.Session()
        else:
            self.sess = sess
        self.writer1 = tf.summary.FileWriter('./log/r-1', self.sess.graph)
        self.writer2 = tf.summary.FileWriter('./log/r-2', self.sess.graph)
        self.tmp_tensor = tf.placeholder(tf.float32)
        self.all_reward_summary = tf.summary.scalar('all_reward',
                                                    self.tmp_tensor)
        self.all_cnt_summary = tf.summary.scalar('all_cnt', self.tmp_tensor)

        self.write_op = tf.summary.merge_all()
Beispiel #4
0
def tensorflow_session():
    # Init session and params
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    # Pin GPU to local rank (one GPU per process)
    config.gpu_options.visible_device_list = str(0)
    sess = tf.Session(config=config)
    return sess
Beispiel #5
0
 def __init__(self):
     self.sess = tf.Session()
     self.action_space = 11
     self.memory_size = 3000
     self.n_features = 3
     self.env = gym.make('Pendulum-v0')
     self.env = self.env.unwrapped
     self.env.seed(1)
     self.build_learning_model()
Beispiel #6
0
    def __init__(self,
                 n_state,
                 n_action,
                 learning_rate,
                 gamma,
                 replay_buffer_size=3000,
                 sess: tf.Session = None):
        self.n_state = n_state
        self.n_action = n_action
        self.fai_s_size = 512
        # shape: (state_size, action_size)
        self.w = np.zeros([n_state])
        self.learning_rate = learning_rate
        self.gamma = gamma
        self.replay_buffer = np.zeros(
            [replay_buffer_size, self.n_state * 2 + 2])
        self.memory_size = replay_buffer_size
        self.memory_count = 0
        self.state = tf.placeholder(tf.float32, [None, self.n_state])
        self.state_hat = tf.placeholder(tf.float32, [None, self.n_state])
        self.state_ = tf.placeholder(tf.float32, [None, self.n_state])
        self.rs_p = tf.placeholder(tf.float32, [None, 1])
        if sess is None:
            self.sess = tf.Session()
        else:
            self.sess = sess
        self.eval_collection_name = [
            'eval_net_collection', tf.GraphKeys.GLOBAL_VARIABLES
        ]
        self.target_collection_name = [
            'target_net_collection', tf.GraphKeys.GLOBAL_VARIABLES
        ]

        shutil.rmtree("./log")
        os.mkdir("./log")
        with tf.variable_scope('assign_op'):
            e_params = tf.get_collection('eval_collection_name')
            t_params = tf.get_collection('target_net_collection')
            self.assign_op = [
                tf.assign(t, e) for t, e in zip(t_params, e_params)
            ]
        with tf.variable_scope('eval_net'):
            self.eval_fai, self.eval_s_hat, self.eval_r_s, self.eval_M = self._build_net(
                self.eval_collection_name)
        with tf.variable_scope('target_net'):
            self.eval_fai, self.target_s_hat, self.target_r_s, self.target_M = self._build_net(
                self.target_collection_name)
        tf.summary.FileWriter("./log", self.sess.graph)
        self.sess.run(tf.global_variables_initializer())
Beispiel #7
0
    def __init__(self,
                 n_actions,
                 n_features,
                 learning_rate=0.005,
                 reward_decay=0.9,
                 replace_decay=0.9,
                 e_greedy=0.9,
                 replace_target_iter=200,
                 memory_size=3000,
                 batch_size=32,
                 e_greedy_increment=None,
                 output_graph=False,
                 double_q=True,
                 sess: tf.Session = None):
        self.n_actions = n_actions
        self.n_features = n_features
        self.learning_rate = learning_rate
        self.gamma = reward_decay
        self.replace_decay = replace_decay
        self.replace_target_iter = replace_target_iter
        self.epsilon_max = e_greedy
        self.memory_size = memory_size
        self.batch_size = batch_size
        self.e_greedy_increment = e_greedy_increment
        self.output_graph = output_graph
        self.double_q = double_q
        self.memory_counter = 0
        self.learn_step_counter = 0
        self.memory = np.zeros((self.memory_size, self.n_features * 2 + 2))
        self.epsilon = 0 if self.e_greedy_increment is not None else self.epsilon_max
        self._build_net()

        e_params = tf.get_collection('eval_net_params')
        t_params = tf.get_collection('target_net_params')
        with tf.variable_scope("assign_op"):
            self.replace_target_op = [
                tf.assign(t, e) for t, e in zip(t_params, e_params)
            ]

        if sess is None:
            self.sess = tf.Session()
            self.sess.run(tf.global_variables_initializer())
        else:
            self.sess = sess

        if output_graph:
            tf.summary.FileWriter("./logs/", self.sess.graph)
        self.cost_his = []  # 损失函数历史记录
Beispiel #8
0
    def __init__(self, checkpoint_filename, input_name="images",
                 output_name="features"):
        self.session = tf.Session()
        with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle:
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(file_handle.read())
        tf.import_graph_def(graph_def, name="net")
        self.input_var = tf.get_default_graph().get_tensor_by_name(
            "%s:0" % input_name)
        self.output_var = tf.get_default_graph().get_tensor_by_name(
            "%s:0" % output_name)

        assert len(self.output_var.get_shape()) == 2
        assert len(self.input_var.get_shape()) == 4
        self.feature_dim = self.output_var.get_shape().as_list()[-1]
        self.image_shape = self.input_var.get_shape().as_list()[1:]
Beispiel #9
0
    def __post_init__(self):
        if self.batch_size is None:
            self.batch_size = 1
        assert self.nsamples % self.batch_size == 0

        self.enc = encoder.get_encoder(self.models_dir, self.model_name)
        self.hparams = model.default_hparams()
        with open(
                os.path.join(self.models_dir, self.model_name, 'hparams.json')) as f:
            self.hparams.override_from_dict(json.load(f))

        if self.length is None:
            self.length = self.hparams.n_ctx // 2
        elif self.length > self.hparams.n_ctx:
            raise ValueError("Can't get samples longer than window size: %s" % self.hparams.n_ctx)

        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())

        # with tf.Session(graph=tf.Graph()) as self.sess:
        self.sess.run(tf.global_variables_initializer())
        self.context = tf.placeholder(tf.int32, [self.batch_size, None])
        np.random.seed(self.seed)
        tf.set_random_seed(self.seed)
        self.output = sample.sample_sequence(
            hparams=self.hparams, length=self.length,
            context=self.context,
            batch_size=self.batch_size,
            temperature=self.temperature, top_k=self.top_k, top_p=self.top_p
        )

        saver = tf.train.Saver()
        print(f"MODEL DIR {self.models_dir}")
        print(f"MODEL NAME {self.model_name}")
        print(f"PWD {os.getcwd()}")
        print(f"MODEL DIR ABS {Path(self.models_dir).absolute()}")
        ckpt = tf.train.latest_checkpoint(
            os.path.join(self.models_dir, self.model_name))
        saver.restore(self.sess, ckpt)
Beispiel #10
0
    def trainingProcess(self):
        init = tf.global_variables_initializer()
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        avgLoss = 0.0
        self.saver = tf.train.Saver()
        with tf.Session(config=config) as sess:
            print("init...")
            sess.run(init)
            print("init finished!")
            step = 0
            while self.texti.epoch < self.maxEpoch:
                batchX, batchImage, batchWord, batchY = self.texti.nextBatch()

                if step % self.displayStep == 0:
                    acc, p = sess.run([self.PredAcc, self.p],
                                      feed_dict=self.make_feed_dict(
                                          batchX, batchImage, batchWord,
                                          batchY, False))
                    self.display(avgLoss / self.displayStep, acc, p, batchY,
                                 self.texti.epoch)
                    avgLoss = 0.0

                if step % self.validStep == 0 and self.texti.epoch > 0:
                    self.valid(sess, step)
                    if self.patience == 0:
                        break

                _, tmpLoss = sess.run([self.trainOP, self.loss],
                                      feed_dict=self.make_feed_dict(
                                          batchX, batchImage, batchWord,
                                          batchY, True))
                avgLoss += tmpLoss

                step += 1
                if self.texti.epoch > -1:
                    sess.run(self.addGlobal)
Beispiel #11
0
def train(x_train, y_train):
    n_samples, n_features = x_train.shape

    w = tf.Variable(np.random.rand(input_dim, 1).astype(dtype='float32'),
                    name="weight")
    b = tf.Variable(0.0, dtype=tf.float32, name="bias")

    x = tf.placeholder(dtype=tf.float32, name='x')
    y = tf.placeholder(dtype=tf.float32, name='y')

    predictions = tf.matmul(x, w) + b
    loss = tf.reduce_mean(
        tf.log(1 + tf.exp(tf.multiply(-1.0 * y, predictions))))

    # optimizer = tf.train.GradientDescentOptimizer(learn_rate).minimize(loss)
    optimizer = tf.train.ProximalGradientDescentOptimizer(
        learning_rate=learn_rate,
        l1_regularization_strength=0.1).minimize(loss)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for epoch in range(n_epochs):
            for idx in range(0, n_samples, batch_size):
                iE = min(n_samples, idx + batch_size)
                x_batch = x_train[idx:iE, :]
                y_batch = y_train[idx:iE, :]
                sess.run([optimizer], feed_dict={x: x_batch, y: y_batch})
            curr_w, curr_b = sess.run([w, b])

            for idx in range(len(curr_w)):
                if curr_w[idx] < threshold * -1:
                    curr_w[idx] += threshold
                else:
                    curr_w[idx] -= threshold
            sess.run([tf.assign(w, curr_w)])
    return curr_w, curr_b
action_output_ = tf.multiply(action_output_raw, 100000)
action_output_ = tf.round(action_output_)
action_output = tf.div(action_output_, 100000)

prediction = tf.concat([Q_output, action_output], 1, name = "concat_node")
prediction_identity = tf.identity(prediction, name = "prediction_node")

Q_loss = tf.keras.losses.mean_squared_error(y_true = Q_target, y_pred = Q_output_raw)
policy_loss = tf.keras.losses.categorical_crossentropy(y_true = action_target, y_pred = action_output_raw)

total_loss = Q_loss + policy_loss
train_op = tf.train.AdamOptimizer(learning_rate = learning_rate, name = "Optimizer").minimize(total_loss, name = 'optimize_node')

init = tf.global_variables_initializer()

sess = tf.Session()
sess.run(init)
train_writer = tf.summary.FileWriter(path_to_store + "/summary", sess.graph)
train_writer.close()


with open(os.path.join(path_to_store, model_name + '.pb'), 'wb') as f:
    f.write(tf.get_default_graph().as_graph_def().SerializeToString())



# builder = tf.saved_model.builder.SavedModelBuilder("C:/Users/Snurka/init_model")
# builder.add_meta_graph_and_variables(
#   sess,
#   [tf.saved_model.tag_constants.SERVING]
# )
import tensorflow._api.v2.compat.v1 as tf
import numpy as np
tf.reset_default_graph()
tf.compat.v1.disable_eager_execution()
tf.compat.v1.disable_v2_behavior()
tf.global_variables_initializer()
x = tf.placeholder(tf.float32, shape=[None, 4])
y = tf.placeholder(tf.float32, shape=[None, 1])
w = tf.Variable(tf.random_normal([4, 1]), name="weight")
b = tf.Variable(tf.random_normal([1]), name="bias")
hypo = tf.matmul(x, w) + b
saver = tf.train.Saver()
test_arr = [[12, 6.5, 15.7, 10.8]]
sess2 = tf.Session()
saver.restore(sess2, "./saved.ckpt")
predict = sess2.run(hypo, feed_dict={x: test_arr})
print(predict[0])
Beispiel #14
0
loss_node = tf.get_collection('loss')[0]
optimizer_node = tf.get_collection('optimizer')[0]

saver = tf.train.Saver(max_to_keep=1)
# path = random.choice(replay_files)
# # Load all training data
# game = utils.Halite()
# #path = '1068739.json'
# game.load_replay(path)
# game.load_data()
# print("winner of this game is player: ", game.winner)
# X_frame, Y_ship, Y_shipyard = game.get_training_data()
# X_ship = game.get_my_ships()
# turns_left = game.turns_left
#assert (turns_left is not None)
with tf.Session() as sess:
    tf.initializers.global_variables().run()

    for step in range(5):
        # Load all training data
        game = utils.Halite()
        # path = random.choice(replay_files)
        game.load_replay(path)
        game.load_data()
        X_frame, Y_ship, Y_shipyard = game.get_training_data()
        X_ship = game.get_my_ships()
        turns_left = game.turns_left
        spawn = game.spawn
        assert (turns_left is not None)

        # first batch of parameters X: (halite,ship_pos) Y: ship_moves
Beispiel #15
0
def main(trainModel=True,
         buildConfusionMatrix=True,
         restore=False,
         buildClassifiedMatrix=True):

    tf.disable_v2_behavior()

    input_images = tf.placeholder(tf.float32, [None, 28, 28], name="Input")
    real = tf.placeholder(tf.float32, [None, CLASSES], name="real_classes")

    layer1 = create_conv_layer(tf.reshape(input_images, [-1, 28, 28, 1]),
                               1,
                               28, [5, 5], [2, 2],
                               name="conv_no_pool")
    layer2 = create_conv_layer(layer1,
                               28,
                               56, [5, 5], [2, 2],
                               name='conv_with_pool')
    conv_result = tf.reshape(layer2, [-1, 7 * 7 * 56])

    relu_layer_weight = tf.Variable(tf.truncated_normal([7 * 7 * 56, 1000],
                                                        stddev=STDDEV * 2),
                                    name='relu_layer_weight')
    rely_layer_bias = tf.Variable(tf.truncated_normal([1000],
                                                      stddev=STDDEV / 2),
                                  name='rely_layer_bias')
    relu_layer = tf.matmul(conv_result, relu_layer_weight) + rely_layer_bias
    relu_layer = tf.nn.relu(relu_layer)
    relu_layer = tf.nn.dropout(relu_layer, DROPOUT)

    final_layer_weight = tf.Variable(tf.truncated_normal([1000, CLASSES],
                                                         stddev=STDDEV * 2),
                                     name='final_layer_weight')
    final_layer_bias = tf.Variable(tf.truncated_normal([CLASSES],
                                                       stddev=STDDEV / 2),
                                   name='final_layer_bias')
    final_layer = tf.matmul(relu_layer, final_layer_weight) + final_layer_bias

    predicts = tf.nn.softmax(final_layer)
    predicts_for_log = tf.clip_by_value(predicts, 1e-9, 0.999999999)

    #crossEntropy = -tf.reduce_mean(tf.reduce_sum(y * tf.log(y_clipped) + (1 - y) * tf.log(1 - y_clipped), axis=1))

    loss = -tf.reduce_mean(
        tf.reduce_sum(real * tf.log(predicts_for_log) +
                      (1 - real) * tf.log(1 - predicts_for_log),
                      axis=1),
        axis=0)
    #test = tf.reduce_sum(real * tf.log(predicts_for_log) + (1 - real) * tf.log(1 - predicts_for_log), axis=1)
    #loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=final_layer, labels=real))
    optimiser = tf.train.GradientDescentOptimizer(
        learning_rate=LEARNING_RATE).minimize(loss)

    correct_prediction = tf.equal(tf.argmax(real, axis=1),
                                  tf.argmax(predicts, axis=1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    confusion_matrix = tf.confusion_matrix(labels=tf.argmax(real, axis=1),
                                           predictions=tf.argmax(predicts,
                                                                 axis=1),
                                           num_classes=CLASSES)

    saver = tf.train.Saver()

    # dataset = get_mnist_dataset()
    dataset = get_fashion_dataset()

    with tf.Session() as session:

        session.run(tf.global_variables_initializer())

        if restore:
            saver.restore(session, SAVE_PATH)

        if trainModel:
            train(input_images, real, session, optimiser, loss, accuracy,
                  saver, dataset)

        if buildConfusionMatrix:
            test_cm = session.run(confusion_matrix,
                                  feed_dict={
                                      input_images: dataset.test_x,
                                      real: dataset.test_y
                                  })
            draw_confusion_matrix(test_cm)

        if buildClassifiedMatrix:
            all_probs = session.run(predicts,
                                    feed_dict={
                                        input_images: dataset.test_x,
                                        real: dataset.test_y
                                    })
            max_failure_picture_index = [[(-1, -1.0)] * CLASSES
                                         for _ in range(CLASSES)]
            for i in range(len(all_probs)):
                real = np.argmax(dataset.test_y[i])
                for j in range(CLASSES):
                    if max_failure_picture_index[real][j][1] < all_probs[i][j]:
                        max_failure_picture_index[real][j] = (i,
                                                              all_probs[i][j])
            draw_max_failure_pictures(dataset.test_x,
                                      max_failure_picture_index)
Beispiel #16
0
with g.as_default():
    # Create the model
    # x = tf.placeholder("float", [None, 784])
    x = tf.compat.v1.placeholder("float", [None, 784])
    W = tf.Variable(tf.zeros([784, 10]), name="vaiable_W")
    b = tf.Variable(tf.zeros([10]), name="variable_b")
    y = tf.nn.softmax(tf.matmul(x, W) + b)

    # Define loss and optimizer
    y_ = tf.compat.v1.placeholder("float", [None, 10])
    cross_entropy = -tf.reduce_sum(y_ * tf.log(y))
    train_step = tf.train.GradientDescentOptimizer(0.01).minimize(
        cross_entropy)

    sess = tf.Session()

    # Train
    init = tf.initialize_all_variables()
    sess.run(init)

    for i in range(1000):
        batch_xs, batch_ys = mnist.train.next_batch(100)
        train_step.run({x: batch_xs, y_: batch_ys}, sess)

    # Test trained model
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

    print(accuracy.eval({x: mnist.test.images, y_: mnist.test.labels}, sess))
def gain(data_x, gain_parameters):
    '''Impute missing values in data_x
  
  Args:
    - data_x: original data with missing values
    - gain_parameters: GAIN network parameters:
      - batch_size: Batch size
      - hint_rate: Hint rate
      - alpha: Hyperparameter
      - iterations: Iterations
      
  Returns:
    - imputed_data: imputed data
  '''
    # Define mask matrix
    data_m = 1 - np.isnan(data_x)

    # System parameters
    batch_size = gain_parameters['batch_size']
    hint_rate = gain_parameters['hint_rate']
    alpha = gain_parameters['alpha']
    iterations = gain_parameters['iterations']

    # Other parameters
    no, dim = data_x.shape

    # Hidden state dimensions
    h_dim = int(dim)

    # Normalization
    norm_data, norm_parameters = normalization(data_x)
    norm_data_x = np.nan_to_num(norm_data, 0)

    ## GAIN architecture
    # Input placeholders
    # Data vector
    tf.disable_v2_behavior()
    X = tf.placeholder(tf.float32, shape=[None, dim])
    # Mask vector
    M = tf.placeholder(tf.float32, shape=[None, dim])
    # Hint vector
    H = tf.placeholder(tf.float32, shape=[None, dim])

    # Discriminator variables
    D_W1 = tf.Variable(xavier_init([dim * 2, h_dim]))  # Data + Hint as inputs
    D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

    D_W2 = tf.Variable(xavier_init([h_dim, h_dim]))
    D_b2 = tf.Variable(tf.zeros(shape=[h_dim]))

    D_W3 = tf.Variable(xavier_init([h_dim, dim]))
    D_b3 = tf.Variable(tf.zeros(shape=[dim]))  # Multi-variate outputs

    theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3]

    #Generator variables
    # Data + Mask as inputs (Random noise is in missing components)
    G_W1 = tf.Variable(xavier_init([dim * 2, h_dim]))
    G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

    G_W2 = tf.Variable(xavier_init([h_dim, h_dim]))
    G_b2 = tf.Variable(tf.zeros(shape=[h_dim]))

    G_W3 = tf.Variable(xavier_init([h_dim, dim]))
    G_b3 = tf.Variable(tf.zeros(shape=[dim]))

    theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3]

    ## GAIN functions
    # Generator
    def generator(x, m):
        # Concatenate Mask and Data
        inputs = tf.concat(values=[x, m], axis=1)
        G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1)
        G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2)
        # MinMax normalized output
        G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3)
        return G_prob

    # Discriminator
    def discriminator(x, h):
        # Concatenate Data and Hint
        inputs = tf.concat(values=[x, h], axis=1)
        D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1)
        D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2)
        D_logit = tf.matmul(D_h2, D_W3) + D_b3
        D_prob = tf.nn.sigmoid(D_logit)
        return D_prob

    ## GAIN structure
    # Generator
    G_sample = generator(X, M)

    # Combine with observed data
    Hat_X = X * M + G_sample * (1 - M)

    # Discriminator
    D_prob = discriminator(Hat_X, H)

    ## GAIN loss
    D_loss_temp = -tf.reduce_mean(M * tf.log(D_prob + 1e-8) \
                                  + (1-M) * tf.log(1. - D_prob + 1e-8))

    G_loss_temp = -tf.reduce_mean((1 - M) * tf.log(D_prob + 1e-8))

    MSE_loss = \
    tf.reduce_mean((M * X - M * G_sample)**2) / tf.reduce_mean(M)

    D_loss = D_loss_temp
    G_loss = G_loss_temp + alpha * MSE_loss

    ## GAIN solver
    D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D)
    G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G)

    ## Iterations
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    # Start Iterations
    for it in tqdm(range(iterations)):

        # Sample batch
        batch_idx = sample_batch_index(no, batch_size)
        X_mb = norm_data_x[batch_idx, :]
        M_mb = data_m[batch_idx, :]
        # Sample random vectors
        Z_mb = uniform_sampler(0, 0.01, batch_size, dim)
        # Sample hint vectors
        H_mb_temp = binary_sampler(hint_rate, batch_size, dim)
        H_mb = M_mb * H_mb_temp

        # Combine random vectors with observed vectors
        X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb

        _, D_loss_curr = sess.run([D_solver, D_loss_temp],
                                  feed_dict={
                                      M: M_mb,
                                      X: X_mb,
                                      H: H_mb
                                  })
        _, G_loss_curr, MSE_loss_curr = \
        sess.run([G_solver, G_loss_temp, MSE_loss],
                 feed_dict = {X: X_mb, M: M_mb, H: H_mb})

    ## Return imputed data
    Z_mb = uniform_sampler(0, 0.01, no, dim)
    M_mb = data_m
    X_mb = norm_data_x
    X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb

    imputed_data = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0]

    imputed_data = data_m * norm_data_x + (1 - data_m) * imputed_data

    # Renormalization
    imputed_data = renormalization(imputed_data, norm_parameters)

    # Rounding
    imputed_data = rounding(imputed_data, data_x)

    return imputed_data
Beispiel #18
0
def main():
    args = parser.parse_args()
    enc = encoder.get_encoder(CHECKPOINT_DIR, args.model_name)
    hparams = model.default_hparams()
    with open(os.path.join(CHECKPOINT_DIR, args.model_name,
                           'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if args.sample_length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    if args.model_name == '345M':
        # args.memory_saving_gradients = True
        if args.optimizer == 'adam':
            args.only_train_transformer_layers = True

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.graph_options.rewrite_options.layout_optimizer = rewriter_config_pb2.RewriterConfig.OFF
    with tf.Session(config=config) as sess:
        context = tf.placeholder(tf.int32, [args.batch_size, None])
        context_in = randomize(context, hparams, args.noise)
        output = model.model(hparams=hparams, X=context_in)
        loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=context[:, 1:], logits=output['logits'][:, :-1]))

        if args.val_every > 0:
            val_context = tf.placeholder(tf.int32, [args.val_batch_size, None])
            val_output = model.model(hparams=hparams, X=val_context)
            val_loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=val_context[:, 1:],
                    logits=val_output['logits'][:, :-1]))
            val_loss_summary = tf.summary.scalar('val_loss', val_loss)

        tf_sample = sample.sample_sequence(hparams=hparams,
                                           length=args.sample_length,
                                           context=context,
                                           batch_size=args.batch_size,
                                           temperature=1.0,
                                           top_k=args.top_k,
                                           top_p=args.top_p)

        all_vars = [v for v in tf.trainable_variables() if 'model' in v.name]
        train_vars = [v for v in all_vars if '/h' in v.name
                      ] if args.only_train_transformer_layers else all_vars

        if args.optimizer == 'adam':
            opt = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
        elif args.optimizer == 'sgd':
            opt = tf.train.GradientDescentOptimizer(
                learning_rate=args.learning_rate)
        else:
            exit('Bad optimizer:', args.optimizer)

        if args.accumulate_gradients > 1:
            if args.memory_saving_gradients:
                exit(
                    "Memory saving gradients are not implemented for gradient accumulation yet."
                )
            opt = AccumulatingOptimizer(opt=opt, var_list=train_vars)
            opt_reset = opt.reset()
            opt_compute = opt.compute_gradients(loss)
            opt_apply = opt.apply_gradients()
            summary_loss = tf.summary.scalar('loss', opt_apply)
        else:
            if args.memory_saving_gradients:
                opt_grads = memory_saving_gradients.gradients(loss, train_vars)
            else:
                opt_grads = tf.gradients(loss, train_vars)
            opt_grads = list(zip(opt_grads, train_vars))
            opt_apply = opt.apply_gradients(opt_grads)
            summary_loss = tf.summary.scalar('loss', loss)

        summary_lr = tf.summary.scalar('learning_rate', args.learning_rate)
        summaries = tf.summary.merge([summary_lr, summary_loss])

        summary_log = tf.summary.FileWriter(
            os.path.join(CHECKPOINT_DIR, args.run_name))

        saver = tf.train.Saver(var_list=all_vars,
                               max_to_keep=5,
                               keep_checkpoint_every_n_hours=2)
        sess.run(tf.global_variables_initializer())

        if args.restore_from == 'latest':
            ckpt = tf.train.latest_checkpoint(
                os.path.join(CHECKPOINT_DIR, args.run_name))
            if ckpt is None:
                # Get fresh GPT weights if new run.
                ckpt = tf.train.latest_checkpoint(
                    os.path.join(CHECKPOINT_DIR, args.model_name))
        elif args.restore_from == 'fresh':
            ckpt = tf.train.latest_checkpoint(
                os.path.join(CHECKPOINT_DIR, args.model_name))
        else:
            ckpt = tf.train.latest_checkpoint(args.restore_from)
        print('Loading checkpoint', ckpt)
        saver.restore(sess, ckpt)

        print('Loading dataset...')
        chunks = load_dataset(enc,
                              args.dataset,
                              args.combine,
                              encoding=args.encoding)
        data_sampler = Sampler(chunks)
        if args.val_every > 0:
            if args.val_dataset:
                val_chunks = load_dataset(enc,
                                          args.val_dataset,
                                          args.combine,
                                          encoding=args.encoding)
            else:
                val_chunks = chunks
        print('dataset has', data_sampler.total_size, 'tokens')
        print('Training...')

        if args.val_every > 0:
            # Sample from validation set once with fixed seed to make
            # it deterministic during training as well as across runs.
            val_data_sampler = Sampler(val_chunks, seed=1)
            val_batches = [[
                val_data_sampler.sample(1024)
                for _ in range(args.val_batch_size)
            ] for _ in range(args.val_batch_count)]

        counter = 1
        counter_path = os.path.join(CHECKPOINT_DIR, args.run_name, 'counter')
        if os.path.exists(counter_path):
            # Load the step number if we're resuming a run
            # Add 1 so we don't immediately try to save again
            with open(counter_path, 'r') as fp:
                counter = int(fp.read()) + 1

        def save():
            maketree(os.path.join(CHECKPOINT_DIR, args.run_name))
            print(
                'Saving',
                os.path.join(CHECKPOINT_DIR, args.run_name,
                             'model-{}').format(counter))
            saver.save(sess,
                       os.path.join(CHECKPOINT_DIR, args.run_name, 'model'),
                       global_step=counter)
            with open(counter_path, 'w') as fp:
                fp.write(str(counter) + '\n')

        def generate_samples():
            print('Generating samples...')
            context_tokens = data_sampler.sample(1)
            all_text = []
            index = 0
            while index < args.sample_num:
                out = sess.run(
                    tf_sample,
                    feed_dict={context: args.batch_size * [context_tokens]})
                for i in range(min(args.sample_num - index, args.batch_size)):
                    text = enc.decode(out[i])
                    text = '======== SAMPLE {} ========\n{}\n'.format(
                        index + 1, text)
                    all_text.append(text)
                    index += 1
            print(text)
            maketree(os.path.join(SAMPLE_DIR, args.run_name))
            with open(os.path.join(SAMPLE_DIR, args.run_name,
                                   'samples-{}').format(counter),
                      'w',
                      encoding=args.encoding) as fp:
                fp.write('\n'.join(all_text))

        def validation():
            print('Calculating validation loss...')
            losses = []
            for batch in tqdm.tqdm(val_batches):
                losses.append(
                    sess.run(val_loss, feed_dict={val_context: batch}))
            v_val_loss = np.mean(losses)
            v_summary = sess.run(val_loss_summary,
                                 feed_dict={val_loss: v_val_loss})
            summary_log.add_summary(v_summary, counter)
            summary_log.flush()
            print('[{counter} | {time:2.2f}] validation loss = {loss:2.2f}'.
                  format(counter=counter,
                         time=time.time() - start_time,
                         loss=v_val_loss))

        def sample_batch():
            return [data_sampler.sample(1024) for _ in range(args.batch_size)]

        avg_loss = (0.0, 0.0)
        start_time = time.time()

        try:
            while counter < 1000:
                if counter % args.save_every == 0:
                    save()
                if counter % args.sample_every == 0:
                    generate_samples()
                if args.val_every > 0 and (counter % args.val_every == 0
                                           or counter == 1):
                    validation()

                if args.accumulate_gradients > 1:
                    sess.run(opt_reset)
                    for _ in range(args.accumulate_gradients):
                        sess.run(opt_compute,
                                 feed_dict={context: sample_batch()})
                    (v_loss, v_summary) = sess.run((opt_apply, summaries))
                else:
                    (_, v_loss, v_summary) = sess.run(
                        (opt_apply, loss, summaries),
                        feed_dict={context: sample_batch()})

                summary_log.add_summary(v_summary, counter)

                avg_loss = (avg_loss[0] * 0.99 + v_loss,
                            avg_loss[1] * 0.99 + 1.0)

                print(
                    '[{counter} | {time:2.2f}] loss={loss:2.2f} avg={avg:2.2f}'
                    .format(counter=counter,
                            time=time.time() - start_time,
                            loss=v_loss,
                            avg=avg_loss[0] / avg_loss[1]))

                counter += 1
        except KeyboardInterrupt:
            print('interrupted')
            save()