def main(_): # tf.device('/gup:0') ''' Trains model from data ''' if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir) print('Created training directory', FLAGS.train_dir) char_vocab, char_tensors, char_lens, max_word_length = load_data(FLAGS.data_dir, 70) train_reader = DataReader(char_tensors['train'], char_lens['train'], FLAGS.batch_size) print('initialized all dataset readers') with tf.Graph().as_default(), tf.Session() as session: # tensorflow seed must be inside graph tf.set_random_seed(FLAGS.seed) np.random.seed(seed=FLAGS.seed) ''' build training graph ''' # initializer = tf.random_uniform_initializer(-FLAGS.param_init, FLAGS.param_init) # initializer = tf.random_uniform_initializer(0.0, 2 * FLAGS.param_init) initializer = tf.contrib.layers.xavier_initializer() with tf.variable_scope("Model", initializer=initializer): train_model = dga_model.inference_graph( char_vocab_size=char_vocab.size, char_embed_size=FLAGS.char_embed_size, batch_size=FLAGS.batch_size, num_highway_layers=FLAGS.highway_layers, num_rnn_layers=FLAGS.rnn_layers, rnn_size=FLAGS.rnn_size, max_word_length=max_word_length, kernels=eval(FLAGS.kernels), kernel_features=eval(FLAGS.kernel_features), dropout=FLAGS.dropout, embed_dimension=FLAGS.embed_dimension) train_model.update(dga_model.decoder_graph(train_model.embed_output, char_vocab_size=char_vocab.size, batch_size=FLAGS.batch_size, num_highway_layers=FLAGS.highway_layers, num_rnn_layers=FLAGS.rnn_layers, rnn_size=FLAGS.rnn_size, max_word_length=max_word_length, kernels=eval(FLAGS.kernels), kernel_features=eval(FLAGS.kernel_features), dropout=FLAGS.dropout, )) train_model.update(dga_model.en_decoder_loss_graph(train_model.input, train_model.input_len_g, train_model.decoder_output, batch_size=FLAGS.batch_size, max_word_length=max_word_length )) train_model.update(dga_model.genearator_layer(batch_size=FLAGS.batch_size, input_dimension=FLAGS.random_dimension, max_word_length=max_word_length, embed_dimension=FLAGS.embed_dimension)) train_model.update(dga_model.generator_layer_loss(train_model.gl_output, batch_size=FLAGS.batch_size, max_word_length=max_word_length, embed_dimension=FLAGS.embed_dimension )) train_model.update( dga_model.lr(train_model.gl_output, FLAGS.batch_size, max_word_length, FLAGS.embed_dimension)) train_model.update(dga_model.lr_loss(train_model.lr_output, batch_size=FLAGS.batch_size)) # scaling loss by FLAGS.num_unroll_steps effectively scales gradients by the same factor. # we need it to reproduce how the original Torch code optimizes. Without this, our gradients will be # much smaller (i.e. 35 times smaller) and to get system to learn we'd have to scale learning rate and max_grad_norm appropriately. # Thus, scaling gradients so that this trainer is exactly compatible with the original train_model.update(dga_model.autoencoder_train_graph(train_model.en_decoder_loss, FLAGS.learning_rate, FLAGS.max_grad_norm)) train_model.update(dga_model.lr_train_graph(train_model.lr_loss, FLAGS.learning_rate, FLAGS.max_grad_norm)) train_model.update(dga_model.generator_train_graph(train_model.gl_loss, FLAGS.learning_rate, FLAGS.max_grad_norm)) # create saver before creating more graph nodes, so that we do not save any vars defined below saver = tf.train.Saver(max_to_keep=50) if FLAGS.load_model: saver.restore(session, FLAGS.load_model) print('Loaded model from', FLAGS.load_model, 'saved at global step', train_model.global_step_autoencoder.eval()) else: tf.global_variables_initializer().run() session.run(train_model.clear_char_embedding_padding) print('Created and initialized fresh model. Size:', dga_model.model_size()) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph=session.graph) tf.summary.merge_all() ''' take learning rate from CLI, not from saved graph ''' session.run( [tf.assign(train_model.learning_rate, FLAGS.learning_rate), tf.assign(train_model.lr_learning_rate, FLAGS.learning_rate), tf.assign(train_model.gl_learning_rate, FLAGS.learning_rate), tf.assign(train_model.lr_learning_rate_g, FLAGS.learning_rate), ] ) ''' training autoencoder here ''' print("***************train autoencoder********************") rnn_state_g, rnn_state_d = session.run([train_model.initial_rnn_state_g, train_model.initial_rnn_state_d]) print("Start to train auto-encoder.....\n") for epoch in range(FLAGS.max_epochs): epoch_start_time = time.time() avg_train_loss = 0.0 count = 0 for x, y in train_reader.iter(): count += 1 start_time = time.time() loss, _, rnn_state_g, rnn_state_d, step, _, generated_dga, mask1, mask2, loss1, loss2 = session.run([ train_model.en_decoder_loss, train_model.train_op, train_model.final_rnn_state_g, train_model.final_rnn_state_d, # train_model.global_norm, train_model.global_step_autoencoder, train_model.clear_char_embedding_padding, train_model.generated_dga, train_model.mask1, train_model.mask2, train_model.loss1, train_model.loss2, ], { train_model.input: x, train_model.input_len_g: y, train_model.initial_rnn_state_g: rnn_state_g, train_model.initial_rnn_state_d: rnn_state_d }) avg_train_loss += 0.05 * (loss - avg_train_loss) # time_elapsed = time.time() - start_time time_elapsed = time.time() - epoch_start_time if count % FLAGS.print_every == 0: print( 'AutoEncoder: %6d: %d [%5d/%5d], loss1/2 = %6.8f/%6.8f, train_loss/perplexity = %6.8f/%6.7f secs/batch = %.4fs' % ( step, epoch, count, train_reader.length, loss1, loss2, loss, np.exp(loss), time_elapsed )) print(char_vocab.change(x[0]) + " ---> " + char_vocab.change(generated_dga[0])) summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=avg_train_loss), ]) summary_writer.add_summary(summary, step) train_reader.shuf() print('Epoch training time:', time.time() - epoch_start_time) save_as = '%s/autoencoder_epoch%03d_%.4f.model' % (FLAGS.train_dir, epoch, avg_train_loss) saver.save(session, save_as) # saver.save(session, "autoencoder.model") np_random = np.random.RandomState(FLAGS.seed) ''' training generator here ''' print("***************train generator********************") for epoch in range(FLAGS.max_epochs_gl): epoch_start_time = time.time() avg_gl_loss = 0.0 count = 0 for x, y in train_reader.iter(): count += 1 start_time = time.time() rnn_state_g, _, embed_output = session.run([ train_model.final_rnn_state_g, train_model.clear_char_embedding_padding, train_model.embed_output, ], { train_model.input: x, train_model.input_len_g: y, train_model.initial_rnn_state_g: rnn_state_g, }) generator_input = np_random.rand(FLAGS.batch_size, FLAGS.random_dimension) gl_loss, _, step_gl = session.run([ train_model.gl_loss, train_model.train_op_gl, train_model.global_step_gl, ], { train_model.gl_input: generator_input, train_model.gl_target: embed_output }) avg_gl_loss += 0.05 * (gl_loss - avg_gl_loss) time_elapsed = time.time() - start_time if count % FLAGS.print_every == 0: print('Generator Layer: %6d: %d [%5d/%5d], train_loss = %6.8f secs/batch = %.4fs' % ( step_gl, epoch, count, train_reader.length, gl_loss, time_elapsed)) gl_summary = tf.Summary(value=[ tf.Summary.Value(tag="gl_loss", simple_value=avg_gl_loss), ]) summary_writer.add_summary(gl_summary, step_gl) train_reader.shuf() print('Epoch training time:', time.time() - epoch_start_time) save_as = '%s/gl_epoch%03d_%.4f.model' % (FLAGS.train_dir, epoch, avg_gl_loss) saver.save(session, save_as) # saver.save(session, "gl.model") ''' training lr here ''' print("***************train logistic regression********************") for epoch in range(FLAGS.max_epochs_lr): epoch_start_time = time.time() avg_lr_loss = 0.0 avg_gr_loss = 0.0 count = 0 for x, y in train_reader.iter(): count += 1 start_time = time.time() if (count % FLAGS.iteration) != 0: generator_input = np_random.rand(FLAGS.batch_size, FLAGS.random_dimension) gl_output = session.run([ train_model.gl_output, ], { train_model.gl_input: generator_input, }) rnn_state_g, _, embed_output = session.run([ train_model.final_rnn_state_g, train_model.clear_char_embedding_padding, train_model.embed_output, ], { train_model.input: x, train_model.input_len_g: y, train_model.initial_rnn_state_g: rnn_state_g, }) # origin_dga = [char_vocab.change(dga).replace(" ", "") for dga in generated_dga] target = np.zeros([FLAGS.batch_size]) # generated_dga[0: int(len(generated_dga) / 2)] = x[0: int(len(generated_dga) / 2)] target[0: int(len(target) / 2)] = np.ones([int(len(target) / 2)]) gl_output = gl_output[0] gl_output[0: int(len(embed_output) / 2)] = embed_output[0: int(len(embed_output) / 2)] # # for i in range(int(len(generated_dga) / 2), len(generated_dga)): # dga_len = 0 # dga = generated_dga[i] # for dga_char in dga: # if dga_char == ' ': # break # dga_len += 1 # y[i] = dga_len lr_loss_d, _, step_lr = session.run([ train_model.lr_loss, train_model.train_op_lr, train_model.global_step_lr, ], { train_model.lr_input: gl_output, train_model.lr_target: target }) avg_lr_loss += 0.05 * (lr_loss_d - avg_lr_loss) else: generator_input = np_random.rand(FLAGS.batch_size, FLAGS.random_dimension) target = np.zeros([FLAGS.batch_size]) rl_loss_g, _, step_lr = session.run([ train_model.lr_loss, train_model.train_op_g, train_model.global_step_lr ], { train_model.gl_input: generator_input, train_model.lr_target: target }) avg_gr_loss += 0.05 * (rl_loss_g - avg_gr_loss) if count % FLAGS.print_every == 0: time_elapsed = time.time() - start_time print('Regression Logistic: %6d: %d [%5d/%5d], loss_lr/loss_g = %6.8f/%6.7f secs/batch = %.4fs' % ( step_lr, epoch, count, train_reader.length, avg_lr_loss, avg_gr_loss, time_elapsed)) lr_summary = tf.Summary(value=[ tf.Summary.Value(tag="lr_loss", simple_value=avg_lr_loss), ]) summary_writer.add_summary(lr_summary, step_lr) gr_summary = tf.Summary(value=[ tf.Summary.Value(tag="gr_loss", simple_value=avg_gr_loss), ]) summary_writer.add_summary(gr_summary, step_lr) train_reader.shuf() print('Epoch training time:', time.time() - epoch_start_time) save_as = '%s/lr_epoch%03d_%.4f.model' % (FLAGS.train_dir, epoch, avg_lr_loss) saver.save(session, save_as) # saver.save(session, "final_model") print('Saved model')
m.update( dga_model.decoder_graph( m.embed_output, char_vocab_size=char_vocab.size, batch_size=FLAGS.batch_size, num_highway_layers=FLAGS.highway_layers, num_rnn_layers=FLAGS.rnn_layers, rnn_size=FLAGS.rnn_size, max_word_length=actual_max_word_length, kernels=eval(FLAGS.kernels), kernel_features=eval(FLAGS.kernel_features), dropout=FLAGS.dropout, )) m.update( dga_model.genearator_layer(batch_size=FLAGS.batch_size, input_dimension=FLAGS.random_dimension, max_word_length=actual_max_word_length, embed_dimension=FLAGS.embed_dimension)) saver = tf.train.Saver() saver.restore(session, FLAGS.load_model) print('Loaded model from', FLAGS.load_model, 'saved at global step', global_step.eval()) rnn_state_g = session.run(m.initial_rnn_state_g) rnn_state_d = session.run(m.initial_rnn_state_d) np_random = np.random.RandomState(FLAGS.seed) with open("result.txt", "w") as f_out: for x, y in generate_reader.iter(): # rnn_result=session.run(m.embed_output,{m.input:x,m.input_len_g: y}) # generated_dga=session.run(m.generated_dga,{m.input:x,m.input_len_g: y}) # for index,domain in enumerate(generated_dga): # f_out.write(char_vocab.change(x[index])+"-->"+char_vocab.change(domain))
def main(_): ''' Loads trained model and evaluates it on test split ''' if FLAGS.load_model is None: print('Please specify checkpoint file to load model from') return -1 if not os.path.exists(FLAGS.load_model + '.meta'): print('Checkpoint file not found', FLAGS.load_model) return -1 print('Reading the character vocabulary from the train data') char_vocab, _, _, max_word_length = load_data(FLAGS.data_dir, 70) with tf.Graph().as_default(), tf.Session() as session: # tensorflow seed must be inside graph tf.set_random_seed(FLAGS.seed) np.random.seed(seed=FLAGS.seed) print('Initializing the network graph') initializer = tf.contrib.layers.xavier_initializer() ''' build inference graph ''' with tf.variable_scope("Model", initializer=initializer): m = dga_model.inference_graph( char_vocab_size=char_vocab.size, char_embed_size=FLAGS.char_embed_size, batch_size=FLAGS.batch_size, num_highway_layers=FLAGS.highway_layers, num_rnn_layers=FLAGS.rnn_layers, rnn_size=FLAGS.rnn_size, max_word_length=max_word_length, kernels=eval(FLAGS.kernels), kernel_features=eval(FLAGS.kernel_features), dropout=0, embed_dimension=FLAGS.embed_dimension) m.update( dga_model.decoder_graph( m.embed_output, char_vocab_size=char_vocab.size, batch_size=FLAGS.batch_size, num_highway_layers=FLAGS.highway_layers, num_rnn_layers=FLAGS.rnn_layers, rnn_size=FLAGS.rnn_size, max_word_length=max_word_length, kernels=eval(FLAGS.kernels), kernel_features=eval(FLAGS.kernel_features))) m.update( dga_model.genearator_layer( batch_size=FLAGS.batch_size, input_dimension=FLAGS.random_dimension, max_word_length=max_word_length, embed_dimension=FLAGS.embed_dimension)) # we need global step only because we want to read it from the model global_step = tf.Variable(0, dtype=tf.int32, name='global_step') saver = tf.train.Saver() saver.restore(session, FLAGS.load_model) print('Loaded model from', FLAGS.load_model, 'saved at global step', global_step.eval()) output_fname = FLAGS.data_dir + "/output_agd.txt" print('Generating output domains and saving them to ', output_fname) with open(output_fname, "w") as outfile: for i in tqdm(range(FLAGS.num_samples)): # Select a psuedo-random seed np_random = np.random.RandomState(i) pseudo_random_seed = np_random.rand(FLAGS.batch_size, FLAGS.random_dimension) # Generater(seed) -> embedding domain_embedding = session.run( [m.gl_output], {m.gl_input: pseudo_random_seed}) # Decoder(embedding) -> algorithmically generated domain agd_ixs = session.run([m.generated_dga], {m.decoder_input: domain_embedding[0]}) agd = agd_output_to_domain(agd_ixs, char_vocab) # Save result to file outfile.write("{}, {}\n".format(i, agd)) print("Done")