def interpolate(): properties = get_properties(FLAGS) logdir = setup_logdir(FLAGS, properties) noise = tf.placeholder(dtype=tf.float32, shape=[FLAGS.batch_size, FLAGS.z_dim]) model = get_model(FLAGS, properties, logdir, noise) generated_seqs = get_generated_seqs(model) session_creator = ChiefSessionCreator( master='', checkpoint_filename_with_path=tf.train.latest_checkpoint(logdir)) seqs = [] with MonitoredSession(session_creator=session_creator, hooks=None) as session: noise1 = np.random.uniform(-1, 1, FLAGS.z_dim) noise2 = np.random.uniform(-1, 1, FLAGS.z_dim) n = np.stack([ slerp(ratio, noise1, noise2) for ratio in np.linspace(0, 1, FLAGS.batch_size) ]) results, d_scores = session.run( [generated_seqs, model.discriminator_fake], feed_dict={noise: n}) for i in range(FLAGS.batch_size): seqs.append(Sequence(id=i, seq=results[i], d_score=d_scores[i])) print( sequences_to_fasta(seqs, properties['class_mapping'], escape=False, strip_zeros=True))
def main(_, is_test=False, debug_cli=False, debug_ui=False): graph = tf.Graph() with graph.as_default(): properties = get_properties(FLAGS) # Select model to train model = get_model(FLAGS, properties) logdir = setup_logdir(FLAGS, properties) print_run_meta_data(FLAGS) # Adding all meta data about the model before starting add_model_metadata(logdir, os.path.join(os.path.dirname(__file__), FLAGS.model_type), FLAGS, properties) # We set allow_soft_placement to be True because Saver for the DCGAN model gets misplaced on the GPU. session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) hooks = get_hooks(debug_cli, debug_ui) model_hooks = get_specific_hooks(FLAGS, logdir, properties) if hasattr(FLAGS, "static_embedding") and not FLAGS.static_embedding: model_hooks.append(get_embedding_hook(model, FLAGS)) train_ops = GANTrainOps(generator_train_op=model.g_optim, discriminator_train_op=model.d_optim, global_step_inc_op=model.increment_global_step) train_steps = GANTrainSteps(FLAGS.g_step, FLAGS.d_step) if is_test: return graph else: gan_train(train_ops, get_hooks_fn=get_sequential_train_hooks(train_steps=train_steps), hooks=([tf.train.StopAtStepHook(num_steps=FLAGS.steps)] + hooks + model_hooks), logdir=logdir, save_summaries_steps=FLAGS.save_summary_steps, save_checkpoint_secs=FLAGS.save_checkpoint_sec, config=session_config)
def main(_): FLAGS.properties_file = "properties_test.json" properties = get_properties(FLAGS) logdir = setup_logdir(FLAGS, properties) FLAGS.running_mode = "test" model = get_model(FLAGS, properties) with tf.variable_scope('model', reuse=True): batch = model.data_handler.get_batch(FLAGS.batch_size, FLAGS) noise = tf.random_uniform([FLAGS.batch_size, FLAGS.z_dim], minval=-1.0, maxval=1.0, dtype=tf.float32, name='z0') generated_data = model.get_generated_data(noise, batch[1:]) model.data_handler.display_fake_data(generated_data, batch[1], FLAGS.batch_size) session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) id_to_enzyme_class_dict = properties["class_mapping"] evaluate_repeatedly(checkpoint_dir=logdir, hooks=[ BlastHook(id_to_enzyme_class_dict, every_n_steps=1, output_dir=logdir, n_examples=FLAGS.batch_size, running_mode=FLAGS.running_mode), tf.contrib.training.SummaryAtEndHook(logdir), tf.contrib.training.StopAfterNEvalsHook(1) ], eval_ops=generated_data, config=session_config)
def main(_): FLAGS.properties_file = "properties_test.json" properties = get_properties(FLAGS) logdir = setup_logdir(FLAGS, properties) FLAGS.running_mode = "test" path = os.path.join(FLAGS.data_dir, FLAGS.dataset.replace("\\", os.sep), "d_test.csv") # data_to_test = pd.read_csv(path, header=None)[:FLAGS.batch_size] # data_to_test = from_amino_acid_to_id(data_to_test, 0) model = get_model(FLAGS, properties) path = os.path.join(FLAGS.data_dir, FLAGS.dataset.replace("\\", os.sep), "properties_test_original.json") with open(path) as json_data_file: properties = json.load(json_data_file) with tf.variable_scope('model', reuse=True): real_x, labels = model.data_handler.get_batch(FLAGS.batch_size, FLAGS) real_x, labels = model.data_handler.prepare_real_data(real_x, labels) # # data = model.data_handler.get_embedded_seqs(data_to_test) # d_scores, _ = model.get_discriminator_result(data, batch[1:], reuse=True) # printing_d_scores = tf.py_func( # lambda vals, scores: print_protein_seq(vals, properties["class_mapping"], d_scores=scores), # [tf.squeeze(data_to_test), d_scores], tf.string) noise = np.random.normal(size=[FLAGS.batch_size, FLAGS.z_dim], loc=0.0, scale=1.0) z = noise generated_data = model.get_generated_data( tf.convert_to_tensor(z, dtype=tf.float32), labels) d_fake_scores, _ = model.get_discriminator_result(generated_data, labels, reuse=True) generated_data_ids = model.data_handler.convert_to_acid_ids( generated_data) printing_sequeces = tf.py_func( lambda vals, labels: print_protein_seq( vals, properties["class_mapping"], labels=labels), [tf.squeeze(generated_data_ids), labels[0]], tf.string) session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) evaluate_repeatedly( checkpoint_dir=logdir, hooks=[ # BlastHook(id_to_enzyme_class_dict, every_n_steps=1, output_dir=logdir, n_examples=FLAGS.batch_size, # running_mode=FLAGS.running_mode), tf.contrib.training.SummaryAtEndHook(logdir), tf.contrib.training.StopAfterNEvalsHook(1) ], eval_ops=[printing_sequeces], max_number_of_evaluations=1, config=session_config)
def main(_): FLAGS.properties_file = "properties_test.json" properties = get_properties(FLAGS) logdir = setup_logdir(FLAGS, properties) FLAGS.running_mode = "test" path = os.path.join(FLAGS.data_dir, FLAGS.dataset.replace("\\", os.sep), "d_test.csv") data_to_test = pd.read_csv(path, header=None)[:FLAGS.batch_size] data_to_test = from_amino_acid_to_id(data_to_test, 0) model = get_model(FLAGS, properties) with tf.variable_scope('model', reuse=True): batch = model.data_handler.get_batch(FLAGS.batch_size, FLAGS) data = model.data_handler.get_embedded_seqs(data_to_test) d_scores, _ = model.get_discriminator_result(data, batch[1:], reuse=True) printing_d_scores = tf.py_func( lambda vals, scores: print_protein_seq( vals, properties["class_mapping"], d_scores=scores), [tf.squeeze(data_to_test), d_scores], tf.string) noise1 = np.random.uniform(size=[FLAGS.z_dim], low=-1.0, high=1.0) noise2 = np.random.uniform(size=[FLAGS.z_dim], low=-1.0, high=1.0) z = np.stack([ slerp(ratio, noise1, noise2) for ratio in np.linspace(0, 1, FLAGS.batch_size) ]) generated_data = model.get_generated_data( tf.convert_to_tensor(z, dtype=tf.float32), batch[1:]) d_fake_scores, _ = model.get_discriminator_result(generated_data, batch[1:], reuse=True) generated_data_ids = model.data_handler.convert_to_acid_ids( generated_data) printing_sequeces = tf.py_func( lambda vals, scores: print_protein_seq( vals, properties["class_mapping"], d_scores=scores), [tf.squeeze(generated_data_ids), d_fake_scores], tf.string) session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) evaluate_repeatedly( checkpoint_dir=logdir, hooks=[ # BlastHook(id_to_enzyme_class_dict, every_n_steps=1, output_dir=logdir, n_examples=FLAGS.batch_size, # running_mode=FLAGS.running_mode), tf.contrib.training.SummaryAtEndHook(logdir), tf.contrib.training.StopAfterNEvalsHook(1) ], eval_ops=[printing_d_scores, printing_sequeces], max_number_of_evaluations=1, config=session_config)
def get_discriminator_results(): properties = get_properties(FLAGS) logdir = setup_logdir(FLAGS, properties) noise = tf.placeholder(dtype=tf.float32, shape=[FLAGS.batch_size, FLAGS.z_dim]) model = get_model(FLAGS, properties, logdir, noise) s1 = [FLAGS.batch_size, properties[SEQ_LENGTH]] input = tf.placeholder(dtype=tf.int32, shape=s1) data = tf.expand_dims(tf.transpose(tf.one_hot(input, FLAGS.n_seqs, axis=1), [0, 2, 1]), axis=1) s2 = [FLAGS.batch_size] labels = tf.placeholder(dtype=tf.float32, shape=[FLAGS.batch_size]) with tf.variable_scope('model', reuse=tf.AUTO_REUSE): d, d_h = model.get_discriminator_result(data, labels, reuse=True) fasta_seqs = fasta_to_numpy(FLAGS.fasta_path, properties[SEQ_LENGTH]) session_creator = ChiefSessionCreator( master='', checkpoint_filename_with_path=tf.train.latest_checkpoint(logdir)) seqs = [] with MonitoredSession(session_creator=session_creator, hooks=None) as session: for i in range(0, len(fasta_seqs), FLAGS.batch_size): print("Processing batch ", i) batch = fasta_seqs[i:i + FLAGS.batch_size] l = len(batch) if l < (FLAGS.batch_size): batch = np.vstack([ batch, np.zeros([FLAGS.batch_size - l, properties[SEQ_LENGTH]]) ]) d_scores, step = session.run([d, tf.train.get_global_step()], feed_dict={ input: batch, labels: np.zeros(s2) }) for j in range(l): seqs.append( Sequence(id=j + i, seq=fasta_seqs[j + i], d_score=d_scores[j])) fasta = sequences_to_fasta(seqs, properties['class_mapping'], escape=False, strip_zeros=True) time_stamp = time.strftime('%H_%M_%S', time.gmtime()) original_name = os.path.splitext(os.path.basename(FLAGS.fasta_path))[0] path = os.path.join( logdir, '{}_scores_{}_{}.fasta'.format(original_name, step, time_stamp)) with open(path, 'w') as f: print(fasta, file=f) tf.logging.info('{} sequences stored in {}'.format( len(seqs), path))
def generate_sequences(): properties = get_properties(FLAGS) logdir = setup_logdir(FLAGS, properties) tf.logging.info('Noise will have standard deviation of {}'.format( FLAGS.stddev)) noise = tf.random.truncated_normal([FLAGS.batch_size, FLAGS.z_dim], stddev=FLAGS.stddev, dtype=tf.float32) model = get_model(FLAGS, properties, logdir, noise) if FLAGS.one_hot: generated_seqs = tf.squeeze(tf.argmax(model.fake_x, axis=-1)) else: generated_seqs = convert_to_acid_ids(model.fake_x) seqs = [] session_creator = ChiefSessionCreator( master='', checkpoint_filename_with_path=tf.train.latest_checkpoint(logdir)) with MonitoredSession(session_creator=session_creator, hooks=None) as session: while True: results, step = session.run( [generated_seqs, tf.train.get_global_step()], None) id = len(seqs) for i in range(FLAGS.batch_size): seqs.append(Sequence(id=id + i, seq=results[i])) if len(seqs) >= FLAGS.n_seqs: break time_stamp = time.strftime('%H_%M_%S', time.gmtime()) path = os.path.join(logdir, 'generated_{}_{}.fasta'.format(step, time_stamp)) fasta = sequences_to_fasta(seqs, properties['class_mapping'], escape=False, strip_zeros=True) if FLAGS.blast: db_path = os.path.join( FLAGS.data_dir, FLAGS.dataset, FLAGS.blast_db.replace("\\", os.sep) + "_" + FLAGS.running_mode) blast_results, err = get_local_blast_results(logdir, db_path, fasta) seqs, evalues, similarities, identity = update_sequences_with_blast_results( blast_results, seqs) print_stats([("Evalue", evalues), ("BLOMSUM45", similarities), ("Identity", identity)], len(seqs)) fasta = sequences_to_fasta(seqs, properties['class_mapping'], escape=False, strip_zeros=True) with open(path, 'w') as f: print(fasta, file=f) tf.logging.info('{} sequences stored in {}'.format(len(seqs), path)) tf.logging.info('Finished evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))
def raw_results(): properties = get_properties(FLAGS) logdir = setup_logdir(FLAGS, properties) noise = tf.random.truncated_normal([FLAGS.batch_size, FLAGS.z_dim], stddev=0.5, dtype=tf.float32) model = get_model(FLAGS, properties, logdir, noise) raw_generations = tf.squeeze(model.fake_x) session_creator = ChiefSessionCreator( master='', checkpoint_filename_with_path=tf.train.latest_checkpoint(logdir)) with MonitoredSession(session_creator=session_creator, hooks=None) as session: results, step = session.run( [raw_generations, tf.train.get_global_step()], None) time_stamp = time.strftime('%H_%M_%S', time.gmtime()) path = os.path.join(logdir, 'raw_{}_{}.npz'.format(step, time_stamp)) with open(path, 'wb') as f: np.savez(f, results)