def setup(opts): global sess global output global enc global g length = None temperature = 1 top_k = 0 enc = encoder.get_encoder(opts['checkpoint_dir']) hparams = model.default_hparams() with open(os.path.join(opts['checkpoint_dir'], 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if length is None: length = hparams.n_ctx // 2 elif length > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) sess = tf.Session() context = tf.placeholder(tf.int32, [1, None]) output, length_ph = sample.sample_sequence(hparams=hparams, context=context, batch_size=1, temperature=temperature, top_k=top_k) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(opts['checkpoint_dir']) saver.restore(sess, ckpt) g = tf.get_default_graph() g.finalize() return sess, enc, context, length_ph
def sample_model( model_name='345-recipes', seed=None, nsamples=1, batch_size=8, length=None, temperature=temp, top_k=0, ): """ Run the sample_model :model_name=117M : String, which model to use :seed=None : Integer seed for random number generators, fix seed to reproduce results :nsamples=0 : Number of samples to return, if 0, continues to generate samples indefinately. :batch_size=1 : Number of batches (only affects speed/memory). :length=None : Number of tokens in generated text, if None (default), is determined by model hyperparameters :temperature=1 : Float value controlling randomness in boltzmann distribution. Lower temperature results in less random completions. As the temperature approaches zero, the model will become deterministic and repetitive. Higher temperature results in more random completions. :top_k=0 : Integer value controlling diversity. 1 means only 1 word is considered for each step (token), resulting in deterministic completions, while 40 means 40 words are considered at each step. 0 (default) is a special setting meaning no restrictions. 40 generally is a good value. """ enc = encoder.get_encoder(model_name) hparams = model.default_hparams() with open(os.path.join('models', model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if length is None: length = hparams.n_ctx elif length > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) with tf.Session(graph=tf.Graph()) as sess: np.random.seed(seed) tf.set_random_seed(seed) output = sample.sample_sequence( hparams=hparams, length=length, start_token=enc.encoder['<|endoftext|>'], batch_size=batch_size, temperature=temperature, top_k=top_k )[:, 1:] saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name)) saver.restore(sess, ckpt) generated = 0 while nsamples == 0 or generated < nsamples: out = sess.run(output) for i in range(batch_size): generated += batch_size text = enc.decode(out[i]) return(text)
def __init__(self, encoder, model_name="117M", batch_size=1): self.encoder = encoder self.model_name = model_name self.hparams = model.default_hparams() with open(os.path.join('models', model_name, 'hparams.json')) as f: self.hparams.override_from_dict(json.load(f))
def gpt2_session_and_out_op(seed=None, model_name='345M', length=75, temperature=1, top_k=40): batch_size = 1 hparams = model.default_hparams() with open(os.path.join('models', model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if length is None: length = hparams.n_ctx // 2 with tf.Graph().as_default() as graph: sess = tf.Session(graph=graph) context = tf.placeholder(tf.int32, [batch_size, None], name='context') np.random.seed(seed) tf.set_random_seed(seed) out_op = sample.sample_sequence(hparams=hparams, length=length, context=context, batch_size=batch_size, temperature=temperature, top_k=top_k) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name)) saver.restore(sess, ckpt) return sess, out_op
def __init__(self): self.config = tf.compat.v1.ConfigProto() self.config.gpu_options.allow_growth = True self.config.graph_options.rewrite_options.layout_optimizer = ( rewriter_config_pb2.RewriterConfig.OFF) self.sess = tf.compat.v1.Session(config=self.config) self.hparams = model.default_hparams() with open("checkpoint/run1/hparams.json") as f: self.hparams.override_from_dict(json.load(f)) self.context = tf.compat.v1.placeholder(tf.int32, [1, None]) self.length = tf.compat.v1.placeholder(tf.int32, ()) self.temperature = tf.compat.v1.placeholder(tf.int32, ()) self.model = model.model(hparams=self.hparams, X=self.context) self.load_checkpoint("checkpoint/run1") self.enc = encoder.get_encoder("run1") self.output = sample.sample_sequence( hparams=self.hparams, length=self.length, start_token=None, context=self.context, batch_size=1, temperature=self.temperature, top_k=0, top_p=0, ) # spit out all these warnrings self.dummy_run()
def get_model(model_name='124M', seed=None, nsamples=1, batch_size=1, length=None, temperature=1, top_k=0, top_p=1, models_dir='models'): models_dir = os.path.expanduser(os.path.expandvars(models_dir)) if batch_size is None: batch_size = 1 assert nsamples % batch_size == 0 enc = encoder.get_encoder(model_name, models_dir) hparams = model.default_hparams() with open(os.path.join(models_dir, model_name, 'hparams.json')) as filehandler: hparams.override_from_dict(json.load(filehandler)) if length is None: length = hparams.n_ctx // 2 elif length > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) sess = tf.Session(graph=tf.Graph()) context = tf.placeholder(tf.int32, [batch_size, None]) np.random.seed(seed) tf.set_random_seed(seed) output = sample.sample_sequence( hparams=hparams, length=length, context=context, batch_size=batch_size, temperature=temperature, top_k=top_k, top_p=top_p ) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name)) saver.restore(sess, ckpt) return sess, output
def get_hparams(): import json hparams = model.default_hparams() with open(os.path.join('models', MODEL_NAME, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) return hparams
def restore_model( model_name='345M', seed=None, models_dir='models'): models_dir = os.path.expanduser(os.path.expandvars(models_dir)) enc: Encoder = encoder.get_encoder(model_name, models_dir) hparams: HParams = model.default_hparams() with open(os.path.join(models_dir, model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) np.random.seed(seed) tf.set_random_seed(seed) ph = Placeholders() sequence_output = sample.sample_sequence( hparams=hparams, length=ph.length, temperature=ph.temperature, top_k=ph.top_k, context=ph.context, batch_size=1 ) sess = tf.Session() saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name)) saver.restore(sess, ckpt) return sess, hparams, sequence_output, enc, ph
def __init__(self, model_name: str): """ Initializes encoder from pre-trained model's vocab & bpe merges, its hyper-parameters (i.e., Transformer decoder params) & loads up the Toxic comments training & test csvs into dataframes :param model_name: '117M' or '345M' """ self.enc = encoder.get_encoder(model_name) self.hparams = model.default_hparams() self.hparams.add_hparam('dropout_rate', 0.) # add dropout for training with open(os.path.join('models', model_name, 'hparams.json')) as f: self.hparams.override_from_dict(json.load(f)) self.train_df = pd.read_csv(TOXIC_TRAIN_PATH) self.test_df = pd.read_csv(TOXIC_TEST_PATH) self.train_sequences = None self.train_labels = None self.test_sequences = None seeded_kf = KFold(n_splits=FLAGS.num_folds, random_state=FLAGS.seed, shuffle=True) self.train_folds = [(train_index, val_index) for train_index, val_index in seeded_kf.split( range(len(self.train_df)))]
def __init__(self, model_name='jokes', seed=None, nsamples=4, batch_size=1, length=100, temperature=0.85, top_k=40, top_p=0.0): self.batch_size = batch_size self.enc = encoder.get_encoder(model_name) hparams = model.default_hparams() with open(os.path.join('models', model_name, 'hparams.json')) as fp: hparams.override_from_dict(json.load(fp)) self.sess = tf.Session() self.context = tf.placeholder(tf.int32, [batch_size, None]) np.random.seed(seed) tf.set_random_seed(seed) self.output = sample.sample_sequence(hparams=hparams, length=length, context=self.context, batch_size=batch_size, temperature=temperature, top_k=top_k, top_p=top_p) self.sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name)) saver.restore(self.sess, ckpt)
def interact_model( message="A quick look at the", model_name='345M', seed=5, nsamples=1, batch_size=1, length=50, temperature=0.9, top_k=20, top_p=0.9, ): if batch_size is None: batch_size = 1 assert nsamples % batch_size == 0 enc = encoder.get_encoder(model_name) hparams = model.default_hparams() path = os.path.dirname(__file__) with open(os.path.join(path, 'models', model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if length is None: length = hparams.n_ctx // 2 elif length > hparams.n_ctx: raise ValueError("Can't get samples longer" " than window size: %s" % hparams.n_ctx) with tf.Session(graph=tf.Graph()) as sess: context = tf.placeholder(tf.int32, [batch_size, None]) np.random.seed(seed) tf.set_random_seed(seed) output = sample.sample_sequence( hparams=hparams, length=length, context=context, batch_size=batch_size, temperature=temperature, top_k=top_k, top_p=top_p ) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join(path, 'models', model_name)) saver.restore(sess, ckpt) if message == "": return -1 raw_text = message context_tokens = enc.encode(raw_text) out = sess.run(output, feed_dict={ context: [context_tokens for _ in range(batch_size)] })[:, len(context_tokens):] text = [] for i in range(batch_size): text.append(enc.decode(out[i])) return text
def __init__(self, sess, length=10, temperature=0.7, top_k=1): seed = None batch_size = 1 model_path = '1558M' self.sess = sess self.enc = encoder.get_encoder(model_path) hparams = model.default_hparams() with open(os.path.join('models/1558M', 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) self.context = tf.placeholder(tf.int32, [batch_size, None]) np.random.seed(seed) tf.set_random_seed(seed) self.output = sample.sample_sequence( hparams=hparams, length=length, context=self.context, batch_size=batch_size, ) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint('models/1558M') saver.restore(self.sess, ckpt)
def __init__(self, **kwargs): model_name = kwargs['model'] seed = kwargs['seed'] self.length = kwargs['len'] top_k = kwargs['top_k'] self.lang_target = kwargs['language'] self.enc = encoder.get_encoder(model_name) self.translator = googletrans.Translator() hparams = model.default_hparams() with open(os.path.join('./data/models', model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) self.lang_model = hparams.n_lang #get the language of the model from Hyperparameter # start session self.sess = tf.Session() self.context = tf.placeholder(tf.int32, [1, None]) np.random.seed(seed) tf.set_random_seed(seed) self.output = sample.sample_sequence( hparams=hparams, length=self.length, context=self.context, batch_size=1, temperature=1, top_k=top_k, top_p=0 ) # restore transformer model from last checkpoint saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join('./data/models', model_name)) saver.restore(self.sess, ckpt)
def load_model(): enc = encoder.get_encoder(model_name) hparams = model.default_hparams() with open(os.path.join('models', model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if length > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) sess = tf.Session(graph=tf.Graph()).__enter__() # with as sess: np.random.seed(seed) tf.set_random_seed(seed) context = tf.placeholder(tf.int32, [1, None]) output = sample.sample_sequence(hparams=hparams, length=length, context=context, batch_size=1, temperature=temperature, top_k=top_k) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name)) saver.restore(sess, ckpt) return sess, enc, context, output
def main(input_file, output_file, seed=42, model_name='117M', batch_size=1): np.random.seed(seed) tf.set_random_seed(seed) data = pd.read_csv(input_file, sep='\t') output_file = open(output_file, 'wb') enc = encoder.get_encoder(model_name) hparams = model.default_hparams() with open(os.path.join('../pretrained', model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) with tf.Session(graph=tf.Graph()) as sess: context = tf.placeholder(tf.int32, [batch_size, None]) output = model.model(hparams=hparams, X=context, reuse=tf.AUTO_REUSE) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join('../pretrained', model_name)) saver.restore(sess, ckpt) for _id in range(0, len(data), batch_size): list_token_ids, list_tokens = encode(enc, data[_id:_id+batch_size]['Text']) out = sess.run(output, feed_dict={ context: list_token_ids }) # out['att_probs'].shape (1, 12, 12, 29, 29) # 'AB', 'AP', 'BA', 'BP', 'PA', 'PB', 'token', 'label', 'ID' # (layer, head) for res in get_features(data[_id:_id+batch_size], list_tokens, out['att_probs'].transpose((0, 3, 4, 1, 2))): pickle.dump(res, output_file) output_file.close()
def interact_model( model_name='117M', seed=None, nsamples=1, batch_size=None, length=None, temperature=1, top_k=0, ): if batch_size is None: batch_size = 1 assert nsamples % batch_size == 0 np.random.seed(seed) tf.set_random_seed(seed) enc = encoder.get_encoder(model_name) hparams = model.default_hparams() with open(os.path.join('models', model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if length is None: length = hparams.n_ctx // 2 elif length > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) with tf.Session(graph=tf.Graph()) as sess: context = tf.placeholder(tf.int32, [batch_size, None]) output = sample.sample_sequence(hparams=hparams, length=length, context=context, batch_size=batch_size, temperature=temperature, top_k=top_k) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name)) saver.restore(sess, ckpt) while True: raw_text = input("Model prompt >>> ") while not raw_text: print('Prompt should not be empty!') raw_text = input("Model prompt >>> ") clean_input = ''.join(x for x in raw_text if ord(x) <= 128) context_tokens = enc.encode(clean_input) generated = 0 for _ in range(nsamples // batch_size): out = sess.run(output, feed_dict={ context: [context_tokens for _ in range(batch_size)] })[:, len(context_tokens):] for i in range(batch_size): generated += 1 text = enc.decode(out[i]) print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40) print(text) print("=" * 80)
def setup(self, model_name='117M', seed=None, temperature=1, top_k=40, length=None): if length not in self.cache: enc = encoder.get_encoder(model_name) hparams = model.default_hparams() with open(os.path.join('models', model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if length is None: length = hparams.n_ctx // 2 sess = tf.Session(graph=tf.Graph()).__enter__() context = tf.placeholder(tf.int32, [1, None]) np.random.seed(seed) tf.set_random_seed(seed) output = sample.sample_sequence(hparams=hparams, length=length, context=context, batch_size=1, temperature=temperature, top_k=top_k) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint( os.path.join('models', model_name)) saver.restore(sess, ckpt) self.cache[length] = context, enc, output, sess return self.cache[length]
def interact_model( model_name='117M', prompt="", seed=None, nsamples=1, batch_size=None, length=270, temperature=1, top_k=40, ): print(prompt) if batch_size is None: batch_size = 1 assert nsamples % batch_size == 0 np.random.seed(seed) tf.set_random_seed(seed) enc = encoder.get_encoder(model_name) hparams = model.default_hparams() with open(os.path.join('models', model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if length is None: length = hparams.n_ctx // 2 elif length > hparams.n_ctx: raise ValueError( f"can't get samples longer than window size: {hparams.n_ctx}") with tf.Session(graph=tf.Graph()) as sess: context = tf.placeholder(tf.int32, [batch_size, None]) output = sample.sample_sequence(hparams=hparams, length=length, context=context, batch_size=batch_size, temperature=temperature, top_k=top_k)[:, 1:] saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name)) saver.restore(sess, ckpt) raw_text = prompt context_tokens = enc.encode(raw_text) generated = 0 for _ in range(nsamples // batch_size): out = sess.run(output, feed_dict={ context: [context_tokens for _ in range(batch_size)] }) for i in range(batch_size): generated += 1 text = enc.decode(out[i]) print("=" * 40 + " MESSAGE " + str(generated) + " " + "=" * 40) print(f"{text}") gpt_text = text print("=" * 80) return gpt_text
def simple_gpt2_example(): models_dir = lib_dir_path + '/models' model_name = '345M' seed = None nsamples = 1 batch_size = 1 length = 300 temperature = 1 top_k = 0 raw_text = 'I went to a lounge to celebrate my birthday and' models_dir = os.path.expanduser(os.path.expandvars(models_dir)) if batch_size is None: batch_size = 1 assert nsamples % batch_size == 0 enc = encoder.get_encoder(model_name, models_dir) hparams = model.default_hparams() with open(os.path.join(models_dir, model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if length is None: length = hparams.n_ctx // 2 elif length > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) with tf.Session(graph=tf.Graph()) as sess: context = tf.placeholder(tf.int32, [batch_size, None]) np.random.seed(seed) tf.set_random_seed(seed) output = sample.sample_sequence(hparams=hparams, length=length, context=context, batch_size=batch_size, temperature=temperature, top_k=top_k) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name)) saver.restore(sess, ckpt) #-------------------- context_tokens = enc.encode(raw_text) generated = 0 for _ in range(nsamples // batch_size): out = sess.run(output, feed_dict={ context: [context_tokens for _ in range(batch_size)] })[:, len(context_tokens):] for i in range(batch_size): generated += 1 text = enc.decode(out[i]) print('=' * 40 + ' SAMPLE ' + str(generated) + ' ' + '=' * 40) print(text) print('=' * 80)
def __init__(self, config_path, save_path): with open(config_path, 'r') as f: config = json.load(f) self.model_name = config['model_name'] self.seed = config['seed'] self.nsamples = config['nsamples'] self.batch_size = config['batch_size'] self.length = config['length'] self.temperature = config['temperature'] self.top_k = config['top_k'] self.top_p = config['top_p'] self.models_dir = config['models_dir'] self.save_path = save_path self.models_dir = os.path.expanduser( os.path.expandvars(self.models_dir, )) if self.batch_size is None: self.batch_size = 1 assert self.nsamples % self.batch_size == 0 self.enc = encoder.get_encoder(self.model_name, self.models_dir) self.hparams = model.default_hparams() with open( os.path.join( self.models_dir, self.model_name, 'hparams.json', )) as f: self.hparams.override_from_dict(json.load(f)) if self.length is None: self.length = self.hparams.n_ctx // 2 elif self.length > self.hparams.n_ctx: raise ValueError( ('Can\'t get samples longer ' 'than window size: {}').format(self.hparams.n_ctx, )) self.sess = tf.Session() self.context = tf.placeholder(tf.int32, [self.batch_size, None]) np.random.seed(self.seed) tf.set_random_seed(self.seed) self.output = sample.sample_sequence( hparams=self.hparams, length=self.length, context=self.context, batch_size=self.batch_size, temperature=self.temperature, top_k=self.top_k, top_p=self.top_p, ) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint( os.path.join( self.models_dir, self.model_name, )) saver.restore(self.sess, ckpt)
def interact_model(message="", model_name='1558M', models_dir='models', seed=None, length=20, temperature=1, top_k=0): #models_dir = os.path.expanduser(os.path.expandvars(models_dir)) global conversation enc = encoder.get_encoder(model_name, models_dir) hparams = model.default_hparams() with open(os.path.join('models', model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if length > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) with tf.Session(graph=tf.Graph()) as sess: np.random.seed(seed) tf.set_random_seed(seed) context = tf.placeholder(tf.int32, [1, None]) #isn't exists in other model output = sample.sample_sequence(hparams=hparams, length=length, context=context, batch_size=1, temperature=temperature, top_k=top_k) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name)) saver.restore(sess, ckpt) conversation = conversation + "\nyou: " + message conversation = conversation + "\nher: " sys.stdout.write("her: ") sys.stdout.flush() encoded_conversation = enc.encode(conversation) result = sess.run(output, feed_dict={context: [encoded_conversation] })[:, len(encoded_conversation):] text = enc.decode(result[0]) #sys.stderr.write("=============="+text+"=================") #sys.stderr.flush() splits = text.split('\n') #line = splits[1] if len(splits)>1 else splits[0] #parts = line.split(': ') #reply = parts[1] if len(parts)>1 else parts[0] reply = splits[0] sys.stdout.write(reply + '\n') sys.stdout.flush() conversation = conversation + reply print(conversation) return reply
def chat(model_name='117M', seed=None, length=50): # Prepare conversation and context conversation = Conversation("Hudson", "Jayme") conversation.add_human("Hi!") conversation.add_computer("Hey!") conversation.add_human("I'm a human named {}, who are you?".format(conversation.name_human)) conversation.add_computer("I'm a computer program but refer to me as {} please".format(conversation.name_computer)) conversation.add_human("How exciting! Are you ready to chat?") conversation.add_computer("Sure thing! You go first.") # Prepare the model enc = encoder.get_encoder(model_name) hparams = model.default_hparams() with open(os.path.join('models', model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if length > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) with tf.Session(graph=tf.Graph()) as sess: np.random.seed(seed) tf.set_random_seed(seed) context = tf.placeholder(tf.int32, [1, None]) output = sample.sample_sequence( hparams=hparams, length=length, context=context, batch_size=1 # temperature=temperature, top_k=top_k ) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name)) saver.restore(sess, ckpt) # Print the initial context/prompt print(conversation.text_generic(1)) while True: # Let the human speak message = None while not message: message = input("{}: ".format(conversation.name_human)) conversation.add_human(message) # Let the computer speak prompt = conversation.text_gpt() + "\n\n{}: ".format(conversation.name_computer) encoded_prompt = enc.encode(prompt) result= sess.run(output, feed_dict={ context: [encoded_prompt] })[:, len(encoded_prompt):] text = enc.decode(result[0]) reply = (text.split('\n'))[0] conversation.add_computer(reply) print("{}: {}".format(conversation.name_computer, reply))
def __init__(self, lr, max_seq_len, batch_size, num_train_epochs, num_warmup_steps, model_dir): self.lr = lr self.max_seq_len = max_seq_len self.batch_size = batch_size self.num_train_epochs = num_train_epochs self.num_warmup_steps = num_warmup_steps self.model_dir = model_dir self.hyperparams = model.default_hparams()
def interact_model(model_name,seed,nsamples,batch_size,length,temperature,top_k,models_dir,article): result_list = [] models_dir = os.path.expanduser(os.path.expandvars(models_dir)) if batch_size is None: batch_size = 1 assert nsamples % batch_size == 0 enc = encoder.get_encoder(model_name, models_dir) hparams = model.default_hparams() with open(os.path.join(models_dir, model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if length is None: length = hparams.n_ctx // 2 elif length > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) with tf.Session(graph=tf.Graph()) as sess: context = tf.placeholder(tf.int32, [batch_size, None]) np.random.seed(seed) tf.set_random_seed(seed) output = sample.sample_sequence( hparams=hparams, length=length, context=context, batch_size=batch_size, temperature=temperature, top_k=top_k ) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name)) saver.restore(sess, ckpt) # while True: raw_text = article+"\n TF;DR:" while not raw_text: return 'Text should not be empty!' # raw_text = input("Model prompt >>> ") context_tokens = enc.encode(raw_text) generated = 0 for i in range(3): for _ in range(nsamples // batch_size): out = sess.run(output, feed_dict={ context: [context_tokens for _ in range(batch_size)] })[:, len(context_tokens):] for i in range(batch_size): generated += 1 text = enc.decode(out[i]) result_list.append(str(text)) result_list.append(str(raw_text)) result_list = pd.Series(result_list) tfidf = TfidfVectorizer(stop_words='english') tfidf_matrix = tfidf.fit_transform(result_list) cosine_sim = linear_kernel(tfidf_matrix) sim_scores = list(enumerate(cosine_sim[3])) sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True) sim_scores = sim_scores[1] return result_list[sim_scores[0]].split("<|endoftext|>")[0]
def interact_model(model_name, seed, nsamples, batch_size, length, temperature, top_k, models_dir): models_dir = os.path.expanduser(os.path.expandvars(models_dir)) if batch_size is None: batch_size = 1 assert nsamples % batch_size == 0 enc = encoder.get_encoder(model_name, models_dir) hparams = model.default_hparams() with open(os.path.join(models_dir, model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if length is None: length = hparams.n_ctx // 2 elif length > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) with tf.Session(graph=tf.Graph()) as sess: context = tf.placeholder(tf.int32, [batch_size, None]) np.random.seed(seed) tf.set_random_seed(seed) output = sample.sample_sequence(hparams=hparams, length=length, context=context, batch_size=batch_size, temperature=temperature, top_k=top_k) saver = tf.train.Saver(save_relative_paths=True) ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name)) saver.restore(sess, ckpt) while True: raw_text = input("\nModel prompt >>> ") if raw_text == 'ADMIN_NIXTRATOR': raw_text = False break while not raw_text: print('\nPrompt should not be empty!') raw_text = input("\nModel prompt >>> ") context_tokens = enc.encode(raw_text) generated = 0 for _ in range(nsamples // batch_size): out = sess.run(output, feed_dict={ context: [context_tokens for _ in range(batch_size)] })[:, len(context_tokens):] for i in range(batch_size): generated += 1 text = enc.decode(out[i]) print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40) print(text) print("=" * 80)
def interact_model(self, model_name, # model_name: This indicates which model we are using. In our case, we are using the GPT-2 model with 345 million parameters or weights seed, # Integer seed for random number generators, fix seed to reproduce results nsamples, # This represents the number of sample texts generated in our output batch_size, #This only affects speed/memory. This must also divide nsamples length, # It represents the number of tokens in the generated text. If the length is None, then the number of tokens is decided by model hyperparameters temperature, # is controls randomness in Boltzmann distribution. Lower temperature results in less random completions. As the temperature approaches zero, the model will become deterministic and repetitive. Higher temperature results in more random completions top_k, #This parameter controls diversity. If the value of top_k is set to 1, this means that only 1 word is considered for each step (token). If top_k is set to 40, that means 40 words are considered at each step. 0 (default) is a special setting meaning no restrictions. top_k = 40 generally is a good value models_dir # It represents the path to parent folder containing model subfolders (contains the <model_name> folder) ): models_dir = "E:\\workdirectory\\Code Name Val Halen\\DS Sup\\DL\\Chapter A9\\gpt\\gpt-2\\models\\" #os.path.expanduser(os.path.expandvars(models_dir)) if batch_size is None: batch_size = 1 assert nsamples % batch_size == 0 enc = encoder.get_encoder(model_name, models_dir) hparams = model.default_hparams() with open(os.path.join(models_dir, model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if length is None: length = hparams.n_ctx // 2 elif length > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) with tf.Session(graph=tf.Graph()) as sess: context = tf.placeholder(tf.int32, [batch_size, None]) np.random.seed(seed) tf.set_random_seed(seed) output = sample.sample_sequence( hparams=hparams, length=length, context=context, batch_size=batch_size, temperature=temperature, top_k=top_k ) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name)) saver.restore(sess, ckpt) while True: raw_text = input("Model prompt >>> ") while not raw_text: print('Prompt should not be empty!') raw_text = input("Model prompt >>> ") context_tokens = enc.encode(raw_text) generated = 0 for _ in range(nsamples // batch_size): out = sess.run(output, feed_dict={ context: [context_tokens for _ in range(batch_size)] })[:, len(context_tokens):] for i in range(batch_size): generated += 1 text = enc.decode(out[i]) print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40) print(text) print("=" * 80)
def noninteract_model( model_name, seed, nsamples, batch_size, length, temperature, top_k, models_dir, prompt_list ): models_dir = os.path.expanduser(os.path.expandvars(models_dir)) if batch_size is None: batch_size = 1 assert nsamples % batch_size == 0 enc = encoder.get_encoder(model_name, models_dir) hparams = model.default_hparams() with open(os.path.join(models_dir, model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if length is None: length = hparams.n_ctx // 2 elif length > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) with tf.Session(graph=tf.Graph()) as sess: context = tf.placeholder(tf.int32, [batch_size, None]) np.random.seed(seed) tf.set_random_seed(seed) output = sample.sample_sequence( hparams=hparams, length=length, context=context, batch_size=batch_size, temperature=temperature, top_k=top_k ) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name)) saver.restore(sess, ckpt) all_text={} for prpt in prompt_list: print(prpt) text_list=[] raw_text = prpt context_tokens = enc.encode(raw_text) generated = 0 for _ in range(nsamples // batch_size): out = sess.run(output, feed_dict={ context: [context_tokens for _ in range(batch_size)] })[:, len(context_tokens):] for i in range(batch_size): generated += 1 text_list.append(enc.decode(out[i])) all_text[prpt]=text_list return all_text
def interact_model(model_name='345M', seed=None, nsamples=1, batch_size=1, length=108, temperature=0.9, top_k=38, raw_text=''): if batch_size is None: batch_size = 1 assert nsamples % batch_size == 0 enc = encoder.get_encoder(model_name) hparams = model.default_hparams() with open(os.path.join('models', model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if length is None: length = hparams.n_ctx // 2 elif length > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) with tf.Session(graph=tf.Graph()) as sess: context = tf.placeholder(tf.int32, [batch_size, None]) np.random.seed(seed) tf.set_random_seed(seed) output = sample.sample_sequence(hparams=hparams, length=length, context=context, batch_size=batch_size, temperature=temperature, top_k=top_k) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name)) saver.restore(sess, ckpt) text = '' context_tokens = enc.encode(raw_text) #generated = 0 for _ in range(nsamples // batch_size): out = sess.run(output, feed_dict={ context: [context_tokens for _ in range(batch_size)] })[:, len(context_tokens):] for i in range(batch_size): #generated += 1 text += enc.decode(out[i]) # Ensure that generated text ends with punctuation or twitter handle #while (text[-1] not in '.?!') and ('@' not in text.split()[-1]): return text
def main(): # keep track of the commit id git_commit_id = get_current_git_version() write_log(log_file, "GIT COMMIT ID: " + git_commit_id) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config, graph=tf.Graph()) as sess: hparams = model_gpt.default_hparams() with open(os.path.join(FLAGS.gpt_model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) preprocessed_data = Preprocessor(processed_data_dir, FLAGS.limits, eos, empty) field_id2word = preprocessed_data.fieldid2word model = SeqUnit(batch_size=FLAGS.batch_size, hidden_size=FLAGS.hidden_size, emb_size=FLAGS.emb_size, field_size=FLAGS.field_size, pos_size=FLAGS.pos_size, field_vocab=FLAGS.field_vocab, source_vocab=FLAGS.source_vocab, position_vocab=FLAGS.position_vocab, target_vocab=FLAGS.target_vocab, scope_name="seq2seq", name="seq2seq", field_concat=FLAGS.field, position_concat=FLAGS.position, fgate_enc=FLAGS.fgate_encoder, dual_att=FLAGS.dual_attention, decoder_add_pos=FLAGS.decoder_pos, encoder_add_pos=FLAGS.encoder_pos, learning_rate=FLAGS.learning_rate, use_coverage = FLAGS.use_coverage, coverage_penalty=FLAGS.coverage_penalty, fieldid2word = field_id2word, copy_gate_penalty=FLAGS.copy_gate_penalty, use_copy_gate=FLAGS.use_copy_gate, gpt_hparams=hparams, vocab_ind=None, empty_token=empty, stop_token=eos) if FLAGS.mode == 'train': # collect all trainable variables, exclude embeddings gpt_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='model') gpt_var_load = [] for each_var in gpt_var: if "Adam" not in each_var.name: gpt_var_load.append(each_var) gpt_var_load.remove(model.embedding) # load GPT checkpoint saver = tf.train.Saver(var_list=gpt_var_load) ckpt = tf.train.latest_checkpoint(FLAGS.gpt_model_name) saver.restore(sess, ckpt) # init other vars seq2seq_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='seq2seq') seq2seq_var.append(model.embedding) sess.run(tf.variables_initializer(var_list=seq2seq_var)) train(sess, preprocessed_data, model) else: model.load(saved_model_path, sess) test_result = evaluate(sess, preprocessed_data, model, results_path, 'test') write_log(log_file, test_result)
def single_interact_model_4( #output_dir, #output_file, models_dir, model_name, seedinput, outputlength, temperature, top_k, top_p, ): seed=None nsamples=1 #x possible batch_size=1 enc = encoder.get_encoder(model_name) hparams = model.default_hparams() with open(os.path.join(models_dir, model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if outputlength is None: outputlength = hparams.n_ctx // 2 elif outputlength > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) with tf.Session(graph=tf.Graph()) as sess: context = tf.placeholder(tf.int32, [batch_size, None]) np.random.seed(seed) tf.set_random_seed(seed) output = sample.sample_sequence( hparams=hparams, length=outputlength, context=context, batch_size=batch_size, temperature=temperature, top_k=top_k, top_p=top_p ) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name)) saver.restore(sess, ckpt) context_tokens = enc.encode(seedinput) generated = 0 for _ in range(nsamples // batch_size): out = sess.run(output, feed_dict={ context: [context_tokens for _ in range(batch_size)] })[:, len(context_tokens):] for i in range(batch_size): generated += 1 output = enc.decode(out[i]) print("-" * 80) return output
def interact_model( model_name='117M', seed=None, nsamples=1, batch_size=1, length=None, temperature=1, top_k=0, ): """ Interactively run the model :model_name=117M : String, which model to use :seed=None : Integer seed for random number generators, fix seed to reproduce results :nsamples=1 : Number of samples to return total :batch_size=1 : Number of batches (only affects speed/memory). Must divide nsamples. :length=None : Number of tokens in generated text, if None (default), is determined by model hyperparameters :temperature=1 : Float value controlling randomness in boltzmann distribution. Lower temperature results in less random completions. As the temperature approaches zero, the model will become deterministic and repetitive. Higher temperature results in more random completions. :top_k=0 : Integer value controlling diversity. 1 means only 1 word is considered for each step (token), resulting in deterministic completions, while 40 means 40 words are considered at each step. 0 (default) is a special setting meaning no restrictions. 40 generally is a good value. """ if batch_size is None: batch_size = 1 assert nsamples % batch_size == 0 enc = encoder.get_encoder(model_name) hparams = model.default_hparams() with open(os.path.join('models', model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if length is None: length = hparams.n_ctx // 2 elif length > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) with tf.Session(graph=tf.Graph()) as sess: context = tf.placeholder(tf.int32, [batch_size, None]) np.random.seed(seed) tf.set_random_seed(seed) output = sample.sample_sequence( hparams=hparams, length=length, context=context, batch_size=batch_size, temperature=temperature, top_k=top_k ) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name)) saver.restore(sess, ckpt) while True: raw_text = input("Model prompt >>> ") while not raw_text: print('Prompt should not be empty!') raw_text = input("Model prompt >>> ") context_tokens = enc.encode(raw_text) generated = 0 for _ in range(nsamples // batch_size): out = sess.run(output, feed_dict={ context: [context_tokens for _ in range(batch_size)] })[:, len(context_tokens):] for i in range(batch_size): generated += 1 text = enc.decode(out[i]) print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40) print(text) print("=" * 80)
def sample_model( model_name='117M', seed=None, nsamples=0, batch_size=1, length=None, temperature=1, top_k=0, ): """ Run the sample_model :model_name=117M : String, which model to use :seed=None : Integer seed for random number generators, fix seed to reproduce results :nsamples=0 : Number of samples to return, if 0, continues to generate samples indefinately. :batch_size=1 : Number of batches (only affects speed/memory). :length=None : Number of tokens in generated text, if None (default), is determined by model hyperparameters :temperature=1 : Float value controlling randomness in boltzmann distribution. Lower temperature results in less random completions. As the temperature approaches zero, the model will become deterministic and repetitive. Higher temperature results in more random completions. :top_k=0 : Integer value controlling diversity. 1 means only 1 word is considered for each step (token), resulting in deterministic completions, while 40 means 40 words are considered at each step. 0 (default) is a special setting meaning no restrictions. 40 generally is a good value. """ enc = encoder.get_encoder(model_name) hparams = model.default_hparams() with open(os.path.join('models', model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if length is None: length = hparams.n_ctx elif length > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) with tf.Session(graph=tf.Graph()) as sess: np.random.seed(seed) tf.set_random_seed(seed) output = sample.sample_sequence( hparams=hparams, length=length, start_token=enc.encoder['<|endoftext|>'], batch_size=batch_size, temperature=temperature, top_k=top_k )[:, 1:] saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name)) saver.restore(sess, ckpt) generated = 0 while nsamples == 0 or generated < nsamples: out = sess.run(output) for i in range(batch_size): generated += batch_size text = enc.decode(out[i]) print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40) print(text)