Ejemplo n.º 1
0
def sample_model(
    model_name='774M',
    seed=None,
    nsamples=10,
    batch_size=1,
    length=150,
    temperature=1,
    top_k=0,
    top_p=1,
    models_dir='../models',
):
    """
    Run the sample_model
    :model_name=124M : String, which model to use
    :seed=None : Integer seed for random number generators, fix seed to
     reproduce results
    :nsamples=0 : Number of samples to return, if 0, continues to
     generate samples indefinately.
    :batch_size=1 : Number of batches (only affects speed/memory).
    :length=None : Number of tokens in generated text, if None (default), is
     determined by model hyperparameters
    :temperature=1 : Float value controlling randomness in boltzmann
     distribution. Lower temperature results in less random completions. As the
     temperature approaches zero, the model will become deterministic and
     repetitive. Higher temperature results in more random completions.
    :top_k=0 : Integer value controlling diversity. 1 means only 1 word is
     considered for each step (token), resulting in deterministic completions,
     while 40 means 40 words are considered at each step. 0 (default) is a
     special setting meaning no restrictions. 40 generally is a good value.
     :models_dir : path to parent folder containing model subfolders
     (i.e. contains the <model_name> folder)
    """
    models_dir = os.path.expanduser(os.path.expandvars(models_dir))
    enc = encoder.get_encoder(model_name, models_dir)
    hparams = model.default_hparams()
    with open(os.path.join(models_dir, model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)

        output = sample.sample_sequence(
            hparams=hparams,
            length=length,
            context=context,
            # start_token=enc.encoder['<|endoftext|>'],
            batch_size=batch_size,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p)[:, 1:]

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name))
        saver.restore(sess, ckpt)

        # generated = 0
        # while nsamples == 0 or generated < nsamples:
        #     out = sess.run(output)
        #     for i in range(batch_size):
        #         generated += batch_size
        #         text = enc.decode(out[i])
        #         print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40)
        #         print(text)

        context_tokens = enc.encode(rawtext)
        generated = 0
        for _ in range(nsamples // batch_size):
            out = sess.run(output,
                           feed_dict={
                               context:
                               [context_tokens for _ in range(batch_size)]
                           })[:, len(context_tokens):]
            for i in range(batch_size):
                generated += 1
                text = enc.decode(out[i])
                print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40)
                print(text)
        print("=" * 80)
Ejemplo n.º 2
0
def interact_model(
    model_name='124M',
    seed=None,
    nsamples=1,
    batch_size=1,
    length=None,
    temperature=1,
    top_k=0,
    top_p=1,
    models_dir='models',
):
    """
    Interactively run the model
    :model_name=124M : String, which model to use
    :seed=None : Integer seed for random number generators, fix seed to reproduce
     results
    :nsamples=1 : Number of samples to return total
    :batch_size=1 : Number of batches (only affects speed/memory).  Must divide nsamples.
    :length=None : Number of tokens in generated text, if None (default), is
     determined by model hyperparameters
    :temperature=1 : Float value controlling randomness in boltzmann
     distribution. Lower temperature results in less random completions. As the
     temperature approaches zero, the model will become deterministic and
     repetitive. Higher temperature results in more random completions.
    :top_k=0 : Integer value controlling diversity. 1 means only 1 word is
     considered for each step (token), resulting in deterministic completions,
     while 40 means 40 words are considered at each step. 0 (default) is a
     special setting meaning no restrictions. 40 generally is a good value.
     :models_dir : path to parent folder containing model subfolders
     (i.e. contains the <model_name> folder)
    """
    models_dir = os.path.expanduser(os.path.expandvars(models_dir))
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    enc = encoder.get_encoder(model_name, models_dir)
    hparams = model.default_hparams()
    with open(os.path.join(models_dir, model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        output = sample.sample_sequence(
            hparams=hparams, length=length,
            context=context,
            batch_size=batch_size,
            temperature=temperature, top_k=top_k, top_p=top_p
        )

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name))
        saver.restore(sess, ckpt)

        while True:
            raw_text = input("Model prompt >>> ")
            while not raw_text:
                print('Prompt should not be empty!')
                raw_text = input("Model prompt >>> ")
            context_tokens = enc.encode(raw_text)
Ejemplo n.º 3
0
def train_main(dataset,
               model_name='117M',
               seed=None,
               batch_size=1,
               sample_length=1023,
               sample_num=1,
               sample_every=100,
               run_name='run1',
               restore_from='latest',
               save_every=1000):

    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if sample_length is None:
        sample_length = hparams.n_ctx // 2
    elif sample_length > hparams.n_ctx:
        raise ValueError(
            "Can't get samples longer than window size: %s" % hparams.n_ctx)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        output = model.model(hparams=hparams, X=context)
        loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=context[:, 1:], logits=output['logits'][:, :-1]))

        tf_sample = sample.sample_sequence(
            hparams=hparams,
            length=sample_length,
            context=context,
            batch_size=batch_size,
            temperature=1.0,
            top_k=40)

        train_vars = [v for v in tf.trainable_variables() if 'model' in v.name]
        opt = tf.train.AdamOptimizer().minimize(loss, var_list=train_vars)

        saver = tf.train.Saver(
            var_list=train_vars,
            max_to_keep=5,
            keep_checkpoint_every_n_hours=2)
        sess.run(tf.global_variables_initializer())

        if restore_from == 'latest':
            ckpt = tf.train.latest_checkpoint(
                os.path.join(CHECKPOINT_DIR, run_name))
            if ckpt is None:
                # Get fresh GPT weights if new run.
                ckpt = tf.train.latest_checkpoint(
                    os.path.join('models', model_name))
        elif restore_from == 'fresh':
            ckpt = tf.train.latest_checkpoint(
                os.path.join('models', model_name))
        else:
            ckpt = tf.train.latest_checkpoint(restore_from)
        print('Loading checkpoint', ckpt)
        saver.restore(sess, ckpt)

        print('Loading dataset...')
        chunks = load_dataset(enc, dataset)
        data_sampler = Sampler(chunks)
        print('dataset has', data_sampler.total_size, 'tokens')
        print('Training...')

        counter = 1
        if os.path.exists(os.path.join(CHECKPOINT_DIR, run_name, 'counter')):
            # Load the step number if we're resuming a run
            # Add 1 so we don't immediately try to save again
            with open(os.path.join(CHECKPOINT_DIR, run_name, 'counter'),
                      'r') as fp:
                counter = int(fp.read()) + 1

        def save():
            maketree(os.path.join(CHECKPOINT_DIR, run_name))
            print(
                'Saving',
                os.path.join(CHECKPOINT_DIR, run_name,
                             'model-{}').format(counter))
            saver.save(
                sess,
                os.path.join(CHECKPOINT_DIR, run_name, 'model'),
                global_step=counter)
            with open(os.path.join(CHECKPOINT_DIR, run_name, 'counter'),
                      'w') as fp:
                fp.write(str(counter) + '\n')

        def generate_samples():
            context_tokens = data_sampler.sample(1)
            all_text = []
            index = 0
            while index < sample_num:
                out = sess.run(
                    tf_sample, feed_dict={context: batch_size*[context_tokens]})
                for i in range(min(sample_num - index, batch_size)):
                    text = enc.decode(out[i])
                    text = '======== SAMPLE {} ========\n{}\n'.format(index + 1, text)
                    all_text.append(text)
                    index += 1
            print(text)
            maketree(os.path.join(SAMPLE_DIR, run_name))
            with open(
                    os.path.join(SAMPLE_DIR, run_name,
                                 'samples-{}').format(counter), 'w') as fp:
                fp.write('\n'.join(all_text))

        avg_loss = (0.0, 0.0)
        start_time = time.time()

        try:
            while True:
                if counter % save_every == 0:
                    save()
                if counter % sample_every == 0:
                    generate_samples()

                batch = [data_sampler.sample(1024) for _ in range(batch_size)]

                _, lv = sess.run((opt, loss), feed_dict={context: batch})

                avg_loss = (avg_loss[0] * 0.99 + lv, avg_loss[1] * 0.99 + 1.0)

                print(
                    '[{counter} | {time:2.2f}] loss={loss:2.2f} avg={avg:2.2f}'
                    .format(
                        counter=counter,
                        time=time.time() - start_time,
                        loss=lv,
                        avg=avg_loss[0] / avg_loss[1]))

                counter += 1
        except KeyboardInterrupt:
            print('interrupted')
            save()
Ejemplo n.º 4
0
 def get_encoder(self):
     logging.info("getting model's encoder")
     self.enc = encoder.get_encoder(self.model_name)
def sample_model(model_name='117M',
                 seed=None,
                 nsamples=0,
                 batch_size=1,
                 length=None,
                 temperature=1,
                 top_k=0,
                 top_p=0.0):
    """
    Run the sample_model
    :model_name=117M : String, which model to use
    :seed=None : Integer seed for random number generators, fix seed to
     reproduce results
    :nsamples=0 : Number of samples to return, if 0, continues to
     generate samples indefinately.
    :batch_size=1 : Number of batches (only affects speed/memory).
    :length=None : Number of tokens in generated text, if None (default), is
     determined by model hyperparameters
    :temperature=1 : Float value controlling randomness in boltzmann
     distribution. Lower temperature results in less random completions. As the
     temperature approaches zero, the model will become deterministic and
     repetitive. Higher temperature results in more random completions.
    :top_k=0 : Integer value controlling diversity. 1 means only 1 word is
     considered for each step (token), resulting in deterministic completions,
     while 40 means 40 words are considered at each step. 0 (default) is a
     special setting meaning no restrictions. 40 generally is a good value.
    :top_p=0.0 : Float value controlling diversity. Implements nucleus sampling,
     overriding top_k if set to a value > 0. A good setting is 0.9.
    """
    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        np.random.seed(seed)
        tf.set_random_seed(seed)

        output = sample.sample_sequence(
            hparams=hparams,
            length=length,
            start_token=enc.encoder['<|endoftext|>'],
            batch_size=batch_size,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p)[:, 1:]

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
        saver.restore(sess, ckpt)

        generated = 0
        while nsamples == 0 or generated < nsamples:
            out = sess.run(output)
            for i in range(batch_size):
                generated += batch_size
                text = enc.decode(out[i])
                print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40)
                print(text)
                return (text)
def interact_model(
    model_name='117M',
    seed=None,
    nsamples=1,
    batch_size=1,
    length=None,
    temperature=1,
    top_k=0,
    top_p=0.0,
    output_file='output.txt',
    test_cutoff=999,
    bleu_cutoff=0.4,
):
    """
	Interactively run the model
	:model_name=117M : String, which model to use
	:seed=None : Integer seed for random number generators, fix seed to reproduce
	 results
	:nsamples=1 : Number of samples to return total
	:batch_size=1 : Number of batches (only affects speed/memory).	Must divide nsamples.
	:length=None : Number of tokens in generated text, if None (default), is
	 determined by model hyperparameters
	:temperature=1 : Float value controlling randomness in boltzmann
	 distribution. Lower temperature results in less random completions. As the
	 temperature approaches zero, the model will become deterministic and
	 repetitive. Higher temperature results in more random completions.
	:top_k=0 : Integer value controlling diversity. 1 means only 1 word is
	 considered for each step (token), resulting in deterministic completions,
	 while 40 means 40 words are considered at each step. 0 (default) is a
	 special setting meaning no restrictions. 40 generally is a good value.
	:top_p=0.0 : Float value controlling diversity. Implements nucleus sampling,
	 overriding top_k if set to a value > 0. A good setting is 0.9.
	"""
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        output = sample.sample_sequence(hparams=hparams,
                                        length=length,
                                        context=context,
                                        batch_size=batch_size,
                                        temperature=temperature,
                                        top_k=top_k,
                                        top_p=top_p)

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
        saver.restore(sess, ckpt)
        data = load_codesearch_net_lite(_TEST_FILE)
        avg_bleu = 0
        with open(output_file, "w") as out:
            for i, row in data.iterrows():
                code = row['code']
                input_snippet = extract_definition_and_documentation(code)
                output_code = generate_output(enc, nsamples, sess, context,
                                              output, input_snippet)
                output_code = output_code[:output_code.find("<|endoftext|>")]
                output_code = output_code[:output_code.find("<END>")]
                BLEUscore = nltk.translate.bleu_score.sentence_bleu(
                    [code], output_code)
                avg_bleu += BLEUscore
                out.write(f"\ni = {i}, bleu = {BLEUscore}")
                print(
                    f"i = {i}, bleu = {BLEUscore}, avg_bleu = {avg_bleu/(i + 1)}"
                )
                if BLEUscore > bleu_cutoff:
                    out.write(
                        f"\ninput_snippet = {input_snippet}, output_code = {output_code}"
                    )
                    out.flush()
                if i > test_cutoff: break
            avg_bleu /= i
            print(f"Bleu score = {avg_bleu}")
            out.write(f"\nBleu score = {avg_bleu}")
def interact_model(
    model_name='124M',
    seed=None,
    nsamples=1,
    batch_size=1,
    length=None,
    temperature=1,
    top_k=0,
    top_p=1,
    models_dir='models',
    constant=0.0,
    counter=0,
):
    """
    Interactively run the model
    :model_name=124M : String, which model to use
    :seed=None : Integer seed for random number generators, fix seed to reproduce
     results
    :nsamples=1 : Number of samples to return total
    :batch_size=1 : Number of batches (only affects speed/memory).  Must divide nsamples.
    :length=None : Number of tokens in generated text, if None (default), is
     determined by model hyperparameters
    :temperature=1 : Float value controlling randomness in boltzmann
     distribution. Lower temperature results in less random completions. As the
     temperature approaches zero, the model will become deterministic and
     repetitive. Higher temperature results in more random completions.
    :top_k=0 : Integer value controlling diversity. 1 means only 1 word is
     considered for each step (token), resulting in deterministic completions,
     while 40 means 40 words are considered at each step. 0 (default) is a
     special setting meaning no restrictions. 40 generally is a good value.
     :models_dir : path to parent folder containing model subfolders
     (i.e. contains the <model_name> folder)
    """
    models_dir = str(os.path.dirname(
        os.path.abspath(__file__))) + '/models_gpt'
    models_dir = os.path.expanduser(os.path.expandvars(models_dir))
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    enc = encoder.get_encoder(model_name, models_dir)
    hparams = model.default_hparams()
    with open(os.path.join(models_dir, model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    length = 30  #100
    word_set = np.load(
        str(os.path.dirname(os.path.abspath(__file__))) +
        '/look_ups_gpt-2/word_sets_num.npy')
    numbers = word_set[counter]
    evaluations = np.zeros((8))
    if constant > 0:
        text_all = open(
            str(os.path.dirname(os.path.abspath(__file__))) +
            "/results/snippets_with_anchoring.txt", "a+")
    else:
        text_all = open(
            str(os.path.dirname(os.path.abspath(__file__))) +
            "/results/snippets_no_anchoring.txt", "a+")

    text_all.write(
        '=================================================================================================='
    )
    text_all.write('\n')
    file1 = open(
        str(os.path.dirname(os.path.abspath(__file__))) +
        "/look_ups_gpt-2/word_sets.txt", "r+")
    line = file1.readlines()
    text_all.write(line[counter])
    text_all.write('\n')
    text_all.write('\n')

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        glovers = load_words_and_glove()
        converter_table = np.load(
            str(os.path.dirname(os.path.abspath(__file__))) +
            '/look_ups_gpt-2/converter_table.npy')
        container = related_words()
        np.random.seed(seed)
        tf.set_random_seed(seed)
        weight = constant
        output, probabilites = sample.sample_sequence_glove_all_top_five_gpu(
            hparams=hparams,
            length=length,
            context=context,
            batch_size=batch_size,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            glove=[
                glovers[numbers[0], :], glovers[numbers[1], :],
                glovers[numbers[2], :], glovers[numbers[3], :],
                glovers[numbers[4], :]
            ],  #[glovers[0,:], glovers[98,:], glovers[2,:],  glovers[19,:], glovers[85,:]] #converter_table[14836,:] #words[98,:] #converter_table[5536,:]  #glover
            weight=weight)

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name))
        saver.restore(sess, ckpt)
        holder = 0
        counterer = 0
        perplexities = np.zeros((10))
        Nr_words = np.zeros((10))
        Nr_related = np.zeros((10))
        Nr_related_with = np.zeros((10))
        text_length = np.zeros((10))
        Nr_main = np.zeros((10))
        Nr_main_related_without = np.zeros((10))
        Nr_main_related_with = np.zeros((10))
        while holder < 10:
            Harry, counterer = Harry_sentences_no_capital(counterer, 2)
            context_tokens = enc.encode(Harry)
            generated = 0
            for _ in range(nsamples // batch_size):
                out, proba = sess.run(
                    [output, probabilites],
                    feed_dict={
                        context: [context_tokens for _ in range(batch_size)]
                    })
                out = out[:, len(context_tokens):]
                for i in range(batch_size):

                    main = [[numbers[0]]]
                    counter_main_sim_without = isSubset(
                        container[numbers[0]][1:], out[i])
                    counter_main_sim_with = isSubset(container[numbers[0]][0:],
                                                     out[i])
                    counter_main = counter_main_sim_with - counter_main_sim_without
                    checker = tokens_from_words(numbers)
                    counter_tot = 0
                    counter_sim = 0
                    counter_sim_with = 0
                    cond = False
                    counter = isSubset(checker, out[i])
                    counter_tot += counter
                    if counter > 0:
                        cond = True
                    for num in numbers:
                        counter_sim += isSubset(container[num][1:], out[i])
                        counter_sim_with += isSubset(container[num][0:],
                                                     out[i])

                    perplexitiy = np.power(proba, (-1 / length))
                    generated += 1
                    text = enc.decode(out[i])
                    text_all.write(text)
                    text_all.write('\n')
                    perplexities[holder] = perplexitiy
                    Nr_words[holder] = counter_tot
                    Nr_related[holder] = counter_sim
                    text_length[holder] = text.count(' ')
                    Nr_main[holder] = counter_main
                    Nr_main_related_with[holder] = counter_main_sim_with
                    Nr_main_related_without[holder] = counter_main_sim_without
                    Nr_related_with[holder] = counter_sim_with

            holder += 1

    evaluations[0] = np.mean(perplexities)
    evaluations[1] = np.mean(Nr_main)
    evaluations[2] = np.mean(Nr_main_related_with)
    evaluations[3] = np.mean(Nr_main_related_without)
    evaluations[4] = np.mean(Nr_words)
    evaluations[5] = np.mean(Nr_related_with)
    evaluations[6] = np.mean(Nr_related)
    evaluations[7] = np.mean(text_length)
    text_all.write(
        '=================================================================================================='
    )
    text_all.close()
    return evaluations
Ejemplo n.º 8
0
def interact_model(
    model_name='mixed',
    seed=None,
    nsamples=1,
    batch_size=1,
    #length=None,
    length=20,
    temperature=1,
    top_k=0,
):
    """
    Interactively run the model
    :model_name=117M : String, which model to use
    :seed=None : Integer seed for random number generators, fix seed to reproduce
     results
    :nsamples=1 : Number of samples to return total
    :batch_size=1 : Number of batches (only affects speed/memory).  Must divide nsamples.
    :length=None : Number of tokens in generated text, if None (default), is
     determined by model hyperparameters
    :temperature=1 : Float value controlling randomness in boltzmann
     distribution. Lower temperature results in less random completions. As the
     temperature approaches zero, the model will become deterministic and
     repetitive. Higher temperature results in more random completions.
    :top_k=0 : Integer value controlling diversity. 1 means only 1 word is
     considered for each step (token), resulting in deterministic completions,
     while 40 means 40 words are considered at each step. 0 (default) is a
     special setting meaning no restrictions. 40 generally is a good value.
    """
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        output = sample.sample_sequence(hparams=hparams,
                                        length=length,
                                        context=context,
                                        batch_size=batch_size,
                                        temperature=temperature,
                                        top_k=top_k)

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
        saver.restore(sess, ckpt)

        class Sample(Resource):
            def get(self, name):
                #raw_text = input("Model prompt >>> ")
                raw_text = "here's some text"
                context_tokens = enc.encode(raw_text)
                generated = 0
                for _ in range(nsamples // batch_size):
                    out = sess.run(
                        output,
                        feed_dict={
                            context:
                            [context_tokens for _ in range(batch_size)]
                        })[:, len(context_tokens):]
                    for i in range(batch_size):
                        generated += 1
                        text = enc.decode(out[i])
                        print("=" * 40 + " SAMPLE " + str(generated) + " " +
                              "=" * 40)
                        print(text)
                print("=" * 80)
                return (text)

            def post(self, name):
                parser = reqparse.RequestParser()
                parser.add_argument("prompt")
                args = parser.parse_args()
                samples = []
                context_tokens = enc.encode(args["prompt"])
                generated = 0
                for _ in range(nsamples // batch_size):
                    out = sess.run(
                        output,
                        feed_dict={
                            context:
                            [context_tokens for _ in range(batch_size)]
                        })[:, len(context_tokens):]
                    for i in range(batch_size):
                        generated += 1
                        text = enc.decode(out[i])
                        #                        print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40)
                        sample = {
                            "name": "handey",
                            "prompt": args["prompt"],
                            "text": text
                        }
                        samples.append(sample)
                return samples, 201

        api.add_resource(Sample, "/sample/<string:name>")
        app.run(host='0.0.0.0', port=8000)
Ejemplo n.º 9
0
def replytalk(update, context):
    # carry global vars
    global length
    global sess
    global output
    global model_name
    global batch_size
    global nsamples
    global tfcontext
    global output
    global sample_context
    global persistent_context
    global my_name

    user = update.message.from_user
    logger.info("REPLYTALK received of %s: %s", user.first_name,
                update.message.text)

    if not sample_context:
        # initialize. try to inject some context to hint the model
        sample_context = 'Conversation of ' + my_name + ', and a person from internet called ' + user.first_name + '.\n'
        sample_context = sample_context + persistent_context + '\n\n'
        sample_context = sample_context + my_name + ' - Hi ' + user.first_name + '\n'

    raw_text = update.message.text
    sample_context = sample_context + user.first_name + ' - ' + raw_text + '\n'

    enc = encoder.get_encoder(model_name)
    context_tokens = encoder.get_encoder(model_name).encode(sample_context)
    logger.info("sample_context: " + sample_context)
    logger.info("sample_context_len: " + str(len(context_tokens)))
    out = sess.run(output,
                   feed_dict={tfcontext:
                              [context_tokens
                               for _ in range(1)]})[:, len(context_tokens):]
    text = enc.decode(out[0])
    logger.info("Model run complete")

    # parse the response somehow
    logger.info("model response" + text)
    logger.info("first line response" + text.split('\n')[0])
    model_response_text = ''

    if len(text.split('\n')[0]) < 5 or len(compress(text.split('\n')[0])) < 5:
        model_response_text = text.split('\n')[1].lstrip()  #+ '\n'
    else:
        model_response_text = text.split('\n')[0].lstrip()  #+ '\n'

    logger.info("guessed response" + model_response_text)

    # if model response starts with correspondent name...
    if (model_response_text.startswith(user.first_name)):
        # v002+ just look for the first line beginning with my name
        for line in text.split('\n'):
            if line.startswith(my_name + ' - '):
                model_response_text = line.split('-')[1]
                logger.info("guessed response (2)" + model_response_text)

    if '<|endoftext|>' in model_response_text:
        model_response_text = model_response_text.split('<|endoftext|>')[0]

    # sometimes my name is mentioned on line 1 need to clean that
    if model_response_text.startswith(my_name + ' - '):
        model_response_text = model_response_text.split(my_name + ' - ')[1]

    logger.info("final response " + model_response_text)

    update.message.reply_text(model_response_text,
                              reply_markup=ReplyKeyboardRemove())

    sample_context = sample_context + my_name + ' - ' + model_response_text + '\n'

    # truncate the context
    linecount = 0
    count = 0
    for line in sample_context.splitlines():
        linecount += 1
    logger.info("ctx length " + str(linecount) + " " +
                str(len(context_tokens)) + " tokens")
    if linecount > 30 or len(context_tokens) > 800:
        #sample_context_new = '';
        sample_context_new = persistent_context + '\n\n'
        for line in sample_context.splitlines():
            count += 1
            if count > (linecount - 30):
                sample_context_new = sample_context_new + line + '\n'
        sample_context = sample_context_new

    return REPLYTALK
def interact_model(
    model_name='345M',
    seed=None,
    nsamples=1,
    batch_size=1,
    length=None,
    temperature=1,
    top_k=0,
):
    """
    Interactively run the model
    :model_name=117M : String, which model to use
    :seed=None : Integer seed for random number generators, fix seed to reproduce
     results
    :nsamples=1 : Number of samples to return total
    :batch_size=1 : Number of batches (only affects speed/memory).  Must divide nsamples.
    :length=None : Number of tokens in generated text, if None (default), is
     determined by model hyperparameters
    :temperature=1 : Float value controlling randomness in boltzmann
     distribution. Lower temperature results in less random completions. As the
     temperature approaches zero, the model will become deterministic and
     repetitive. Higher temperature results in more random completions.
    :top_k=0 : Integer value controlling diversity. 1 means only 1 word is
     considered for each step (token), resulting in deterministic completions,
     while 40 means 40 words are considered at each step. 0 (default) is a
     special setting meaning no restrictions. 40 generally is a good value.
    """
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        output = sample.sample_sequence(hparams=hparams,
                                        length=length,
                                        context=context,
                                        batch_size=batch_size,
                                        temperature=temperature,
                                        top_k=top_k)

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
        saver.restore(sess, ckpt)

        while True:
            raw_text = sys.stdin.read()
            while not raw_text:
                raw_text = sys.stdin.read()
            context_tokens = enc.encode(raw_text)
            generated = 0
            for _ in range(nsamples // batch_size):
                out = sess.run(output,
                               feed_dict={
                                   context:
                                   [context_tokens for _ in range(batch_size)]
                               })[:, len(context_tokens):]
                for i in range(batch_size):
                    generated += 1
                    text = enc.decode(out[i])
                    print("=" * 40 + " SAMPLE " + str(generated) + " " +
                          "=" * 40)
                    print(text)
            print("=" * 80)
Ejemplo n.º 11
0
def text_generator(state_dict):
    parser = argparse.ArgumentParser()
    parser.add_argument("--text", type=str, required=True)
    parser.add_argument("--quiet", type=bool, default=False)
    parser.add_argument("--nsamples", type=int, default=1)
    parser.add_argument('--unconditional',
                        action='store_true',
                        help='If true, unconditional generation.')
    parser.add_argument("--batch_size", type=int, default=-1)
    parser.add_argument("--length", type=int, default=-1)
    parser.add_argument("--temperature", type=float, default=0.7)
    parser.add_argument("--top_k", type=int, default=40)
    args = parser.parse_args()

    if args.quiet is False:
        print(args)

    if args.batch_size == -1:
        args.batch_size = 1
    assert args.nsamples % args.batch_size == 0

    seed = random.randint(0, 2147483647)
    np.random.seed(seed)
    torch.random.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load Model
    enc = get_encoder()
    config = GPT2Config()
    model = GPT2LMHeadModel(config)
    model = load_weight(model, state_dict)
    model.to(device)
    model.eval()

    if args.length == -1:
        args.length = config.n_ctx // 2
    elif args.length > config.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         config.n_ctx)

    print(args.text)
    context_tokens = enc.encode(args.text)

    generated = 0
    for _ in range(args.nsamples // args.batch_size):
        out = sample_sequence(
            model=model,
            length=args.length,
            context=context_tokens if not args.unconditional else None,
            start_token=enc.encoder['<|endoftext|>']
            if args.unconditional else None,
            batch_size=args.batch_size,
            temperature=args.temperature,
            top_k=args.top_k,
            device=device)
        out = out[:, len(context_tokens):].tolist()
        for i in range(args.batch_size):
            generated += 1
            text = enc.decode(out[i])
            if args.quiet is False:
                print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40)
            print(text)
Ejemplo n.º 12
0
def generate(sess,
             run_name='run1',
             checkpoint_dir='checkpoint',
             model_name=None,
             model_dir='models',
             sample_dir='samples',
             return_as_list=False,
             truncate=None,
             destination_path=None,
             sample_delim='=' * 20 + '\n',
             prefix=None,
             seed=None,
             nsamples=1,
             batch_size=1,
             length=1023,
             temperature=0.7,
             top_k=0,
             top_p=0.0,
             include_prefix=True):
    """Generates text from a model loaded into memory.

    Adapted from https://github.com/openai/gpt-2/blob/master/src/interactive_conditional_samples.py
    """

    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    if nsamples == 1:
        sample_delim = ''

    if prefix == '':
        prefix = None

    if model_name:
        checkpoint_path = os.path.join(model_dir, model_name)
    else:
        checkpoint_path = os.path.join(checkpoint_dir, run_name)

    enc = encoder.get_encoder(checkpoint_path)
    hparams = model.default_hparams()
    with open(os.path.join(checkpoint_path, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if prefix:
        context = tf.compat.v1.placeholder(tf.int32, [batch_size, None])
        context_tokens = enc.encode(prefix)

    np.random.seed(seed)
    tf.compat.v1.set_random_seed(seed)

    output = sample.sample_sequence(
        hparams=hparams,
        length=min(length, 1023 - (len(context_tokens) if prefix else 0)),
        start_token=enc.encoder['<|endoftext|>'] if not prefix else None,
        context=context if prefix else None,
        batch_size=batch_size,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p)[:, 1:]

    if destination_path:
        f = open(destination_path, 'w')
    generated = 0
    gen_texts = []
    while generated < nsamples:
        if not prefix:
            out = sess.run(output)
        else:
            out = sess.run(output,
                           feed_dict={context: batch_size * [context_tokens]})
        for i in range(batch_size):
            generated += 1
            gen_text = enc.decode(out[i])
            if prefix:
                gen_text = enc.decode(context_tokens[:1]) + gen_text
            if truncate:
                truncate_esc = re.escape(truncate)
                if prefix and not include_prefix:
                    prefix_esc = re.escape(prefix)
                    pattern = '(?:{})(.*?)(?:{})'.format(
                        prefix_esc, truncate_esc)
                else:
                    pattern = '(.*?)(?:{})'.format(truncate_esc)

                trunc_text = re.search(pattern, gen_text, re.S)
                if trunc_text:
                    gen_text = trunc_text.group(1)
            gen_text = gen_text.lstrip('\n')
            if destination_path:
                f.write("{}\n{}".format(gen_text, sample_delim))
            if not return_as_list and not destination_path:
                print("{}\n{}".format(gen_text, sample_delim), end='')
            gen_texts.append(gen_text)

    if destination_path:
        f.close()

    if return_as_list:
        return gen_texts
Ejemplo n.º 13
0
def finetune(sess,
             dataset,
             steps=-1,
             model_name='124M',
             model_dir='models',
             combine=50000,
             batch_size=1,
             learning_rate=0.0001,
             accumulate_gradients=5,
             restore_from='latest',
             run_name='run1',
             checkpoint_dir='checkpoint',
             sample_every=100,
             sample_length=1023,
             sample_num=1,
             multi_gpu=False,
             save_every=1000,
             print_every=1,
             max_checkpoints=1,
             use_memory_saving_gradients=False,
             only_train_transformer_layers=False,
             optimizer='adam',
             overwrite=False,
             val_dataset=None,
             val_batch_size=2,
             val_batch_count=40,
             val_every=0):
    """Finetunes the model on the given dataset.

    Adapted from https://github.com/nshepperd/gpt-2/blob/finetuning/train.py.
    See that file for parameter definitions.
    """

    # assert model_name not in ['774M', '1558M'] or multi_gpu, "Currently, a modern single GPU cannot finetune the 774M GPT-2 model or larger."

    SAMPLE_DIR = 'samples'

    checkpoint_path = os.path.join(checkpoint_dir, run_name)

    def maketree(path):
        try:
            os.makedirs(path)
        except:
            pass

    maketree(checkpoint_path)
    files = [f for f in os.listdir(checkpoint_path)]
    for file in ['hparams.json', 'encoder.json', 'vocab.bpe']:
        try:
            shutil.copyfile(os.path.join(model_dir, model_name, file),
                            os.path.join(checkpoint_path, file))
        except FileNotFoundError as fnf_error:
            print(
                "You need to download the GPT-2 model first via download_gpt2()"
            )
            raise (fnf_error)

    enc = encoder.get_encoder(checkpoint_path)
    hparams = model.default_hparams()
    with open(os.path.join(checkpoint_path, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if sample_length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    if model_name not in ['117M', '124M']:
        use_memory_saving_gradients = True
        only_train_transformer_layers = True
        accumulate_gradients = 1

    context = tf.compat.v1.placeholder(tf.int32, [batch_size, None])
    gpus = []

    if multi_gpu:
        gpus = get_available_gpus()

    output = model.model(hparams=hparams, X=context, gpus=gpus)
    loss = tf.reduce_mean(
        input_tensor=tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=context[:, 1:], logits=output['logits'][:, :-1]))

    # validation code
    if val_every > 0:
        val_context = tf.placeholder(tf.int32, [val_batch_size, None])
        val_output = model.model(hparams=hparams, X=val_context,
                                 reuse=True)  # added reuse=True
        val_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=val_context[:,
                                   1:], logits=val_output['logits'][:, :-1]))
        val_loss_summary = tf.summary.scalar('val_loss', val_loss)

    tf_sample = sample.sample_sequence(hparams=hparams,
                                       length=sample_length,
                                       context=context,
                                       batch_size=batch_size,
                                       temperature=1.0,
                                       top_k=40)

    all_vars = [
        v for v in tf.compat.v1.trainable_variables() if 'model' in v.name
    ]
    train_vars = [v for v in all_vars if '/h' in v.name
                  ] if only_train_transformer_layers else all_vars

    if optimizer == 'adam':
        opt = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate)
    elif optimizer == 'sgd':
        opt = tf.compat.v1.train.GradientDescentOptimizer(
            learning_rate=learning_rate)

    if accumulate_gradients > 1:
        if use_memory_saving_gradients:
            exit(
                "Memory saving gradients are not implemented for gradient accumulation yet."
            )
        opt = AccumulatingOptimizer(opt=opt, var_list=train_vars)
        opt_reset = opt.reset()
        opt_compute = opt.compute_gradients(loss)
        opt_apply = opt.apply_gradients()
        summary_loss = tf.compat.v1.summary.scalar('loss', opt_apply)
    else:
        if use_memory_saving_gradients:
            opt_grads = memory_saving_gradients.gradients(loss, train_vars)
        else:
            opt_grads = tf.gradients(ys=loss, xs=train_vars)
        opt_grads = list(zip(opt_grads, train_vars))
        opt_apply = opt.apply_gradients(opt_grads)
        summary_loss = tf.compat.v1.summary.scalar('loss', loss)

    summary_log = tf.compat.v1.summary.FileWriter(checkpoint_path)

    saver = tf.compat.v1.train.Saver(var_list=all_vars,
                                     max_to_keep=max_checkpoints)
    sess.run(tf.compat.v1.global_variables_initializer())

    if restore_from == 'latest':
        ckpt = tf.train.latest_checkpoint(checkpoint_path)
        if ckpt is None:
            # Get fresh GPT weights if new run.
            ckpt = tf.train.latest_checkpoint(
                os.path.join(model_dir, model_name))
    elif restore_from == 'fresh':
        ckpt = tf.train.latest_checkpoint(os.path.join(model_dir, model_name))
    else:
        ckpt = tf.train.latest_checkpoint(restore_from)
    print('Loading checkpoint', ckpt)
    saver.restore(sess, ckpt)

    print('Loading dataset...')
    chunks = load_dataset(enc, dataset, combine)
    data_sampler = Sampler(chunks)

    # validation code
    if val_every > 0:
        if val_dataset:
            val_chunks = load_dataset(enc, val_dataset, combine)
        else:
            val_chunks = chunks

    print('dataset has', data_sampler.total_size, 'tokens')
    print('Training...')

    # validation code
    if val_every > 0:
        # Sample from validation set once with fixed seed to make
        # it deterministic during training as well as across runs.
        val_data_sampler = Sampler(val_chunks, seed=1)
        val_batches = [[
            val_data_sampler.sample(1024) for _ in range(val_batch_size)
        ] for _ in range(val_batch_count)]

    counter = 1
    counter_path = os.path.join(checkpoint_path, 'counter')
    if os.path.exists(counter_path) and restore_from == 'latest':
        # Load the step number if we're resuming a run
        # Add 1 so we don't immediately try to save again
        with open(counter_path, 'r') as fp:
            counter = int(fp.read()) + 1
    counter_base = counter

    def save():
        maketree(checkpoint_path)
        print('Saving',
              os.path.join(checkpoint_path, 'model-{}').format(counter - 1))
        saver.save(sess,
                   os.path.join(checkpoint_path, 'model'),
                   global_step=counter - 1)
        with open(counter_path, 'w') as fp:
            fp.write(str(counter - 1) + '\n')

    def generate_samples():
        context_tokens = data_sampler.sample(1)
        all_text = []
        index = 0
        while index < sample_num:
            out = sess.run(tf_sample,
                           feed_dict={context: batch_size * [context_tokens]})
            for i in range(min(sample_num - index, batch_size)):
                text = enc.decode(out[i])
                text = '======== SAMPLE {} ========\n{}\n'.format(
                    index + 1, text)
                all_text.append(text)
                index += 1
        print(text)
        maketree(os.path.join(SAMPLE_DIR, run_name))
        with open(
                os.path.join(SAMPLE_DIR, run_name,
                             'samples-{}').format(counter), 'w') as fp:
            fp.write('\n'.join(all_text))

    # validation code
    def validation():
        print('Calculating validation loss...')
        losses = []
        for batch in tqdm(val_batches):
            losses.append(sess.run(val_loss, feed_dict={val_context: batch}))
        v_val_loss = np.mean(losses)
        v_summary = sess.run(val_loss_summary,
                             feed_dict={val_loss: v_val_loss})
        summary_log.add_summary(v_summary, counter)
        summary_log.flush()
        print('[{counter} | {time:2.2f}] validation loss = {loss:2.2f}'.format(
            counter=counter, time=time.time() - start_time, loss=v_val_loss))
        return v_val_loss

    def sample_batch():
        return [data_sampler.sample(1024) for _ in range(batch_size)]

    if overwrite and restore_from == 'latest':
        for file in files:
            if file.startswith('model') or file.startswith('events'):
                os.remove(os.path.join(checkpoint_path, file))
        save()

    avg_loss = (0.0, 0.0)
    start_time = time.time()

    #Trying out a change to finetune that saves only when validation loss decreases
    if steps:
        steps = int(steps)

    try:
        while True:
            if steps > 0 and counter == (counter_base + steps):
                #save()
                return
            # if (counter - 1) % save_every == 0 and counter > 1:
            #     save()
            if (counter - 1) % sample_every == 0 and counter > 1:
                generate_samples()

            # validation code
            if val_every > 0 and counter == 1:
                v_val_loss = validation()
                save()
            elif val_every > 0 and counter == counter_base:
                v_val_loss = validation()
            elif val_every > 0 and (counter % val_every == 0):
                new_v_val_loss = validation()
                if new_v_val_loss < v_val_loss:
                    v_val_loss = new_v_val_loss
                    save()

            if accumulate_gradients > 1:
                sess.run(opt_reset)
                for _ in range(accumulate_gradients):
                    sess.run(opt_compute, feed_dict={context: sample_batch()})
                (v_loss, v_summary) = sess.run((opt_apply, summary_loss))
            else:
                (_, v_loss, v_summary) = sess.run(
                    (opt_apply, loss, summary_loss),
                    feed_dict={context: sample_batch()})

            summary_log.add_summary(v_summary, counter)

            if (counter % print_every == 0) or counter == 1:
                avg_loss = (avg_loss[0] * 0.99 + v_loss,
                            avg_loss[1] * 0.99 + 1.0)

                print(
                    '[{counter} | {time:2.2f}] loss={loss:2.2f} avg={avg:2.2f}'
                    .format(counter=counter,
                            time=time.time() - start_time,
                            loss=v_loss,
                            avg=avg_loss[0] / avg_loss[1]))

            counter += 1
    except KeyboardInterrupt:
        print('interrupted')
        save()
Ejemplo n.º 14
0
def makeModel(text, leng, k):
    try:
        model_name = '774M'
        seed = None
        nsamples = 1
        batch_size = 1
        length = int(leng)
        temperature = 1
        top_k = int(k)
        top_p = 1
        models_dir = 'models'
        raw_text = text
        print('makeModel')

        models_dir = os.path.expanduser(os.path.expandvars(models_dir))
        if batch_size is None:
            batch_size = 1
        assert nsamples % batch_size == 0

        enc = encoder.get_encoder(model_name, models_dir)
        hparams = model.default_hparams()
        with open(os.path.join(models_dir, model_name, 'hparams.json')) as f:
            hparams.override_from_dict(json.load(f))

        if length is None:
            length = hparams.n_ctx // 2
        elif length > hparams.n_ctx:
            raise ValueError("Can't get samples longer than window size: %s" %
                             hparams.n_ctx)

        with tf.Session(graph=tf.Graph()) as sess:
            context = tf.placeholder(tf.int32, [batch_size, None])
            np.random.seed(seed)
            tf.set_random_seed(seed)
            output = sample.sample_sequence(hparams=hparams,
                                            length=length,
                                            context=context,
                                            batch_size=batch_size,
                                            temperature=temperature,
                                            top_k=top_k,
                                            top_p=top_p)

            saver = tf.train.Saver()
            ckpt = tf.train.latest_checkpoint(
                os.path.join(models_dir, model_name))
            saver.restore(sess, ckpt)

            print('raw_text in make model      :  ' + raw_text)
            context_tokens = enc.encode(raw_text)
            generated = 0
            for _ in range(nsamples // batch_size):
                out = sess.run(output,
                               feed_dict={
                                   context:
                                   [context_tokens for _ in range(batch_size)]
                               })[:, len(context_tokens):]
                for i in range(batch_size):
                    generated += 1
                    text = enc.decode(out[i])
                    print("=" * 40 + " SAMPLE " + str(generated) + " " +
                          "=" * 40)
                    print(text)
        return text.encode('ascii', 'ignore').decode('ascii')
    except Exception as e:
        print(e)
        return 500
Ejemplo n.º 15
0
def encode_main(in_text, out_npz, model_name='117M'):
    enc = encoder.get_encoder(model_name)
    print('Reading files')
    chunks = load_dataset(enc, in_text)
    print('Writing', out_npz)
    np.savez_compressed(out_npz, *chunks)
Ejemplo n.º 16
0
def _bytes_feature(value):
  """Returns a bytes_list from a string / byte."""
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

# Divides a list into chunks
def chunks(l, n):
    out = []
    for i in range(0, len(l), n):
        out.append(l[i:i + n])
    return out

if not os.path.exists(log_dir):
    os.mkdir(log_dir)

enc = encoder.get_encoder(encoder_path)

file_chunks = chunks(files, files_per)

print("Got {} files, divided into {} chunks.".format(str(len(files)), str(len(file_chunks))))

def create_file(args):
    i, chunk = args
    s = name + "_" + str(i) + ".tfrecords"
    if os.path.exists(os.path.join(log_dir, s)): # Hack-y, if file of same name is in log dir, sign that the file is complete, so skip
        return
    if os.path.exists(os.path.join(output_dir, s)): # Unfinished file, remove
        os.remove(os.path.join(output_dir, s))

    with tf.python_io.TFRecordWriter(os.path.join(output_dir, s)) as writer:
        good_files = 0
Ejemplo n.º 17
0
from encoder import get_encoder

encoder = get_encoder()


def pre_inference(payload, signature, metadata):
    context = encoder.encode(payload["text"])
    return {"context": [context]}


def post_inference(prediction, signature, metadata):
    response = prediction["sample"]
    return encoder.decode(response)
Ejemplo n.º 18
0
def interact_model(model_name='model',
                   seed=99,
                   nsamples=5,
                   batch_size=5,
                   length=8,
                   temperature=0,
                   top_k=10,
                   top_p=.85,
                   models_dir=''):

    models_dir = os.path.expanduser(os.path.expandvars(models_dir))

    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    enc = encoder.get_encoder(model_name, models_dir)
    hparams = model.default_hparams()
    with open(os.path.join(models_dir, model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
    config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=0,
                                      inter_op_parallelism_threads=0,
                                      allow_soft_placement=True,
                                      gpu_options=gpu_options)

    with tf.compat.v1.Session(graph=tf.Graph(), config=config) as sess:

        context = tf.compat.v1.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.compat.v1.set_random_seed(seed)

        # p = tf.random.uniform((1,), minval=.68, maxval=.98, dtype=tf.dtypes.float32, name='random_p_logits')
        output = sample.sample_sequence(hparams=hparams,
                                        length=length,
                                        context=context,
                                        batch_size=batch_size,
                                        temperature=temperature,
                                        top_k=top_k,
                                        top_p=top_p)

        saver = tf.compat.v1.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name))
        saver.restore(sess, ckpt)

        class Autocomplete(Resource):
            def get(self):
                return ''

            def post(self):
                body = request.get_json(force=True)
                if body['text'] == "": return

                context_tokens = enc.encode(body['text'])
                generated = 0
                predictions = []

                for _ in range(nsamples // batch_size):

                    feed_dict = {
                        context: [context_tokens for _ in range(batch_size)]
                    }
                    out = sess.run(output,
                                   feed_dict=feed_dict)[:,
                                                        len(context_tokens):]

                    for i in range(batch_size):
                        generated += 1
                        text = enc.decode(out[i])
                        predictions.append(str(text))

                return Response(json.dumps({'result': predictions}),
                                status=200,
                                mimetype='application/json')

        if __name__ == '__main__':
            app = Flask(__name__)
            api = Api(app)
            api.add_resource(Autocomplete, '/autocomplete')
            app.run('0.0.0.0', port=3030, debug=True, use_reloader=False)
Ejemplo n.º 19
0
     distribution. Lower temperature results in less random completions. As the
     temperature approaches zero, the model will become deterministic and
     repetitive. Higher temperature results in more random completions.
    :top_k=0 : Integer value controlling diversity. 1 means only 1 word is
     considered for each step (token), resulting in deterministic completions,
     while 40 means 40 words are considered at each step. 0 (default) is a
     special setting meaning no restrictions. 40 generally is a good value.
     :models_dir : path to parent folder containing model subfolders
     (i.e. contains the <model_name> folder)
    """
    models_dir = os.path.expanduser(os.path.expandvars(models_dir))
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    enc = encoder.get_encoder(model_name, models_dir)
    hparams = model.default_hparams()
    with open(os.path.join(models_dir, model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        output = sample.sample_sequence(
            hparams=hparams, length=length,
Ejemplo n.º 20
0
    'files', None,
    'Name of a file that specifies the images in the dataset, one per line. E.g. --files my_list_of_images.txt'
)
flags.DEFINE_integer('shards', 2048, 'Number of tfrecord files to generate')
flags.DEFINE_integer('nprocs', 8, 'Number of processes to work in parallel')
flags.DEFINE_boolean('directory_labels', False,
                     'Use the directory name of each file as a label')
flags.DEFINE_string('crop_method', 'none', '<random, distorted, middle, none>')
flags.DEFINE_integer('resize', -1, 'Resize to a specific resolution')
flags.DEFINE_string('doc2vec_embeddings', None,
                    'Use a doc2vec model for embeddings')
flags.DEFINE_string('tags_csv', None, 'Include tags from a csv')

FLAGS = flags.FLAGS

tokenizer = encoder.get_encoder()


def _check_or_create_dir(directory):
    """Check if directory exists otherwise create it."""
    if not tf.gfile.Exists(directory):
        tf.gfile.MakeDirs(directory)


def _int64_feature(value):
    """Wrapper for inserting int64 features into Example proto."""
    if not isinstance(value, list):
        value = [value]
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

Ejemplo n.º 21
0
def alite_graph(
    model_name='124M',
    seed=None,
    nsamples=1,
    batch_size=1,
    length=10,
    temperature=1,  # .5 usually has numbered steps, .7 usually does not
    beam_width=6,
    max_contexts=800,
    max_expansions=1000,
    top_k=None,
    top_p=1,
    models_dir='models',
    input_samples=[],
):
    """
    Interactively run the model
    :model_name=124M : String, which model to use
    :seed=None : Integer seed for random number generators, fix seed to reproduce
     results
    :nsamples=1 : Number of samples to return total
    :batch_size=1 : Number of batches (only affects speed/memory).  Must divide nsamples.
    :length=None : Number of tokens in generated text, if None (default), is
     determined by model hyperparameters
    :temperature=1 : Float value controlling randomness in boltzmann
     distribution. Lower temperature results in less random completions. As the
     temperature approaches zero, the model will become deterministic and
     repetitive. Higher temperature results in more random completions.
    :top_k=40 : Integer value controlling diversity. 1 means only 1 word is
     considered for each step (token), resulting in deterministic completions,
     while 40 means 40 words are considered at each step. 0 (default) is a
     special setting meaning no restrictions. 40 generally is a good value.
     :models_dir : path to parent folder containing model subfolders
     (i.e. contains the <model_name> folder)
    """

    models_dir = os.path.expanduser(os.path.expandvars(models_dir))
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    top_k = beam_width  #Set the top_k to the beam_width to only find the beam_width number of logits

    enc = encoder.get_encoder(model_name, models_dir)
    hparams = model.default_hparams()
    with open(os.path.join(models_dir, model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        logits = sample.get_logits(hparams=hparams,
                                   length=length,
                                   context=context,
                                   batch_size=batch_size,
                                   temperature=temperature,
                                   top_k=top_k,
                                   top_p=top_p)

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name))
        saver.restore(sess, ckpt)

        input_iter = 0
        start_time = time.perf_counter()

        while True:
            raw_text = ""
            if (input_iter < len(input_samples)):
                raw_text = input_samples[input_iter]
                input_iter += 1
                print(raw_text)
            elif (len(input_samples) == 0):
                raw_text = input("Model prompt >>> ")

            while not raw_text:
                print('Prompt should not be empty!')
                raw_text = input("Model prompt >>> ")
            context_tokens = enc.encode(raw_text)
            generated = 0

            #Create a graph to map the contexts
            g = Graph(directed=True)
            g.add_vertex(str(context_tokens))
            generated = 0
            times_run = 0

            for _ in range(nsamples // batch_size):

                for i in range(batch_size):
                    generated += 1
                    max_length = len(context_tokens) + length
                    contexts = [context_tokens]
                    # o_file.write(str(contexts) + '\n')
                    print(contexts)
                    probability_map = {}
                    full_probability_map = {}
                    all_contexts = []
                    all_contexts.append(context_tokens)

                    while True:  #This will probably check if the context lengths are less than max_length
                        new_contexts = []

                        #Get highest probability context
                        max_key = str(contexts[0])
                        if (bool(probability_map)):
                            max_key = max(probability_map.keys(),
                                          key=(lambda k: probability_map[k]))
                        else:
                            probability_map[str(context_tokens)] = 0
                            full_probability_map[str(context_tokens)] = 0

                        if (len(probability_map) >= max_contexts):
                            break
                        if (times_run >= max_expansions):
                            break

                        #Find the highest probability context
                        con = max_key
                        str_values = con.strip('][').split(', ')
                        new_values = []
                        for val in str_values:
                            new_values.append(int(val))
                        con = new_values

                        out_logits = sess.run(
                            logits,
                            feed_dict={
                                context: [con for _ in range(batch_size)]
                            })

                        times_run += 1

                        # Normalize the outputs
                        out_logits = out_logits - np.max(out_logits)
                        eo_logits = np.exp(out_logits) + 1e-20
                        out_logits = np.log(eo_logits / (np.sum(eo_logits)))

                        logit_indeces = []
                        logit_probs = []
                        for logit_index in range(len(out_logits[0])):
                            if (out_logits.item(logit_index) >
                                    np.min(out_logits)):
                                #We should get (beam width) # of logit indeces and probabilities
                                logit_indeces.append(logit_index)
                                logit_probs.append(
                                    out_logits[0].item(logit_index))

                        new_contexts = []
                        for i in range(len(logit_indeces)):
                            temp_context = con.copy()
                            temp_context.append(logit_indeces[i])
                            all_contexts.append(temp_context)
                            if str(con) in probability_map.keys():
                                g.add_vertex(str(temp_context))
                                parent_context = temp_context[
                                    0:len(temp_context) - 1]
                                g.add_edge(str(temp_context),
                                           str(parent_context))
                                probability_map[str(
                                    temp_context
                                )] = probability_map[str(con)] + logit_probs[i]
                                full_probability_map[str(
                                    temp_context
                                )] = probability_map[str(con)] + logit_probs[i]
                                # o_file.write("Probability for " + str(temp_context) + " is " + str(logit_probs[i]) + " + " + str(probability_map[str(con)]) + " = " + str(probability_map[str(temp_context)]) + "\n")
                                # print("Probability for " + str(temp_context) + " is " + str(logit_probs[i]) + " + " + str(probability_map[str(con)]) + " = " + str(probability_map[str(temp_context)]))

                            else:
                                g.add_vertex(str(temp_context))
                                parent_context = temp_context[
                                    0:len(temp_context) - 1]
                                g.add_edge(str(temp_context),
                                           str(parent_context))
                                probability_map[str(
                                    temp_context)] = logit_probs[i]
                                full_probability_map[str(
                                    temp_context)] = logit_probs[i]
                                # o_file.write("Probability for " + str(temp_context) + " is " + str(logit_probs[i]) + " = " + str(probability_map[str(temp_context)]) + "\n")
                                # print("Probability for " + str(temp_context) + " is " + str(logit_probs[i]) + " = " + str(probability_map[str(temp_context)]))

                            new_contexts.append(temp_context)

                        if str(con) in probability_map.keys():
                            del probability_map[str(con)]

                        contexts = new_contexts
                        new_probs = {}
                        for con in contexts:
                            if str(con) in probability_map:
                                new_probs[str(con)] = probability_map[str(con)]

                        # for con in contexts:
                        #     print(enc.decode(con) + " --- Probability: ---" + str(probability_map[str(con)]))

                        string_contexts = list(probability_map.keys())
                        #print(string_contexts)
                        new_contexts = []
                        for con in string_contexts:
                            str_values = con.strip('][').split(', ')
                            new_values = []
                            for val in str_values:
                                new_values.append(int(val))
                            new_contexts.append(new_values)

                        contexts = new_contexts

                    all_strings = []
                    for context in all_contexts:
                        con_string = enc.decode(context)
                        all_strings.append(con_string)

                    for context in contexts:
                        con_string = enc.decode(context)
                        print(con_string)

                    # print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40 + '\n')

                    # print(text)

                    nr_vertices = g.vcount()
                    print(nr_vertices)

                    es = EdgeSeq(g)
                    E = [e.tuple for e in es]
                    lay = g.layout_auto()

                    v_label = list(map(str, range(nr_vertices)))
                    position = {k: lay[k] for k in range(nr_vertices)}
                    Y = [lay[k][1] for k in range(nr_vertices)]
                    M = max(Y)
                    L = len(position)
                    Xn = [position[k][0] for k in range(L)]
                    Yn = [2 * M - position[k][1] for k in range(L)]
                    Xe = []
                    Ye = []
                    for edge in E:
                        Xe += [
                            position[edge[0]][0], position[edge[1]][0], None
                        ]
                        Ye += [
                            2 * M - position[edge[0]][1],
                            2 * M - position[edge[1]][1], None
                        ]
                    labels = v_label

                    hover_annotations = [
                        all_strings[x] + ' - Probability: ' +
                        str(full_probability_map[str(all_contexts[x])])
                        for x in range(len(all_strings))
                    ]

                    def make_annotations(pos,
                                         text,
                                         font_size=10,
                                         font_color='rgb(250,250,250)'):
                        L = len(pos)
                        if len(text) != L:
                            raise ValueError(
                                'The lists pos and text must have the same len'
                            )
                        annotations = []
                        for k in range(L):
                            annotations.append(
                                dict(
                                    text=str(
                                        text[k]
                                    ),  # or replace labels with a different list for the text within the circle
                                    x=pos[k][0],
                                    y=2 * M - position[k][1],
                                    xref='x1',
                                    yref='y1',
                                    font=dict(color=font_color,
                                              size=font_size),
                                    showarrow=False))
                        return annotations

                    fig = go.Figure()
                    fig.add_trace(
                        go.Scatter(x=Xe,
                                   y=Ye,
                                   mode='lines',
                                   line=dict(color='rgb(210,210,210)',
                                             width=1),
                                   hoverinfo='none'))
                    fig.add_trace(
                        go.Scatter(
                            x=Xn,
                            y=Yn,
                            mode='markers',
                            name='bla',
                            marker=dict(
                                symbol='circle-dot',
                                size=18,
                                color='#6175c1',  #'#DB4551',
                                line=dict(color='rgb(50,50,50)', width=1)),
                            text=hover_annotations,
                            hoverinfo='text',
                            opacity=0.8))
                    axis = dict(
                        showline=
                        False,  # hide axis line, grid, ticklabels and  title
                        zeroline=False,
                        showgrid=False,
                        showticklabels=False,
                    )

                    fig.update_layout(
                        title='Tree - Nodes: ' + str(len(all_contexts)) +
                        ' Beam width: ' + str(beam_width),
                        annotations=make_annotations(position, v_label),
                        font_size=12,
                        showlegend=False,
                        xaxis=axis,
                        yaxis=axis,
                        margin=dict(l=40, r=40, b=85, t=100),
                        hovermode='closest',
                        plot_bgcolor='rgb(248,248,248)')
                    fig.show()

                    time_elapsed = time.perf_counter() - start_time
                    print('\n' + str(time_elapsed) + " seconds elapsed" +
                          '\n' + '-' * 60 + '\n')
                    return

            print("=" * 80)
Ejemplo n.º 22
0
def interact_model(
    raw_text,
    model_name='345MChinese',
    seed=None,
    nsamples=1,
    batch_size=1,
    length=None,
    temperature=0.9,
    top_k=40,
    top_p=0.0,
    generated=0,
):
    """
    Interactively run the model
    :model_name=117M : String, which model to use
    :seed=None : Integer seed for random number generators, fix seed to reproduce
     results
    :nsamples=1 : Number of samples to return total
    :batch_size=1 : Number of batches (only affects speed/memory).  Must divide nsamples.
    :length=None : Number of tokens in generated text, if None (default), is
     determined by model hyperparameters
    :temperature=1 : Float value controlling randomness in boltzmann
     distribution. Lower temperature results in less random completions. As the
     temperature approaches zero, the model will become deterministic and
     repetitive. Higher temperature results in more random completions.
    :top_k=0 : Integer value controlling diversity. 1 means only 1 word is
     considered for each step (token), resulting in deterministic completions,
     while 40 means 40 words are considered at each step. 0 (default) is a
     special setting meaning no restrictions. 40 generally is a good value.
    :top_p=0.0 : Float value controlling diversity. Implements nucleus sampling,
     overriding top_k if set to a value > 0. A good setting is 0.9.
    """
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('../models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        output = sample.sample_sequence(hparams=hparams,
                                        length=length,
                                        context=context,
                                        batch_size=batch_size,
                                        temperature=temperature,
                                        top_k=top_k,
                                        top_p=top_p)

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('../models',
                                                       model_name))
        saver.restore(sess, ckpt)

        while True:
            # raw_text = input("Model prompt >>> ")
            while not raw_text:
                print('You should input text.')
                raw_text = input("Model prompt >>> ")
            print("raw_text:", raw_text)
            context_tokens = enc.encode(raw_text)
            result = []
            text_orgin = raw_text[13:]
            generate = 0

            for _ in range(nsamples // batch_size):
                generated += 1
                generate += 1
                out = sess.run(output,
                               feed_dict={
                                   context:
                                   [context_tokens for _ in range(batch_size)]
                               })[:, len(context_tokens):]
                for i in range(batch_size):
                    text = enc.decode(out[i])
                    if '<|endoftext|>' in text:
                        pos = text.index('<|endoftext|>')
                    else:
                        pos = text.index('.', 1000)
                        pos = pos + 1
                    text = text_orgin + text[:pos]
                    result.append(text)
                    # print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40)
                    path1 = '/home/stu/pkq/gpt-2/samples/story_final_return'
                    path2 = path1[:-7]

                    path3 = str(generate) + ".txt"
                    path4 = raw_text + " " + str(generated) + ".txt"

                    path_return = os.path.join(path1, path3)
                    path = os.path.join(path2, path4)
                    # saved all the generated texts in path
                    with open(path, "w") as f1:
                        f1.write(text)

                    # saved all the generated texts that start with the raw_text in path_return to return to the client
                    with open(path_return, "w") as f2:
                        f2.write(text)
                    #    print(text)
            # print("=" * 80)
            return result
def sample_model(
    model_name='117M',
    seed=None,
    nsamples=0,
    batch_size=1,
    length=None,
    temperature=1,
    top_k=0,
):
    """
    Run the sample_model
    :model_name=117M : String, which model to use
    :seed=None : Integer seed for random number generators, fix seed to
     reproduce results
    :nsamples=0 : Number of samples to return, if 0, continues to
     generate samples indefinately.
    :batch_size=1 : Number of batches (only affects speed/memory).
    :length=None : Number of tokens in generated text, if None (default), is
     determined by model hyperparameters
    :temperature=1 : Float value controlling randomness in boltzmann
     distribution. Lower temperature results in less random completions. As the
     temperature approaches zero, the model will become deterministic and
     repetitive. Higher temperature results in more random completions.
    :top_k=0 : Integer value controlling diversity. 1 means only 1 word is
     considered for each step (token), resulting in deterministic completions,
     while 40 means 40 words are considered at each step. 0 (default) is a
     special setting meaning no restrictions. 40 generally is a good value.
    """
    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        np.random.seed(seed)
        tf.set_random_seed(seed)

        output = sample.sample_sequence(
            hparams=hparams, length=length,
            start_token=enc.encoder['<|endoftext|>'],
            batch_size=batch_size,
            temperature=temperature, top_k=top_k
        )[:, 1:]

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
        saver.restore(sess, ckpt)

        generated = 0
        while nsamples == 0 or generated < nsamples:
            out = sess.run(output)
            for i in range(batch_size):
                generated += batch_size
                text = enc.decode(out[i])
                print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40)
                print(text)
Ejemplo n.º 24
0
def interact_model(
    model_name='117M',
    seed=None,
    nsamples=1,
    batch_size=None,
    length=None,
    temperature=1,
    top_k=0,
):
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0
    np.random.seed(seed)
    tf.set_random_seed(seed)

    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)

    print("GPT-2 Parameters")
    print("================")
    print("Model Name : "+ str(model_name))
    print("Seed = " + str(seed))
    print("N Samples = " + str(nsamples))
    print("Batch Size = " + str(batch_size))
    print("Length = " + str(length))
    print("Temperature = " + str(temperature))
    print("Top K = " + str(top_k))

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        output = sample.sample_sequence(
            hparams=hparams, length=length,
            context=context,
            batch_size=batch_size,
            temperature=temperature, top_k=top_k
        )
        
        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
        saver.restore(sess, ckpt)

        context_tokens = enc.encode(raw_text)
        print("Context Tokens = " + str(context_tokens))
        generated = 0
        for _ in range(nsamples // batch_size):
            out = sess.run(output, feed_dict={
                context: [context_tokens for _ in range(batch_size)]
            })[:, len(context_tokens):]
            for i in range(batch_size):
                generated += 1
                text = enc.decode(out[i])
                print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40)
                print(text)
        print("=" * 80)
Ejemplo n.º 25
0
def main():
    args = parser.parse_args()
    enc = encoder.get_encoder(args.model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', args.model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if args.sample_length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    if args.model_name == '345M':
        args.memory_saving_gradients = True
        if args.optimizer == 'adam':
            args.only_train_transformer_layers = True

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.graph_options.rewrite_options.layout_optimizer = rewriter_config_pb2.RewriterConfig.OFF
    with tf.Session(config=config) as sess:
        context = tf.placeholder(tf.int32, [args.batch_size, None])
        context_in = randomize(context, hparams, args.noise)
        output = model.model(hparams=hparams, X=context_in)
        loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=context[:, 1:], logits=output['logits'][:, :-1]))

        if args.val_every > 0:
            val_context = tf.placeholder(tf.int32, [args.val_batch_size, None])
            val_output = model.model(hparams=hparams, X=val_context)
            val_loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=val_context[:, 1:],
                    logits=val_output['logits'][:, :-1]))
            val_loss_summary = tf.summary.scalar('val_loss', val_loss)

        tf_sample = sample.sample_sequence(hparams=hparams,
                                           length=args.sample_length,
                                           context=context,
                                           batch_size=args.batch_size,
                                           temperature=1.0,
                                           top_k=args.top_k,
                                           top_p=args.top_p)

        all_vars = [v for v in tf.trainable_variables() if 'model' in v.name]
        train_vars = [v for v in all_vars if '/h' in v.name
                      ] if args.only_train_transformer_layers else all_vars

        if args.optimizer == 'adam':
            opt = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
        elif args.optimizer == 'sgd':
            opt = tf.train.GradientDescentOptimizer(
                learning_rate=args.learning_rate)
        else:
            exit('Bad optimizer:', args.optimizer)

        if args.accumulate_gradients > 1:
            if args.memory_saving_gradients:
                exit(
                    "Memory saving gradients are not implemented for gradient accumulation yet."
                )
            opt = AccumulatingOptimizer(opt=opt, var_list=train_vars)
            opt_reset = opt.reset()
            opt_compute = opt.compute_gradients(loss)
            opt_apply = opt.apply_gradients()
            summary_loss = tf.summary.scalar('loss', opt_apply)
        else:
            if args.memory_saving_gradients:
                opt_grads = memory_saving_gradients.gradients(loss, train_vars)
            else:
                opt_grads = tf.gradients(loss, train_vars)
            opt_grads = list(zip(opt_grads, train_vars))
            opt_apply = opt.apply_gradients(opt_grads)
            summary_loss = tf.summary.scalar('loss', loss)

        summary_lr = tf.summary.scalar('learning_rate', args.learning_rate)
        summaries = tf.summary.merge([summary_lr, summary_loss])

        summary_log = tf.summary.FileWriter(
            os.path.join(CHECKPOINT_DIR, args.run_name))

        saver = tf.train.Saver(var_list=all_vars,
                               max_to_keep=5,
                               keep_checkpoint_every_n_hours=2)
        sess.run(tf.global_variables_initializer())

        if args.restore_from == 'latest':
            ckpt = tf.train.latest_checkpoint(
                os.path.join(CHECKPOINT_DIR, args.run_name))
            if ckpt is None:
                # Get fresh GPT weights if new run.
                ckpt = tf.train.latest_checkpoint(
                    os.path.join('models', args.model_name))
        elif args.restore_from == 'fresh':
            ckpt = tf.train.latest_checkpoint(
                os.path.join('models', args.model_name))
        else:
            ckpt = tf.train.latest_checkpoint(args.restore_from)
        print('Loading checkpoint', ckpt)
        saver.restore(sess, ckpt)

        print('Loading dataset...')
        chunks = load_dataset(enc,
                              args.dataset,
                              args.combine,
                              encoding=args.encoding)
        data_sampler = Sampler(chunks)
        if args.val_every > 0:
            if args.val_dataset:
                val_chunks = load_dataset(enc,
                                          args.val_dataset,
                                          args.combine,
                                          encoding=args.encoding)
            else:
                val_chunks = chunks
        print('dataset has', data_sampler.total_size, 'tokens')
        print('Training...')

        if args.val_every > 0:
            # Sample from validation set once with fixed seed to make
            # it deterministic during training as well as across runs.
            val_data_sampler = Sampler(val_chunks, seed=1)
            val_batches = [[
                val_data_sampler.sample(1024)
                for _ in range(args.val_batch_size)
            ] for _ in range(args.val_batch_count)]

        counter = 1
        counter_path = os.path.join(CHECKPOINT_DIR, args.run_name, 'counter')
        if os.path.exists(counter_path):
            # Load the step number if we're resuming a run
            # Add 1 so we don't immediately try to save again
            with open(counter_path, 'r') as fp:
                counter = int(fp.read()) + 1

        def save():
            maketree(os.path.join(CHECKPOINT_DIR, args.run_name))
            print(
                'Saving',
                os.path.join(CHECKPOINT_DIR, args.run_name,
                             'model-{}').format(counter))
            saver.save(sess,
                       os.path.join(CHECKPOINT_DIR, args.run_name, 'model'),
                       global_step=counter)
            with open(counter_path, 'w') as fp:
                fp.write(str(counter) + '\n')

        def generate_samples():
            print('Generating samples...')
            context_tokens = data_sampler.sample(1)
            all_text = []
            index = 0
            while index < args.sample_num:
                out = sess.run(
                    tf_sample,
                    feed_dict={context: args.batch_size * [context_tokens]})
                for i in range(min(args.sample_num - index, args.batch_size)):
                    text = enc.decode(out[i])
                    text = '======== SAMPLE {} ========\n{}\n'.format(
                        index + 1, text)
                    all_text.append(text)
                    index += 1
            print(text)
            maketree(os.path.join(SAMPLE_DIR, args.run_name))
            with open(os.path.join(SAMPLE_DIR, args.run_name,
                                   'samples-{}').format(counter),
                      'w',
                      encoding=args.encoding) as fp:
                fp.write('\n'.join(all_text))

        def validation():
            print('Calculating validation loss...')
            losses = []
            for batch in tqdm.tqdm(val_batches):
                losses.append(
                    sess.run(val_loss, feed_dict={val_context: batch}))
            v_val_loss = np.mean(losses)
            v_summary = sess.run(val_loss_summary,
                                 feed_dict={val_loss: v_val_loss})
            summary_log.add_summary(v_summary, counter)
            summary_log.flush()
            print('[{counter} | {time:2.2f}] validation loss = {loss:2.2f}'.
                  format(counter=counter,
                         time=time.time() - start_time,
                         loss=v_val_loss))

        def sample_batch():
            return [data_sampler.sample(1024) for _ in range(args.batch_size)]

        avg_loss = (0.0, 0.0)
        start_time = time.time()

        try:
            while True:
                if counter % args.save_every == 0:
                    save()
                if counter % args.sample_every == 0:
                    generate_samples()
                if args.val_every > 0 and (counter % args.val_every == 0
                                           or counter == 1):
                    validation()

                if args.accumulate_gradients > 1:
                    sess.run(opt_reset)
                    for _ in range(args.accumulate_gradients):
                        sess.run(opt_compute,
                                 feed_dict={context: sample_batch()})
                    (v_loss, v_summary) = sess.run((opt_apply, summaries))
                else:
                    (_, v_loss, v_summary) = sess.run(
                        (opt_apply, loss, summaries),
                        feed_dict={context: sample_batch()})

                summary_log.add_summary(v_summary, counter)

                avg_loss = (avg_loss[0] * 0.99 + v_loss,
                            avg_loss[1] * 0.99 + 1.0)

                print(
                    '[{counter} | {time:2.2f}] loss={loss:2.2f} avg={avg:2.2f}'
                    .format(counter=counter,
                            time=time.time() - start_time,
                            loss=v_loss,
                            avg=avg_loss[0] / avg_loss[1]))

                counter += 1
        except KeyboardInterrupt:
            print('interrupted')
            save()
Ejemplo n.º 26
0
tf.app.flags.DEFINE_string("gpu", "0", "Specify which gpu to use.")
tf.app.flags.DEFINE_integer("batch_size", 10, "Number of batches (only affects speed/memory).")
tf.app.flags.DEFINE_string("data_name", "roc", "Set `roc` to train the model on ROCStories corpus or \
                                                `kg` to train the model on the knowledge bases or\
                                                `multi_roc` to train the model on ROCStories with multi-task learning.")
tf.app.flags.DEFINE_integer("n_class", 4, "Number of classes for the auxiliary classification task.")
tf.app.flags.DEFINE_float("learning_rate", 1e-4, "Learning rate.")
tf.app.flags.DEFINE_string("data_dir", "./data", "Data directory.")
tf.app.flags.DEFINE_integer("length", 200, "Number of tokens in generated text.")
tf.app.flags.DEFINE_float("temperature", 0.7, "Float value controlling randomness in boltzmann distribution. Lower temperature results in less random completions. As the temperature approaches zero, the model will become deterministic and repetitive. Higher temperature results in more random completions.")
tf.app.flags.DEFINE_integer("top_k", 40, "Integer value controlling diversity.")
FLAGS = tf.app.flags.FLAGS
FLAGS.is_train = bool(FLAGS.is_train)
FLAGS.cond = bool(FLAGS.cond)
model_dir = os.path.expanduser(os.path.expandvars(FLAGS.model_dir))
enc = encoder.get_encoder(model_dir)
PAD_ID = enc.encoder['<|endoftext|>']
hparams = model.default_hparams()
with open(os.path.join(model_dir, 'hparams.json')) as f:
    hparams.override_from_dict(json.load(f))

def load_data(path, fname, enc, label):
    data = []
    print('loading %s/%s ......' % (path, fname))
    with open('%s/%s.txt' % (path, fname)) as f:
        tmp = []
        for k, line in enumerate(f):
            i = k + 1
            if i % 6 == 0:
                data.append({"st": tmp, "label": label})
                tmp = []
Ejemplo n.º 27
0
import numpy as np
import tensorflow as tf

# import gpt.model, gpt.sample, gpt.encoder

model_name='1558M'
batch_size = 1
seed = None
nsamples=1
length=10
temperature=1
top_k=0
np.random.seed(seed)
tf.set_random_seed(seed)

enc = encoder.get_encoder(model_name)
hparams = model.default_hparams()
with open(os.path.join('models', model_name, 'hparams.json')) as f:
    hparams.override_from_dict(json.load(f))

if length is None:
    length = hparams.n_ctx // 2
elif length > hparams.n_ctx:
    raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)

with tf.Session(graph=tf.Graph()) as sess:
    context = tf.placeholder(tf.int32, [1, None])
    output = sample.sample_sequence(
        hparams=hparams, length=length,
        context=context,
        batch_size=1,
Ejemplo n.º 28
0
os.makedirs(results_path, exist_ok=False)
os.makedirs(saved_model_path, exist_ok=False)
log_file = os.path.join(results_path, 'log.txt')


# create data paths
root_path = FLAGS.root_path
gold_path_valid = os.path.join(root_path, FLAGS.domain, 'original_data', 'valid.summary')
gold_path_test = os.path.join(root_path, FLAGS.domain, 'original_data', 'test.summary')
field_vocab_file = os.path.join(root_path, "human_books_songs_films_field_vocab.txt")
processed_data_dir = os.path.join(root_path, FLAGS.domain, "processed_data")

# bpe vocab
last_best = 0.0
enc = encoder.get_encoder("117M")
eos = 50256 #TODO move to settings
empty = 28920 #TODO move to settings


def train(sess, preprocessed_data, model):
    # keep track of all input parameters
    write_log(log_file, "####################INPUT PARAMETERS###################")
    for attr in FLAGS.flag_values_dict():
        value = FLAGS.flag_values_dict()[attr]
        write_log(log_file, attr + " = " + str(value))
    write_log(log_file, "#######################################################")

    train_iterator = DataLoader(preprocessed_data.train_set, FLAGS.domain,
                                batch_size=FLAGS.batch_size, shuffle=True, eos=eos, empty=empty)
Ejemplo n.º 29
0
def main():
    args = parser.parse_args()
    enc = encoder.get_encoder(args.model_name)
    hparams = model.default_hparams()
    hparams.res_dropout = args.dropout
    hparams.attn_dropout = args.dropout
    epsilon = -1e10
    if args.dtype == 'float32':
        hparams.dtype = tf.float32
    elif args.dtype == 'float16':
        hparams.dtype = tf.float16
        epsilon = -65500
    elif args.dtype == 'bfloat16':
        hparams.dtype = tf.bfloat16
        epsilon = -65500
    else:
        print('Unknown dtype', args.dtype)
    if args.float16:
        hparams.dtype = tf.bfloat16
        epsilon = -65500

    with open(os.path.join('models', args.model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))
    if args.n_ctx >= 0:
        hparams.n_ctx = args.n_ctx
    if args.n_embd >= 0:
        hparams.n_embd = args.n_embd
    if args.n_head >= 0:
        hparams.n_head = args.n_head
    if args.n_layer >= 0:
        hparams.n_layer = args.n_layer

    if args.sample_length < 0:
        args.sample_length = hparams.n_ctx - 1
    if args.sample_length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)
    if args.sample_ctx < 0:
        args.sample_ctx = hparams.n_ctx

    if args.model_name == '345M':
        args.memory_saving_gradients = True
        if args.optimizer == 'adam':
            args.only_train_transformer_layers = True

    config = tf.ConfigProto()
    if args.allow_growth:
        config.gpu_options.allow_growth = True
    if args.disable_layout_optimizer:
        config.graph_options.rewrite_options.layout_optimizer = rewriter_config_pb2.RewriterConfig.OFF
    with tflex.Session(config=config, init_tpu=args.init_tpu) as sess:
        context = tf.placeholder(tf.int32, [args.batch_size, None])
        context_in = randomize(context, hparams, args.noise)
        output = model.model(hparams=hparams, X=context_in)
        loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=context[:, 1:], logits=output['logits'][:, :-1]))

        if args.val_every > 0:
            val_context = tf.placeholder(tf.int32, [args.val_batch_size, None])
            val_output = model.model(hparams=hparams, X=val_context)
            val_loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=val_context[:, 1:],
                    logits=val_output['logits'][:, :-1]))
            val_loss_summary = tf.summary.scalar('val_loss', val_loss)

        tf_sample = sample.sample_sequence(hparams=hparams,
                                           length=args.sample_length,
                                           context=context,
                                           batch_size=args.batch_size,
                                           temperature=1.0,
                                           top_k=args.top_k,
                                           top_p=args.top_p,
                                           epsilon=epsilon)

        all_vars = [v for v in tf.trainable_variables() if 'model' in v.name]
        train_vars = [v for v in all_vars if '/h' in v.name
                      ] if args.only_train_transformer_layers else all_vars

        parameter_count = sum([np.prod(v.shape.as_list()) for v in train_vars])
        print("This model is using %d parameters (%.2fM)" %
              (parameter_count, parameter_count / (1024.0 * 1024.0)))

        with tf.variable_scope(tf.get_variable_scope().name,
                               reuse=tf.AUTO_REUSE):
            global_step = tflex.get_variable('global_step') or tf.get_variable(
                'global_step', shape=(), dtype=tf.int32, trainable=False)
            current_step = args.learning_rate_initial_step
            global_step.load(current_step, session=sess)
            if args.learning_rate_cos:
                lr = tflex_sgdr.sgdr_decay_with_warmup(
                    args.learning_rate,
                    global_step,
                    warmup_steps=args.learning_rate_warmup,
                    initial_period_steps=args.learning_rate_period,
                    learning_rate_min=args.learning_rate_min)
            else:
                lr = tflex.get_variable('learn_rate') or tf.get_variable(
                    'learn_rate', shape=(), dtype=tf.float32, trainable=False)
                lr.load(args.learning_rate, session=sess)

        def update_lr(rate=None, step=None):
            if not args.learning_rate_cos:
                if step is None:
                    step = global_step.eval(session=sess)
                if rate is None:
                    rate = args.learning_rate
                if callable(rate):
                    rate = rate(step)
                lr.load(rate, session=sess)
            return lr.eval(session=sess)

        @tflex.register_command
        def set_learning_rate():
            print("Current learn rate: %0.8f" % update_lr())
            print("New learn rate?")
            rate = input('')
            if not rate:
                print("Empty input; not changing anything.")
            else:
                try:
                    rate = float(rate)
                except:
                    print("Invalid input; must be a float")
            print("Setting learn rate to %0.8f" % rate)
            args.learning_rate = rate

        if args.optimizer == 'adam':
            opt = tf.train.AdamOptimizer(learning_rate=lr)
        elif args.optimizer == 'sgd':
            opt = tf.train.GradientDescentOptimizer(learning_rate=lr)
        elif args.optimizer == 'ada':
            import tensor2tensor.utils.optimize
            from tensor2tensor.utils import hparam
            import tensor2tensor.models.research
            from tensor2tensor.utils import registry
            ada_hparams = registry.hparams('afx_mimic_adam')
            ada_hparams.optimizer_adafactor_beta1 = 0.0
            ada_hparams.optimizer_adafactor_factored = True
            opt = tensor2tensor.utils.optimize.adafactor(learning_rate=lr,
                                                         hparams=ada_hparams)
        else:
            exit('Bad optimizer:', args.optimizer)

        #if tpu_addr:
        #    # https://pulsejet.github.io/blog/posts/tpu-without-estimator/
        #    from tensorflow.contrib.tpu.python.tpu import tpu_function
        #    tpu_function.get_tpu_context().set_number_of_shards(8)
        #    opt = tf.contrib.tpu.CrossShardOptimizer(opt)

        if args.accumulate_gradients > 1:
            if args.memory_saving_gradients:
                exit(
                    "Memory saving gradients are not implemented for gradient accumulation yet."
                )
            opt = AccumulatingOptimizer(opt=opt, var_list=train_vars)
            opt_reset = opt.reset()
            opt_compute = opt.compute_gradients(loss)
            opt_apply = opt.apply_gradients()
            summary_loss = tf.summary.scalar('loss', opt_apply)
        else:
            if args.memory_saving_gradients:
                opt_grads = memory_saving_gradients.gradients(loss, train_vars)
            else:
                opt_grads = tf.gradients(loss, train_vars)
            opt_grads = list(zip(opt_grads, train_vars))
            opt_apply = opt.apply_gradients(opt_grads)
            summary_loss = tf.summary.scalar('loss', loss)

        summary_lr = tf.summary.scalar('learning_rate', lr)
        summaries = tf.summary.merge([summary_lr, summary_loss])

        summary_log = tf.summary.FileWriter(
            os.path.join(CHECKPOINT_DIR, args.run_name))

        if args.save_graph:
            summary_log.add_graph(tf.get_default_graph())

        saver = tflex.Saver(var_list=all_vars,
                            max_to_keep=args.max_to_keep,
                            keep_checkpoint_every_n_hours=2,
                            reshape=args.truncate_weights)
        sess.run(tf.global_variables_initializer())

        if args.restore_from == 'latest':
            ckpt = tflex.latest_checkpoint(
                os.path.join(CHECKPOINT_DIR, args.run_name))
            if ckpt is None:
                # Get fresh GPT weights if new run.
                ckpt = tflex.latest_checkpoint(
                    os.path.join('models', args.model_name))
        elif args.restore_from == 'fresh':
            ckpt = tflex.latest_checkpoint(
                os.path.join('models', args.model_name))
        else:
            ckpt = tflex.latest_checkpoint(args.restore_from)
        print('Loading snapshot %s...' % ckpt)
        t0 = time.time()
        if not args.fresh_model:
            saver.restore(sess, ckpt)
        t1 = time.time()
        print('Loaded in %f seconds' % (t1 - t0))

        def make_sampler(dataset, enc, seed, combine):
            if os.path.isdir(dataset) or dataset.endswith('.npz'):
                chunks = load_dataset(enc, dataset, combine)
                data_sampler = Sampler(chunks, seed=seed)
                print('dataset has', data_sampler.total_size, 'tokens',
                      len(chunks), 'chunks')
            else:
                data_sampler = TextSampler(dataset, enc, seed=seed)
            return data_sampler

        print('Loading dataset...')
        seed = None if args.seed < 0 else args.seed
        data_sampler = make_sampler(dataset=args.dataset,
                                    enc=enc,
                                    seed=seed,
                                    combine=args.combine)
        if args.val_every > 0:
            # Sample from validation set once with fixed seed to make
            # it deterministic during training as well as across runs.
            val_dataset = args.val_dataset if args.val_dataset else args.dataset
            val_data_sampler = make_sampler(dataset=val_dataset,
                                            enc=enc,
                                            seed=1,
                                            combine=args.combine)
            val_batches = [[
                val_data_sampler.sample(hparams.n_ctx)
                for _ in range(args.val_batch_size)
            ] for _ in range(args.val_batch_count)]

        print('Training...')
        counter = 1
        counter_path = os.path.join(CHECKPOINT_DIR, args.run_name, 'counter')
        if os.path.exists(counter_path):
            # Load the step number if we're resuming a run
            # Add 1 so we don't immediately try to save again
            with open(counter_path, 'r') as fp:
                counter = int(fp.read()) + 1

        @tflex.register_command
        def save():
            maketree(os.path.join(CHECKPOINT_DIR, args.run_name))
            print(
                'Saving',
                os.path.join(CHECKPOINT_DIR, args.run_name,
                             'model-{}').format(counter))
            t0 = time.time()
            saver.save(sess,
                       os.path.join(CHECKPOINT_DIR, args.run_name, 'model'),
                       global_step=counter)
            t1 = time.time()
            print('Saved in %f seconds' % (t1 - t0))
            with open(counter_path, 'w') as fp:
                fp.write(str(counter) + '\n')

        @tflex.register_command
        def generate_samples():
            print('Generating samples...')
            context_tokens = data_sampler.sample(1)
            all_text = []
            index = 0
            while index < args.sample_num:
                out = sess.run(
                    tf_sample,
                    feed_dict={context: args.batch_size * [context_tokens]})
                for i in range(min(args.sample_num - index, args.batch_size)):
                    text = enc.decode(out[i])
                    text = '======== SAMPLE {} ========\n{}\n'.format(
                        index + 1, text)
                    print(text)
                    all_text.append(text)
                    index += 1
            maketree(os.path.join(SAMPLE_DIR, args.run_name))
            with open(
                    os.path.join(SAMPLE_DIR, args.run_name,
                                 'samples-{}').format(counter), 'w') as fp:
                fp.write('\n'.join(all_text))

        @tflex.register_command
        def validation():
            if args.val_every <= 0:
                return
            print('Calculating validation loss...')
            losses = []
            for batch in tqdm.tqdm(val_batches):
                loss = sess.run(val_loss, feed_dict={val_context: batch})
                losses.append(loss)
                v_val_loss = np.mean(losses)
                print('{n} loss={loss:2.4f} avg={avg:2.4f}'.format(
                    n=len(losses), loss=loss, avg=v_val_loss))
            print('losses', losses)
            v_summary = sess.run(val_loss_summary,
                                 feed_dict={val_loss: v_val_loss})
            summary_log.add_summary(v_summary, counter)
            summary_log.flush()
            print(
                '{stamp} [{counter} | {time:2.4f}] validation loss = {loss:2.4f}'
                .format(stamp=timestamp(),
                        counter=counter,
                        time=time.time() - start_time,
                        loss=v_val_loss))

        start_time = time.time()

        def elapsed():
            return time.time() - start_time

        def say(msg):
            print('{stamp} [{counter} | {time:2.4f}] {msg}'.format(
                counter=counter, time=elapsed(), msg=msg, stamp=timestamp()))

        def sample_batch():
            #return [data_sampler.sample(args.sample_ctx) for _ in range(args.batch_size)]
            #say('Sampling batch...')
            r = []
            times = []
            for _ in range(args.batch_size):
                start = time.time()
                sample = data_sampler.sample(args.sample_ctx)
                end = time.time()
                elapsed = (end - start)
                r += [sample]
                times += [elapsed]
            total = sum(times)
            avg = total / len(times)
            #say('Sampled %d batches in %.4f seconds (avg per batch: %.4f)' % (args.batch_size, total, avg))
            return r

        prev_time = time.time()
        avg_loss = (0.0, 0.0)

        if args.debug_before_training:
            import pdb
            pdb.set_trace()

        last_saved_time = elapsed()
        while True:
            try:
                now = elapsed()
                if args.save_time > 0 and ((
                    (now - last_saved_time) / 60.0) >= args.save_time):
                    save()
                    last_saved_time = now
                elif args.save_every > 0 and (counter % args.save_every == 0):
                    save()
                if args.sample_every > 0 and counter % args.sample_every == 0:
                    generate_samples()
                if args.val_every > 0 and (counter % args.val_every == 0
                                           or counter == 1):
                    validation()

                v_rate = update_lr()

                if args.accumulate_gradients > 1:
                    #say('Running opt_reset...')
                    sess.run(opt_reset)
                    for _ in range(args.accumulate_gradients):
                        batch = sample_batch()
                        say('Running opt_compute...')
                        sess.run(opt_compute, feed_dict={context: batch})
                    say('Running opt_apply...')
                    (v_loss, v_summary) = sess.run((opt_apply, summaries))
                else:
                    batch = sample_batch()
                    say('Running opt_apply...')
                    (_, v_loss, v_summary) = sess.run(
                        (opt_apply, loss, summaries),
                        feed_dict={context: batch})

                if args.float16:
                    v_loss = tf.to_float(v_loss).eval()

                summary_log.add_summary(v_summary, counter)
                summary_log.flush()

                avg_loss = (avg_loss[0] * 0.99 + v_loss,
                            avg_loss[1] * 0.99 + 1.0)

                now = time.time()
                print(
                    '{stamp} [{counter} | {time:2.4f} | {delta:2.2f}s | {ops:2.6f}tokens/s] loss={loss:2.4f} avg={avg:2.4f} rate={rate:0.7f} step={step}'
                    .format(
                        stamp=timestamp(),
                        counter=counter,
                        time=now - start_time,
                        delta=now - prev_time,
                        ops=args.sample_ctx * args.batch_size /
                        (now - prev_time),
                        rate=v_rate,
                        loss=v_loss,
                        avg=avg_loss[0] / avg_loss[1],
                        step=current_step,
                    ))

                counter += 1
                current_step += 1
                global_step.load(current_step, session=sess)

                tflex.check_commands_with_args(
                    session=sess,
                    stamp=timestamp(),
                    counter=counter,
                    time=now - start_time,
                    delta=now - prev_time,
                    ops=args.batch_size / (now - prev_time),
                    rate=v_rate,
                    loss=v_loss,
                    avg=avg_loss[0] / avg_loss[1],
                    avg_loss=avg_loss,
                    step=current_step,
                    train_vars=train_vars,
                    all_vars=all_vars,
                    args=args,
                    data_sampler=data_sampler,
                    ckpt=ckpt,
                    saver=saver,
                )
                if tflex.should_quit():
                    break

                prev_time = now
                if args.debug_print_all_vars:
                    print('all variables:')
                    print('name/shape/parameter_count')
                    param_count = 0
                    for x in tf.all_variables():
                        shape = x.shape.as_list()
                        count = np.prod(shape)
                        print(x.name, shape, count)
                        param_count += count
                    print('Total parameters:', param_count)
                    args.debug_print_all_vars = False

                if args.debug_print_trainable_vars:
                    print('trainable variables:')
                    print('name/shape/parameter_count')
                    param_count = 0
                    for x in tf.trainable_variables():
                        shape = x.shape.as_list()
                        count = np.prod(shape)
                        print(x.name, shape, count)
                        param_count += count
                    print('Total parameters:', param_count)
                    args.debug_print_trainable_vars = False
            except KeyboardInterrupt:
                print('interrupted')
                if args.save_on_ctrlc:
                    save()
                if args.debug_on_ctrlc:
                    import pdb
                    pdb.set_trace()
                else:
                    break
def interact_model(model_name='117M',
                   seed=None,
                   nsamples=1,
                   batch_size=1,
                   length=None,
                   temperature=1,
                   seed_word="I am",
                   top_k=0,
                   top_p=0.0):
    """
    Interactively run the model
    :model_name=117M : String, which model to use
    :seed=None : Integer seed for random number generators, fix seed to reproduce
     results
    :nsamples=1 : Number of samples to return total
    :batch_size=1 : Number of batches (only affects speed/memory).  Must divide nsamples.
    :length=None : Number of tokens in generated text, if None (default), is
     determined by model hyperparameters
    :temperature=1 : Float value controlling randomness in boltzmann
     distribution. Lower temperature results in less random completions. As the
     temperature approaches zero, the model will become deterministic and
     repetitive. Higher temperature results in more random completions.
    :top_k=0 : Integer value controlling diversity. 1 means only 1 word is
     considered for each step (token), resulting in deterministic completions,
     while 40 means 40 words are considered at each step. 0 (default) is a
     special setting meaning no restrictions. 40 generally is a good value.
    :top_p=0.0 : Float value controlling diversity. Implements nucleus sampling,
     overriding top_k if set to a value > 0. A good setting is 0.9.
    """
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)

        output = sample.sample_sequence(hparams=hparams,
                                        length=length,
                                        context=context,
                                        batch_size=batch_size,
                                        temperature=temperature,
                                        top_k=top_k,
                                        top_p=top_p)

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
        saver.restore(sess, ckpt)

        raw_text = seed_word
        if not raw_text:
            print("You should provide a seed sentence or word.")
            quit()

        context_tokens = enc.encode(raw_text)

        out = sess.run(output,
                       feed_dict={
                           context:
                           [context_tokens for _ in range(batch_size)]
                       })[:, len(context_tokens):]

        text = enc.decode(out[0])

        # treat output text
        output_text = text.split('.')[0] + (".")
        #output_text = text

        print(output_text)
def interact_model(
    model_name='117M',
    seed=None,
    nsamples=1,
    batch_size=1,
    length=None,
    temperature=1,
    top_k=0,
):
    """
    Interactively run the model
    :model_name=117M : String, which model to use
    :seed=None : Integer seed for random number generators, fix seed to reproduce
     results
    :nsamples=1 : Number of samples to return total
    :batch_size=1 : Number of batches (only affects speed/memory).  Must divide nsamples.
    :length=None : Number of tokens in generated text, if None (default), is
     determined by model hyperparameters
    :temperature=1 : Float value controlling randomness in boltzmann
     distribution. Lower temperature results in less random completions. As the
     temperature approaches zero, the model will become deterministic and
     repetitive. Higher temperature results in more random completions.
    :top_k=0 : Integer value controlling diversity. 1 means only 1 word is
     considered for each step (token), resulting in deterministic completions,
     while 40 means 40 words are considered at each step. 0 (default) is a
     special setting meaning no restrictions. 40 generally is a good value.
    """
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        output = sample.sample_sequence(
            hparams=hparams, length=length,
            context=context,
            batch_size=batch_size,
            temperature=temperature, top_k=top_k
        )

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
        saver.restore(sess, ckpt)

        while True:
            raw_text = input("Model prompt >>> ")
            while not raw_text:
                print('Prompt should not be empty!')
                raw_text = input("Model prompt >>> ")
            context_tokens = enc.encode(raw_text)
            generated = 0
            for _ in range(nsamples // batch_size):
                out = sess.run(output, feed_dict={
                    context: [context_tokens for _ in range(batch_size)]
                })[:, len(context_tokens):]
                for i in range(batch_size):
                    generated += 1
                    text = enc.decode(out[i])
                    print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40)
                    print(text)
            print("=" * 80)
def interact_model(
    model_name='117M',
    seed=None,
    nsamples=1,
    batch_size=None,
    length=1000,
    temperature=0.8,
    top_k=40,
):
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        output = sample.sample_sequence(hparams=hparams,
                                        length=length,
                                        context=context,
                                        batch_size=batch_size,
                                        temperature=temperature,
                                        top_k=top_k)

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
        saver.restore(sess, ckpt)

        while True:
            food = random_food()
            raw_text = food + " Recipe:"
            try:
                wiki = wikipedia.page(food)
                for image in wiki.images:
                    if "svg" not in image:
                        image = "<img style='max-height:300px;' src='" + wiki.images[
                            0] + "'>"
                        break
            except:
                image = ""
            context_tokens = enc.encode(raw_text)
            generated = 0
            for _ in range(nsamples // batch_size):
                out = sess.run(output,
                               feed_dict={
                                   context:
                                   [context_tokens for _ in range(batch_size)]
                               })[:, len(context_tokens):]
                for i in range(batch_size):
                    generated += 1
                    text = enc.decode(out[i])
                    text = text.split("<|endoftext|>")[0]
                    text = text.replace("\n", "<br>")
                    text_file = open("/var/www/recipe/index.html", "w")
                    text = "<div style='width:66%;position:absolute;left:16%'><h1>" + raw_text + "</h1>" + image + "<br>" + str(
                        text) + "</div>"
                    text_file.write(text)
                    text_file.close()