예제 #1
0
파일: server.py 프로젝트: yining1023/gpt-2
def setup(opts):
    global sess
    global output
    global enc
    global g
    length = None
    temperature = 1
    top_k = 0

    enc = encoder.get_encoder(opts['checkpoint_dir'])
    hparams = model.default_hparams()
    with open(os.path.join(opts['checkpoint_dir'], 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    sess = tf.Session()
    context = tf.placeholder(tf.int32, [1, None])
    output, length_ph = sample.sample_sequence(hparams=hparams,
                                               context=context,
                                               batch_size=1,
                                               temperature=temperature,
                                               top_k=top_k)
    saver = tf.train.Saver()
    ckpt = tf.train.latest_checkpoint(opts['checkpoint_dir'])
    saver.restore(sess, ckpt)

    g = tf.get_default_graph()
    g.finalize()
    return sess, enc, context, length_ph
def sample_model(
    model_name='345-recipes',
    seed=None,
    nsamples=1,
    batch_size=8,
    length=None,
    temperature=temp,
    top_k=0,
):
    """
    Run the sample_model
    :model_name=117M : String, which model to use
    :seed=None : Integer seed for random number generators, fix seed to
     reproduce results
    :nsamples=0 : Number of samples to return, if 0, continues to
     generate samples indefinately.
    :batch_size=1 : Number of batches (only affects speed/memory).
    :length=None : Number of tokens in generated text, if None (default), is
     determined by model hyperparameters
    :temperature=1 : Float value controlling randomness in boltzmann
     distribution. Lower temperature results in less random completions. As the
     temperature approaches zero, the model will become deterministic and
     repetitive. Higher temperature results in more random completions.
    :top_k=0 : Integer value controlling diversity. 1 means only 1 word is
     considered for each step (token), resulting in deterministic completions,
     while 40 means 40 words are considered at each step. 0 (default) is a
     special setting meaning no restrictions. 40 generally is a good value.
    """
    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        np.random.seed(seed)
        tf.set_random_seed(seed)

        output = sample.sample_sequence(
            hparams=hparams, length=length,
            start_token=enc.encoder['<|endoftext|>'],
            batch_size=batch_size,
            temperature=temperature, top_k=top_k
        )[:, 1:]

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
        saver.restore(sess, ckpt)

        generated = 0
        while nsamples == 0 or generated < nsamples:
            out = sess.run(output)
            for i in range(batch_size):
                generated += batch_size
                text = enc.decode(out[i])
                return(text)
예제 #3
0
    def __init__(self, encoder, model_name="117M", batch_size=1):

        self.encoder = encoder
        self.model_name = model_name
        self.hparams = model.default_hparams()
        with open(os.path.join('models', model_name, 'hparams.json')) as f:
            self.hparams.override_from_dict(json.load(f))
예제 #4
0
def gpt2_session_and_out_op(seed=None,
                            model_name='345M',
                            length=75,
                            temperature=1,
                            top_k=40):
    batch_size = 1
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    with tf.Graph().as_default() as graph:
        sess = tf.Session(graph=graph)
        context = tf.placeholder(tf.int32, [batch_size, None], name='context')
        np.random.seed(seed)
        tf.set_random_seed(seed)
        out_op = sample.sample_sequence(hparams=hparams,
                                        length=length,
                                        context=context,
                                        batch_size=batch_size,
                                        temperature=temperature,
                                        top_k=top_k)

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
        saver.restore(sess, ckpt)

    return sess, out_op
예제 #5
0
    def __init__(self):
        self.config = tf.compat.v1.ConfigProto()
        self.config.gpu_options.allow_growth = True
        self.config.graph_options.rewrite_options.layout_optimizer = (
            rewriter_config_pb2.RewriterConfig.OFF)
        self.sess = tf.compat.v1.Session(config=self.config)

        self.hparams = model.default_hparams()
        with open("checkpoint/run1/hparams.json") as f:
            self.hparams.override_from_dict(json.load(f))

        self.context = tf.compat.v1.placeholder(tf.int32, [1, None])
        self.length = tf.compat.v1.placeholder(tf.int32, ())
        self.temperature = tf.compat.v1.placeholder(tf.int32, ())

        self.model = model.model(hparams=self.hparams, X=self.context)

        self.load_checkpoint("checkpoint/run1")
        self.enc = encoder.get_encoder("run1")

        self.output = sample.sample_sequence(
            hparams=self.hparams,
            length=self.length,
            start_token=None,
            context=self.context,
            batch_size=1,
            temperature=self.temperature,
            top_k=0,
            top_p=0,
        )

        # spit out all these warnrings
        self.dummy_run()
예제 #6
0
def get_model(model_name='124M', seed=None, nsamples=1, batch_size=1, length=None, temperature=1, top_k=0, top_p=1, models_dir='models'):
    models_dir = os.path.expanduser(os.path.expandvars(models_dir))
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    enc = encoder.get_encoder(model_name, models_dir)
    hparams = model.default_hparams()
    with open(os.path.join(models_dir, model_name, 'hparams.json')) as filehandler:
        hparams.override_from_dict(json.load(filehandler))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)

    sess = tf.Session(graph=tf.Graph())
    context = tf.placeholder(tf.int32, [batch_size, None])
    np.random.seed(seed)
    tf.set_random_seed(seed)
    output = sample.sample_sequence(
        hparams=hparams, length=length,
        context=context,
        batch_size=batch_size,
        temperature=temperature, top_k=top_k, top_p=top_p
    )

    saver = tf.train.Saver()
    ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name))
    saver.restore(sess, ckpt)

    return sess, output
예제 #7
0
def get_hparams():
    import json
    hparams = model.default_hparams()
    with open(os.path.join('models', MODEL_NAME, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    return hparams
예제 #8
0
def restore_model(
        model_name='345M',
        seed=None,
        models_dir='models'):

    models_dir = os.path.expanduser(os.path.expandvars(models_dir))
    enc: Encoder = encoder.get_encoder(model_name, models_dir)

    hparams: HParams = model.default_hparams()
    with open(os.path.join(models_dir, model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    np.random.seed(seed)
    tf.set_random_seed(seed)

    ph = Placeholders()
    sequence_output = sample.sample_sequence(
        hparams=hparams,
        length=ph.length,
        temperature=ph.temperature,
        top_k=ph.top_k,
        context=ph.context,
        batch_size=1
    )

    sess = tf.Session()
    saver = tf.train.Saver()
    ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name))
    saver.restore(sess, ckpt)
    return sess, hparams, sequence_output, enc, ph
예제 #9
0
    def __init__(self, model_name: str):
        """
        Initializes encoder from pre-trained model's vocab & bpe merges,
        its hyper-parameters (i.e., Transformer decoder params) &
        loads up the Toxic comments training & test csvs into dataframes
        :param model_name: '117M' or '345M'
        """
        self.enc = encoder.get_encoder(model_name)
        self.hparams = model.default_hparams()
        self.hparams.add_hparam('dropout_rate', 0.)  # add dropout for training
        with open(os.path.join('models', model_name, 'hparams.json')) as f:
            self.hparams.override_from_dict(json.load(f))

        self.train_df = pd.read_csv(TOXIC_TRAIN_PATH)
        self.test_df = pd.read_csv(TOXIC_TEST_PATH)
        self.train_sequences = None
        self.train_labels = None
        self.test_sequences = None

        seeded_kf = KFold(n_splits=FLAGS.num_folds,
                          random_state=FLAGS.seed,
                          shuffle=True)
        self.train_folds = [(train_index, val_index)
                            for train_index, val_index in seeded_kf.split(
                                range(len(self.train_df)))]
예제 #10
0
    def __init__(self,
                 model_name='jokes',
                 seed=None,
                 nsamples=4,
                 batch_size=1,
                 length=100,
                 temperature=0.85,
                 top_k=40,
                 top_p=0.0):
        self.batch_size = batch_size
        self.enc = encoder.get_encoder(model_name)
        hparams = model.default_hparams()
        with open(os.path.join('models', model_name, 'hparams.json')) as fp:
            hparams.override_from_dict(json.load(fp))

        self.sess = tf.Session()
        self.context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        self.output = sample.sample_sequence(hparams=hparams,
                                             length=length,
                                             context=self.context,
                                             batch_size=batch_size,
                                             temperature=temperature,
                                             top_k=top_k,
                                             top_p=top_p)

        self.sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
        saver.restore(self.sess, ckpt)
예제 #11
0
def interact_model(
    message="A quick look at the",
    model_name='345M',
    seed=5,
    nsamples=1,
    batch_size=1,
    length=50,
    temperature=0.9,
    top_k=20,
    top_p=0.9,
):
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    path = os.path.dirname(__file__)
    with open(os.path.join(path, 'models',
                           model_name,
                           'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer"
                         " than window size: %s" % hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        output = sample.sample_sequence(
            hparams=hparams, length=length,
            context=context,
            batch_size=batch_size,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p
        )

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join(path, 'models',
                                                       model_name))
        saver.restore(sess, ckpt)

        if message == "":
            return -1
        raw_text = message

        context_tokens = enc.encode(raw_text)
        out = sess.run(output, feed_dict={
            context: [context_tokens for _ in range(batch_size)]
        })[:, len(context_tokens):]
        text = []
        for i in range(batch_size):
            text.append(enc.decode(out[i]))

        return text
    def __init__(self, sess, length=10, temperature=0.7, top_k=1):

        seed = None
        batch_size = 1
        model_path = '1558M'
        self.sess = sess

        self.enc = encoder.get_encoder(model_path)
        hparams = model.default_hparams()
        with open(os.path.join('models/1558M', 'hparams.json')) as f:
            hparams.override_from_dict(json.load(f))

        self.context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        self.output = sample.sample_sequence(
            hparams=hparams,
            length=length,
            context=self.context,
            batch_size=batch_size,
        )

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint('models/1558M')
        saver.restore(self.sess, ckpt)
예제 #13
0
    def __init__(self, **kwargs):
        model_name = kwargs['model']
        seed = kwargs['seed']
        self.length = kwargs['len']
        top_k = kwargs['top_k']
        self.lang_target = kwargs['language']

        self.enc = encoder.get_encoder(model_name)
        self.translator = googletrans.Translator()
        hparams = model.default_hparams()
        with open(os.path.join('./data/models', model_name, 'hparams.json')) as f:
            hparams.override_from_dict(json.load(f))
        self.lang_model = hparams.n_lang    #get the language of the model from Hyperparameter

        # start session
        self.sess = tf.Session()
        self.context = tf.placeholder(tf.int32, [1, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        self.output = sample.sample_sequence(
            hparams=hparams, length=self.length,
            context=self.context,
            batch_size=1,
            temperature=1,
            top_k=top_k, top_p=0
        )

        # restore transformer model from last checkpoint
        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('./data/models', model_name))
        saver.restore(self.sess, ckpt)
예제 #14
0
def load_model():
    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    sess = tf.Session(graph=tf.Graph()).__enter__()
    # with  as sess:
    np.random.seed(seed)
    tf.set_random_seed(seed)
    context = tf.placeholder(tf.int32, [1, None])
    output = sample.sample_sequence(hparams=hparams,
                                    length=length,
                                    context=context,
                                    batch_size=1,
                                    temperature=temperature,
                                    top_k=top_k)

    saver = tf.train.Saver()
    ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
    saver.restore(sess, ckpt)

    return sess, enc, context, output
예제 #15
0
def main(input_file, output_file, seed=42, model_name='117M', batch_size=1):
    np.random.seed(seed)
    tf.set_random_seed(seed)

    data = pd.read_csv(input_file, sep='\t')

    output_file = open(output_file, 'wb')
    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('../pretrained', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        output = model.model(hparams=hparams, X=context, reuse=tf.AUTO_REUSE)
        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('../pretrained', model_name))
        saver.restore(sess, ckpt)

        for _id in range(0, len(data), batch_size):
            list_token_ids, list_tokens = encode(enc, data[_id:_id+batch_size]['Text'])
            out = sess.run(output, feed_dict={
                context: list_token_ids
            })
            # out['att_probs'].shape (1, 12, 12, 29, 29)
            # 'AB', 'AP', 'BA', 'BP', 'PA', 'PB', 'token', 'label', 'ID'
            # (layer, head)

            for res in get_features(data[_id:_id+batch_size], list_tokens, out['att_probs'].transpose((0, 3, 4, 1, 2))):
                pickle.dump(res, output_file)

    output_file.close()
예제 #16
0
def interact_model(
    model_name='117M',
    seed=None,
    nsamples=1,
    batch_size=None,
    length=None,
    temperature=1,
    top_k=0,
):
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0
    np.random.seed(seed)
    tf.set_random_seed(seed)

    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        output = sample.sample_sequence(hparams=hparams,
                                        length=length,
                                        context=context,
                                        batch_size=batch_size,
                                        temperature=temperature,
                                        top_k=top_k)

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
        saver.restore(sess, ckpt)

        while True:
            raw_text = input("Model prompt >>> ")
            while not raw_text:
                print('Prompt should not be empty!')
                raw_text = input("Model prompt >>> ")
            clean_input = ''.join(x for x in raw_text if ord(x) <= 128)
            context_tokens = enc.encode(clean_input)
            generated = 0
            for _ in range(nsamples // batch_size):
                out = sess.run(output,
                               feed_dict={
                                   context:
                                   [context_tokens for _ in range(batch_size)]
                               })[:, len(context_tokens):]
                for i in range(batch_size):
                    generated += 1
                    text = enc.decode(out[i])
                    print("=" * 40 + " SAMPLE " + str(generated) + " " +
                          "=" * 40)
                    print(text)
            print("=" * 80)
예제 #17
0
 def setup(self,
           model_name='117M',
           seed=None,
           temperature=1,
           top_k=40,
           length=None):
     if length not in self.cache:
         enc = encoder.get_encoder(model_name)
         hparams = model.default_hparams()
         with open(os.path.join('models', model_name, 'hparams.json')) as f:
             hparams.override_from_dict(json.load(f))
         if length is None:
             length = hparams.n_ctx // 2
         sess = tf.Session(graph=tf.Graph()).__enter__()
         context = tf.placeholder(tf.int32, [1, None])
         np.random.seed(seed)
         tf.set_random_seed(seed)
         output = sample.sample_sequence(hparams=hparams,
                                         length=length,
                                         context=context,
                                         batch_size=1,
                                         temperature=temperature,
                                         top_k=top_k)
         saver = tf.train.Saver()
         ckpt = tf.train.latest_checkpoint(
             os.path.join('models', model_name))
         saver.restore(sess, ckpt)
         self.cache[length] = context, enc, output, sess
     return self.cache[length]
예제 #18
0
def interact_model(
    model_name='117M',
    prompt="",
    seed=None,
    nsamples=1,
    batch_size=None,
    length=270,
    temperature=1,
    top_k=40,
):
    print(prompt)
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0
    np.random.seed(seed)
    tf.set_random_seed(seed)

    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError(
            f"can't get samples longer than window size: {hparams.n_ctx}")

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        output = sample.sample_sequence(hparams=hparams,
                                        length=length,
                                        context=context,
                                        batch_size=batch_size,
                                        temperature=temperature,
                                        top_k=top_k)[:, 1:]

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
        saver.restore(sess, ckpt)

        raw_text = prompt
        context_tokens = enc.encode(raw_text)
        generated = 0
        for _ in range(nsamples // batch_size):
            out = sess.run(output,
                           feed_dict={
                               context:
                               [context_tokens for _ in range(batch_size)]
                           })
            for i in range(batch_size):
                generated += 1
                text = enc.decode(out[i])
                print("=" * 40 + " MESSAGE " + str(generated) + " " + "=" * 40)
                print(f"{text}")
                gpt_text = text
        print("=" * 80)
        return gpt_text
예제 #19
0
파일: gpt2_test.py 프로젝트: pengge/SWDT
def simple_gpt2_example():
    models_dir = lib_dir_path + '/models'
    model_name = '345M'
    seed = None
    nsamples = 1
    batch_size = 1
    length = 300
    temperature = 1
    top_k = 0

    raw_text = 'I went to a lounge to celebrate my birthday and'

    models_dir = os.path.expanduser(os.path.expandvars(models_dir))
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    enc = encoder.get_encoder(model_name, models_dir)
    hparams = model.default_hparams()
    with open(os.path.join(models_dir, model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        output = sample.sample_sequence(hparams=hparams,
                                        length=length,
                                        context=context,
                                        batch_size=batch_size,
                                        temperature=temperature,
                                        top_k=top_k)

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name))
        saver.restore(sess, ckpt)

        #--------------------
        context_tokens = enc.encode(raw_text)
        generated = 0
        for _ in range(nsamples // batch_size):
            out = sess.run(output,
                           feed_dict={
                               context:
                               [context_tokens for _ in range(batch_size)]
                           })[:, len(context_tokens):]
            for i in range(batch_size):
                generated += 1
                text = enc.decode(out[i])
                print('=' * 40 + ' SAMPLE ' + str(generated) + ' ' + '=' * 40)
                print(text)
        print('=' * 80)
    def __init__(self, config_path, save_path):
        with open(config_path, 'r') as f:
            config = json.load(f)
        self.model_name = config['model_name']
        self.seed = config['seed']
        self.nsamples = config['nsamples']
        self.batch_size = config['batch_size']
        self.length = config['length']
        self.temperature = config['temperature']
        self.top_k = config['top_k']
        self.top_p = config['top_p']
        self.models_dir = config['models_dir']
        self.save_path = save_path

        self.models_dir = os.path.expanduser(
            os.path.expandvars(self.models_dir, ))
        if self.batch_size is None:
            self.batch_size = 1
        assert self.nsamples % self.batch_size == 0

        self.enc = encoder.get_encoder(self.model_name, self.models_dir)
        self.hparams = model.default_hparams()
        with open(
                os.path.join(
                    self.models_dir,
                    self.model_name,
                    'hparams.json',
                )) as f:
            self.hparams.override_from_dict(json.load(f))

        if self.length is None:
            self.length = self.hparams.n_ctx // 2
        elif self.length > self.hparams.n_ctx:
            raise ValueError(
                ('Can\'t get samples longer '
                 'than window size: {}').format(self.hparams.n_ctx, ))

        self.sess = tf.Session()
        self.context = tf.placeholder(tf.int32, [self.batch_size, None])
        np.random.seed(self.seed)
        tf.set_random_seed(self.seed)
        self.output = sample.sample_sequence(
            hparams=self.hparams,
            length=self.length,
            context=self.context,
            batch_size=self.batch_size,
            temperature=self.temperature,
            top_k=self.top_k,
            top_p=self.top_p,
        )

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(
            os.path.join(
                self.models_dir,
                self.model_name,
            ))
        saver.restore(self.sess, ckpt)
예제 #21
0
def interact_model(message="",
                   model_name='1558M',
                   models_dir='models',
                   seed=None,
                   length=20,
                   temperature=1,
                   top_k=0):
    #models_dir = os.path.expanduser(os.path.expandvars(models_dir))
    global conversation
    enc = encoder.get_encoder(model_name, models_dir)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)
    with tf.Session(graph=tf.Graph()) as sess:
        np.random.seed(seed)
        tf.set_random_seed(seed)
        context = tf.placeholder(tf.int32,
                                 [1, None])  #isn't exists in other model
        output = sample.sample_sequence(hparams=hparams,
                                        length=length,
                                        context=context,
                                        batch_size=1,
                                        temperature=temperature,
                                        top_k=top_k)

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name))
        saver.restore(sess, ckpt)

        conversation = conversation + "\nyou: " + message
        conversation = conversation + "\nher: "
        sys.stdout.write("her: ")
        sys.stdout.flush()

        encoded_conversation = enc.encode(conversation)
        result = sess.run(output, feed_dict={context: [encoded_conversation]
                                             })[:,
                                                len(encoded_conversation):]
        text = enc.decode(result[0])

        #sys.stderr.write("=============="+text+"=================")
        #sys.stderr.flush()

        splits = text.split('\n')
        #line = splits[1] if len(splits)>1 else splits[0]
        #parts = line.split(': ')
        #reply = parts[1] if len(parts)>1 else parts[0]
        reply = splits[0]
        sys.stdout.write(reply + '\n')
        sys.stdout.flush()
        conversation = conversation + reply
        print(conversation)
        return reply
예제 #22
0
def chat(model_name='117M', seed=None, length=50):

    # Prepare conversation and context
    conversation = Conversation("Hudson", "Jayme")
    conversation.add_human("Hi!")
    conversation.add_computer("Hey!")
    conversation.add_human("I'm a human named {}, who are you?".format(conversation.name_human))
    conversation.add_computer("I'm a computer program but refer to me as {} please".format(conversation.name_computer))
    conversation.add_human("How exciting! Are you ready to chat?")
    conversation.add_computer("Sure thing! You go first.")

    # Prepare the model
    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        np.random.seed(seed)
        tf.set_random_seed(seed)
        context = tf.placeholder(tf.int32, [1, None])
        output = sample.sample_sequence(
                hparams=hparams, length=length, context=context, batch_size=1
                # temperature=temperature, top_k=top_k
                )

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
        saver.restore(sess, ckpt)

        # Print the initial context/prompt
        print(conversation.text_generic(1))

        while True:

            # Let the human speak
            message = None
            while not message:
                message = input("{}: ".format(conversation.name_human))
            conversation.add_human(message)

            # Let the computer speak
            prompt = conversation.text_gpt() + "\n\n{}: ".format(conversation.name_computer)

            encoded_prompt = enc.encode(prompt)

            result= sess.run(output, feed_dict={
                context: [encoded_prompt]
                })[:, len(encoded_prompt):]
            text = enc.decode(result[0])

            reply = (text.split('\n'))[0]
            conversation.add_computer(reply)
            print("{}: {}".format(conversation.name_computer, reply))
예제 #23
0
 def __init__(self, lr, max_seq_len,
              batch_size, num_train_epochs, num_warmup_steps, model_dir):
     self.lr = lr
     self.max_seq_len = max_seq_len
     self.batch_size = batch_size
     self.num_train_epochs = num_train_epochs
     self.num_warmup_steps = num_warmup_steps
     self.model_dir = model_dir
     self.hyperparams = model.default_hparams()
예제 #24
0
def interact_model(model_name,seed,nsamples,batch_size,length,temperature,top_k,models_dir,article):
    result_list = []
    models_dir = os.path.expanduser(os.path.expandvars(models_dir))
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    enc = encoder.get_encoder(model_name, models_dir)
    hparams = model.default_hparams()
    with open(os.path.join(models_dir, model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        output = sample.sample_sequence(
            hparams=hparams, length=length,
            context=context,
            batch_size=batch_size,
            temperature=temperature, top_k=top_k
        )

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name))
        saver.restore(sess, ckpt)

        # while True:
        raw_text = article+"\n TF;DR:"
        while not raw_text:
            return 'Text should not be empty!'
            # raw_text = input("Model prompt >>> ")
        context_tokens = enc.encode(raw_text)
        generated = 0
        for i in range(3):
            for _ in range(nsamples // batch_size):
                out = sess.run(output, feed_dict={
                    context: [context_tokens for _ in range(batch_size)]
                })[:, len(context_tokens):]
                for i in range(batch_size):
                    generated += 1
                    text = enc.decode(out[i])
                    result_list.append(str(text))
        result_list.append(str(raw_text))
        result_list = pd.Series(result_list)
        tfidf = TfidfVectorizer(stop_words='english')
        tfidf_matrix = tfidf.fit_transform(result_list)
        cosine_sim = linear_kernel(tfidf_matrix)
        sim_scores = list(enumerate(cosine_sim[3]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1]
        return result_list[sim_scores[0]].split("<|endoftext|>")[0]
예제 #25
0
def interact_model(model_name, seed, nsamples, batch_size, length, temperature,
                   top_k, models_dir):

    models_dir = os.path.expanduser(os.path.expandvars(models_dir))
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    enc = encoder.get_encoder(model_name, models_dir)
    hparams = model.default_hparams()
    with open(os.path.join(models_dir, model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        output = sample.sample_sequence(hparams=hparams,
                                        length=length,
                                        context=context,
                                        batch_size=batch_size,
                                        temperature=temperature,
                                        top_k=top_k)

        saver = tf.train.Saver(save_relative_paths=True)
        ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name))
        saver.restore(sess, ckpt)

        while True:
            raw_text = input("\nModel prompt >>> ")
            if raw_text == 'ADMIN_NIXTRATOR':
                raw_text = False
                break
            while not raw_text:
                print('\nPrompt should not be empty!')
                raw_text = input("\nModel prompt >>> ")
            context_tokens = enc.encode(raw_text)
            generated = 0
            for _ in range(nsamples // batch_size):
                out = sess.run(output,
                               feed_dict={
                                   context:
                                   [context_tokens for _ in range(batch_size)]
                               })[:, len(context_tokens):]
                for i in range(batch_size):
                    generated += 1
                    text = enc.decode(out[i])
                    print("=" * 40 + " SAMPLE " + str(generated) + " " +
                          "=" * 40)
                    print(text)
            print("=" * 80)
예제 #26
0
 def interact_model(self,
     model_name, # model_name: This indicates which model we are using. In our case, we are using the GPT-2 model with 345 million parameters or weights
     seed, # Integer seed for random number generators, fix seed to reproduce results
     nsamples, # This represents the number of sample texts generated in our output
     batch_size, #This only affects speed/memory. This must also divide nsamples
     length, # It represents the number of tokens in the generated text. If the length is None, then the number of tokens is decided by model hyperparameters
     temperature, # is controls randomness in Boltzmann distribution. Lower temperature results in less random completions. As the temperature approaches zero, the model will become deterministic and repetitive. Higher temperature results in more random completions
     top_k, #This parameter controls diversity. If the value of top_k is set to 1, this means that only 1 word is considered for each step (token). If top_k is set to 40, that means 40 words are considered at each step. 0 (default) is a special setting meaning no restrictions. top_k = 40 generally is a good value
     models_dir # It represents the path to parent folder containing model subfolders (contains the <model_name> folder)
 ):
     models_dir = "E:\\workdirectory\\Code Name Val Halen\\DS Sup\\DL\\Chapter A9\\gpt\\gpt-2\\models\\" #os.path.expanduser(os.path.expandvars(models_dir))
     if batch_size is None:
         batch_size = 1
     assert nsamples % batch_size == 0
 
     enc = encoder.get_encoder(model_name, models_dir)
     hparams = model.default_hparams()
     with open(os.path.join(models_dir, model_name, 'hparams.json')) as f:
         hparams.override_from_dict(json.load(f))
 
     if length is None:
         length = hparams.n_ctx // 2
     elif length > hparams.n_ctx:
         raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)
 
     with tf.Session(graph=tf.Graph()) as sess:
         context = tf.placeholder(tf.int32, [batch_size, None])
         np.random.seed(seed)
         tf.set_random_seed(seed)
         output = sample.sample_sequence(
             hparams=hparams, length=length,
             context=context,
             batch_size=batch_size,
             temperature=temperature, top_k=top_k
         )
 
         saver = tf.train.Saver()
         ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name))
         saver.restore(sess, ckpt)
 
         while True:
             raw_text = input("Model prompt >>> ")
             while not raw_text:
                 print('Prompt should not be empty!')
                 raw_text = input("Model prompt >>> ")
             context_tokens = enc.encode(raw_text)
             generated = 0
             for _ in range(nsamples // batch_size):
                 out = sess.run(output, feed_dict={
                     context: [context_tokens for _ in range(batch_size)]
                 })[:, len(context_tokens):]
                 for i in range(batch_size):
                     generated += 1
                     text = enc.decode(out[i])
                     print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40)
                     print(text)
             print("=" * 80)
def noninteract_model(
    model_name,
    seed,
    nsamples,
    batch_size,
    length,
    temperature,
    top_k,
    models_dir,
    prompt_list
):
    models_dir = os.path.expanduser(os.path.expandvars(models_dir))
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    enc = encoder.get_encoder(model_name, models_dir)
    hparams = model.default_hparams()
    with open(os.path.join(models_dir, model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        output = sample.sample_sequence(
            hparams=hparams, length=length,
            context=context,
            batch_size=batch_size,
            temperature=temperature, top_k=top_k
        )

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name))
        saver.restore(sess, ckpt)
        all_text={}
        for prpt in prompt_list:
            print(prpt)
            text_list=[]
            raw_text = prpt
            context_tokens = enc.encode(raw_text)
            generated = 0
            for _ in range(nsamples // batch_size):
                out = sess.run(output, feed_dict={
                    context: [context_tokens for _ in range(batch_size)]
                })[:, len(context_tokens):]
                for i in range(batch_size):
                    generated += 1
                    text_list.append(enc.decode(out[i]))
            all_text[prpt]=text_list
    return all_text
예제 #28
0
def interact_model(model_name='345M',
                   seed=None,
                   nsamples=1,
                   batch_size=1,
                   length=108,
                   temperature=0.9,
                   top_k=38,
                   raw_text=''):
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        output = sample.sample_sequence(hparams=hparams,
                                        length=length,
                                        context=context,
                                        batch_size=batch_size,
                                        temperature=temperature,
                                        top_k=top_k)

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
        saver.restore(sess, ckpt)

        text = ''
        context_tokens = enc.encode(raw_text)
        #generated = 0
        for _ in range(nsamples // batch_size):
            out = sess.run(output,
                           feed_dict={
                               context:
                               [context_tokens for _ in range(batch_size)]
                           })[:, len(context_tokens):]
            for i in range(batch_size):
                #generated += 1
                text += enc.decode(out[i])

        # Ensure that generated text ends with punctuation or twitter handle
        #while (text[-1] not in '.?!') and ('@' not in text.split()[-1]):

    return text
예제 #29
0
def main():

    # keep track of the commit id
    git_commit_id = get_current_git_version()
    write_log(log_file, "GIT COMMIT ID: " + git_commit_id)

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config, graph=tf.Graph()) as sess:
        hparams = model_gpt.default_hparams()
        with open(os.path.join(FLAGS.gpt_model_name, 'hparams.json')) as f:
            hparams.override_from_dict(json.load(f))

        preprocessed_data = Preprocessor(processed_data_dir, FLAGS.limits, eos, empty)
        field_id2word = preprocessed_data.fieldid2word

        model = SeqUnit(batch_size=FLAGS.batch_size, hidden_size=FLAGS.hidden_size,
                        emb_size=FLAGS.emb_size, field_size=FLAGS.field_size,
                        pos_size=FLAGS.pos_size, field_vocab=FLAGS.field_vocab,
                        source_vocab=FLAGS.source_vocab, position_vocab=FLAGS.position_vocab,
                        target_vocab=FLAGS.target_vocab, scope_name="seq2seq", name="seq2seq",
                        field_concat=FLAGS.field, position_concat=FLAGS.position,
                        fgate_enc=FLAGS.fgate_encoder, dual_att=FLAGS.dual_attention,
                        decoder_add_pos=FLAGS.decoder_pos, encoder_add_pos=FLAGS.encoder_pos,
                        learning_rate=FLAGS.learning_rate, use_coverage = FLAGS.use_coverage,
                        coverage_penalty=FLAGS.coverage_penalty, fieldid2word = field_id2word,
                        copy_gate_penalty=FLAGS.copy_gate_penalty, use_copy_gate=FLAGS.use_copy_gate,
                        gpt_hparams=hparams, vocab_ind=None,
                        empty_token=empty, stop_token=eos)

        if FLAGS.mode == 'train':
            # collect all trainable variables, exclude embeddings
            gpt_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='model')
            gpt_var_load = []
            for each_var in gpt_var:
                if "Adam" not in each_var.name:
                    gpt_var_load.append(each_var)
            gpt_var_load.remove(model.embedding)

            # load GPT checkpoint
            saver = tf.train.Saver(var_list=gpt_var_load)
            ckpt = tf.train.latest_checkpoint(FLAGS.gpt_model_name)
            saver.restore(sess, ckpt)

            # init other vars
            seq2seq_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='seq2seq')
            seq2seq_var.append(model.embedding)
            sess.run(tf.variables_initializer(var_list=seq2seq_var))

            train(sess, preprocessed_data, model)

        else:
            model.load(saved_model_path, sess)
            test_result = evaluate(sess, preprocessed_data, model, results_path, 'test')
            write_log(log_file, test_result)
예제 #30
0
def single_interact_model_4(
    #output_dir,
    #output_file,
    models_dir,
    model_name,
    seedinput,
    outputlength,
    temperature,
    top_k,
    top_p,
):
        
        seed=None
        nsamples=1 #x possible 
        batch_size=1
        
        enc = encoder.get_encoder(model_name)
        hparams = model.default_hparams()
        with open(os.path.join(models_dir, model_name, 'hparams.json')) as f:
            hparams.override_from_dict(json.load(f))
        if outputlength is None:
            outputlength = hparams.n_ctx // 2
        elif outputlength > hparams.n_ctx:
            raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)

        with tf.Session(graph=tf.Graph()) as sess:
            context = tf.placeholder(tf.int32, [batch_size, None])
            np.random.seed(seed)
            tf.set_random_seed(seed)
            output = sample.sample_sequence(
                hparams=hparams, length=outputlength,
                context=context,
                batch_size=batch_size,
                temperature=temperature, top_k=top_k, top_p=top_p
            )

            saver = tf.train.Saver()
            ckpt = tf.train.latest_checkpoint(os.path.join(models_dir, model_name))
            saver.restore(sess, ckpt)

            context_tokens = enc.encode(seedinput)
            generated = 0
        
            for _ in range(nsamples // batch_size):
                out = sess.run(output, feed_dict={
                    context: [context_tokens for _ in range(batch_size)]
                
                })[:, len(context_tokens):]
                for i in range(batch_size):
                    generated += 1
                    output = enc.decode(out[i])

            print("-" * 80)
        return output
def interact_model(
    model_name='117M',
    seed=None,
    nsamples=1,
    batch_size=1,
    length=None,
    temperature=1,
    top_k=0,
):
    """
    Interactively run the model
    :model_name=117M : String, which model to use
    :seed=None : Integer seed for random number generators, fix seed to reproduce
     results
    :nsamples=1 : Number of samples to return total
    :batch_size=1 : Number of batches (only affects speed/memory).  Must divide nsamples.
    :length=None : Number of tokens in generated text, if None (default), is
     determined by model hyperparameters
    :temperature=1 : Float value controlling randomness in boltzmann
     distribution. Lower temperature results in less random completions. As the
     temperature approaches zero, the model will become deterministic and
     repetitive. Higher temperature results in more random completions.
    :top_k=0 : Integer value controlling diversity. 1 means only 1 word is
     considered for each step (token), resulting in deterministic completions,
     while 40 means 40 words are considered at each step. 0 (default) is a
     special setting meaning no restrictions. 40 generally is a good value.
    """
    if batch_size is None:
        batch_size = 1
    assert nsamples % batch_size == 0

    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx // 2
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        context = tf.placeholder(tf.int32, [batch_size, None])
        np.random.seed(seed)
        tf.set_random_seed(seed)
        output = sample.sample_sequence(
            hparams=hparams, length=length,
            context=context,
            batch_size=batch_size,
            temperature=temperature, top_k=top_k
        )

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
        saver.restore(sess, ckpt)

        while True:
            raw_text = input("Model prompt >>> ")
            while not raw_text:
                print('Prompt should not be empty!')
                raw_text = input("Model prompt >>> ")
            context_tokens = enc.encode(raw_text)
            generated = 0
            for _ in range(nsamples // batch_size):
                out = sess.run(output, feed_dict={
                    context: [context_tokens for _ in range(batch_size)]
                })[:, len(context_tokens):]
                for i in range(batch_size):
                    generated += 1
                    text = enc.decode(out[i])
                    print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40)
                    print(text)
            print("=" * 80)
def sample_model(
    model_name='117M',
    seed=None,
    nsamples=0,
    batch_size=1,
    length=None,
    temperature=1,
    top_k=0,
):
    """
    Run the sample_model
    :model_name=117M : String, which model to use
    :seed=None : Integer seed for random number generators, fix seed to
     reproduce results
    :nsamples=0 : Number of samples to return, if 0, continues to
     generate samples indefinately.
    :batch_size=1 : Number of batches (only affects speed/memory).
    :length=None : Number of tokens in generated text, if None (default), is
     determined by model hyperparameters
    :temperature=1 : Float value controlling randomness in boltzmann
     distribution. Lower temperature results in less random completions. As the
     temperature approaches zero, the model will become deterministic and
     repetitive. Higher temperature results in more random completions.
    :top_k=0 : Integer value controlling diversity. 1 means only 1 word is
     considered for each step (token), resulting in deterministic completions,
     while 40 means 40 words are considered at each step. 0 (default) is a
     special setting meaning no restrictions. 40 generally is a good value.
    """
    enc = encoder.get_encoder(model_name)
    hparams = model.default_hparams()
    with open(os.path.join('models', model_name, 'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if length is None:
        length = hparams.n_ctx
    elif length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)

    with tf.Session(graph=tf.Graph()) as sess:
        np.random.seed(seed)
        tf.set_random_seed(seed)

        output = sample.sample_sequence(
            hparams=hparams, length=length,
            start_token=enc.encoder['<|endoftext|>'],
            batch_size=batch_size,
            temperature=temperature, top_k=top_k
        )[:, 1:]

        saver = tf.train.Saver()
        ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
        saver.restore(sess, ckpt)

        generated = 0
        while nsamples == 0 or generated < nsamples:
            out = sess.run(output)
            for i in range(batch_size):
                generated += batch_size
                text = enc.decode(out[i])
                print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40)
                print(text)