import gpt_2_simple as gpt2

sess = gpt2.start_tf_sess()
gpt2.load_gpt2(sess, checkpoint_dir="Plato/checkpoint")
answer = gpt2.generate(sess,
                       length=100,
                       include_prefix=False,
                       temperature=0.1,
                       top_k=1,
                       top_p=0.9,
                       model_dir="Plato/models",
                       sample_dir="Plato/samples",
                       checkpoint_dir="Plato/checkpoint",
                       run_name='run1',
                       prefix="What is philosophy",
                       return_as_list=True)

print(answer[0])
import gpt_2_simple as gpt2

# gpt2.download_gpt2(model_name="124M")  # This could also be "124", `345M`, `774M`, or `1558M`

sess = gpt2.start_tf_sess()

gpt2.finetune(sess,
              "words.txt",
              model_name='124M',
              steps=1,
              restore_from='fresh',
              run_name='run1')
gpt2.load_gpt2(sess)
gpt2.generate(sess, length=100, temperature=0.7, prefix="hi mom ", nsamples=1)
Beispiel #3
0
sess = gpt2.start_tf_sess()

gpt2.finetune(sess,
              dataset=file_name,
              model_name='124M',
              steps=1000,
              restore_from='fresh',
              run_name='run1',
              print_every=10,
              sample_every=200,
              save_every=500
              )
           
gpt2.copy_checkpoint_to_gdrive(run_name='run1')

gpt2.copy_checkpoint_from_gdrive(run_name='run1')

sess = gpt2.start_tf_sess()
gpt2.load_gpt2(sess, run_name='run1')

gpt2.generate(sess, run_name='run1')

gpt2.generate(sess,
              length=250,
              temperature=0.7,
              prefix="This restaurant is",
              nsamples=5,
              batch_size=5
              )
Beispiel #4
0
#!./usr/bin/python3
import gpt_2_simple as gpt2
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

binary_input = "110100100011000110101010000111001111100110011000010110010011010101010000000100001100111000100100011010000000101011101000111011101001110100001100010001100100010001001111101101000010011010100110101010010101111000110001100011101010100100010010001111110000110110011110011010101000110011101010010010001001100000010100110110001010110001100000001011001101001111011001001110101110110001010011011011111110"
sess = gpt2.start_tf_sess()
gpt2.load_gpt2(sess)

gpt2.generate(sess,
              length=20,
              temperature=.7,
              prefix=binary_input,
              nsamples=3
              )
Beispiel #5
0
async def homepage(request):
    global generate_count
    global sess

    if request.method == 'GET':
        params = request.query_params
    elif request.method == 'POST':
        params = await request.json()
    elif request.method == 'HEAD':
        return UJSONResponse({'text': ''}, headers=response_header)

    category = params.get('category', '').lower().strip()

    if category == '':
        category = 'love'

    if category not in VALID_CATEGORIES:
        return UJSONResponse(
            {'text': '<span style="font-size: 2em">ಠ_ಠ</span>'},
            headers=response_header)

    keywords = " ".join([
        v.replace(' ', '-').strip() for k, v in params.items()
        if 'key' in k and v != ''
    ])

    prepend = "<|startoftext|>~`{}~^{}~".format(category, keywords) + "}"
    text = prepend + params.get('prefix', '')[:100]

    length = MIN_LENGTH

    while '<|endoftext|>' not in text and length <= MAX_LENGTH:
        text = gpt2.generate(sess,
                             length=STEP_LENGTH,
                             temperature=0.7,
                             top_k=40,
                             prefix=text,
                             include_prefix=True,
                             return_as_list=True)[0]
        length += STEP_LENGTH

        generate_count += 1
        if generate_count == 8:
            # Reload model to prevent Graph/Session from going OOM
            tf.reset_default_graph()
            sess.close()
            sess = gpt2.start_tf_sess(threads=1)
            gpt2.load_gpt2(sess)
            generate_count = 0

    prepend_esc = re.escape(prepend)
    eot_esc = re.escape('<|endoftext|>')

    if '<|endoftext|>' not in text:
        pattern = '(?:{})(.*)'.format(prepend_esc)
    else:
        pattern = '(?:{})(.*)(?:{})'.format(prepend_esc, eot_esc)

    trunc_text = re.search(pattern, text)

    return UJSONResponse({'text': trunc_text.group(1)},
                         headers=response_header)
Beispiel #6
0
"""After the model is trained, you can copy the checkpoint folder to your own Google Drive.

If you want to download it to your personal computer, it's strongly recommended you copy it there first, then download from Google Drive. The checkpoint folder is copied as a `.rar` compressed file; you can download it and uncompress it locally.
"""

gpt2.copy_checkpoint_to_gdrive(run_name='run1')


gpt2.load_gpt2(sess, run_name='run1')

"""## Generate Text From The Trained Model

After you've trained the model or loaded a retrained model from checkpoint, you can now generate text. `generate` generates a single text from the loaded model.
"""

gpt2.generate(sess, run_name='run1')


"""For bulk generation, you can generate a large amount of text to a file and sort out the samples locally on your computer. The next cell will generate a generated text file with a unique timestamp.

You can rerun the cells as many times as you want for even more generated texts!
"""

gen_file = 'gpt2_gentext_{:%Y%m%d_%H%M%S}.txt'.format(datetime.utcnow())

gpt2.generate_to_file(sess,
                      destination_path=gen_file,
                      length=500,
                      temperature=0.7,
                      nsamples=100,
                      batch_size=20
Beispiel #7
0
# Define a new pandas DataFrame to save songID , songName , Lyrics in it to use them later
lyrics_data = pd.DataFrame({'songID':songID, 'songName':songName, 'lyrics':lyrics })
lyrics_data.head(3)

with open('/content/drive/My Drive/lyrics.txt', 'w') as f:
    for item in lyrics_data.lyrics.to_list():
        f.write("%s\n" % item)

gpt2.copy_file_from_gdrive('lyrics.txt')

sess = gpt2.start_tf_sess()

gpt2.finetune(sess,
              dataset='lyrics.txt',
              model_name='124M',
              steps=1000,
              restore_from='fresh',
              run_name='run1',
              print_every=10,
              sample_every=200,
              save_every=500
              )

gpt2.generate(sess,
              length=250,
              temperature=0.7,
              prefix="My Love",
              nsamples=5,
              batch_size=5
              )
Beispiel #8
0
                  overwrite=False,
                  steps=steps,
                  batch_size=2,
                  run_name=run_name)  # steps is max number of training steps

    gen_path = 'gen/' + run_name + '/result_' + str(i) + '.txt'
    f = open(gen_path, "a+", encoding='utf8')

    for j in range(iters):
        prefix = ' '.join(prefix.split()[-35:])
        print(prefix)
        prefix = gpt2.generate(sess,
                               temperature=1.15,
                               top_k=30,
                               sample_delim=' ',
                               run_name=run_name,
                               prefix=prefix,
                               include_prefix=True,
                               truncate='<|endoftext|>',
                               return_as_list=True)[0]
        prefix = re.sub(r' {2,}', ' ', prefix)
        prefix = re.sub(r'\t{2,}', '\t', prefix)
        prefix = re.sub(r'\n{2,}', '\n', prefix)
        f.write(' '.join(prefix.split()[35:]))

    f.close()

    train_path = gen_path
    steps = 10
    sess.close()
def main():
    """
    The main function.
    """
    parser = argparse.ArgumentParser(
        description="Finetune a GPT-2 model using ff2zim")
    parser.add_argument("-d",
                        "--debug",
                        action="store_true",
                        help="show debug information")
    subparsers = parser.add_subparsers(dest="action",
                                       help="action to perform",
                                       required=True)

    # parser for generating trainingfile
    tfparser = subparsers.add_parser(
        "generate-trainingfile",
        help="generate the trainingfile from a ff2zim project")
    tfparser.add_argument("project", help="path to ff2zim project")
    tfparser.add_argument("trainingfile", help="path to write trainingfile to")
    tfparser.add_argument(
        "--add-epub",
        action="store",
        nargs="*",
        help="add an epub or a directory of epubs to the trainingfile",
        metavar="PATH",
        dest="epubpaths")

    # parser for encoding the trainingfile
    eparser = subparsers.add_parser(
        "encode-trainingfile",
        help="encode a trainingfile for better performance")
    eparser.add_argument("trainingfile", help="path to trainingfile to encode")
    eparser.add_argument("outfile", help="path to write to")
    eparser.add_argument("model", help="model to encode for")

    # parser for finetuning
    finetuneparser = subparsers.add_parser(
        "finetune", help="finetune a gpt-2 model using a trainingfile")
    finetuneparser.add_argument("trainingfile", help="path to trainingfile")
    finetuneparser.add_argument("--model",
                                action="store",
                                default="124M",
                                help="model to use")
    finetuneparser.add_argument("--run-name",
                                action="store",
                                dest="runname",
                                default="run1",
                                help="run name for finetuned model.")

    # parser for generating
    genparser = subparsers.add_parser(
        "generate", help="generate a sample with an interactive prompt")
    genparser.add_argument("--model",
                           action="store",
                           default="124M",
                           help="model to use")
    genparser.add_argument("--run-name",
                           action="store",
                           dest="runname",
                           default="run1",
                           help="run name for finetuned model.")
    genparser.add_argument("-n",
                           "--numsamples",
                           action="store",
                           type=int,
                           help="number of samples to generate",
                           default=1)
    genparser.add_argument("-m",
                           "--mode",
                           action="store",
                           choices=("story", "chapter", "complete"),
                           default="story")

    ns = parser.parse_args()

    if ns.action == "generate-trainingfile":
        print("Generating trainingfile...")
        trainingfile = ns.trainingfile
        finetuner = GPT2Finetuner(ns.project, ns.epubpaths)
        num_stories, num_epubs = finetuner.create_training_file(trainingfile)
        print("Trainingfile successfully created.")
        print("Included: {} fanfics and {} epubs.".format(
            num_stories, num_epubs))
        return

    elif ns.action == "encode-trainingfile":
        print("Encoding trainingfile...")
        gpt2.encode_dataset(ns.trainingfile,
                            out_path=ns.outfile,
                            model_name=ns.model)
        print("Done.")
        return

    elif ns.action == "finetune":
        model = ns.model
        if not os.path.isdir(os.path.join("models", model)):
            print("Downloading the '{}' model...".format(model))
            gpt2.download_gpt2(model_name=model)
            print("Download finished.")
        print("Starting TF session...")
        sess = gpt2.start_tf_sess()
        print("TF session started.")
        print("Finetuning...")
        gpt2.finetune(
            sess,
            ns.trainingfile,
            model_name=model,
            run_name=ns.runname,
            print_every=100,
            sample_every=500,
            save_every=500,
            use_memory_saving_gradients=True,
            accumulate_gradients=1,
        )
    elif ns.action == "generate":
        prepend_story_start = False
        print("========== Generate a story ==========")
        if ns.mode in ("story", "chapter"):
            story_start = "\n" + TOKEN_STORY_START + "\n"
            description_s = "\n" + TOKEN_DESCRIPTION_START + "\n"
            description = input("Description of story: ")
            description_s += description + "\n" + TOKEN_DESCRIPTION_END + "\n"
            story_start += description_s + "\n" + TOKEN_CHAPTER_START + "\n"
            prepend_story_start = True
        elif ns.mode == "complete":
            story_start = input("Prompt: ")
        print("========== Generating... =========")
        print("Starting TF session...")
        sess = gpt2.start_tf_sess()
        print("TF session started.")
        print("Loading gpt-2...")
        gpt2.load_gpt2(sess)
        print("Loaded.")
        print("Generating: ", end="", flush=True)
        results = []
        for i in range(ns.numsamples):
            finished = False
            storyparts = []
            while not finished:
                if not storyparts:
                    # first generation
                    prefix = story_start
                elif prepend_story_start:
                    # also include story start
                    prefix = description_s
                    prefix += " ".join(storyparts[-1].split(" ")[-21:-1])
                else:
                    prefix = " ".join(storyparts[-1].split(" ")[-21:-1])
                multisamples = True
                gpt2results = gpt2.generate(
                    sess,
                    model_name=ns.model,
                    run_name=ns.runname,
                    prefix=prefix,
                    return_as_list=True,
                    # nsamples=ns.numsamples,
                    seed=int(time.time()),
                    temperature=0.8,
                    top_k=50,
                    top_p=0.9,
                    nsamples=(5 if multisamples else 1),
                )
                result = None
                for gpt2result in gpt2results:
                    gpt2result = gpt2result[len(prefix):]
                    if not is_looping(gpt2result):
                        result = gpt2result
                        break
                    if result is None:
                        # set default just to be sure
                        result = gpt2result

                if ns.debug:
                    print("=====")
                    print("#storyparts: ", len(storyparts))
                    if len(storyparts) > 0:
                        print("-----\nLast storypart: \n-----\n",
                              storyparts[-1])
                    print("-----\nResult: \n-----\n", result)
                    print("=====")

                if ns.mode == "story" or ns.mode == "chapter":
                    if is_looping(result):
                        print("L", end="", flush=True)
                        # remove last part to reduce chance of looping
                        storyparts = storyparts[:-1]
                        continue

                    # append result
                    storyparts.append(result)
                    if TOKEN_CHAPTER_END in result:
                        print("C", end="", flush=True)
                        if ns.mode == "chapter":
                            finished = True
                    elif TOKEN_STORY_END in result:
                        print("S", end="", flush=True)
                        finished = True
                    else:
                        print(".", end="", flush=True)
                elif ns.mode == "complete":
                    # set result
                    storyparts = [prefix + result]
                    finished = True
            # results.append(story[len(prefix):])
            results.append("".join(storyparts))
        print("\n", flush=True)
        for text in results:
            print("========= Result =========")
            print(text)
Beispiel #10
0
 temp = t / 10
 for i in range(10):
     monl = []
     with open('names.txt', 'r') as f:
         line = f.readline()
         while line:
             monl.append(
                 f'<|startoftext|>\n{{\n    "monster_name": "{line.strip()}"'
             )
             line = f.readline()
     print(monl)
     monsters = gpt2.generate(sess,
                              return_as_list=True,
                              run_name="dnd11",
                              batch_prefix=monl,
                              temperature=temp,
                              batch_size=len(monl),
                              nsamples=len(monl),
                              truncate="<|endoftext|>",
                              length=10240)
     for mon in monsters:
         mon = mon.replace("<|startoftext|>\n", "")
         print(mon)
         quit()
         try:
             h = str(load(mon))
         except:
             pre_fix_exceptions += 1
         mon = fix_json(mon, 100)
         try:
             h = str(load(mon))
Beispiel #11
0
    print(run_name)
    print("Model " + str(i + 1) + " from " + str(model_num))
    print("=====================================")
    sess = gpt2.reset_session(sess)
    sess = gpt2.start_tf_sess()
    gpt2.load_gpt2(sess, run_name=run_name)

    generated_paraphrase[run_name] = {}
    for test_input in test_input_list:
        test_input = test_input.replace('\n', '')
        gen_para = gpt2.generate(
            sess,
            run_name=run_name,
            length=50,
            temperature=1,
            prefix=test_input,
            nsamples=num_samples,
            batch_size=1,  #,
            include_prefix=True,
            truncate='<|end of text|>',
            return_as_list=True)
        #results will include the prefix since if include_prefix == False, the result sometimes not consistent, so more difficult to preprocess
        generated_paraphrase[run_name][test_input] = {}
        for i, x in enumerate(gen_para):
            generated_paraphrase[run_name][test_input][i] = x.replace(
                test_input, '').replace('/n', '')

    dump_pickle("generated_paraphrase/" + output_file + '.pkl',
                generated_paraphrase)

    pd.concat(
Beispiel #12
0
    url = 'https://drive.google.com/u/0/uc?id=1T5BntQPxoRjGDMAUbqUwUnsDs2DARM6C&export=download'  #You can set your to your own model on gdrive
    #ok but this is run2.tar
    #not shakespeare.txt, so how to train your own
    #good question https://github.com/minimaxir/gpt-2-simple or https://colab.research.google.com/drive/1VLG8e7YSEwypxU-noRNhsv5dW4NfTGce
    output = 'run2.tar'
    print("Downloading pretrained model...")
    gdown.download(url, output, quiet=False)
    print("Untar run2.tar")
    exit(0)
runName = "run2"
sess = gpt2.start_tf_sess()
gpt2.load_gpt2(sess, run_name=runName)
print(
    gpt2.generate(sess,
                  length=250,
                  temperature=0.7,
                  prefix="Fuhrer",
                  nsamples=5,
                  batch_size=5,
                  run_name=runName))


def generate(prefixs="nexity"):
    return gpt2.generate(sess,
                         length=250,
                         temperature=0.7,
                         prefix=prefixs,
                         nsamples=5,
                         batch_size=5,
                         run_name=runName)
Beispiel #13
0
#!/usr/bin/env python3
import gpt_2_simple as gpt2
import json

pregenerated = open('pregenerated.json', 'r')
entries = json.load(pregenerated)
pregenerated.close()

sess = gpt2.start_tf_sess()
gpt2.load_gpt2(sess)

petitions = gpt2.generate(sess,
                          prefix="<|startofpetition|>",
                          truncate="<|endofpetition|>",
                          include_prefix=False,
                          return_as_list=True,
                          batch_size=10,
                          nsamples=10)

for petition in petitions:
    petition = petition.replace("<|startofpetition|>",
                                "")  # Sometimes this gets duplicated
    petition = petition.replace("Reason for rejection:",
                                "<h2>Why was this petition rejected?</h2>")
    lines = petition.split("\n")
    if lines[0].strip() == "":
        start = 1
    else:
        start = 0
    entry = {'title': lines[start], 'body': "<br />".join(lines[start + 2:])}
    entries.append(entry)
gpt2GeneratedText = []
gpt2_length = []
file = open("BLM_CNN_GPT2_ouputs.txt",
            "w")  # Stores the output of each GPT2 generated article
for i in range(0, len(headlines), 1):
    print("----------- " + str(i) + " -----------")
    headline_prefix = headlines[i]

    # Set the length of the generation to the max generation length if the article length is greater than the max
    articleLength = len(article_text[i].split())
    if articleLength > 1023:
        articleLength = 1023
    output = gpt2.generate(sess,
                           prefix=headline_prefix,
                           length=articleLength,
                           temperature=0.7,
                           top_p=0.9,
                           return_as_list=True)
    print(output)
    gpt2GeneratedText.append(output[0])
    gpt2_length.append(articleLength)

    file.write("---------------------------- " + str(i) +
               " ----------------------------\n")
    file.write("Prefix Text (Headlines): " + headline_prefix + "\n")
    file.write("length: " + str(articleLength) + "\n")
    file.write("GPT2 Output:\n" + output[0] + "\n\n")

file.close()

# Dictionary to storte the collected data in a .csv file
Beispiel #15
0
            for idx, word in enumerate(split_tweet):
                if idx < word_count:
                    tweet_prompt = tweet_prompt + word + " "

            tweet_prompt = tweet_prompt.strip(" ")

            # Generate the tweet using the gpt-2 model
            sess = gpt2.start_tf_sess()
            gpt2.load_gpt2(sess, run_name=model)

            tweet = gpt2.generate(sess,
                                  run_name=model,
                                  checkpoint_dir=file_directory +
                                  "/checkpoint",
                                  length=100,
                                  temperature=.7,
                                  nsamples=1,
                                  batch_size=1,
                                  prefix=tweet_prompt,
                                  truncate='<|endoftext|>',
                                  include_prefix=True,
                                  return_as_list=True)[0]

            gpt2.reset_session(sess)

    # Clean the tweet string
    tweet = tweet.replace("\"]", "")
    tweet = tweet.replace("[\"", "")
    tweet = tweet.replace("\']", "")
    tweet = tweet.replace("[\'", "")
    tweet = tweet.replace("\\n", "\n")
    tweet = tweet.replace("\\'", "\'")
Beispiel #16
0
########################
######  AthenaAI  ######
######    2021    ######
########################

import gpt_2_simple as gpt2

sess = gpt2.start_tf_sess()
gpt2.load_gpt2(sess)

question_to_ask = ""  # Put question here (as short as possible) and test the result.

answer = gpt2.generate(sess,
                       length=100,
                       include_prefix=True,
                       temperature=0.1,
                       top_k=1,
                       top_p=0.9,
                       run_name='run1',
                       prefix=question_to_ask,
                       return_as_list=True)[0]

print(answer)
Beispiel #17
0
import gpt_2_simple as gpt2

sess = gpt2.start_tf_sess()
gpt2.load_gpt2(sess)

# pre = '<|newsubmission|>\n\nCMV: The internet is a "monopoly" where the price of knowledge is controlled by only a few, and the popularity of existing platforms and services is driven by a desire to sell more.\nI think that a lot of the criticism of the internet as a marketplace and a place where everyone is competing for resources is a bit unfair.\n<|newcomment|>'

pre = """

CMV: Seeing a person dressed as a woman that is clearly a man will never not be weird.
I know gender dysphoria is real and transgender people exist. I know that the world would be better off accepting that fact.

But in the push to accept transgender people in society, there seems to be this awkward game of pretend that is expected to be played.

It is one thing to simply adjust the pronouns you use to make them comfortable, but it is another thing entirely to be expected to convince yourself that they are the gender in which they identify. If you meet a trans woman that is clearly trans (most notably ones that used to be men with clear male facial features, endomorph or even receding hairlines) you are expected to actually believe she is a woman on a biological level.

There are some very lucky biological men that can transition and be incredibly convincing, but most aren't, and if you aren't one of those lucky people, you will never be seen as an actual woman no matter how much pretend we are expected to play. We can all act like they are women, but the brain already sorts them out as "men dressed as women" way before any conscious decision is made, and it will always be weird and awkward seeing these individuals no matter how nice and accepting we are about it.

<|newcomment|>
"""

single_text = gpt2.generate(sess,
                            prefix=pre,
                            truncate="<|newsubmission|>",
                            return_as_list=True)[0]
print("<|newsubmission|>\n")
print(single_text)
Beispiel #18
0
              print_every=2,
              sample_every=5,
              save_every=10
              )
#Stop to see if it's trained enough

gpt2.copy_checkpoint_to_gdrive(run_name='fantasy3')
input_text="I was riding a cycle when"
#this is where we enter the text to get suggestions
#Other hyperparameters
length=20
top_p=0.5
temperature=0.9
top_k=0.8
#To be used while running

gpt2.generate(sess,
              prefix=input_text,
              length=length,
              temperature=temperature,
              top_p=top_p,
              top_k=top_k,
              nsamples=5,
              batch_size=5,
              include_prefix=False,
              return_as_list=True
              )
# Loads suggestions

gpt2.copy_checkpoint_to_gdrive()
Beispiel #19
0
    first = True

    cycleCount = 0

    while True:
        debug.append('\n\ncycle %i' % cycleCount)
        debug.append('prefix:')
        debug.append(prefix)
        cycleCount += 1

        if startFile and first:
            text = readFile(startFile)
        else:
            text = gpt2.generate(sess,
                                 run_name=run_name,
                                 prefix=prefix,
                                 temperature=(temp / 100),
                                 return_as_list=True)[0]

        first = False

        debug.append('output:')
        debug.append(text)
        print('\n\noutput:')
        print(text)

        newLines = text.split('\n')

        direction = None
        lastIndex = None
        for line in newLines:
Beispiel #20
0
def create_recipe():
    text = gpt2.generate(sess, return_as_list=True, length=200)[0]
    text = "\n".join(text.split("."))
    print(text)
Beispiel #21
0
Datei: gen.py Projekt: ef1j/ASR33
#!/usr/bin/env python
import gpt_2_simple as gpt2
import sys

if len(sys.argv) > 1:
    prompt = sys.argv[1]
else:
    prompt = "prompt: So, what's new around here?"

print(prompt)
sys.exit(1)

sess = gpt2.start_tf_sess()
gpt2.load_gpt2(sess)

single_text = gpt2.generate(
        sess,
        return_as_list=True,
        temperature=0.75,
        include_prefix=False,
        truncate="<|endoftext|>",
        prefix="""ASCII Today - Fun with the Teletype Terminal"""
        )[0]

print(single_text)

Beispiel #22
0
import gpt_2_simple as gpt2

sess = gpt2.start_tf_sess()
gpt2.load_gpt2(sess)

single_text = gpt2.generate(sess,
                            length=60,
                            temperature=0.7,
                            prefix="Meu amor",
                            nsamples=5,
                            batch_size=5,
                            return_as_list=True)[0]

# print(single_text)
Beispiel #23
0
                                                  model_name=model_name,
                                                  steps=int(rounds),
                                                  run_name="run" + model_name)
                                    y = False
                                elif response == 'n':
                                    gpt2.reset_session(sess)
                                    gpt2.finetune(sess,
                                                  fil,
                                                  model_name=model_name,
                                                  steps=int(rounds),
                                                  run_name="run" + model_name)
                                    y = False
                                else:
                                    print('(y/n) please')
                        print("Generating text... this might take a while:")
                        gpt2.generate(sess, truncate='<|endoftext|>')
                    elif response == 'n':
                        print(
                            'Alright I can only generate then, using the older model saved in checkpoint.'
                        )
                        x = False
                        print("Generating text... this might take a while:")
                        gpt2.generate(sess, truncate='<|endoftext|>')
                    else:
                        print('(y/n) please')

    elif response == 'n':
        print('Got it, Generating only then')
        if not sess_flag:
            # gpt2.load_gpt2(sess)
            gpt2.load_gpt2(sess, run_name="run" + model_name)
Beispiel #24
0
import gpt_2_simple as gpt2s
import os
import requests
import better_profanity

FILE_PATH = ""

meme_model = "124M"
gpt2s.download_gpt2(model_name=meme_model)
with open(FILE_PATH + "extracted_text.txt") as extracted_text:
    extracted_text = better_profanity.censor(extracted_text)
    session = gpt2s.start_tf_sess()
    gpt2s.finetune(session, extracted_text, meme_model, steps=100)
    txt_list = gpt2s.generate(session, temperature=0.7, return_as_list=True)
    print(txt_list)
Beispiel #25
0
def gen_fresh():
    content = request.json

    min_length = content['min_sample_len']
    max_length = content['max_sample_len']
    num_samples = 1
    past_context_len = content['past_context_len']
    iterations = content['iterations']

    model_str = content['writer']
    model_name = model_map[model_str]

    generated_str = ""

    random_len = random.randint(min_length, max_length)
    # setup gpt2
    sess = gpt2.start_tf_sess()
    gpt2.load_gpt2(sess, model_name=model_name)
    output = gpt2.generate(
        sess,
        checkpoint_dir="models",
        run_name=model_name,
        length=random_len,
        prefix="<|startoftext|>",
        temperature=0.9,
        top_k=50,
        top_p=0.95,
        # nsamples=num_samples,
        # batch_size=num_samples,
        return_as_list=True)
    output_str = output[0]
    output_str_arr = output_str.split(' ')
    input_str = ' '.join(output_str_arr[-past_context_len:])

    if iterations != 0:
        generated_str += ' '.join(
            output_str_arr[:-past_context_len]) + ' '  # or '\n'
    else:
        generated_str += output_str + ' '

    for i in range(iterations - 1):
        random_len = random.randint(min_length, max_length)
        output = gpt2.generate(
            sess,
            checkpoint_dir="models",
            run_name=model_name,
            length=random_len,
            prefix=input_str,
            temperature=0.9,
            top_k=50,
            top_p=0.95,
            # nsamples=num_samples,
            # batch_size=num_samples,
            return_as_list=True)
        output_str = output[0]
        output_str_arr = output_str.split(' ')
        input_str = ' '.join(output_str_arr[-past_context_len:])
        # add the entire str if not last iteration, otherwise omit the starting input
        if iterations != iterations - 2:
            generated_str += ' '.join(
                output_str_arr[:-past_context_len]) + ' '  # or '\n'
        else:
            generated_str += output_str + ' '

    tf.reset_default_graph()
    sess.close()

    res = {'output': generated_str}

    return jsonify(res)
Beispiel #26
0
def train(filename):
    sess = gpt2.start_tf_sess()
    gpt2.finetune(sess, filename, model_name=model_name,
                  steps=1000)  # steps is max number of training steps

    gpt2.generate(sess)
import gpt_2_simple as gpt2
import os
import requests

model_name = "124M"
if not os.path.isdir(os.path.join("models", model_name)):
    print("Downloading {model_name} model...")
    gpt2.download_gpt2(
        model_name=model_name
    )  # model is saved into current directory under /models/124M/

file_name = "sourcetext/grimms_fairy_tales.txt"
if not os.path.isfile(file_name):
    url = "http://www.gutenberg.org/files/2591/2591-0.txt"
    data = requests.get(url)

    with open(file_name, 'w') as f:
        f.write(data.text)

sess = gpt2.start_tf_sess()
gpt2.finetune(sess, file_name, model_name=model_name,
              steps=1000)  # steps is max number of training steps

gpt2.generate(sess)
Beispiel #28
0
gpt2.finetune(sess,
              dataset=file_name,
              model_name='124M',
              steps=1000,
              restore_from='fresh',
              run_name='run1',
              print_every=10,
              sample_every=200,
              save_every=500
              )

#generation
gpt2.generate(sess,
              length=150,
              temperature=0.7,
              prefix="Two households, both alike in dignity",
              nsamples=5,
              batch_size=5
              )

gpt2.generate(sess,
              length=150,
              temperature=0.7,
              prefix="Who's there?",
              nsamples=5,
              batch_size=5
              )

gpt2.generate(sess,
              length=150,
              temperature=0.7,
Beispiel #29
0
        length=length,
        temperature=0.4,
        nsamples=1,
        return_as_list=True,
        include_prefix=include_prefix,
    )[0]
    os.chdir(old_wd)
    return generated


if __name__ == "__main__":
    prefixes = [
        "He's gonna go for the flip-reset",
        "And we're in the grand finals",
        "David passes to Robbie",
        "And we're at match point",
    ]

    for prefix in prefixes:
        # Generates the messages using the model.
        generated = gpt2.generate(
            sess,
            prefix=prefix,
            length=25,
            temperature=0.2,
            nsamples=5,
            return_as_list=True,
        )
        print("\n".join(generated))
        print()
# saving the text generated when run at 200 steps

import gpt_2_simple as gpt2
import json

sess = gpt2.start_tf_sess()
gpt2.load_gpt2(sess)

# gpt2.generate(sess)

text = gpt2.generate(sess, return_as_list=True, batch_size=5, nsamples=5)

with open("GPT2_Surrealist_Text_200new.json", "w") as output:
    json.dump(text, output, indent=2)