def predict():
    import logging
    from simpletransformers.language_generation import LanguageGenerationModel

    logging.basicConfig(level=logging.INFO)
    transformers_logger = logging.getLogger("transformers")
    transformers_logger.setLevel(logging.WARNING)

    model = LanguageGenerationModel("gpt2",
                                    "gpt2",
                                    args={"max_length": 256},
                                    use_cuda=False)

    prompts_ = [
        "Despite the recent successes of deep learning, such models are still far from some human abilities like learning from few examples, reasoning and explaining decisions. In this paper, we focus on organ annotation in medical images and we introduce a reasoning framework that is based on learning fuzzy relations on a small dataset for generating explanations.",
        "There is a growing interest and literature on intrinsic motivations and open-ended learning in both cognitive robotics and machine learning on one side, and in psychology and neuroscience on the other. This paper aims to review some relevant contributions from the two literature threads and to draw links between them.",
        "Recent success of pre-trained language models (LMs) has spurred widespread interest in the language capabilities that they possess. However, efforts to understand whether LM representations are useful for symbolic reasoning tasks have been limited and scattered.",
        "Many theories, based on neuroscientific and psychological empirical evidence and on computational concepts, have been elaborated to explain the emergence of consciousness in the central nervous system. These theories propose key fundamental mechanisms to explain consciousness, but they only partially connect such mechanisms to the possible functional and adaptive role of consciousness.",
    ]

    prompts = [
        "Despite the recent successes of deep learning",
        "Learning in both cognitive and ", "I do not understand"
    ]

    for prompt in prompts:
        # Generate text using the model. Verbose set to False to prevent logging generated sequences.
        generated = model.generate(prompt, verbose=False)

        generated = '.'.join(generated[0].split('.')[:-1]) + '.'
        print(generated)
        print(
            "----------------------------------------------------------------------"
        )
Example #2
0
def generate_text(prompt):
    model = LanguageGenerationModel("gpt2",
                                    "gpt2",
                                    args={"length": 256},
                                    use_cuda=use_cuda)
    generated_text = model.generate(prompt, verbose=False)
    generated_text = '.'.join(generated_text[0].split('.')[:-1]) + '.'
    return generated_text
Example #3
0
def generate_tweet(handle,prompt='I think that'):
    gen_args={'length':200,
             'k':10}
    try:
        model = LanguageGenerationModel("gpt2", f"gpt2_outputs/{handle}",use_cuda=False, args=gen_args)
    except OSError:
        build_language_model(handle)
        model = LanguageGenerationModel("gpt2", f"gpt2_outputs/{handle}",use_cuda=False, args=gen_args)
    generated_text = model.generate(prompt,verbose=False)
    return generated_text
Example #4
0
 def __init__(self,):
     if not os.path.isfile('from_scratch/best_model/pytorch_model.bin'):
         download_model()    
     try:
         self.model = LanguageGenerationModel("gpt2", "from_scratch/best_model", args={"max_length": 200, "temperature":1},use_cuda=False)
     except:
         os.remove("from_scratch/best_model/pytorch_model.bin")
         download_model()
         print("retry") 
         self.model = LanguageGenerationModel("gpt2", "from_scratch/best_model", args={"max_length": 200, "temperature":1},use_cuda=False)
Example #5
0
    def __init__(self):
        threading.Thread.__init__(self)

        self._config = ConfigParser()
        self._config.read('ssi-bot.ini')

        self._model_path = os.path.join(ROOT_DIR,
                                        self._config['DEFAULT']['model_path'])

        # if you are generating on CPU, keep use_cuda and fp16 both false.
        # If you have a nvidia GPU you may enable these features
        # TODO shift these parameters into the ssi-bot.ini file
        self._model = LanguageGenerationModel("gpt2",
                                              self._model_path,
                                              use_cuda=False,
                                              args={'fp16': False})
Example #6
0
class McNLP:
    def __init__(self,):
        if not os.path.isfile('from_scratch/best_model/pytorch_model.bin'):
            download_model()    
        try:
            self.model = LanguageGenerationModel("gpt2", "from_scratch/best_model", args={"max_length": 200, "temperature":1},use_cuda=False)
        except:
            os.remove("from_scratch/best_model/pytorch_model.bin")
            download_model()
            print("retry") 
            self.model = LanguageGenerationModel("gpt2", "from_scratch/best_model", args={"max_length": 200, "temperature":1},use_cuda=False)

    def generate(self,string_to_start,temperature=1, max_length=200):
        generated = self.model.generate(string_to_start,args={'temperature':temperature,'max_length':max_length}, verbose=False)
        return generated[0].replace(',','\n')



# mc = McNLP()
# f = open("demofile2.txt", "a", encoding='utf8')
# f.write(mc.generate("שורף את הביט", temperature=1.8,max_length=300))
# f.close()
# print()
Example #7
0
import logging

from simpletransformers.language_generation import LanguageGenerationModel

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

model = LanguageGenerationModel("gpt2",
                                "outputs/fine-tuned/",
                                args={"max_length": 200},
                                use_cuda=False)
# model = LanguageGenerationModel("gpt2", "outputs/fine-tuned", args={"max_length": 200})
# model = LanguageGenerationModel("gpt2", "gpt2", args={"max_length": 200})

prompts = [
    "Click on \"Mrs.\"",
    "Click on New Lead",
    "Click on secondary checkbox.",
]

for prompt in prompts:
    # Generate text using the model. Verbose set to False to prevent logging generated sequences.
    generated = model.generate(prompt, verbose=False)

    generated = ".".join(generated[0].split(".")[:-1]) + "."
    print(
        "============================================================================="
    )
    print(generated)
    print(
def main():
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument(
        "--data_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The input data dir. Should contain the source and target files for the task.",
    )
    parser.add_argument(
        "--model_name_or_path",
        default=None,
        type=str,
        required=True,
        help=
        "Path to pretrained model or model identifier from huggingface.co/models",
    )
    parser.add_argument(
        "--output_dir",
        default='output_dir/',
        type=str,
        help=
        "The output directory where the model predictions and checkpoints will be written.",
    )

    args = parser.parse_args()

    if (os.path.exists(args.output_dir) and os.listdir(args.output_dir)
            and args.do_train and not args.overwrite_output_dir):
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome."
            .format(args.output_dir))

    model = LanguageGenerationModel(model_type="gpt2",
                                    model_name=args.model_name_or_path,
                                    args={"max_length": 64})

    fsource = open(args.data_dir + 'test.source', 'r', encoding='utf8')
    prompts = [sent.strip() + '\t' for sent in fsource.readlines()]

    ftarget = open(args.data_dir + 'test.target', 'r', encoding='utf8')
    targets = [sent.strip() + '\t' for sent in ftarget.readlines()]

    foutput = open(args.output_dir + 'test.hypo',
                   'w',
                   encoding='utf8',
                   newline='\n')

    assert len(prompts) == len(targets)
    case_number = len(prompts)
    correct_number = 0
    for i, prompt in enumerate(prompts):
        # Generate text using the model. Verbose set to False to prevent logging generated sequences.
        generated = model.generate(prompt, verbose=False)
        foutput.write(targets[i])
        if generated[0] == targets[i]:
            correct_number += 1
        print(
            "============================================================================="
        )
        print(generated[0])
        print(targets[i])
        print(
            "============================================================================="
        )
    print('correct number = {}, case number = {}'.format(
        correct_number, case_number))
Example #9
0
class ModelTextGenerator(threading.Thread):

    daemon = True
    name = "MTGThread"

    _config = None

    def __init__(self):
        threading.Thread.__init__(self)

        self._config = ConfigParser()
        self._config.read('ssi-bot.ini')

        self._model_path = os.path.join(ROOT_DIR,
                                        self._config['DEFAULT']['model_path'])

        # if you are generating on CPU, keep use_cuda and fp16 both false.
        # If you have a nvidia GPU you may enable these features
        # TODO shift these parameters into the ssi-bot.ini file
        self._model = LanguageGenerationModel("gpt2",
                                              self._model_path,
                                              use_cuda=False,
                                              args={'fp16': False})

    def run(self):

        while True:

            try:
                # get the top job in the list
                jobs = self.top_pending_jobs()
                if not jobs:
                    # there are no jobs at all in the queue
                    # Rest a little before attempting again
                    time.sleep(30)
                    continue

                for job in jobs:
                    logging.info(
                        f"Starting to generate text for job_id {job.id}.")

                    # Increment the counter because we're about to generate text
                    job.text_generation_attempts += 1
                    job.save()

                    # use the model to generate the text
                    # pass a copy of the parameters to keep the job values intact
                    generated_text = self.generate_text(
                        job.text_generation_parameters.copy())
                    if generated_text:
                        # if the model generated text, set it into the 'job'
                        job.generated_text = generated_text
                        job.save()

            except:
                logging.exception("Generating text for a job failed")

    def top_pending_jobs(self):
        """
		Get a list of text that need text to be generated, by treating
		each database Thing record as a 'job'.
		Three attempts at text generation are allowed.

		"""

        query = db_Thing.select(db_Thing).\
           where(db_Thing.text_generation_parameters.is_null(False)).\
           where(db_Thing.generated_text.is_null()).\
           where(db_Thing.text_generation_attempts < 3).\
           order_by(db_Thing.created_utc)
        return list(query)

    def generate_text(self, text_generation_parameters):

        start_time = time.time()

        # pop the prompt out from the args
        prompt = text_generation_parameters.pop('prompt')

        output_list = self._model.generate(prompt=prompt,
                                           args=text_generation_parameters)

        end_time = time.time()
        duration = round(end_time - start_time, 1)

        logging.info(
            f'{len(output_list)} sample(s) of text generated in {duration} seconds.'
        )

        return output_list[0]
import logging
from simpletransformers.language_generation import LanguageGenerationModel

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

model = LanguageGenerationModel("gpt2",
                                "outputs/from_scratch/",
                                args={"length": 200})
# model = LanguageGenerationModel("gpt2", "outputs/fine-tuned", args={"length": 200})
# model = LanguageGenerationModel("gpt2", "gpt2", args={"length": 200})

prompts = [
    "Despite the recent successes of deep learning, such models are still far from some human abilities like learning from few examples, reasoning and explaining decisions. In this paper, we focus on organ annotation in medical images and we introduce a reasoning framework that is based on learning fuzzy relations on a small dataset for generating explanations.",
    "There is a growing interest and literature on intrinsic motivations and open-ended learning in both cognitive robotics and machine learning on one side, and in psychology and neuroscience on the other. This paper aims to review some relevant contributions from the two literature threads and to draw links between them.",
    "Recent success of pre-trained language models (LMs) has spurred widespread interest in the language capabilities that they possess. However, efforts to understand whether LM representations are useful for symbolic reasoning tasks have been limited and scattered.",
    "Many theories, based on neuroscientific and psychological empirical evidence and on computational concepts, have been elaborated to explain the emergence of consciousness in the central nervous system. These theories propose key fundamental mechanisms to explain consciousness, but they only partially connect such mechanisms to the possible functional and adaptive role of consciousness.",
    "I failed the first quarter of a class in middle school, so I made a fake report card. I did this every quarter that year. I forgot that they mail home the end-of-year cards, and my mom got it before I could intercept with my fake. She was PISSED—at the school for their error.",
]

for prompt in prompts:
    # Generate text using the model. Verbose set to False to prevent logging generated sequences.
    generated = model.generate(prompt, verbose=False)

    generated = ".".join(generated[0].split(".")[:-1]) + "."
    print(
        "============================================================================="
    )
    print(generated)
    print(
Example #11
0
args["use_cuda"] = True
myclient = pymongo.MongoClient("your mongodb")
mydb = myclient["test"]
mycol = mydb["news"]
for count in (response.json())["articles"]:
    content = count["description"]
    title = count["title"]
    time = count["publishedAt"]
    url = count["urlToImage"]
    by = count["source"]["name"]
    if content == None or content == "":
        continue
    for i in range(10):
        try:
            model = LanguageGenerationModel("gpt2",
                                            "distilgpt2",
                                            use_cuda=True)
            c = model.generate(prompt=content, args=args)[0]
            print(len(c))
            if len(c) > 1000 and good(c) and len(c) < 2000:
                print(c)
                mydict = {
                    'title': title,
                    'content': c[:-13],
                    'time': time,
                    'by': by,
                    'url': url
                }
                mycol.create_index([('title', pymongo.ASCENDING)], unique=True)
                x = mycol.insert_one(mydict)
                break
Example #12
0
"""
File to generate response based on GPT-2 language model
"""

from simpletransformers.language_generation import LanguageGenerationModel
from simpletransformers.classification import ClassificationModel
import config as conf
from util import *
import random

model = LanguageGenerationModel("gpt2",
                                "comedy_bot/models/text_gen",
                                args={"max_length": 300},
                                use_cuda=False)

score = ClassificationModel("roberta",
                            "comedy_bot/models/text_classify",
                            use_cuda=False)


def generate_response(prompt, trunc=True):
    """
    Generate a response based on incoming prompt
    :return: generated response
    """
    if prompt[-1] not in ['.', '?', '!', '"', '”'] and trunc:
        prompt = prompt + '.'

    if trunc is False:
        words = conf.ADD_WORDS
Example #13
0
import logging
import simpletransformers
from simpletransformers.language_generation import LanguageGenerationModel

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

model = LanguageGenerationModel("gpt2", "outputs/from_scratch/best_model/", args={"max_length": 20},use_cuda=False)
# model = LanguageGenerationModel("gpt2", "outputs/fine-tuned", args={"max_length": 200})
# model = LanguageGenerationModel("gpt2", "gpt2", args={"max_length": 200})

prompts = [
    "Bonjour",
    "Ça va?"
]

for prompt in prompts:
    generated = model.generate(prompt, verbose=False)

    generated = ".".join(generated[0].split(".")[:-1]) + "."
    print("=============================================================================")
    print(generated)
    print("=============================================================================")