def generate_samples(): """Generate unconditional samples.""" print('Generating samples...') generated = 0 all_text = [] for _ in range(args.sample_num): out = sample_sequence(model=model, length=args.sample_length, context=None, start_token=enc.encoder['<|endoftext|>'], batch_size=1, temperature=1.0, top_k=args.top_k, device=device) out = out[:, :].tolist()[0] generated += 1 text = enc.decode(out) print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40) print(text) all_text.append(text) maketree(os.path.join(SAMPLE_DIR, args.run_name)) with open( os.path.join(SAMPLE_DIR, args.run_name, 'samples-{}.txt'.format(counter)), 'w') as fp: fp.write('\n'.join(all_text))
def text_generator(self): if self.args.length == -1: self.args.length = self.config.n_ctx // 2 elif self.args.length > self.config.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % self.config.n_ctx) if self.args.quiet is False: print(self.args.text) context_tokens = self.enc.encode(self.args.text) generated = 0 for _ in range(self.args.nsamples // self.args.batch_size): out = sample_sequence(model=self.model, length=self.args.length, context=context_tokens if not self.args.unconditional else None, start_token=self.enc.encoder['<|endoftext|>'] if self.args.unconditional else None, batch_size=self.args.batch_size, temperature=self.args.temperature, top_k=self.args.top_k, device=self.device) out = out[:, len(context_tokens):].tolist() for i in range(self.args.batch_size): generated += 1 text = self.enc.decode(out[i]) if self.args.quiet is False: print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40) print(text) return text
def text_generator(state_dict, given_starting_letter): seed = random.randint(0, 2147483647) np.random.seed(seed) torch.random.manual_seed(seed) torch.cuda.manual_seed(seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") enc = get_encoder() config = GPT2Config() model = GPT2LMHeadModel(config) model = load_weight(model, state_dict) model.to(device) model.eval() context_tokens = enc.encode(EXAMPLE_LETTER) generated = 0 out = sample_sequence( model=model, length=config.n_ctx // 2, context=context_tokens, start_token=None, batch_size=1, temperature=0.7, top_k=40, device=device, ) out = out[:, len(context_tokens):].tolist() text = enc.decode(out[0]) print(text) return text
def text_generator(state_dict): # parser = argparse.ArgumentParser() # parser.add_argument("--text", type=str, required=True) # parser.add_argument("--quiet", type=bool, default=False) # parser.add_argument("--nsamples", type=int, default=1) # parser.add_argument('--unconditional', action='store_true', help='If true, unconditional generation.') # parser.add_argument("--batch_size", type=int, default=-1) # parser.add_argument("--length", type=int, default=-1) # parser.add_argument("--temperature", type=float, default=0.7) # parser.add_argument("--top_k", type=int, default=40) # args = parser.parse_args() if args_quiet is False: print(args) # if args_batch_size == -1: args_batch_size = 1 assert args_nsamples % args_batch_size == 0 seed = random.randint(0, 2147483647) np.random.seed(seed) torch.random.manual_seed(seed) torch.cuda.manual_seed(seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load Model enc = get_encoder() config = GPT2Config() model = GPT2LMHeadModel(config) model = load_weight(model, state_dict) model.to(device) model.eval() #if args_length == -1: args_length = config.n_ctx // 2 # elif args_length > config.n_ctx: # raise ValueError("Can't get samples longer than window size: %s" % config.n_ctx) # print(args.text) context_tokens = enc.encode(GPT2_seed_text) generated = 0 for _ in range(args_nsamples // args_batch_size): out = sample_sequence( model=model, length=args_length, context=context_tokens if not args_unconditional else None, start_token=enc.encoder['<|endoftext|>'] if args_unconditional else None, batch_size=args_batch_size, temperature=args_temperature, top_k=args_top_k, device=device ) out = out[:, len(context_tokens):].tolist() for i in range(args_batch_size): generated += 1 text = enc.decode(out[i]) if args_quiet is False: print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40) global GPT2_output GPT2_output = text print(text)
def text_generator(state_dict, param_prompt, param_nsamples, param_batch_size, param_length, param_temperature, param_top_k): #param_prompt = "Peter was a man" param_quiet = False #param_nsamples = 1 param_unconditional = None #param_batch_size = 1 #param_length = 5 #param_temperature = 0.95 #param_top_k = 100 if param_batch_size == -1: param_batch_size = 1 assert param_nsamples % param_batch_size == 0 seed = random.randint(0, 2147483647) np.random.seed(seed) torch.random.manual_seed(seed) torch.cuda.manual_seed(seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load Model enc = get_encoder() config = GPT2Config() model = GPT2LMHeadModel(config) model = load_weight(model, state_dict) model.to(device) model.eval() if param_length == -1: param_length = config.n_ctx // 2 elif param_length > config.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % config.n_ctx) response = param_prompt #print(param_prompt) context_tokens = enc.encode(param_prompt) generated = 0 for _ in range(param_nsamples // param_batch_size): out = sample_sequence( model=model, length=param_length, context=context_tokens if not param_unconditional else None, start_token=enc.encoder['<|endoftext|>'] if param_unconditional else None, batch_size=param_batch_size, temperature=param_temperature, top_k=param_top_k, device=device ) out = out[:, len(context_tokens):].tolist() for i in range(param_batch_size): generated += 1 text = enc.decode(out[i]) if param_quiet is False: response = "=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40 #return("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40) response = param_prompt + text #return(text) return response
def text_generator(input_text): if gpt2_parameters.get("quiet") is False: print('GPT-2 parameters used: ' + str(gpt2_parameters)) if gpt2_parameters.get("batch_size") == -1: gpt2_parameters["batch_size"] = 1 assert gpt2_parameters.get("nsamples") % gpt2_parameters.get("batch_size") == 0 seed = random.randint(0, 2147483647) np.random.seed(seed) torch.random.manual_seed(seed) torch.cuda.manual_seed(seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") enc = get_encoder() print(GPT2Config(model_file).output_config()) config = GPT2Config(model_file) model = GPT2LMHeadModel(config) model = load_weight(model, state_dict) model.to(device) model.eval() if gpt2_parameters.get("length") == -1: gpt2_parameters["length"] = config.n_ctx // 2 elif gpt2_parameters.get("length") > config.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % config.n_ctx) print('TEXT INPUT: ' + input_text) context_tokens = enc.encode(input_text) generated = 0 for _ in range(gpt2_parameters.get("nsamples") // gpt2_parameters.get("batch_size")): out = sample_sequence( model=model, length=gpt2_parameters.get("length"), context=context_tokens if not gpt2_parameters.get("unconditional") else None, start_token=enc.encoder['<|endoftext|>'] if gpt2_parameters.get("unconditional") else None, batch_size=gpt2_parameters.get("batch_size"), temperature=gpt2_parameters.get("temperature"), top_k=gpt2_parameters.get("top_k"), device=device ) out = out[:, len(context_tokens):].tolist() for i in range(gpt2_parameters.get("batch_size")): generated += 1 text = enc.decode(out[i]) context_tokens = enc.encode(text) if gpt2_parameters.get("quiet") is False: print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40) if '<|endoftext|>' in text: print(input_text + text.replace("<|endoftext|>",' (END-OF-TEXT)')) return input_text + text.replace("<|endoftext|>",' (END-OF-TEXT)') else: print(input_text + text + '...') return input_text + text + '...'
def text_generator(state_dict, args): if args.quiet is False: print(args) if args.batch_size == -1: args.batch_size = 1 assert args.nsamples % args.batch_size == 0 seed = random.randint(0, 2147483647) np.random.seed(seed) torch.random.manual_seed(seed) torch.cuda.manual_seed(seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load Model enc = get_encoder() config = GPT2Config() model = GPT2LMHeadModel(config) model = load_weight(model, state_dict) model.to(device) model.eval() if args.length == -1: args.length = config.n_ctx // 2 elif args.length > config.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % config.n_ctx) print(args.text) context_tokens = enc.encode(args.text) generated = 0 for _ in range(args.nsamples // args.batch_size): out = sample_sequence( model=model, length=args.length, context=context_tokens if not args.unconditional else None, start_token=enc.encoder['<|endoftext|>'] if args.unconditional else None, batch_size=args.batch_size, temperature=args.temperature, top_k=args.top_k, device=device) out = out[:, len(context_tokens):].tolist() for i in range(args.batch_size): generated += 1 text = enc.decode(out[i]) if args.quiet is False: print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40) print(text)
def predict(self, text, cursor): print(text) # text = text.split('\n') # line = cursor['line'] # ch = cursor['ch'] # last_lines = text[ # line - 3 if line - 3 >= 0 else 0 # : line # ] # try: # starting_text = " ".join(last_lines) + " " + text[line][:ch] # starting_text = " ".join(starting_text.split()[-self.state_size:]) # starting_text = starting_text.lower() # print(starting_text) # result = [] # for _ in range(3): # result.append(self.model.make_sentence_with_start(starting_text)) # print(result) # return result # except KeyError: # return None context_tokens = self.enc.encode(text) print(context_tokens) generated = 0 response = [] for _ in range(2 // self.batch_size): out = sample_sequence( model=self.model, length=self.length, context=context_tokens, start_token=None, batch_size=self.batch_size, temperature=0.7, top_k=40, device=self.device ) out = out[:, len(context_tokens):].tolist() for i in range(self.batch_size): generated += 1 text = self.enc.decode(out[i]) print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40) print(text) response.append(text) return response
def text_generator(model, text): nsamples = 1 batch_size = -1 length = 200 temperature = .7 top_k = 40 unconditional = False if batch_size == -1: batch_size = 1 assert nsamples % batch_size == 0 seed = random.randint(0, 2147483647) np.random.seed(seed) torch.random.manual_seed(seed) torch.cuda.manual_seed(seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) model.eval() if length == -1: length = config.n_ctx // 2 elif length > config.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % config.n_ctx) context_tokens = enc.encode(text) generated = 0 for _ in range(nsamples // batch_size): out = sample_sequence( model=model, length=length, context=context_tokens if not unconditional else None, start_token=enc.encoder["<|endoftext|>"] if unconditional else None, batch_size=batch_size, temperature=temperature, top_k=top_k, device=device, ) out = out[:, len(context_tokens):].tolist() for i in range(batch_size): generated += 1 text = enc.decode(out[i]) return (text)
def text_generator_for_out(text, model, device, length=200, temperature=0.7, top_k=40, path_to_model=path): print("text_generator_for_out", path) if os.path.exists(path + '/' + 'gpt2-pytorch_model.bin'): print(path + '/' + 'gpt2-pytorch_model.bin') enc = get_encoder() quiet = False length = 200 print("text_generator_for_out 1") if length == -1: length = 1024 // 2 elif length > 1024: raise ValueError("Can't get samples longer than window size: %s" % 1024) context_tokens = enc.encode(text) generated = 0 print("text_generator_for_out 2") for _ in range(1): out = sample_sequence(model=model, length=length, context=context_tokens, start_token=None, batch_size=1, temperature=temperature, top_k=top_k, device=device) print("text_generator_for_out 3") out = out[:, len(context_tokens):].tolist() for i in range(1): generated += 1 text = enc.decode(out[i]) print("text_generator_for_out 4") if quiet is False: print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40) print("in big gen2", text) return text
def text_generator(params): if params['batch_size'] == -1: params['batch_size'] = 1 assert params['nsamples'] % params['batch_size'] == 0 if params['length'] == -1: params['length'] = config.n_ctx // 2 elif params['length'] > config.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % config.n_ctx) seed = random.randint(0, 2147483647) np.random.seed(seed) torch.random.manual_seed(seed) torch.cuda.manual_seed(seed) context_tokens = my_encoder.encode(params['text']) generated = 0 output = [] output_text = [] for _ in range(params['nsamples'] // params['batch_size']): out = sample_sequence( model=model, length=params['length'], context=context_tokens if not params['unconditional'] else None, start_token=my_encoder.encoder['<|endoftext|>'] if params['unconditional'] else None, batch_size=params['batch_size'], temperature=params['temperature'], top_k=params['top_k'], device=device) out = out[:, len(context_tokens):].tolist() output.append(out) for i in range(params['batch_size']): generated += 1 text = my_encoder.decode(out[i]) output_text.append(text.split('<|endoftext|>')[0]) return output_text, output
def text_generator(seed, unconditional=False, nsamples=1, batch_size=-1, length=-1, temperature=0.7, top_k=40): enc = get_encoder() context_tokens = enc.encode(seed) if batch_size == -1: batch_size = 1 assert nsamples % batch_size == 0 if length == -1: length = config.n_ctx // 2 elif length > config.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % config.n_ctx) out = sample_sequence( model=model, length=length, context=context_tokens if not unconditional else None, start_token=enc.encoder['<|endoftext|>'] if unconditional else None, batch_size=batch_size, temperature=temperature, top_k=top_k, device=device) text = '' out = out[:, len(context_tokens):].tolist() for i in range(batch_size): text += enc.decode(out[i]) html = '' html = add_content( html, header('Input Seed ', color='black', gen_text='Network Output')) html = add_content(html, box(seed, text)) return f'<div>{html}</div>'
def text_generator(state_dict): parser = argparse.ArgumentParser() parser.add_argument("--text", type=str, required=True) parser.add_argument("--quiet", type=bool, default=False) parser.add_argument("--nsamples", type=int, default=1) parser.add_argument('--unconditional', action='store_true', help='If true, unconditional generation.') parser.add_argument("--batch_size", type=int, default=-1) parser.add_argument("--length", type=int, default=-1) parser.add_argument("--temperature", type=float, default=0.7) parser.add_argument("--top_k", type=int, default=40) args = parser.parse_args() # ================================================================================ if args.quiet is False: print(args) # ================================================================================ if args.batch_size == -1: args.batch_size = 1 # ================================================================================ assert args.nsamples % args.batch_size == 0 # ================================================================================ seed = random.randint(0, 2147483647) np.random.seed(seed) torch.random.manual_seed(seed) torch.cuda.manual_seed(seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # ================================================================================ # Load Model enc = get_encoder() config = GPT2Config() model = GPT2LMHeadModel(config) # ================================================================================ model = load_weight(model, state_dict) model.to(device) model.eval() # ================================================================================ if args.length == -1: args.length = config.n_ctx // 2 elif args.length > config.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % config.n_ctx) # ================================================================================ print(args.text) # I use computer # ================================================================================ context_tokens = enc.encode(args.text) # afaf 2: context_tokens = enc.encode(args.text) # print("context_tokens",context_tokens) # [40, 779, 3644] # ================================================================================ # print("args.length",args.length) # 512 generated = 0 for _ in range(args.nsamples // args.batch_size): out = sample_sequence( model=model, length=args.length, context=context_tokens if not args.unconditional else None, start_token=enc.encoder['<|endoftext|>'] if args.unconditional else None, batch_size=args.batch_size, temperature=args.temperature, top_k=args.top_k, device=device) # afaf 5: out = sample_sequence( # print("out",out) # tensor([[ 40, 779, 3644, 1143, 3788, 284, 2198, 262, 2033, 286, # 1321, 287, 262, 2393, 11, 290, 788, 4866, 340, 284, # print("out",out.shape) # torch.Size([1, 515]) len_ctx_tokens = len(context_tokens) # print("len_ctx_tokens",len_ctx_tokens) # 3 out = out[:, len_ctx_tokens:].tolist() # ================================================================================ # print("args.batch_size",args.batch_size) # 1 for i in range(args.batch_size): generated += 1 # ================================================================================ # print("out",out) # [[3783, 11, 543, 318, 257, 1688, 636, 286, 616, 3047, 290, 318, 257, 845, # print("out",len(out)) # 1 # ================================================================================ indexed_out = out[i] # print("indexed_out",indexed_out) # [5479, 588, 9678, 290, 24134, 284, 16481, 1366, 287, 257, 30117, 13, 383, 1917, 318, 326, # print("indexed_out",len(indexed_out)) # 512 # ================================================================================ text = enc.decode(indexed_out) print("text", text) afaf # terminals with Ethernet cable to connect the computer to a computer system that has a computer terminal. # An additional feature # ================================================================================ if args.quiet is False: print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40) print(text)
def generator(text): # parser = argparse.ArgumentParser() # parser.add_argument("--text", type=str, required=True) # parser.add_argument("--quiet", type=bool, default=False) # parser.add_argument("--nsamples", type=int, default=1) # parser.add_argument('--unconditional', action='store_true', help='If true, unconditional generation.') # parser.add_argument("--batch_size", type=int, default=-1) # parser.add_argument("--length", type=int, default=-1) # parser.add_argument("--temperature", type=float, default=0.7) # parser.add_argument("--top_k", type=int, default=40) # args = parser.parse_args() state_dict = torch.load( 'gpt2-pytorch_model.bin', map_location='cpu' if not torch.cuda.is_available() else None) input = text quiet = False nsamples = 1 unconditional = False batch_size = -1 length = -1 temperature = 0.7 top_k = 40 if batch_size == -1: batch_size = 1 assert nsamples % batch_size == 0 seed = random.randint(0, 2147483647) np.random.seed(seed) torch.random.manual_seed(seed) torch.cuda.manual_seed(seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load Model enc = get_encoder() config = GPT2Config() model = GPT2LMHeadModel(config) model = load_weight(model, state_dict) model.to(device) model.eval() if length == -1: length = config.n_ctx // 2 elif length > config.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % config.n_ctx) print(text) context_tokens = enc.encode(text) generated = 0 for _ in range(nsamples // batch_size): out = sample_sequence( model=model, length=length, context=context_tokens if not unconditional else None, start_token=enc.encoder['<|endoftext|>'] if unconditional else None, batch_size=batch_size, temperature=temperature, top_k=top_k, device=device) out = out[:, len(context_tokens):].tolist() for i in range(batch_size): generated += 1 text = enc.decode(out[i]) if quiet is False: print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40) return text
def text_generator(state_dict): parser = argparse.ArgumentParser() #parser.add_argument("--text", type = file, required=True) parser.add_argument('filename') parser.add_argument("--quiet", type=bool, default=False) parser.add_argument("--nsamples", type=int, default=1) parser.add_argument('--unconditional', action='store_true', help='If true, unconditional generation.') parser.add_argument("--batch_size", type=int, default=-1) parser.add_argument("--length", type=int, default=40) parser.add_argument("--temperature", type=float, default=0.7) parser.add_argument("--top_k", type=int, default=40) args = parser.parse_args() open_bbc_page = requests.get(main_url).json() article = open_bbc_page["articles"] results = [] for ar in article: results.append(ar["title"]) print(results[1]) text1 = results[1] with open(args.filename) as file: #text1 = file.read() print(text1) if args.quiet is False: print(args) if args.batch_size == -1: args.batch_size = 1 assert args.nsamples % args.batch_size == 0 seed = random.randint(0, 2147483647) np.random.seed(seed) torch.random.manual_seed(seed) torch.cuda.manual_seed(seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load Model enc = get_encoder() config = GPT2Config() model = GPT2LMHeadModel(config) model = load_weight(model, state_dict) model.to(device) model.eval() if args.length == -1: args.length = config.n_ctx // 2 elif args.length > config.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % config.n_ctx) print(text1) context_tokens = enc.encode(text1) generated = 0 for _ in range(args.nsamples // args.batch_size): out = sample_sequence( model=model, length=args.length, context=context_tokens if not args.unconditional else None, start_token=enc.encoder['<|endoftext|>'] if args.unconditional else None, batch_size=args.batch_size, temperature=args.temperature, top_k=args.top_k, device=device) out = out[:, len(context_tokens):].tolist() for i in range(args.batch_size): generated += 1 text = enc.decode(out[i]) if args.quiet is False: print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40) print(text) text = text1 + text api.update_status(status=text)