def process_dataset(self, subsets, perturbation): ''' Returns tokenized subsets of the dataset. Output is a list with (context, response) pairs, sorted on combined length ''' # Split all the dialogues in subdialogues --> ([utterances in context], reponse) dialogue_samples = list( itertools.chain.from_iterable([ self.subdialogues(dialogue, subsets) for dialogue in self.dialogues ])) # Perturn the context, based on specified perturbation option perturbed_samples = self.perturb_dataset(dialogue_samples, perturbation) # Tokenize the samples if type(self.tokenizer) == Tokenizer: # For Tokenizer class, you need to extract the id's sep_token = get_token(self.tokenizer, "sep_token") tokenized_samples = [ (self.tokenizer.encode((sep_token).join(context) + sep_token).ids, self.tokenizer.encode(response + sep_token).ids) for (context, response) in perturbed_samples ] else: # For the other models, the encode function already gives the id's self.tokenizer.add_special_tokens({ 'sep_token': "[SEP]", 'pad_token': "[PAD]", 'mask_token': "[MASK]", 'eos_token': "[EOS]" }) sep_token = self.tokenizer.sep_token tokenized_samples = [ (self.tokenizer.encode((sep_token).join(context) + sep_token), self.tokenizer.encode(response + sep_token)) for (context, response) in perturbed_samples ] # Now shuffle samples and sort on length (to prevent amount of padding in batches) if self.max_length > 0: tokenized_samples = [(x, y) for (x, y) in tokenized_samples if len(x) + len(y) < self.max_length] if self.shuffle: random.shuffle(tokenized_samples) sorted_samples = sorted(tokenized_samples, key=lambda x: len(x[0]) * 10 + len(x[1]) + self .shuffle * random.randint(0, 10)) if self.remove_top_n > 0: sorted_samples = sorted_samples[:int(-1 * self.remove_top_n)] return sorted_samples
def sendrecvTest(screen): curses.cbreak() curses.noecho() screen.keypad(True) screen.nodelay(True) screen.addstr(0,0, "Loading. Please wait.") screen.refresh() global scr scr = screen token = get_token() asyncio.get_event_loop().create_task(input_handler()) client.run(token, bot=False)
def main(): # start the client coroutine TOKEN="" try: if sys.argv[1] == "--help" or sys.argv[1] == "-h": from utils.print_utils.help import print_help print_help() quit() elif sys.argv[1] == "--token" or sys.argv[1] == "--store-token": store_token() quit() elif sys.argv[1] == "--skeleton" or sys.argv[1] == "--copy-skeleton": # ---- now handled in utils.settings.py ---- # pass elif sys.argv[1] == "--config": # --- now handled in utils.settings.py ---- # pass else: print(gc.term.red("E̸̗͚̰̜͎̫̿̐̊̔͆̾̉̊̅̚͝RRƠ̸̳̙̙̯̌͒̓̽͒̒̄̓͒̽͒͊̅̍̐̇̋̽̅̎̿͒̈́̈͒̈́͝͝͠͝͝R: Unknown command.")) print(gc.term.yellow("オプションについては 「 --help 」 を参照してください。\nSee --help for options.")) quit() except IndexError: pass check_for_updates() token = get_token() init_input() print(gc.term.yellow("S̷̨̻͗̋͒ociet̸̼̯̣͑̈́͝t̶̲̺̰́̌e 🗲 「 ソシエット 」を参ります。お待ちください。\n\nSociette (Erune): Scarlet Vane\n")) print(gc.term.yellow("作成... Alabaster Dance Tenka...")) print(gc.term.yellow("作成... First Dance Kagura...")) print(gc.term.yellow("作成... Sixth Dance Bizen...")) print(gc.term.yellow("作成... Closing Dance Tokiyomi...")) print(gc.term.yellow("作成とYuel... Sapphire Dance: Gentiana...\n")) print(gc.term.yellow("Setting up Societte (socie)...\n")) # start the client try: gc.client.run(token, bot=False) except KeyboardInterrupt: pass except SystemExit: pass # if we are here, the client's loop was cancelled or errored, or user exited try: kill() except: # if our cleanly-exit kill function failed for whatever reason, # make sure we at least exit uncleanly quit()
def main(): # start the client coroutine TOKEN = "" try: if sys.argv[1] == "--help" or sys.argv[1] == "-h": from utils.print_utils.help import print_help print_help() quit() elif sys.argv[1] == "--token" or sys.argv[1] == "--store-token": store_token() quit() elif sys.argv[1] == "--skeleton" or sys.argv[1] == "--copy-skeleton": # ---- now handled in utils.settings.py ---- # pass elif sys.argv[1] == "--config": # --- now handled in utils.settings.py ---- # pass else: print(gc.term.red("Error: Unknown command.")) print(gc.term.yellow("See --help for options.")) quit() except IndexError: pass check_for_updates() token = get_token() init_input() print(gc.term.yellow("Starting...")) # start the client try: gc.client.run(token, bot=False) except KeyboardInterrupt: pass except SystemExit: pass # if we are here, the client's loop was cancelled or errored, or user exited try: kill() except: # if our cleanly-exit kill function failed for whatever reason, # make sure we at least exit uncleanly quit()
def main(): # start the client coroutine if settings and settings["debug"]: startLogging() token = None try: if sys.argv[1] == "--help" or sys.argv[1] == "-h": draw_help() quit() elif sys.argv[1] == "--token" or sys.argv[1] == "--store-token": store_token() quit() elif len(sys.argv) == 3 and sys.argv[1] == "--token-path": try: with open(sys.argv[2]) as f: token = f.read() except: print("Error: Cannot read token from path") quit() elif sys.argv[1] == "--skeleton" or sys.argv[1] == "--copy-skeleton": # -- now handled in utils.settings.py --- # pass elif sys.argv[1] == "--config": # -- now handled in utils.settings.py --- # pass elif sys.argv[1] == "--test": if len(sys.argv) < 3: print(gc.term.red("Error: Incorrect syntax for --test")) print(gc.term.yellow("Syntax: Discline.py --test testName")) quit() elif sys.argv[2] in ("input", "formatting", "scrolling", "sendrecv"): runTest(sys.argv[2]) quit() else: print(gc.term.red("Error: Unknown command.")) print(gc.term.yellow("See --help for options.")) quit() except IndexError: pass check_for_updates() if token is None: token = get_token() print(gc.term.yellow("Starting...")) # start the client try: gc.client.run(token, bot=False) except KeyboardInterrupt: pass except SystemExit: pass try: gc.client.close() except: pass try: asyncio.get_event_loop().close() except: pass # if we are here, the client's loop was cancelled or errored, or user exited curses.nocbreak() gc.ui.screen.keypad(False) curses.echo() curses.endwin()
def main(): # start the client coroutine if settings and settings["debug"]: startLogging() token = None try: if sys.argv[1] == "--help" or sys.argv[1] == "-h": try: asyncio.get_event_loop().run_until_complete(runSimpleHelp()) except SystemExit: pass quit() elif sys.argv[1] == "--token" or sys.argv[1] == "--store-token": store_token() quit() elif len(sys.argv) == 3 and sys.argv[1] == "--token-path": try: with open(sys.argv[2]) as f: token = f.read() except: print("Error: Cannot read token from path") quit() elif sys.argv[1] == "--skeleton" or sys.argv[1] == "--copy-skeleton": # -- now handled in utils.settings.py --- # pass elif sys.argv[1] == "--config": # -- now handled in utils.settings.py --- # pass elif sys.argv[1] == "--test": if len(sys.argv) < 3: print(gc.term.red("Error: Incorrect syntax for --test")) print(gc.term.yellow("Syntax: Discline.py --test testName")) quit() elif sys.argv[2] in ("input", "formatting", "scrolling", "sendrecv"): runTest(sys.argv[2]) quit() else: print(gc.term.red("Error: Unknown command.")) print(gc.term.yellow("See --help for options.")) quit() except IndexError: pass check_for_updates() if token is None: token = get_token() print(gc.term.yellow("Starting...")) # start the client loop = asyncio.get_event_loop() try: loop.run_until_complete(gc.client.start(token, bot=False)) except: pass # stop the client and close the event loop try: gc.client.close() loop.close() except: pass if gc.ui.isInitialized: curses.nocbreak() gc.ui.screen.keypad(False) curses.echo() curses.endwin()