Example #1
0
    def process_dataset(self, subsets, perturbation):
        '''
        Returns tokenized subsets of the dataset.
        Output is a list with (context, response) pairs, sorted on combined length
        '''

        # Split all the dialogues in subdialogues --> ([utterances in context], reponse)
        dialogue_samples = list(
            itertools.chain.from_iterable([
                self.subdialogues(dialogue, subsets)
                for dialogue in self.dialogues
            ]))

        # Perturn the context, based on specified perturbation option
        perturbed_samples = self.perturb_dataset(dialogue_samples,
                                                 perturbation)

        # Tokenize the samples
        if type(self.tokenizer) == Tokenizer:

            # For Tokenizer class, you need to extract the id's
            sep_token = get_token(self.tokenizer, "sep_token")
            tokenized_samples = [
                (self.tokenizer.encode((sep_token).join(context) +
                                       sep_token).ids,
                 self.tokenizer.encode(response + sep_token).ids)
                for (context, response) in perturbed_samples
            ]
        else:

            # For the other models, the encode function already gives the id's
            self.tokenizer.add_special_tokens({
                'sep_token': "[SEP]",
                'pad_token': "[PAD]",
                'mask_token': "[MASK]",
                'eos_token': "[EOS]"
            })
            sep_token = self.tokenizer.sep_token
            tokenized_samples = [
                (self.tokenizer.encode((sep_token).join(context) + sep_token),
                 self.tokenizer.encode(response + sep_token))
                for (context, response) in perturbed_samples
            ]

        # Now shuffle samples and sort on length (to prevent amount of padding in batches)
        if self.max_length > 0:
            tokenized_samples = [(x, y) for (x, y) in tokenized_samples
                                 if len(x) + len(y) < self.max_length]
        if self.shuffle:
            random.shuffle(tokenized_samples)
        sorted_samples = sorted(tokenized_samples,
                                key=lambda x: len(x[0]) * 10 + len(x[1]) + self
                                .shuffle * random.randint(0, 10))

        if self.remove_top_n > 0:
            sorted_samples = sorted_samples[:int(-1 * self.remove_top_n)]

        return sorted_samples
Example #2
0
def sendrecvTest(screen):
    curses.cbreak()
    curses.noecho()
    screen.keypad(True)
    screen.nodelay(True)
    screen.addstr(0,0, "Loading. Please wait.")
    screen.refresh()
    global scr
    scr = screen
    token = get_token()
    asyncio.get_event_loop().create_task(input_handler())
    client.run(token, bot=False)
Example #3
0
def main():
    # start the client coroutine
    TOKEN=""
    try:
        if sys.argv[1] == "--help" or sys.argv[1] == "-h":
            from utils.print_utils.help import print_help
            print_help()
            quit()
        elif sys.argv[1] == "--token" or sys.argv[1] == "--store-token":
            store_token()
            quit()
        elif sys.argv[1] == "--skeleton" or sys.argv[1] == "--copy-skeleton":
            # ---- now handled in utils.settings.py ---- #
            pass
        elif sys.argv[1] == "--config":
            # --- now handled in utils.settings.py ---- #
            pass
        else:
            print(gc.term.red("E̸̗͚̰̜͎̫̿̐̊̔͆̾̉̊̅̚͝RRƠ̸̳̙̙̯̌͒̓̽͒̒̄̓͒̽͒͊̅̍̐̇̋̽̅̎̿͒̈́̈͒̈́͝͝͠͝͝R: Unknown command."))
            print(gc.term.yellow("オプションについては 「 --help 」 を参照してください。\nSee --help for options."))
            quit()
    except IndexError: 
        pass

    check_for_updates()
    token = get_token()
    init_input()

    print(gc.term.yellow("S̷̨̻͗̋͒ociet̸̼̯̣͑̈́͝t̶̲̺̰́̌e   🗲  「 ソシエット 」を参ります。お待ちください。\n\nSociette (Erune): Scarlet Vane\n"))
    print(gc.term.yellow("作成... Alabaster Dance Tenka..."))
    print(gc.term.yellow("作成... First Dance Kagura..."))
    print(gc.term.yellow("作成... Sixth Dance Bizen..."))
    print(gc.term.yellow("作成... Closing Dance Tokiyomi..."))
    print(gc.term.yellow("作成とYuel... Sapphire Dance: Gentiana...\n"))
    print(gc.term.yellow("Setting up Societte (socie)...\n"))


    # start the client
    try: gc.client.run(token, bot=False)
    except KeyboardInterrupt: pass
    except SystemExit: pass

    # if we are here, the client's loop was cancelled or errored, or user exited
    try: kill()
    except:
        # if our cleanly-exit kill function failed for whatever reason,
        # make sure we at least exit uncleanly
        quit()
Example #4
0
def main():
    # start the client coroutine
    TOKEN = ""
    try:
        if sys.argv[1] == "--help" or sys.argv[1] == "-h":
            from utils.print_utils.help import print_help
            print_help()
            quit()
        elif sys.argv[1] == "--token" or sys.argv[1] == "--store-token":
            store_token()
            quit()
        elif sys.argv[1] == "--skeleton" or sys.argv[1] == "--copy-skeleton":
            # ---- now handled in utils.settings.py ---- #
            pass
        elif sys.argv[1] == "--config":
            # --- now handled in utils.settings.py ---- #
            pass
        else:
            print(gc.term.red("Error: Unknown command."))
            print(gc.term.yellow("See --help for options."))
            quit()
    except IndexError:
        pass

    check_for_updates()
    token = get_token()
    init_input()

    print(gc.term.yellow("Starting..."))

    # start the client
    try:
        gc.client.run(token, bot=False)
    except KeyboardInterrupt:
        pass
    except SystemExit:
        pass

    # if we are here, the client's loop was cancelled or errored, or user exited
    try:
        kill()
    except:
        # if our cleanly-exit kill function failed for whatever reason,
        # make sure we at least exit uncleanly
        quit()
Example #5
0
def main():
    # start the client coroutine
    if settings and settings["debug"]:
        startLogging()
    token = None
    try:
        if sys.argv[1] == "--help" or sys.argv[1] == "-h":
            draw_help()
            quit()
        elif sys.argv[1] == "--token" or sys.argv[1] == "--store-token":
            store_token()
            quit()
        elif len(sys.argv) == 3 and sys.argv[1] == "--token-path":
            try:
                with open(sys.argv[2]) as f:
                    token = f.read()
            except:
                print("Error: Cannot read token from path")
                quit()
        elif sys.argv[1] == "--skeleton" or sys.argv[1] == "--copy-skeleton":
            # -- now handled in utils.settings.py --- #
            pass
        elif sys.argv[1] == "--config":
            # -- now handled in utils.settings.py --- #
            pass
        elif sys.argv[1] == "--test":
            if len(sys.argv) < 3:
                print(gc.term.red("Error: Incorrect syntax for --test"))
                print(gc.term.yellow("Syntax: Discline.py --test testName"))
                quit()
            elif sys.argv[2] in ("input", "formatting", "scrolling",
                                 "sendrecv"):
                runTest(sys.argv[2])
                quit()
        else:
            print(gc.term.red("Error: Unknown command."))
            print(gc.term.yellow("See --help for options."))
            quit()
    except IndexError:
        pass

    check_for_updates()
    if token is None:
        token = get_token()

    print(gc.term.yellow("Starting..."))

    # start the client
    try:
        gc.client.run(token, bot=False)
    except KeyboardInterrupt:
        pass
    except SystemExit:
        pass
    try:
        gc.client.close()
    except:
        pass
    try:
        asyncio.get_event_loop().close()
    except:
        pass

    # if we are here, the client's loop was cancelled or errored, or user exited
    curses.nocbreak()
    gc.ui.screen.keypad(False)
    curses.echo()
    curses.endwin()
Example #6
0
def main():
    # start the client coroutine
    if settings and settings["debug"]:
        startLogging()
    token = None
    try:
        if sys.argv[1] == "--help" or sys.argv[1] == "-h":
            try:
                asyncio.get_event_loop().run_until_complete(runSimpleHelp())
            except SystemExit:
                pass
            quit()
        elif sys.argv[1] == "--token" or sys.argv[1] == "--store-token":
            store_token()
            quit()
        elif len(sys.argv) == 3 and sys.argv[1] == "--token-path":
            try:
                with open(sys.argv[2]) as f:
                    token = f.read()
            except:
                print("Error: Cannot read token from path")
                quit()
        elif sys.argv[1] == "--skeleton" or sys.argv[1] == "--copy-skeleton":
            # -- now handled in utils.settings.py --- #
            pass
        elif sys.argv[1] == "--config":
            # -- now handled in utils.settings.py --- #
            pass
        elif sys.argv[1] == "--test":
            if len(sys.argv) < 3:
                print(gc.term.red("Error: Incorrect syntax for --test"))
                print(gc.term.yellow("Syntax: Discline.py --test testName"))
                quit()
            elif sys.argv[2] in ("input", "formatting", "scrolling",
                                 "sendrecv"):
                runTest(sys.argv[2])
                quit()
        else:
            print(gc.term.red("Error: Unknown command."))
            print(gc.term.yellow("See --help for options."))
            quit()
    except IndexError:
        pass

    check_for_updates()
    if token is None:
        token = get_token()

    print(gc.term.yellow("Starting..."))

    # start the client
    loop = asyncio.get_event_loop()
    try:
        loop.run_until_complete(gc.client.start(token, bot=False))
    except:
        pass

    # stop the client and close the event loop
    try:
        gc.client.close()
        loop.close()
    except:
        pass

    if gc.ui.isInitialized:
        curses.nocbreak()
        gc.ui.screen.keypad(False)
        curses.echo()
        curses.endwin()