Python DataPreProcessorの例、data_pre_processor.DataPreProcessor Pythonの例

コード例 #1

0

ファイルを表示

ファイル: chat_bot.py プロジェクト: conormccauley1999/SWENG-2020-The-Turing-Game

def serviceMode():
    # Initialise the AI
    ai = AI()
    ai.initialise("")
    # Initialise the pre-processor
    preProcessor = DataPreProcessor("")
    # Create a previous input to track new questions
    pInput = ""
    # The program will only exit when it recieves a deliberate 'exit' message
    loop = True
    while loop:
        # Check if the file exists
        path = str(os.getcwd()) + "/input.txt"
        if os.path.isfile(path):
            # Read our input from the file
            input_file = open("input.txt", "r")
            # Clean our input
            preProcessor.processInput(input_file.read())
            input = str(preProcessor.input.strip())
            # Exit on 'exit'
            if input == "exit":
                loop = False
            elif input != pInput:
                pInput = input
                # Write the the output back to a different file
                output_file = open("output.txt", "w")
                output = arrangeResp(input, preProcessor, ai)
                output_file.write(output)
                output_file.close()

コード例 #2

0

ファイルを表示

 def __init__(self):
     # preProcessor is only utilised here to avoid duplicating string2array
     self.preProcessor = DataPreProcessor("")
     self.model_ = Seq2seq(
         decoder_seq_length=self.decoder_seq_length,
         cell_enc=tf.keras.layers.GRUCell,
         cell_dec=tf.keras.layers.GRUCell,
         n_layer=3,
         n_units=256,
         embedding_layer=tl.layers.Embedding(
             vocabulary_size=self.vocabulary_size,
             embedding_size=self.emb_dim),
     )
     load_weights = tl.files.load_npz(name="data/model.npz")
     tl.files.assign_weights(load_weights, self.model_)

コード例 #3

0

ファイルを表示

ファイル: chat_bot.py プロジェクト: seanlarkin99/SWENG-2020-The-Turing-Game

def arrangeResp(userInput, personality):
    # Creating our AI.
    ai = AI(personality.name)
    # Where output is the processed output.
    preProcessor = DataPreProcessor(userInput)
    # Was the query too short for the AI, if so, exit.
    if preProcessor.processInput() == False:
        return False
    # This is passed to the model.
    processedInput = preProcessor.input
    # Receive response from the model.
    response = ai.respond(processedInput, 1)
    response = preProcessor.array2String(response)
    postProcessor = DataPostProcessor(response, personality)
    postProcessor.postProcess()
    return response

コード例 #4

0

ファイルを表示

ファイル: teenager.py プロジェクト: conormccauley1999/SWENG-2020-The-Turing-Game

    def shortenWords(self, input):
        shortWords = {
            'your': 'ur',
            'tonight': '2nite',
            'for your information': 'fyi',
            'okay': 'k',
            'ok': 'k',
            'to': '2',
            'tomorrow': '2moro',
            'easy': 'ez',
            'see you': 'cya',
            'because': 'cuz',
            'you': 'u',
        }
        preProcessor = DataPreProcessor(input)
        input = preProcessor.string2Array(input)
        for x in range(0, len(input)):
            for y in shortWords:
                if (str(input[x]) == y):
                    input[x] = shortWords[y]
        input = preProcessor.array2String(input)

        return input

コード例 #5

0

ファイルを表示

 def shortenWords(self, input):
     shortWords = {
         'for your information': 'fyi',
         'okay': 'kk',
         'ok': 'kk',
         'easy': 'ez',
         'see you': 'cya',
         'because': 'cuz',
         'very': 'v',
         'to be honest': 'tbh',
         'oh my god': 'omg',
         'boyfriend': 'bf',
         'girlfriend': 'gf',
         'awesome': 'cool',
         'in trouble': 'screwed',
     }
     preProcessor = DataPreProcessor(input)
     input = preProcessor.string2Array(input)
     for x in range(0, len(input)):
         for y in shortWords:
             if (str(input[x]) == y):
                 input[x] = shortWords[y]
     input = preProcessor.array2String(input)
     return input

コード例 #6

0

ファイルを表示

class AI:
    metadata, idx_q, idx_a = data.load_data(PATH='data/{}/'.format("twitter"))
    src_vocab_size = len(metadata['idx2w'])
    emb_dim = 1024
    word2idx = metadata['w2idx']
    idx2word = metadata['idx2w']
    unk_id = word2idx['unk']
    pad_id = word2idx['_']
    start_id = src_vocab_size
    end_id = src_vocab_size + 1
    word2idx.update({'start_id': start_id})
    word2idx.update({'end_id': end_id})
    idx2word = idx2word + ['start_id', 'end_id']
    src_vocab_size = src_vocab_size + 2
    vocabulary_size = src_vocab_size
    decoder_seq_length = 20

    # Creates an instance of the AI with it's name being passed from a name-generating function
    def __init__(self):
        # preProcessor is only utilised here to avoid duplicating string2array
        self.preProcessor = DataPreProcessor("")
        self.model_ = Seq2seq(
            decoder_seq_length=self.decoder_seq_length,
            cell_enc=tf.keras.layers.GRUCell,
            cell_dec=tf.keras.layers.GRUCell,
            n_layer=3,
            n_units=256,
            embedding_layer=tl.layers.Embedding(
                vocabulary_size=self.vocabulary_size,
                embedding_size=self.emb_dim),
        )
        load_weights = tl.files.load_npz(name="data/model.npz")
        tl.files.assign_weights(load_weights, self.model_)

    # Take a name, create a mood
    def initialise(self, name):
        self.name = name
        feelings = [
            "good", "well", "great", "grand", "excellent", "ecstatic", "happy",
            "sad", "annoyed", "frustrated", "angry", "tired", "okay", "alright"
        ]
        self.feel = feelings[randint(0, len(feelings) - 1)]

    # Handle the creation of a response from the given input
    def respond(self, seed, number):
        simpleStart = self.simpleResponse(seed)
        self.model_.eval()
        seed_id = [self.word2idx.get(w, self.unk_id) for w in seed.split(" ")]
        sentence_id = self.model_(inputs=[[seed_id]],
                                  seq_length=20,
                                  start_token=self.start_id,
                                  top_n=number)
        sentence = []
        for w_id in sentence_id[0]:
            w = self.idx2word[w_id]
            if w == 'end_id':
                break
            sentence = sentence + [w]
        # A catch all just in case there are no responses, but we have yet to find an input to trigger this
        if sentence == []:
            sentence = [
                "I'm", "sorry,", "I", "just", "don't", "quite", "understand",
                "what", "you're", "asking..."
            ]
        return simpleStart + sentence

    # Handle simple questions that the AI is less than optimal at answering
    def simpleResponse(self, input):
        sentence = []
        input = self.preProcessor.string2Array(input)
        tally = [0, 0, 0]
        greetings = [
            "hello", "hi", "greetings", "salutations", "hey", "yo", "howdy"
        ]
        names = [["what", "who"], ["is", "are"], ["you", "your"], ["name"]]
        wellbeing = [["how"], ["do", "are"], ["you"],
                     ["doing", "feeling", "feel"]]
        # Tallying key words in the user query to determine if certain questions were being asked
        for x in input:
            for y in range(len(greetings)):
                if x == greetings[y]:
                    tally[0] = 1
                    break
            for y in range(len(names)):
                for z in names[y]:
                    if x is z:
                        tally[1] = tally[1] + 1
                        break
            for y in range(len(wellbeing)):
                for z in wellbeing[y]:
                    if x == z:
                        tally[2] = tally[2] + 1
                        break
        # Handle a return greeting, and maybe ask how the user is
        if tally[0] > 0:
            sentence.append(greetings[randint(0, 6)])
            if randint(0, 1) is 1:
                sentence.append("how")
                sentence.append("are")
                sentence.append("you")
                value = randint(0, 2)
                if value is 0:
                    sentence.append("doing")
                elif value is 1:
                    sentence.append("feeling")
        # Handle questions about it's name with a simple answer
        if tally[1] > 2 and len(input) < 5:
            if randint(0, 1) is 1:
                sentence.append("I")
                sentence.append("am")
            else:
                sentence.append("my")
                sentence.append("name")
                sentence.append("is")
            sentence.append(self.name)
        # Handle a 'how are you' type question with a pre-determined emotional state
        if tally[2] > 2 and len(input) < 5:
            sentence.append("I")
            sentence.append("am")
            if randint(0, 1) is 1:
                sentence.append("feeling")
            sentence.append(self.feel)
        return sentence

コード例 #7

0

ファイルを表示

ファイル: chat_bot.py プロジェクト: conormccauley1999/SWENG-2020-The-Turing-Game

def terminalMode():
    # Simple bit of beautifying for our command-line output
    c_background = "\033[44m"
    c_blue = "\033[94m"
    c_green = "\033[92m"
    c_red = "\033[91m"
    c_close = "\033[0m"

    # Set up our timer
    timer = BackgroundScheduler({"apscheduler.timezone": "Europe/Dublin"})
    accepted_timer = False

    # Small bit of starting text to introduce the user
    startingText = [
        "Ask a question, and see how our AI responds.",
        "Please ensure queries have more than three words.",
        "You will have 60 seconds upon entering your first query to make further queries.",
        "The program will wait for your last query before exiting."
    ]
    print(c_green + "\n".join(startingText) + c_close)

    # The timer feature is optional over the command line because it will be implemented by the UI team
    print(c_background + "Do you wish to enable the on-minute timer? (y/n)" +
          c_close)
    userInput = input()
    if userInput == "y" or userInput == "Y":
        timer.add_job(endProgram, "interval", minutes=1)
        accepted_timer = True

    print(c_background + "Enter the input, or enter 'exit' to end:" + c_close)
    personality = None

    # Retrieve a name
    name = naming()

    # Personality is randomly assigned between teenager, young adult and adult with a random name each time
    perNum = random.randint(1, 3)
    if perNum == 1:
        personality = Teenager(name)
    elif perNum == 2:
        personality = YoungAdult(name)
    else:
        personality = Adult(name)

    # Initialise the ai
    ai = AI()
    ai.initialise(name)
    # Initialise the pre-processor
    preProcessor = DataPreProcessor("")
    # Initialise the post-processor with the personality
    postProcessor = DataPostProcessor(personality)

    # Our input/feedback loop, starting our timer on its initial run
    initial = True
    global end
    while end is False:
        # Start the timer after the first user input if it was accepted
        if initial and accepted_timer:
            timer.start()
        elif not initial:
            print(c_background + "Enter another input:" + c_close)
        initial = False
        # Take the user input
        userInput = input()
        # Exit on 'exit'
        if (userInput == "exit"):
            break
        # Ensure input string is long enough if the terminal is being used
        if preProcessor.processInput(userInput) is not False:
            # Determine our input - whether an error or a valid query
            output = arrangeResp(preProcessor.input, preProcessor, ai)
            # Post-processing of data
            output = postProcessor.postProcess(output)
            # Provide result to the user
            print(c_blue + output + c_close)
        else:
            # Handle queries that are too short
            print(c_red +
                  "The input string must have a minimum of three words!" +
                  c_close)
    # Terminate the program
    print(c_green + "Program exited." + c_close)

コード例 #8

0

ファイルを表示

    def test_ConvertChar(self):  
        testInput = "æ"
        preProcessor = DataPreProcessor(testInput)
        preProcessor.convertAccentedCharsToAscii()
        testInput = preProcessor.input
        self.assertEqual(testInput, "a")

        testInput = "je ètais"
        preProcessor = DataPreProcessor(testInput)
        preProcessor.convertAccentedCharsToAscii()
        testInput = preProcessor.input
        self.assertEqual(testInput, "je etais")

        testInput = "í"
        preProcessor = DataPreProcessor(testInput)
        preProcessor.convertAccentedCharsToAscii()
        testInput = preProcessor.input
        self.assertEqual(testInput, "i") 

        testInput = "Ø"
        preProcessor = DataPreProcessor(testInput)
        preProcessor.convertAccentedCharsToAscii()
        testInput = preProcessor.input
        self.assertEqual(testInput,"o") 
       
        testInput = "Ùaemd"
        preProcessor = DataPreProcessor(testInput)
        preProcessor.convertAccentedCharsToAscii()
        testInput = preProcessor.input
        self.assertEqual(testInput, "uaemd") 

        testInput = "ůűų or ŷÝŸ"
        preProcessor = DataPreProcessor(testInput)
        preProcessor.convertAccentedCharsToAscii()
        testInput = preProcessor.input
        self.assertEqual(testInput,"uuu or yyy")

コード例 #9

0

ファイルを表示

    def test_NumWordToDigit(self):
        testInput = "fsd"
        preProcessor = DataPreProcessor(testInput)
        preProcessor.removeNumWords()
        testInput = preProcessor.input
        self.assertEqual(testInput, "fsd")

        testInput = "one.two"
        preProcessor = DataPreProcessor(testInput)
        preProcessor.removeNumWords()
        testInput = preProcessor.input
        self.assertEqual(testInput, "one.two")

        testInput = "one"
        preProcessor = DataPreProcessor(testInput)
        preProcessor.removeNumWords()
        testInput = preProcessor.input
        self.assertEqual(testInput, "") 

        testInput = "one two"
        preProcessor = DataPreProcessor(testInput)
        preProcessor.removeNumWords()
        testInput = preProcessor.input
        self.assertEqual(testInput," ") 
       
        testInput = "twelve"
        preProcessor = DataPreProcessor(testInput)
        preProcessor.removeNumWords()
        testInput = preProcessor.input
        self.assertEqual(testInput, "") 

        testInput = "one or three"
        preProcessor = DataPreProcessor(testInput)
        preProcessor.removeNumWords()
        testInput = preProcessor.input
        self.assertEqual(testInput," or ")

コード例 #10

0

ファイルを表示

    def test_ConvertChar(self):  
        testInput = 'æ'
        preProcessor = DataPreProcessor(testInput)
        preProcessor.convertAccentedCharsToAscii()
        testInput = preProcessor.input
        self.assertEqual(testInput, 'a')

        testInput = 'je ètais'
        preProcessor = DataPreProcessor(testInput)
        preProcessor.convertAccentedCharsToAscii()
        testInput = preProcessor.input
        self.assertEqual(testInput, 'je etais')

        testInput = 'í'
        preProcessor = DataPreProcessor(testInput)
        preProcessor.convertAccentedCharsToAscii()
        testInput = preProcessor.input
        self.assertEqual(testInput, 'i') 

        testInput = 'Ø'
        preProcessor = DataPreProcessor(testInput)
        preProcessor.convertAccentedCharsToAscii()
        testInput = preProcessor.input
        self.assertEqual(testInput,'o') 
       
        testInput = 'Ùaemd'
        preProcessor = DataPreProcessor(testInput)
        preProcessor.convertAccentedCharsToAscii()
        testInput = preProcessor.input
        self.assertEqual(testInput, 'uaemd') 

        testInput = 'ůűų or ŷÝŸ'
        preProcessor = DataPreProcessor(testInput)
        preProcessor.convertAccentedCharsToAscii()
        testInput = preProcessor.input
        self.assertEqual(testInput,'uuu or yyy')

コード例 #11

0

ファイルを表示

    def test_NumWordToDigit(self):
        testInput = 'fsd'
        preProcessor = DataPreProcessor(testInput)
        preProcessor.convertNumberWordToDigit()
        testInput = preProcessor.input
        self.assertEqual(testInput, 'fsd')

        # FAILS THIS TEST
        testInput = 'one.two'
        preProcessor = DataPreProcessor(testInput)
        preProcessor.convertNumberWordToDigit()
        testInput = preProcessor.input
        print (f"{testInput} : testing input")
        self.assertEqual(testInput, '1.2')

        testInput = 'one'
        preProcessor = DataPreProcessor(testInput)
        preProcessor.convertNumberWordToDigit()
        testInput = preProcessor.input
        self.assertEqual(testInput, '1') 

        # FAILS THIS TEST
        testInput = 'one two'
        preProcessor = DataPreProcessor(testInput)
        preProcessor.convertNumberWordToDigit()
        testInput = preProcessor.input
        self.assertEqual(testInput,'1 2') 
       
        testInput = 'twelve'
        preProcessor = DataPreProcessor(testInput)
        preProcessor.convertNumberWordToDigit()
        testInput = preProcessor.input
        self.assertEqual(testInput, '12') 

        testInput = 'one or three'
        preProcessor = DataPreProcessor(testInput)
        preProcessor.convertNumberWordToDigit()
        testInput = preProcessor.input
        self.assertEqual(testInput,'1 or 3')