Python delexicalise Examples

Programming Language: Python

Namespace/Package Name: utils.delexicalize

Method/Function: delexicalise

Examples at hotexamples.com: 3

Python delexicalise - 3 examples found. These are the top rated real world Python examples of utils.delexicalize.delexicalise extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def createDelexData():
    """Main function of the script - loads delexical dictionary,
    goes through each dialogue and does:
    1) data normalization
    2) delexicalization
    3) addition of database pointer
    4) saves the delexicalized data
    """

    # create dictionary of delexicalied values that then we will search against, order matters here!
    dic = delexicalize.prepareSlotValuesIndependent()
    delex_data = {}

    fin1 = open('data/woz2/data.json')
    data = json.load(fin1)

    for dialogue_name in tqdm(data):
        if 'WOZ' not in dialogue_name:
            continue
        dialogue = data[dialogue_name]
        #print dialogue_name

        for idx, turn in enumerate(dialogue['log']):
            # normalization, split and delexicalization of the sentence
            sent = normalize(turn['text'])

            words = sent.split()
            sent = delexicalize.delexicalise(' '.join(words), dic)

            # changes to numbers only here
            digitpat = re.compile('\d+')
            sent = re.sub(digitpat, '[value_count]', sent)

            # delexicalized sentence added to the dialogue
            dialogue['log'][idx]['text'] = sent

            if idx % 2 == 1:  # if it's a system turn
                # add database pointer
                pointer_vector = addDBPointer(turn)

                #print pointer_vector
                dialogue['log'][idx -
                                1]['db_pointer'] = pointer_vector.tolist()

        delex_data[dialogue_name] = dialogue

    with open('data/delex.json', 'w') as outfile:
        json.dump(delex_data, outfile)

    return delex_data

Example #2

Show file

def createDelexData(sent, sent_act, bs, dic, turn, option):
    # normalization, split and delexicalization of the sentence
    sent = normalize(sent)
    words = sent.split()
    sent = delexicalize.delexicalise(' '.join(words), dic)
    # parsing reference number GIVEN belief state
    sent = delexicaliseReferenceNumber(sent, turn)
    # changes to numbers only here
    digitpat = re.compile('\d+')
    sent = re.sub(digitpat, '[value_count]', sent)
    if option == 'user':
        sent = fixDelex(sent, None, bs)
    if option == 'sys':
        sent = fixDelex(sent, sent_act, None)

    return sent.strip()

Example #3

Show file

def createDelexData():
    """Main function of the script - loads delexical dictionary,
    goes through each dialogue and does:
    1) data normalization
    2) delexicalization
    3) addition of database pointer
    4) saves the delexicalized data
    """
    # download the data
    loadData()

    # create dictionary of delexicalied values that then we will search against, order matters here!
    dic = delexicalize.prepareSlotValuesIndependent()
    delex_data = {}

    with open('data/multi-woz/data.json') as fin1:
        data = json.load(fin1)

    with open('data/multi-woz/dialogue_acts.json') as fin2:
        data2 = json.load(fin2)

    cnt = 10

    for dialogue_name in tqdm(data):
        dialogue = data[dialogue_name]
        # print(dialogue_name)

        idx_acts = 1

        for idx, turn in enumerate(dialogue['log']):
            # normalization, split and delexicalization of the sentence
            sent = normalize(turn['text'])

            words = sent.split()
            sent = delexicalize.delexicalise(' '.join(words), dic)

            # parsing reference number GIVEN belief state
            sent = delexicaliseReferenceNumber(sent, turn)

            # changes to numbers only here
            digitpat = re.compile('\d+')
            sent = re.sub(digitpat, '[value_count]', sent)

            # delexicalized sentence added to the dialogue
            dialogue['log'][idx]['text'] = sent

            if idx % 2 == 1:  # if it's a system turn
                # add database pointer
                pointer_vector = addDBPointer(turn)
                # add booking pointer
                pointer_vector = addBookingPointer(dialogue, turn,
                                                   pointer_vector)

                # print(pointer_vector)
                dialogue['log'][idx -
                                1]['db_pointer'] = pointer_vector.tolist()

            # FIXING delexicalization:
            dialogue = fixDelex(dialogue_name, dialogue, data2, idx, idx_acts)
            idx_acts += 1

        delex_data[dialogue_name] = dialogue

    with open('data/multi-woz/delex.json', 'w') as outfile:
        json.dump(delex_data, outfile)

    return delex_data