Python DataProcess.create_dialogue_iter Examples

Programming Language: Python

Namespace/Package Name: data_process

Class/Type: DataProcess

Method/Function: create_dialogue_iter

Examples at hotexamples.com: 2

Python DataProcess.create_dialogue_iter - 2 examples found. These are the top rated real world Python examples of data_process.DataProcess.create_dialogue_iter extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DataProcess(30)

calc_alpha_ann_return(6)

calc_alpha_max_draw_down(6)

calc_alpha_sharpe(6)

connect_db(3)

calc_accum_alpha(3)

create_dialogue_iter(2)

build_vocab(2)

Execute(2)

clusterization(2)

clean_process(1)

creat_id_sentences(1)

check_report_period(1)

clean_html(1)

classification_data(1)

class_weight(1)

caculate_tfidf(1)

MergeOntos(1)

LB_data(1)

data_encoding(1)

Example #1

Show file

File: data_helpers.py Project: taesunwhang/dstc7

def save_pickle_data(filename, pickle_filename):
    data_process = DataProcess(filename)
    dialog_iter = data_process.create_dialogue_iter(
        data_process.input_file_path)

    index = 0
    with open(pickle_filename, 'wb') as f_handle:
        while True:
            # data -> (context, utterances, target_id)
            data = next(dialog_iter, None)

            if data is None:
                break

            context, utterances, target_id = data

            tokenized_context, _ = data_process.tokenize(context)
            tokenized_utterances, _ = data_process.tokenize(utterances)

            save_data = [tokenized_context, tokenized_utterances]
            pickle.dump(save_data, f_handle)

            index += 1

            if index % 100 == 0:
                print(index)

    print("%s data save complete!" % index)

Example #2

Show file

File: data_helpers.py Project: taesunwhang/dstc7

def make_valid_data(filename, write_file_name):

    data_process = DataProcess(filename)
    dialog_iter = data_process.create_dialogue_iter(
        data_process.input_file_path)

    input_sum_turn = 0
    input_sum_sentence_len = 0
    with open(write_file_name, "w", encoding='utf-8') as f_handle:
        index = 0

        while True:
            index += 1
            # data -> (context, utterances, target_id)
            data = next(dialog_iter, None)

            if data is None:
                break
            speakers, context, utterances, target_id = data
            context_sentence = context[0].split(" __eot__ ")

            f_handle.write("[%d]" % index + "\n")
            sum_sentence_len = 0
            tot_turn = 0

            for i, sentence in enumerate(context_sentence):
                sentence_len = len(nltk.word_tokenize(sentence))
                if len(sentence) == 0:
                    continue
                sentence_string = speakers[i] + " : " + sentence
                sentence_string = str(sentence_len) + "|" + sentence_string
                f_handle.write(sentence_string + "\n")

                sum_sentence_len += sentence_len
                tot_turn += 1

            avg_sentence_len = sum_sentence_len / tot_turn
            sentence_answer = "Answer : " + utterances[target_id[0]] + "\n"
            f_handle.write(sentence_answer)
            f_handle.write("average sentence length : %.3f" %
                           avg_sentence_len + "\n")
            f_handle.write("total turn number : %d" % tot_turn + '\n')

            f_handle.write("-" * 200 + "\n")
            if index % 500 == 0:
                print(index, ":", "avg_sentence_len - %.3f" % avg_sentence_len,
                      "tot_turn - %d" % tot_turn)
            input_sum_turn += tot_turn
            input_sum_sentence_len += avg_sentence_len

        f_handle.write("average sentence length %.3f" %
                       (input_sum_sentence_len / index))
        f_handle.write("average turn length %.3f" % (input_sum_turn / index))