Ejemplo n.º 1
0
def getWords(data):
    words_arr = []
    for _def in data:
        sentence = _def['sent']
        sentence = data_helpers.preprocess_data(sentence)
        words_arr += data_helpers.basic_tokenizer(sentence)

        def_word = _def['def_word']
        def_word = data_helpers.preprocess_data(def_word)
        words_arr += [def_word]

    return words_arr
Ejemplo n.º 2
0
def load_data_plain(data_path):
    x_arr = []
    y_arr = []

    # sentence_id = -1
    with open(data_path, 'r') as f:
        lines = f.readlines()[1:]

        for line in lines:
            line = line.strip()

            cols = line.split('	')

            # if sentence_id != cols[1]:
            sent = cols[2]
            sentiment = int(cols[3])
            sentence_id = cols[1]

            sent = data_helpers.preprocess_data(sent)

            x_arr.append(sent)
            y_arr.append(sentiment)
            # else:
            # 	continue

    return x_arr, y_arr
Ejemplo n.º 3
0
def load_test_data_plain(data_path):
    x_arr = []
    phrase_arr = []

    # sentence_id = -1
    with open(data_path, 'r') as f:
        lines = f.readlines()[1:]

        for index, line in enumerate(lines):
            line = line.strip()

            cols = line.split('	')

            # if sentence_id != cols[1]:
            # print(line)
            # print(cols)
            # print(index)
            # print()
            if len(cols) == 3:
                sent = cols[2]
            else:
                sent = ""
            sentence_id = cols[1]
            phrase_id = cols[0]

            sent = data_helpers.preprocess_data(sent)

            x_arr.append(sent)
            phrase_arr.append(phrase_id)
            # else:
            # 	continue

    return x_arr, phrase_arr