コード例 #1
0
def load_dataset(data_pickle,
                 labels_pickle,
                 to_one_hot=True,
                 batch_size=6,
                 normalization=True):

    with open(data_pickle, 'rb') as data_dump:
        data_sentences = pickle.load(data_dump)

    with open(labels_pickle, 'rb') as labels_dump:
        labels_sentences = pickle.load(labels_dump)

    # Normalize the mfccs
    if normalization:
        print("Normalizing")
        data = [pp.normalize_mfcc(s) for s in data_sentences]
    else:
        data = data_sentences

    # Possibly flatten the sentences
    if to_one_hot:
        labels = [
            pp.to_one_hot(labels_scalar, NUM_CLASSES)
            for labels_scalar in labels_sentences
        ]

    print("Preprocessing done")
    eval = int(len(data) / 50)
    #return DataSet(np.array(data), np.array(labels), batch_size)
    return DataSet(np.array(data[eval:]), np.array(labels[eval:]),
                   batch_size), DataSet(np.array(data[:eval]),
                                        np.array(labels[:eval]), batch_size)
コード例 #2
0
def file_to_oha(filepath=NOT_SPAM_DATA_PATH, label=1):
    my_oha = []
    labels = []
    with open(filepath, 'r') as f:
        lines = f.readlines()[:MAX_LINES]
        for line in lines:
            escaped_line = clean_line(line)
            oha = to_one_hot(escaped_line)
            labels.append(label)
            my_oha.append(oha)
    return my_oha, labels
コード例 #3
0
def file_to_oha(filepath='data/simple/pos.txt', label=1):
    my_oha = []
    labels = []
    if filepath.endswith("neg.txt"):
        label = 0
    with open(filepath, 'r') as f:
        lines = f.readlines()
        for line in lines:
            escaped_line = line.replace('\n', '')
            oha = to_one_hot(escaped_line)
            labels.append(label)
            my_oha.append(oha)
    return my_oha, labels
コード例 #4
0
def predict(txt):
    oha_txt = to_one_hot(txt, add_to_bag=False)
    prediction_array = np.array(oha_txt)
    return clf.predict([prediction_array])  # 1 or 0
コード例 #5
0
def predict(txt):
    txt = clean_line(txt)
    oha_txt = to_one_hot(txt, add_to_bag=False, bow=bow)
    prediction_array = np.array(oha_txt)
    return clf.predict([prediction_array])  # 1 or 0