Ejemplo n.º 1
0
 def update_mapping(mapping, label_dir):
     label_files = os.listdir(label_dir)
     label_files = [os.path.join(label_dir, file) for file in label_files]
     for file in label_files:
         _, segment_labels = read_label_file(file)
         for label in segment_labels:
             if label not in mapping.keys():
                 mapping[label] = len(mapping.keys())
     return mapping
def truncNgramMLP():
    data_file = "./Data/train/real_train_data.csv"
    label_file = "./Data/train/real_train_label.csv"

    # data_file = "./Data/train/train.csv"
    # label_file = "./Data/train/train_label.csv"

    X = read_data_file(data_file)
    X = pad_sequences(X, maxlen=328, dtype='int32', padding='post', truncating='post')
    y = read_label_file(label_file)

    # print ("Shape of train data(m):\n", X.shape)
    # print ("Data:\n", X[0:5], "\n")
    # print ("Shape of train label:", y.shape)
    # print ("Label:\n", y[0:5], "\n")

    str_X = []
    for i in range(X.shape[0]):
        str_X.append(','.join([str(k) for k in X[i]]))

    df = pd.DataFrame(str_X, index=range(X.shape[0]), columns=['data'])
    # Apply ngram and Tfidf to
    tfidf = TfidfVectorizer(analyzer="word", max_features=5000, ngram_range=(2, 4))

    # print(tfidf)
    X_transformed = tfidf.fit_transform(df.data)
    # test_transformed = tfidf.fit_transform()

    X_train, X_test, y_train, y_test = train_test_split(X_transformed,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=42)

    # Success
    print("Training and testing split was successful.")
    print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

    mlp_model = MLP(X_train.shape[1])

    print(mlp_model.summary())

    tensorBoardCallback = TensorBoard(log_dir='./logs/trunc_ngram_mlp', write_graph=X.shape[1])

    optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=1e-4, amsgrad=False)
    # optimizer = SGD(lr=0.01, momentum=0.9, decay=1e-6, nesterov=False)

    mlp_model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    mlp_model.fit(X_train, y_train, callbacks=[tensorBoardCallback], epochs=20, batch_size=128)

    score, acc = mlp_model.evaluate(X_test, y_test, verbose=2, batch_size=128)
    print("score: %.2f" % (score))
    print("acc: %.2f" % (acc))
Ejemplo n.º 3
0
    def generate_segments(video_dir, label_dir, segment_dir):
        # the frames are in one-based indexing.
        if not os.path.exists(segment_dir):
            os.makedirs(segment_dir)

        files = sorted(os.listdir(video_dir))
        action_to_logit_dict, _ = read_mapping_file(MAPPING_FILE)

        all_dict = dict()
        pbar = tqdm(files)
        for file in pbar:
            pbar.set_postfix({'video': file})

            vid_file = os.path.join(video_dir, file)
            label_file = str(file)[:-len(VIDEO_EXT)] + LABEL_EXT
            label_file = os.path.join(label_dir, label_file)

            segment_windows, segment_actions = read_label_file(label_file)
            for i, segment_action in enumerate(segment_actions):
                segment_window = segment_windows[i]
                segment_logit = action_to_logit_dict[segment_action]
                segment_name = '.'.join([file[:-len(VIDEO_EXT)], str(i)])
                segment_file = os.path.join(segment_dir, segment_name + '.npy')
                segment_frames = read_segment_from_video(
                    vid_file, segment_window)
                np.save(segment_file, segment_frames)

                segment_dict = {
                    'action': segment_action,
                    'label': segment_logit,
                    'window': segment_window,
                    'vid-file': vid_file,
                    'label-file': label_file,
                    'segment-file': segment_file,
                    'n-frames': len(segment_frames)
                }
                all_dict[segment_name] = segment_dict
        return all_dict
Ejemplo n.º 4
0
def extract_label(in_file_path, out_file_path, short_index):
    y = read_label_file(in_file_path)
    output = pd.DataFrame(data=y)
    output.drop(short_index, axis=0)
    output.to_csv(out_file_path, index=False, header=False)
Ejemplo n.º 5
0
# %% [markdown]
# ## Plot PE Header length distribution - Test data
# Credit: [@MengdanCode](https://github.com/MengdanCode)
# %%
from data import read_data_file, read_label_file
import numpy as np
from matplotlib import pyplot as plt

data_file = "./Data/train/train.csv"
label_file = "./Data/train/train_label.csv"

X = read_data_file(data_file)
y = read_label_file(label_file)

# %%
X_len = []
for i in X:
    X_len.append(len(i))

print('X_len generated')

X_len = np.array(X_len)

print(X_len.min())
print(X_len.max())

fig_per_hour = plt.figure()
per_hour = fig_per_hour.add_subplot(111)
counts, bins, patches = per_hour.hist(X_len,
                                      bins=100,
                                      normed=False,