def training_data_generator(training_apps, feats, malwares, path, batch_size=64): # training_apps = np.random.choice(train_test_apps, int(len(train_test_apps) * 0.66)) # 66% for training gen_state = 0 while 1: if gen_state + batch_size > len(training_apps): apps = training_apps[gen_state:len(training_apps)] y = [] for app in apps: if app in malwares: y.append(np.array([1, 0])) # malware else: y.append(np.array([0, 1])) # benign X = [preprocess_app(app, feats, path) for app in apps] gen_state = 0 else: apps = training_apps[gen_state:gen_state + batch_size] y = [] for app in apps: if app in malwares: y.append(np.array([1, 0])) # malware else: y.append(np.array([0, 1])) # benign X = [preprocess_app(app, feats, path) for app in apps] gen_state = gen_state + batch_size yield np.array(X), np.array(y)
def testing_data(train_test_apps, feats, malwares, path): testing_apps = np.random.choice(train_test_apps, int(len(train_test_apps) * 0.34)) # 34% for testing xs = [] ys = [] for testing_app in testing_apps: if testing_app in malwares: ys.append(np.array([1, 0])) # malware else: ys.append(np.array([0, 1])) # benign xs.append(preprocess_app(testing_app, feats, path)) xs = np.array(xs) ys = np.array(ys) np.save('testing_xs', xs) np.save('testing_ys', ys) return xs, ys