def create_base(self, **sk_params): n_features = self.n_features class ClassifierModule(nn.Module): def __init__(self): super().__init__() self.nn = nn.Sequential( nn.Linear(n_features, 1), nn.Sigmoid(), # nn.Linear(hidden_dim, hidden_dim), # nn.Sigmoid(), # nn.Dropout(0.05), # nn.Linear(hidden_dim, 1), # nn.Sigmoid() ) def forward(self, X=None, **kwargs): if X is None: return None X = X.reshape(-1, 1, n_features) X = self.nn(X) X = torch.reshape(X, (-1, 1)).float() return X clf = NeuralNetBinaryClassifier(ClassifierModule, **sk_params) return clf
def parameterized_lenet(): return NeuralNetBinaryClassifier( LeNet, optimizer = torch.optim.Adam, max_epochs = 100, lr = 0.01, batch_size = 128, iterator_train__shuffle = True, # Shuffle training data on each epoch train_split = predefined_split(dataset_test), callbacks = callback_list, device ='cuda')
def parameterized_resnet152_96(): return NeuralNetBinaryClassifier( ResNet152_96, optimizer = torch.optim.Adam, max_epochs = 30, lr = 0.01, batch_size = 128, iterator_train__shuffle = True, train_split = predefined_split(dataset_test), callbacks = callback_list, device ='cuda')
def parameterized_vgg11(): return NeuralNetBinaryClassifier( VGG11, optimizer = torch.optim.Adamax, max_epochs = 30, lr = 0.001, batch_size = 128, iterator_train__shuffle = True, train_split = predefined_split(dataset_test), # Supply the skorch framework with our own predefined test dataset callbacks = callback_list, device ='cuda')
def run_100(task, task_df, args, threshold): reduce_lr = LRScheduler( policy='ReduceLROnPlateau', mode='min', factor=0.5, patience=1, ) seeds = list(range(args.start_seed, args.start_seed + 100)) for seed in tqdm(seeds, desc=f'{task} Runs'): logger.info(f"Spliting with seed {seed}") checkpoint = Checkpoint(dirname=args.modeldir / f'{task}_seed_{seed}', ) df = set_group_splits(task_df.copy(), group_col='hadm_id', seed=seed) vectorizer = TfidfVectorizer(sublinear_tf=True, ngram_range=(1, 2), binary=True, max_features=60_000) x_train = vectorizer.fit_transform( df.loc[(df['split'] == 'train')]['processed_note']).astype( np.float32) x_test = vectorizer.transform( df.loc[(df['split'] == 'test')]['processed_note']).astype( np.float32) x_train = np.asarray(x_train.todense()) x_test = np.asarray(x_test.todense()) vocab_sz = len(vectorizer.vocabulary_) y_train = df.loc[(df['split'] == 'train')][f'{task}_label'].to_numpy() y_test = df.loc[(df['split'] == 'test')][f'{task}_label'].to_numpy() clf = MLPModule(input_units=vocab_sz, output_units=1, hidden_units=args.hidden_dim, num_hidden=1, dropout=args.dropout_p, squeeze_output=True) net = NeuralNetBinaryClassifier( clf, max_epochs=args.max_epochs, lr=args.lr, device=args.device, optimizer=optim.Adam, optimizer__weight_decay=args.wd, batch_size=args.batch_size, verbose=1, callbacks=[EarlyStopping, ProgressBar, checkpoint, reduce_lr], train_split=CVSplit(cv=0.15, stratified=True), iterator_train__shuffle=True, threshold=threshold, ) net.set_params(callbacks__valid_acc=None) net.fit(x_train, y_train.astype(np.float32))
##################################### arch_switcher = { 'vgg11': VGG11, 'vgg19': VGG19, 'densenet121': DenseNet121, 'densenet201': DenseNet201, 'resnet18_96': ResNet18_96, 'resnet152_96': ResNet152_96, 'lenet': LeNet } get_arch = arch_switcher.get(args.architecture, lambda: "Architecture does not exist") net = NeuralNetBinaryClassifier(module=get_arch()) net.initialize() net.load_params(f_params=args.parameter, f_optimizer=args.optimizer, f_history=args.history) ###################################### # CALCULATE METRICS # ##################################### print("Predicting lables...") y_hat = net.predict(dataset_test) print("Calculating accuracy...") accuracy = metrics.accuracy_score(target, y_hat) print("Calculating precision...")
dirname='checkpoints/' + run, fn_prefix='from_train_end_') cp = Checkpoint( dirname='checkpoints/' + run) train_end_cp = TrainEndCheckpoint(dirname='checkpoints/'+run) load_state = LoadInitState(train_end_cp) net = NeuralNetBinaryClassifier( module=AudioNet, criterion=nn.BCEWithLogitsLoss, max_epochs=5000, lr=0.01, optimizer=optim.SGD, optimizer__momentum=0.9, batch_size=160, device=device, callbacks=[ ('tensorboard', TensorBoard(writer)), ('cp', cp), ('train_end_cp', train_end_cp), # ("load_state", load_state), ('early_stoping', EarlyStopping(patience=5)), ('lr_scheduler', LRScheduler( policy=ReduceLROnPlateau, monitor='valid_loss')), ], ) print("Begin training") try: y_train = np.concatenate(( np.zeros((100, )), np.ones((100, )))).astype('float32')
dataset = CustomDataset(root=TRAIN_PATH, transform=TRANSFORM_AUG) y = dataset.getY() subject_id = dataset.getGroups() # Skorch wrapper class for sklearn compatibility X = SliceDataset(dataset) # GroupeKFold validation scheme meaning samples from a given group # (here a subject) won't be shared between splits group_kfold = GroupKFold(n_splits=N_SPLITS).get_n_splits(X, y, subject_id) # Estimator and randomized search net = NeuralNetBinaryClassifier( PaperCNN, device=DEVICE, criterion=torch.nn.BCELoss, iterator_train__shuffle=True, iterator_train__num_workers=8, train_split=None, # RandomizedSearchCV handles validation ) clf = RandomizedSearchCV(estimator=net, param_distributions=GRID, n_iter=N_ITER, cv=group_kfold, scoring='accuracy', verbose=1, refit=False) search = clf.fit(X, y) # Retrieve best results and save
sys.exit(1) train_dir = sys.argv[1] test_dir = sys.argv[2] # Use CUDA device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(torch.cuda.get_device_name(device)) # Prepare data train_feat = get_audio_feature(train_dir, negative_num=100, positive_num=100) test_feat = get_audio_feature(test_dir, test_num=100) test_dataset = AudioDataset(train_feat=train_feat, test_feat=test_feat, test_num=100) net = NeuralNetBinaryClassifier( module=AudioNet, device=device, ) net.initialize() net.load_params(f_params='debate_weights.pkl') result = net.predict(test_dataset) result = result.reshape(-1) np.save("C.npy", result)