def main(): warnings.filterwarnings('ignore') arg_obj = args.get_input() args.print_args(arg_obj) crc_task_number = int(arg_obj.number) - 1 ## Set seed to replicate experiments seed = 172 np.random.seed(seed) train_features, train_labels = dataset_loader('train') print(train_features.shape, train_labels.shape) validation_features, validation_labels = dataset_loader('validation') print(validation_features.shape, validation_labels.shape) test_features, test_labels = dataset_loader('test') print(test_features.shape, test_labels.shape) all_features = np.vstack( (train_features, validation_features, test_features)) all_labels = np.hstack((train_labels, validation_labels, test_labels)) print(all_labels.shape) # Create the machine learning models models = [] models.append('LR') models.append('LDA') models.append('KNN') models.append('CART') models.append('RF') models.append('NB') models.append('SVM') for model in models: print(model) preds = np.load('preds/%s_kfold_preds.npy' % model) print(classification_report(all_labels, preds, digits=4)) ave_prec = average_precision_score(all_labels, preds) acc = (preds == all_labels).sum() / len(preds) tp = ((preds == 1) * (preds == all_labels)).sum() fp = ((preds == 1) * (preds != all_labels)).sum() tn = ((preds == 0) * (preds == all_labels)).sum() fn = ((preds == 0) * (preds != all_labels)).sum() prec = tp / (tp + fp) rec = tp / (tp + fn) f1 = 2 * ((prec * rec) / (prec + rec)) spec = tn / (tn + fp) npv = tn / (tn + fn) print('Acc: ', acc) print('Prec: ', prec) print('Rec: ', rec) print('F1: ', f1) print('Spec: ', spec) print('NPV: ', npv) print('Ave Prec: ', ave_prec) print('Sum preds: ', np.sum(preds)) print('Sum labels: ', np.sum(all_labels)) print()
def main(): warnings.filterwarnings('ignore') arg_obj = args.get_input() args.print_args(arg_obj) crc_task_number = int(arg_obj.number) - 1 ## Set seed to replicate experiments seed = 172 np.random.seed(seed) train_features, train_labels = dataset_loader('train') print(train_features.shape, train_labels.shape) validation_features, validation_labels = dataset_loader('validation') print(validation_features.shape, validation_labels.shape) test_features, test_labels = dataset_loader('test') print(test_features.shape, test_labels.shape) # Parameters that can be changed # ------------------------------ # argument needed for Random Forests Model trees = 2 # for splitting the dataset, designate a percent to assign to test # In our case let's try 10 percent test_size = 0.10 # For Logistic Regression # seed of pseudo random number generator to use when shuffling the data seed = 1 # Create the machine learning models models = [] models.append(('LR', LogisticRegression(random_state=seed))) models.append(('LDA', LinearDiscriminantAnalysis())) models.append(('KNN', KNeighborsClassifier())) models.append(('CART', DecisionTreeClassifier(random_state=seed))) models.append(('RF', RandomForestClassifier(n_estimators=trees, random_state=seed))) models.append(('NB', GaussianNB())) models.append(('SVM', SVC(random_state=seed))) ## Select model based on task array index models = [models[crc_task_number]] all_features = np.vstack((train_features, validation_features, test_features)) all_labels = np.hstack((train_labels, validation_labels, test_labels)) print(all_features.shape, all_labels.shape) for name, model in models: print('Cross validating: ', name) model_preds = cross_val_predict(model, all_features, all_labels, cv=5) np.save('preds/%s_kfold_subset_preds.npy' % name, model_preds)
def main(): start_time = time.time() arg_obj = args.get_input() args.print_args(arg_obj) number = int(arg_obj.number) - 1 #split='train' split='validation' #split='test' vid_paths, lmrk_paths, pulse_paths = get_paths(split=split, part_number=number) if split == 'train': spect_idx = -4 else: spect_idx = -3 print(pulse_paths.shape) print() for i in range(len(pulse_paths)): pulse_path = pulse_paths[i] splits = pulse_path.split('/') splits[spect_idx] = 'spectrums' spectrum_dir = '/'.join(splits[:-1]) spectrum_file = splits[-1] if not os.path.isdir(spectrum_dir): os.makedirs(spectrum_dir) spectrum_path = os.path.join(spectrum_dir, spectrum_file) print(pulse_path) print(spectrum_path) pulse = np.load(pulse_path) freq, density = spectral_features(pulse) print(density.shape) np.save(spectrum_path, density) print() end_time = time.time() print('Took %.3f seconds.' % (end_time - start_time)) return
model.eval() total_tokens = 0 total_loss = 0.0 start_time = time.time() step = 0 for inputs in data_loader: step += 1 token_ids, type_ids, pos_ids, generation_mask, tgt_label, tgt_pos = inputs logits = model(token_ids, type_ids, pos_ids, generation_mask, tgt_pos) loss = F.cross_entropy(logits, tgt_label, reduction='sum') total_loss += loss.numpy()[0] total_tokens += tgt_label.shape[0] avg_loss = total_loss / total_tokens ppl = math.exp(avg_loss) avg_speed = (time.time() - start_time) / step print('loss: %.4f - ppl: %.4f - %.3fs/step\n' % (avg_loss, ppl, avg_speed)) model.train() if __name__ == '__main__': args = parse_args() print_args(args) if args.n_gpus > 1: dist.spawn(main, args=(args, ), nprocs=args.n_gpus) else: main(args)
def main(): start_time = time.time() arg_obj = args.get_input() args.print_args(arg_obj) number = int(arg_obj.number) - 1 print('Using number: ', number) tk = int(arg_obj.tk) sk = int(arg_obj.sk) model_load_path = arg_obj.model_load_path arg_obj.fps = IN_FPS ## Create processing objects and the frame grabber shape_predictor = arg_obj.shape_predictor_path if model_load_path is None: if sk > 1: model_load_path = 'model_weights/3dcnn_sk%d' % sk else: model_load_path = 'model_weights/3dcnn_tk%d' % tk ## Make sure a valid model path was given if not os.path.exists(model_load_path): print('Incorrect path to model weights for 3dcnn. Make sure sk is in \ [1,20] and tk is in [3,5,7,...,25]. Exiting.') return -1 ## Use GPU if CUDA is configured and load model to correct device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") hrcnn = HRCNN(drop_p=0, t_kern=tk).float().to(device) ## Load model specified by the path checkpoint = torch.load(model_load_path, map_location=device) hrcnn.load_state_dict(checkpoint['model_state_dict']) hrcnn.eval() split = 'train' #split='val' #split='test' vid_paths, lmrk_paths, out_paths = get_paths(split=split, part_number=number) out_dir = '/'.join(out_paths[0].split('/')[:-1]) if not os.path.isdir(out_dir): os.makedirs(out_dir) print(vid_paths.shape) print(lmrk_paths.shape) print(out_paths.shape) print() for i in range(len(vid_paths)): vid_path = vid_paths[i] lmrk_path = lmrk_paths[i] out_path = out_paths[i] lmrks = read_lmrks(lmrk_path) all_bad, lmrks = clean_lmrks(lmrks) video = prep_video(vid_path, lmrks, all_bad=all_bad) waveform = CNN3D_waveform(hrcnn, video, IN_FPS, sk, device=device) np.save(out_path, waveform) end_time = time.time() print('Took %.3f seconds.' % (end_time - start_time)) return