def test_utils(self): app.config['ENV'] = 'TESTING' # allowing files types self.assertTrue(utils.allowed_file('filename.jpg')) self.assertFalse(utils.allowed_file('filename.gif')) self.assertTrue(utils.allowed_file('filename.png')) self.assertFalse(utils.allowed_file('filename.pdf')) # search file function self.assertTrue(utils.find_file('1.jpg')) self.assertTrue(utils.find_file('2.jpg')) self.assertFalse(utils.find_file('3.jpg')) # download function file = utils._download( 'http://www.reportingday.com/wp-content/uploads/2018/06/Cat-Sleeping-Pics.jpg' ) self.assertTrue(file['success']) self.assertEqual(file['name'], file['name']) # remove function self.assertTrue(utils.remove_file(file['name'])) self.assertFalse(utils.remove_file('randomanme.jpg')) # predict function self.assertEqual(utils.make_prediction('1.jpg'), 'dog') self.assertEqual(utils.make_prediction('2.jpg'), 'cat') self.assertRaises(FileNotFoundError, utils.make_prediction, 'asd.jpg')
def main(): train_ds = MyImageFolder(root_dir="train/", transform=config.train_transforms) val_ds = MyImageFolder(root_dir="val/", transform=config.val_transforms) train_loader = DataLoader(train_ds, batch_size=config.BATCH_SIZE, num_workers=config.NUM_WORKERS, pin_memory=config.PIN_MEMORY, shuffle=True) val_loader = DataLoader(val_ds, batch_size=config.BATCH_SIZE, num_workers=config.NUM_WORKERS, pin_memory=config.PIN_MEMORY, shuffle=True) loss_fn = nn.CrossEntropyLoss() model = Net(net_version="b0", num_classes=10).to(config.DEVICE) optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE) scaler = torch.cuda.amp.GradScaler() if config.LOAD_MODEL: load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer) make_prediction(model, config.val_transforms, 'test/', config.DEVICE) check_accuracy(val_loader, model, config.DEVICE) for epoch in range(config.NUM_EPOCHS): train_fn(train_loader, model, optimizer, loss_fn, scaler, config.DEVICE) check_accuracy(val_loader, model, config.DEVICE) checkpoint = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } save_checkpoint(checkpoint)
def main(): models_path = ['./models/one/', './models/two/', './models/three/'] users_path = './data/users.tsv' history_path = './data/history.tsv' test = pd.read_csv(sys.argv[1], sep="\t") users = pd.read_csv(users_path, delimiter='\t') history = pd.read_csv(history_path, delimiter='\t') estimators = load_estimators(models_path) lmbda = 0.23 (users_features, publishers_features) = get_history_users_features(history.merge(users)) df = get_test_features(test, users.merge(users_features, how='left'), publishers_features) features = [ 'duration', 'publishers_size', 'time_start', 'time_end', 'day_of_week_start', 'day_of_week_end', 'day_start_is_weekend', 'day_end_is_weekend', 'time_start_harmonic_0', 'time_start_harmonic_1', 'time_end_harmonic_0', 'time_end_harmonic_1', 'cpm', 'hour_start', 'hour_end', 'audience_size', 'sex', 'age', 'city_id', 'user_mean_cpm', 'user_mean_is_weekend', 'user_mean_time', 'user_mean_time_harmonic_0', 'user_mean_time_harmonic_1', 'user_mean_publisher', 'user_unique_publishers_cnt', 'publisher_mean_time', 'publisher_mean_time_harmonic_0', 'publisher_mean_time_harmonic_1', 'publisher_mean_is_weekend', 'publisher_mean_cpm', 'publisher_mean_user_id', 'publisher_mean_sex', 'publisher_mean_age', 'publisher_mean_city_id', 'publisher_unique_users_cnt' ] prediction = make_prediction(df[features].values, estimators, box_cox_lmbda=lmbda) prediction.to_csv(sys.stdout, sep="\t", index=False, header=True)
def guessPersonType(clima, ambiente, agua, zona, distancia): model = bayes.BayesModel('Atractivos', constants.labels.personas.tipoPersona) model.add_feature(constants.labels.personas.clima) model.add_feature(constants.labels.personas.distancia) model.add_feature(constants.labels.personas.zona) model.add_feature(constants.labels.personas.ambiente) model.add_feature(constants.labels.personas.agua) model.loadModel() return make_prediction( pd.concat([ model.getProbabilities(constants.labels.personas.clima, clima), model.getProbabilities(constants.labels.personas.distancia, distancia), model.getProbabilities(constants.labels.personas.zona, zona), model.getProbabilities(constants.labels.personas.ambiente, ambiente), model.getProbabilities(constants.labels.personas.agua, agua) ], axis=1))
def main(args): #np.random.seed(432) #torch.random.manual_seed(432) try: os.makedirs(args.outpath) except OSError: pass experiment_path = utils.get_new_model_path(args.outpath) train_writer = SummaryWriter(os.path.join(experiment_path, 'train_logs')) val_writer = SummaryWriter(os.path.join(experiment_path, 'val_logs')) trainer = train.Trainer(train_writer, val_writer) # making dataframes with file names and true answers # train ann_file = '/data/iNat/train2019.json' with open(ann_file) as data_file: train_anns = json.load(data_file) train_anns_df = pd.DataFrame( train_anns['annotations'])[['image_id', 'category_id']] train_img_df = pd.DataFrame( train_anns['images'])[['id', 'file_name']].rename(columns={'id': 'image_id'}) df_train_file_cat = pd.merge(train_img_df, train_anns_df, on='image_id') # valid valid_ann_file = '/data/iNat//val2019.json' with open(valid_ann_file) as data_file: valid_anns = json.load(data_file) valid_anns_df = pd.DataFrame( valid_anns['annotations'])[['image_id', 'category_id']] valid_img_df = pd.DataFrame( valid_anns['images'])[['id', 'file_name']].rename(columns={'id': 'image_id'}) df_valid_file_cat = pd.merge(valid_img_df, valid_anns_df, on='image_id') # test ann_file = '/data/iNat/test2019.json' with open(ann_file) as data_file: test_anns = json.load(data_file) test_img_df = pd.DataFrame( test_anns['images'])[['id', 'file_name']].rename(columns={'id': 'image_id'}) # make dataloaders ID_train = df_train_file_cat.file_name.values labels_train = df_train_file_cat.category_id.values training_set = Dataset.Dataset(ID_train, labels_train, root='/data/iNat/train_val/') # train ID_test = df_valid_file_cat.file_name.values labels_test = df_valid_file_cat.category_id.values test_set = Dataset.Dataset(ID_test, labels_test, root='/data/iNat/train_val/') # valid ID_to_sent = test_img_df.file_name.values to_sent_dataset = Dataset.Dataset_to_sent(ID_to_sent, root='/data/iNat/test/') trainloader = DataLoader(training_set, batch_size=args.batch_size, shuffle=True, num_workers=15) evalloader = DataLoader(test_set, batch_size=args.batch_size, shuffle=False, num_workers=15) to_sent_loader = DataLoader(to_sent_dataset, batch_size=args.batch_size, shuffle=False, num_workers=15) # pretrained model resnet = torchvision.models.resnet18(pretrained=True) classifier = nn.Linear(512, 1010) for param in resnet.parameters(): param.requires_grad = False resnet.fc = classifier model = resnet #opt = torch.optim.Adam(model.parameters()) opt = torch.optim.SGD(model.parameters(), lr=0.001) Loss_func = nn.CrossEntropyLoss() schedule = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=args.epochs - 1) for epoch in range(args.epochs): print('Epoch {}/{}'.format(epoch, args.epochs - 1)) #if epoch == args.epochs-1: if epoch == 15: for param in model.parameters(): param.requires_grad = True trainer.train_epoch(model, opt, trainloader, schedule, Loss_func) metrics = trainer.eval_epoch(model, evalloader, Loss_func) state = dict( epoch=epoch, model_state_dict=model.state_dict(), optimizer_state_dict=opt.state_dict(), loss=metrics['loss'], accuracy=metrics['acc'], global_step=trainer.global_step, ) export_path = os.path.join(experiment_path, 'last.pth') torch.save(state, export_path) print(metrics['loss']) # save predictions to csv utils.make_prediction(model, to_sent_loader, test_img_df.image_id.values, submit_name=args.csv_name)
t_start = t() # data = pd.read_csv('train.csv', parse_dates=['epoch']) # _ = process_for_train() data = pd.read_csv('test.csv', parse_dates=['epoch']) test_sat_id = np.unique(data['sat_id'].values) sat_datas_test = process_for_predict(data) submission = {} submission['id'] = torch.tensor([]).type(torch.LongTensor) for name in targets: submission[name] = torch.tensor([]) model = torch.load(f'models//model.pt') for sat_id in test_sat_id: data_test = sat_datas_test[sat_id] model.load_state_dict(torch.load(f'models//{sat_id}//state_dict.pt')) model.eval() obs_ids = torch.from_numpy(data_test['id'].values) data_test = data_test.drop(['id'], axis=1) print(f'Predicting for satellite {sat_id}') prediction = make_prediction(model=model, data=data_test, method=method) submission['id'] = torch.cat((submission['id'], obs_ids), axis=0) for i, name in enumerate(targets): submission[name] = torch.cat((submission[name], prediction[:, i]), axis=0) submission = pd.DataFrame(submission) submission.to_csv(f'submission-{method}.csv', index=False) print(f'Total time: {t() - t_start}')
def main(): train_ds = DRDataset( images_folder="train/images_preprocessed_1000/", path_to_csv="train/trainLabels.csv", transform=config.val_transforms, ) val_ds = DRDataset( images_folder="train/images_preprocessed_1000/", path_to_csv="train/valLabels.csv", transform=config.val_transforms, ) test_ds = DRDataset( images_folder="test/images_preprocessed_1000", path_to_csv="train/trainLabels.csv", transform=config.val_transforms, train=False, ) test_loader = DataLoader(test_ds, batch_size=config.BATCH_SIZE, num_workers=6, shuffle=False) train_loader = DataLoader( train_ds, batch_size=config.BATCH_SIZE, num_workers=config.NUM_WORKERS, pin_memory=config.PIN_MEMORY, shuffle=False, ) val_loader = DataLoader( val_ds, batch_size=config.BATCH_SIZE, num_workers=2, pin_memory=config.PIN_MEMORY, shuffle=False, ) loss_fn = nn.MSELoss() model = EfficientNet.from_pretrained("efficientnet-b3") model._fc = nn.Linear(1536, 1) model = model.to(config.DEVICE) optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WEIGHT_DECAY) scaler = torch.cuda.amp.GradScaler() if config.LOAD_MODEL and config.CHECKPOINT_FILE in os.listdir(): load_checkpoint(torch.load(config.CHECKPOINT_FILE), model, optimizer, config.LEARNING_RATE) # Run after training is done and you've achieved good result # on validation set, then run train_blend.py file to use information # about both eyes concatenated get_csv_for_blend(val_loader, model, "../train/val_blend.csv") get_csv_for_blend(train_loader, model, "../train/train_blend.csv") get_csv_for_blend(test_loader, model, "../train/test_blend.csv") make_prediction(model, test_loader, "submission_.csv") import sys sys.exit() #make_prediction(model, test_loader) for epoch in range(config.NUM_EPOCHS): train_one_epoch(train_loader, model, optimizer, loss_fn, scaler, config.DEVICE) # get on validation preds, labels = check_accuracy(val_loader, model, config.DEVICE) print( f"QuadraticWeightedKappa (Validation): {cohen_kappa_score(labels, preds, weights='quadratic')}" ) # get on train #preds, labels = check_accuracy(train_loader, model, config.DEVICE) #print(f"QuadraticWeightedKappa (Training): {cohen_kappa_score(labels, preds, weights='quadratic')}") if config.SAVE_MODEL: checkpoint = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), } save_checkpoint(checkpoint, filename=f"b3_{epoch}.pth.tar")