Ejemplo n.º 1
0
    def test_utils(self):
        app.config['ENV'] = 'TESTING'
        # allowing files types
        self.assertTrue(utils.allowed_file('filename.jpg'))
        self.assertFalse(utils.allowed_file('filename.gif'))
        self.assertTrue(utils.allowed_file('filename.png'))
        self.assertFalse(utils.allowed_file('filename.pdf'))

        # search file function
        self.assertTrue(utils.find_file('1.jpg'))
        self.assertTrue(utils.find_file('2.jpg'))
        self.assertFalse(utils.find_file('3.jpg'))

        # download function
        file = utils._download(
            'http://www.reportingday.com/wp-content/uploads/2018/06/Cat-Sleeping-Pics.jpg'
        )
        self.assertTrue(file['success'])
        self.assertEqual(file['name'], file['name'])

        # remove function
        self.assertTrue(utils.remove_file(file['name']))
        self.assertFalse(utils.remove_file('randomanme.jpg'))

        # predict function
        self.assertEqual(utils.make_prediction('1.jpg'), 'dog')
        self.assertEqual(utils.make_prediction('2.jpg'), 'cat')
        self.assertRaises(FileNotFoundError, utils.make_prediction, 'asd.jpg')
Ejemplo n.º 2
0
def main():
    train_ds = MyImageFolder(root_dir="train/",
                             transform=config.train_transforms)
    val_ds = MyImageFolder(root_dir="val/", transform=config.val_transforms)
    train_loader = DataLoader(train_ds,
                              batch_size=config.BATCH_SIZE,
                              num_workers=config.NUM_WORKERS,
                              pin_memory=config.PIN_MEMORY,
                              shuffle=True)
    val_loader = DataLoader(val_ds,
                            batch_size=config.BATCH_SIZE,
                            num_workers=config.NUM_WORKERS,
                            pin_memory=config.PIN_MEMORY,
                            shuffle=True)

    loss_fn = nn.CrossEntropyLoss()
    model = Net(net_version="b0", num_classes=10).to(config.DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE)
    scaler = torch.cuda.amp.GradScaler()

    if config.LOAD_MODEL:
        load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer)

    make_prediction(model, config.val_transforms, 'test/', config.DEVICE)
    check_accuracy(val_loader, model, config.DEVICE)

    for epoch in range(config.NUM_EPOCHS):
        train_fn(train_loader, model, optimizer, loss_fn, scaler,
                 config.DEVICE)
        check_accuracy(val_loader, model, config.DEVICE)
        checkpoint = {
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }
        save_checkpoint(checkpoint)
Ejemplo n.º 3
0
def main():
    models_path = ['./models/one/', './models/two/', './models/three/']
    users_path = './data/users.tsv'
    history_path = './data/history.tsv'
    test = pd.read_csv(sys.argv[1], sep="\t")
    users = pd.read_csv(users_path, delimiter='\t')
    history = pd.read_csv(history_path, delimiter='\t')

    estimators = load_estimators(models_path)
    lmbda = 0.23

    (users_features,
     publishers_features) = get_history_users_features(history.merge(users))
    df = get_test_features(test, users.merge(users_features, how='left'),
                           publishers_features)
    features = [
        'duration', 'publishers_size', 'time_start', 'time_end',
        'day_of_week_start', 'day_of_week_end', 'day_start_is_weekend',
        'day_end_is_weekend', 'time_start_harmonic_0', 'time_start_harmonic_1',
        'time_end_harmonic_0', 'time_end_harmonic_1', 'cpm', 'hour_start',
        'hour_end', 'audience_size', 'sex', 'age', 'city_id', 'user_mean_cpm',
        'user_mean_is_weekend', 'user_mean_time', 'user_mean_time_harmonic_0',
        'user_mean_time_harmonic_1', 'user_mean_publisher',
        'user_unique_publishers_cnt', 'publisher_mean_time',
        'publisher_mean_time_harmonic_0', 'publisher_mean_time_harmonic_1',
        'publisher_mean_is_weekend', 'publisher_mean_cpm',
        'publisher_mean_user_id', 'publisher_mean_sex', 'publisher_mean_age',
        'publisher_mean_city_id', 'publisher_unique_users_cnt'
    ]

    prediction = make_prediction(df[features].values,
                                 estimators,
                                 box_cox_lmbda=lmbda)
    prediction.to_csv(sys.stdout, sep="\t", index=False, header=True)
def guessPersonType(clima, ambiente, agua, zona, distancia):
    model = bayes.BayesModel('Atractivos',
                             constants.labels.personas.tipoPersona)
    model.add_feature(constants.labels.personas.clima)
    model.add_feature(constants.labels.personas.distancia)
    model.add_feature(constants.labels.personas.zona)
    model.add_feature(constants.labels.personas.ambiente)
    model.add_feature(constants.labels.personas.agua)

    model.loadModel()

    return make_prediction(
        pd.concat([
            model.getProbabilities(constants.labels.personas.clima, clima),
            model.getProbabilities(constants.labels.personas.distancia,
                                   distancia),
            model.getProbabilities(constants.labels.personas.zona, zona),
            model.getProbabilities(constants.labels.personas.ambiente,
                                   ambiente),
            model.getProbabilities(constants.labels.personas.agua, agua)
        ],
                  axis=1))
Ejemplo n.º 5
0
Archivo: main.py Proyecto: serg171/lsml
def main(args):
    #np.random.seed(432)
    #torch.random.manual_seed(432)
    try:
        os.makedirs(args.outpath)
    except OSError:
        pass
    experiment_path = utils.get_new_model_path(args.outpath)

    train_writer = SummaryWriter(os.path.join(experiment_path, 'train_logs'))
    val_writer = SummaryWriter(os.path.join(experiment_path, 'val_logs'))
    trainer = train.Trainer(train_writer, val_writer)

    # making dataframes with file names and true answers
    # train
    ann_file = '/data/iNat/train2019.json'
    with open(ann_file) as data_file:
        train_anns = json.load(data_file)

    train_anns_df = pd.DataFrame(
        train_anns['annotations'])[['image_id', 'category_id']]
    train_img_df = pd.DataFrame(
        train_anns['images'])[['id',
                               'file_name']].rename(columns={'id': 'image_id'})
    df_train_file_cat = pd.merge(train_img_df, train_anns_df, on='image_id')

    # valid
    valid_ann_file = '/data/iNat//val2019.json'
    with open(valid_ann_file) as data_file:
        valid_anns = json.load(data_file)

    valid_anns_df = pd.DataFrame(
        valid_anns['annotations'])[['image_id', 'category_id']]
    valid_img_df = pd.DataFrame(
        valid_anns['images'])[['id',
                               'file_name']].rename(columns={'id': 'image_id'})
    df_valid_file_cat = pd.merge(valid_img_df, valid_anns_df, on='image_id')

    # test
    ann_file = '/data/iNat/test2019.json'
    with open(ann_file) as data_file:
        test_anns = json.load(data_file)

    test_img_df = pd.DataFrame(
        test_anns['images'])[['id',
                              'file_name']].rename(columns={'id': 'image_id'})

    # make dataloaders
    ID_train = df_train_file_cat.file_name.values
    labels_train = df_train_file_cat.category_id.values
    training_set = Dataset.Dataset(ID_train,
                                   labels_train,
                                   root='/data/iNat/train_val/')  # train

    ID_test = df_valid_file_cat.file_name.values
    labels_test = df_valid_file_cat.category_id.values
    test_set = Dataset.Dataset(ID_test,
                               labels_test,
                               root='/data/iNat/train_val/')  # valid

    ID_to_sent = test_img_df.file_name.values
    to_sent_dataset = Dataset.Dataset_to_sent(ID_to_sent,
                                              root='/data/iNat/test/')

    trainloader = DataLoader(training_set,
                             batch_size=args.batch_size,
                             shuffle=True,
                             num_workers=15)
    evalloader = DataLoader(test_set,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=15)
    to_sent_loader = DataLoader(to_sent_dataset,
                                batch_size=args.batch_size,
                                shuffle=False,
                                num_workers=15)

    # pretrained model
    resnet = torchvision.models.resnet18(pretrained=True)
    classifier = nn.Linear(512, 1010)
    for param in resnet.parameters():
        param.requires_grad = False
    resnet.fc = classifier

    model = resnet
    #opt = torch.optim.Adam(model.parameters())
    opt = torch.optim.SGD(model.parameters(), lr=0.001)
    Loss_func = nn.CrossEntropyLoss()
    schedule = torch.optim.lr_scheduler.CosineAnnealingLR(opt,
                                                          T_max=args.epochs -
                                                          1)

    for epoch in range(args.epochs):
        print('Epoch {}/{}'.format(epoch, args.epochs - 1))
        #if epoch == args.epochs-1:
        if epoch == 15:
            for param in model.parameters():
                param.requires_grad = True

        trainer.train_epoch(model, opt, trainloader, schedule, Loss_func)
        metrics = trainer.eval_epoch(model, evalloader, Loss_func)

        state = dict(
            epoch=epoch,
            model_state_dict=model.state_dict(),
            optimizer_state_dict=opt.state_dict(),
            loss=metrics['loss'],
            accuracy=metrics['acc'],
            global_step=trainer.global_step,
        )
        export_path = os.path.join(experiment_path, 'last.pth')
        torch.save(state, export_path)
        print(metrics['loss'])

    # save predictions to csv
    utils.make_prediction(model,
                          to_sent_loader,
                          test_img_df.image_id.values,
                          submit_name=args.csv_name)
Ejemplo n.º 6
0
t_start = t()
# data = pd.read_csv('train.csv', parse_dates=['epoch'])
# _ = process_for_train()
data = pd.read_csv('test.csv', parse_dates=['epoch'])
test_sat_id = np.unique(data['sat_id'].values)
sat_datas_test = process_for_predict(data)

submission = {}
submission['id'] = torch.tensor([]).type(torch.LongTensor)
for name in targets:
    submission[name] = torch.tensor([])

model = torch.load(f'models//model.pt')
for sat_id in test_sat_id:
    data_test = sat_datas_test[sat_id]
    model.load_state_dict(torch.load(f'models//{sat_id}//state_dict.pt'))
    model.eval()
    obs_ids = torch.from_numpy(data_test['id'].values)
    data_test = data_test.drop(['id'], axis=1)
    print(f'Predicting for satellite {sat_id}')
    prediction = make_prediction(model=model, data=data_test, method=method)

    submission['id'] = torch.cat((submission['id'], obs_ids), axis=0)
    for i, name in enumerate(targets):
        submission[name] = torch.cat((submission[name], prediction[:, i]),
                                     axis=0)

submission = pd.DataFrame(submission)
submission.to_csv(f'submission-{method}.csv', index=False)
print(f'Total time: {t() - t_start}')
Ejemplo n.º 7
0
def main():
    train_ds = DRDataset(
        images_folder="train/images_preprocessed_1000/",
        path_to_csv="train/trainLabels.csv",
        transform=config.val_transforms,
    )
    val_ds = DRDataset(
        images_folder="train/images_preprocessed_1000/",
        path_to_csv="train/valLabels.csv",
        transform=config.val_transforms,
    )
    test_ds = DRDataset(
        images_folder="test/images_preprocessed_1000",
        path_to_csv="train/trainLabels.csv",
        transform=config.val_transforms,
        train=False,
    )
    test_loader = DataLoader(test_ds,
                             batch_size=config.BATCH_SIZE,
                             num_workers=6,
                             shuffle=False)
    train_loader = DataLoader(
        train_ds,
        batch_size=config.BATCH_SIZE,
        num_workers=config.NUM_WORKERS,
        pin_memory=config.PIN_MEMORY,
        shuffle=False,
    )
    val_loader = DataLoader(
        val_ds,
        batch_size=config.BATCH_SIZE,
        num_workers=2,
        pin_memory=config.PIN_MEMORY,
        shuffle=False,
    )
    loss_fn = nn.MSELoss()

    model = EfficientNet.from_pretrained("efficientnet-b3")
    model._fc = nn.Linear(1536, 1)
    model = model.to(config.DEVICE)
    optimizer = optim.Adam(model.parameters(),
                           lr=config.LEARNING_RATE,
                           weight_decay=config.WEIGHT_DECAY)
    scaler = torch.cuda.amp.GradScaler()

    if config.LOAD_MODEL and config.CHECKPOINT_FILE in os.listdir():
        load_checkpoint(torch.load(config.CHECKPOINT_FILE), model, optimizer,
                        config.LEARNING_RATE)

    # Run after training is done and you've achieved good result
    # on validation set, then run train_blend.py file to use information
    # about both eyes concatenated
    get_csv_for_blend(val_loader, model, "../train/val_blend.csv")
    get_csv_for_blend(train_loader, model, "../train/train_blend.csv")
    get_csv_for_blend(test_loader, model, "../train/test_blend.csv")
    make_prediction(model, test_loader, "submission_.csv")
    import sys
    sys.exit()
    #make_prediction(model, test_loader)

    for epoch in range(config.NUM_EPOCHS):
        train_one_epoch(train_loader, model, optimizer, loss_fn, scaler,
                        config.DEVICE)

        # get on validation
        preds, labels = check_accuracy(val_loader, model, config.DEVICE)
        print(
            f"QuadraticWeightedKappa (Validation): {cohen_kappa_score(labels, preds, weights='quadratic')}"
        )

        # get on train
        #preds, labels = check_accuracy(train_loader, model, config.DEVICE)
        #print(f"QuadraticWeightedKappa (Training): {cohen_kappa_score(labels, preds, weights='quadratic')}")

        if config.SAVE_MODEL:
            checkpoint = {
                "state_dict": model.state_dict(),
                "optimizer": optimizer.state_dict(),
            }
            save_checkpoint(checkpoint, filename=f"b3_{epoch}.pth.tar")