Example #1
0
    def predict(self, test_csv: str,
                prediction_csv: str) -> (pd.DataFrame, Optional[np.float64]):
        self.config["task"] = "predict"
        self.config.tmp_dir = os.path.dirname(prediction_csv) + "/tmp"
        os.makedirs(self.config.tmp_dir, exist_ok=True)

        result = {
            "line_id": [],
            "prediction": [],
        }

        for X in pd.read_csv(test_csv,
                             encoding="utf-8",
                             low_memory=False,
                             dtype=self.config["dtype"],
                             parse_dates=self.config["parse_dates"],
                             chunksize=self.config["nrows"]):
            result["line_id"] += list(X["line_id"])
            preprocess(X, self.config)
            result["prediction"] += list(predict(X, self.config))

        result = pd.DataFrame(result)
        result.to_csv(prediction_csv, index=False)

        target_csv = test_csv.replace("test", "test-target")
        if os.path.exists(target_csv):
            score = validate(result, target_csv, self.config["mode"])
        else:
            score = None

        return result, score
Example #2
0
    def predict(self, test_csv: str, prediction_csv: str) -> (pd.DataFrame, Optional[np.float64]):
        self.config["task"] = "predict"
        self.config.tmp_dir = os.path.dirname(prediction_csv) + "/tmp"
        os.makedirs(self.config.tmp_dir, exist_ok=True)

        df = read_df(test_csv, self.config)
        result = {
            "line_id": list(df["line_id"]),
            "prediction": [],
        }

        def chunker(seq, size):
            return (seq[pos:pos+size] for pos in range(0, len(seq), size))

        for chunk in chunker(df, 100000):
            X = chunk.copy()
            preprocess(X, self.config)
            result["prediction"] += list(predict(X, self.config))

        result = pd.DataFrame(result)
        result.sort_values("line_id", inplace=True)
        result.to_csv(prediction_csv, index=False)

        target_csv = test_csv.replace("test", "test-target")
        if os.path.exists(target_csv):
            score = validate(result, target_csv, self.config["mode"])
        else:
            score = None

        return result, score
Example #3
0
    def predict(self, test_csv: str, prediction_csv: str) -> (pd.DataFrame, Optional[np.float64]):
        self.config["task"] = "predict"
        self.config.tmp_dir = os.path.dirname(prediction_csv) + "/tmp"
        os.makedirs(self.config.tmp_dir, exist_ok=True)

        result = {
            "id": [],
            "prediction": [],
        }

        for X in pd.read_csv(
                test_csv,
                encoding="utf-8",
                low_memory=False,
                dtype=self.config["dtype"],
                parse_dates=self.config["parse_dates"],
                chunksize=self.config["nrows"]
        ):
            result["id"] += list(X["id"])
            preprocess(X, self.config)
            result["prediction"] += list(predict(X, self.config))

        result = pd.DataFrame(result)
        result.to_csv(prediction_csv, index=False)

        return result
Example #4
0
    def train(self, train_csv: str, mode: str):
        self.config["task"] = "train"
        self.config["mode"] = mode
        self.config["model"] = {}
        self.config["ensemble"] = {"lgb": 1}

        self.config.tmp_dir = self.config.model_dir + "/tmp"
        os.makedirs(self.config.tmp_dir, exist_ok=True)

        # load holiday
        path_holiday = './holiday.csv'
        holiday = pd.read_csv(path_holiday, \
                      encoding='utf-8', low_memory=False, dtype={'holiday':str})['holiday'].values
        self.config['holiday'] = set(holiday)

        df = read_df(train_csv, self.config)
        print(df.shape)

        holiday_detect(df, self.config)

        preprocess(df, self.config)

        y = df["target"]
        X = df.drop("target", axis=1)

        train(X, y, self.config)
Example #5
0
    def train(self, train_csv: str, mode: str):
        self.config["task"] = "train"
        self.config["mode"] = mode
        self.config.tmp_dir = self.config.model_dir + "/tmp"
        os.makedirs(self.config.tmp_dir, exist_ok=True)

        df = read_df(train_csv, self.config)
        preprocess(df, self.config)

        y = df["target"]
        X = df.drop("target", axis=1)
        train(X, y, self.config)
Example #6
0
    def predict(self, test_csv: str,
                prediction_csv: str) -> (pd.DataFrame, Optional[np.float64]):
        self.config["task"] = "predict"
        self.config.tmp_dir = os.path.dirname(prediction_csv) + "/tmp"
        os.makedirs(self.config.tmp_dir, exist_ok=True)

        result = {"line_id": [], "prediction": []}
        if 'holiday_detect' in self.config:
            result["datetime"] = []

        for X in pd.read_csv(test_csv,
                             encoding="utf-8",
                             low_memory=False,
                             dtype=self.config["dtype"],
                             parse_dates=self.config["parse_dates"],
                             chunksize=self.config["nrows"]):
            result["line_id"] += list(X["line_id"])
            if 'holiday_detect' in self.config:
                dt_fea = self.config['holiday_detect']
                result["datetime"] += list(X[dt_fea])

            preprocess(X, self.config)
            result["prediction"] += list(predict(X, self.config))

        result = pd.DataFrame(result)

        # post process for holiday
        if 'holiday_detect' in self.config:
            holiday = self.config['holiday']
            for idx, row in result.iterrows():
                dt = row['datetime']
                dt_str = str(dt).split(' ')[0].strip()
                if dt_str in holiday or dt.weekday() == 5 or dt.weekday() == 6:
                    result.loc[idx, 'prediction'] = 0

            result.drop(["datetime"], axis=1, inplace=True)

        result.to_csv(prediction_csv, index=False)

        target_csv = test_csv.replace("test", "test-target")
        if os.path.exists(target_csv):
            score = validate(result, target_csv, self.config["mode"])
        else:
            score = None

        return result, score
Example #7
0
    def train(self, train_csv: str, mode: str):
        self.config["task"] = "train"
        self.config["mode"] = mode
        self.config.tmp_dir = self.config.model_dir + "/tmp"
        os.makedirs(self.config.tmp_dir, exist_ok=True)

        ## prepare data
        df = read_df(train_csv, self.config)
        
        ## preprecessing
        preprocess(df, self.config)

        
        y = df["target"]
        X = df.drop("target", axis=1)
        log('drop target')
        log('####### cur time = ' + str(datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")))
        log('################## after FE #########################')
        log(X.shape)
        log('#####################################################')
        train(X, y, self.config)
Example #8
0
    def predict(self, test_csv: str,
                prediction_csv: str) -> (pd.DataFrame, Optional[np.float64]):
        self.config["task"] = "predict"
        self.config.tmp_dir = os.path.dirname(prediction_csv) + "/tmp"
        os.makedirs(self.config.tmp_dir, exist_ok=True)

        self.config["prediction_csv"] = prediction_csv
        self.config["line_id"] = []

        self.config["start_time"] = time.time()

        result = {
            "line_id": [],
            "prediction": [],
        }

        X = pd.read_csv(
            test_csv,
            encoding="utf-8",
            low_memory=False,
            dtype=self.config["dtype"],
            parse_dates=self.config["parse_dates"],
        )
        self.config["line_id"] = X["line_id"].values

        result["line_id"] = (X["line_id"].values)
        X = preprocess(X, self.config)

        X = X[self.config["columns"]]  # for right columns order

        result["prediction"] = predict(X, self.config)

        result = pd.DataFrame(result)
        result.to_csv(prediction_csv, index=False)

        target_csv = test_csv.replace("test", "test-target")
        if os.path.exists(target_csv):
            score = validate(result, target_csv, self.config["mode"],
                             self.config)
        else:
            score = None

        return result, score
Example #9
0
    def train(self, train_csv: str, mode: str):
        self.config["task"] = "train"
        self.config["mode"] = mode

        self.config[
            "objective"] = "regression" if mode == "regression" else "binary"
        self.config["metric"] = "rmse" if mode == "regression" else "auc"

        self.config.tmp_dir = self.config.model_dir + "/tmp"
        os.makedirs(self.config.tmp_dir, exist_ok=True)

        df = read_df(train_csv, self.config)
        df = preprocess(df, self.config)

        y = df["target"].copy()
        X = df.drop("target", axis=1).copy()
        del df
        gc.collect()

        self.config["columns"] = list(X)

        train(X, y, self.config)
Example #10
0
import sys
import os
import logging

from lib.utils import read_yaml
from lib.preprocess import preprocess, transform_to_long, save_pred_long_df
from model.dcrnn_top import train_dcrnn, run_dcrnn

sys.path.append(os.getcwd())
args = read_yaml('dcrnn_config.yaml')
args, dataloaders, adj_mx, node_ids = preprocess(args)
args = train_dcrnn(args, dataloaders, adj_mx)
args, pred_df = run_dcrnn(args, dataloaders, adj_mx, node_ids)
long_df = transform_to_long(pred_df)
save_pred_long_df(args, long_df)
logging.shutdown()
Example #11
0
def main():
    test_args = parse_args()

    args = joblib.load('models/%s/args.pkl' % test_args.name)

    print('Config -----')
    for arg in vars(args):
        print('%s: %s' % (arg, getattr(args, arg)))
    print('------------')

    if args.pred_type == 'classification':
        num_outputs = 5
    elif args.pred_type == 'regression':
        num_outputs = 1
    elif args.pred_type == 'multitask':
        num_outputs = 6
    else:
        raise NotImplementedError

    cudnn.benchmark = True

    test_transform = transforms.Compose([
        transforms.Resize((args.input_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])

    # data loading code
    test_dir = preprocess('test',
                          args.img_size,
                          scale=args.scale_radius,
                          norm=args.normalize,
                          pad=args.padding,
                          remove=args.remove)
    test_df = pd.read_csv('inputs/test.csv')
    test_img_paths = test_dir + '/' + test_df['id_code'].values + '.png'
    test_labels = np.zeros(len(test_img_paths))

    test_set = Dataset(test_img_paths, test_labels, transform=test_transform)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=4)

    preds = []
    for fold in range(args.n_splits):
        print('Fold [%d/%d]' % (fold + 1, args.n_splits))

        # create model
        model_path = 'models/%s/model_%d.pth' % (args.name, fold + 1)
        if not os.path.exists(model_path):
            print('%s is not exists.' % model_path)
            continue
        model = get_model(model_name=args.arch,
                          num_outputs=num_outputs,
                          freeze_bn=args.freeze_bn,
                          dropout_p=args.dropout_p)
        model = model.cuda()
        model.load_state_dict(torch.load(model_path))

        model.eval()

        preds_fold = []
        with torch.no_grad():
            for i, (input, _) in tqdm(enumerate(test_loader),
                                      total=len(test_loader)):
                if test_args.tta:
                    outputs = []
                    for input in apply_tta(input):
                        input = input.cuda()
                        output = model(input)
                        outputs.append(output.data.cpu().numpy()[:, 0])
                    preds_fold.extend(np.mean(outputs, axis=0))
                else:
                    input = input.cuda()
                    output = model(input)

                    preds_fold.extend(output.data.cpu().numpy()[:, 0])
        preds_fold = np.array(preds_fold)
        preds.append(preds_fold)

        if not args.cv:
            break

    preds = np.mean(preds, axis=0)

    if test_args.tta:
        args.name += '_tta'

    test_df['diagnosis'] = preds
    test_df.to_csv('probs/%s.csv' % args.name, index=False)

    thrs = [0.5, 1.5, 2.5, 3.5]
    preds[preds < thrs[0]] = 0
    preds[(preds >= thrs[0]) & (preds < thrs[1])] = 1
    preds[(preds >= thrs[1]) & (preds < thrs[2])] = 2
    preds[(preds >= thrs[2]) & (preds < thrs[3])] = 3
    preds[preds >= thrs[3]] = 4
    preds = preds.astype('int')

    test_df['diagnosis'] = preds
    test_df.to_csv('submissions/%s.csv' % args.name, index=False)
Example #12
0
def main():
    args = parse_args()

    if args.name is None:
        args.name = '%s_%s' % (args.arch, datetime.now().strftime('%m%d%H'))

    if not os.path.exists('models/%s' % args.name):
        os.makedirs('models/%s' % args.name)

    print('Config -----')
    for arg in vars(args):
        print('- %s: %s' % (arg, getattr(args, arg)))
    print('------------')

    with open('models/%s/args.txt' % args.name, 'w') as f:
        for arg in vars(args):
            print('- %s: %s' % (arg, getattr(args, arg)), file=f)

    joblib.dump(args, 'models/%s/args.pkl' % args.name)

    if args.loss == 'CrossEntropyLoss':
        criterion = nn.CrossEntropyLoss().cuda()
    elif args.loss == 'FocalLoss':
        criterion = FocalLoss().cuda()
    elif args.loss == 'MSELoss':
        criterion = nn.MSELoss().cuda()
    elif args.loss == 'multitask':
        criterion = {
            'classification': nn.CrossEntropyLoss().cuda(),
            'regression': nn.MSELoss().cuda(),
        }
    else:
        raise NotImplementedError

    if args.pred_type == 'classification':
        num_outputs = 5
    elif args.pred_type == 'regression':
        num_outputs = 1
    elif args.loss == 'multitask':
        num_outputs = 6
    else:
        raise NotImplementedError

    cudnn.benchmark = True

    model = get_model(model_name=args.arch,
                      num_outputs=num_outputs,
                      freeze_bn=args.freeze_bn,
                      dropout_p=args.dropout_p)

    train_transform = []
    train_transform = transforms.Compose([
        transforms.Resize((args.img_size, args.img_size)),
        transforms.RandomAffine(
            degrees=(args.rotate_min, args.rotate_max) if args.rotate else 0,
            translate=(args.translate_min, args.translate_max) if args.translate else None,
            scale=(args.rescale_min, args.rescale_max) if args.rescale else None,
            shear=(args.shear_min, args.shear_max) if args.shear else None,
        ),
        transforms.CenterCrop(args.input_size),
        transforms.RandomHorizontalFlip(p=0.5 if args.flip else 0),
        transforms.RandomVerticalFlip(p=0.5 if args.flip else 0),
        transforms.ColorJitter(
            brightness=0,
            contrast=args.contrast,
            saturation=0,
            hue=0),
        RandomErase(
            prob=args.random_erase_prob if args.random_erase else 0,
            sl=args.random_erase_sl,
            sh=args.random_erase_sh,
            r=args.random_erase_r),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])

    val_transform = transforms.Compose([
        transforms.Resize((args.img_size, args.input_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])

    # data loading code
    if 'diabetic_retinopathy' in args.train_dataset:
        diabetic_retinopathy_dir = preprocess(
            'diabetic_retinopathy',
            args.img_size,
            scale=args.scale_radius,
            norm=args.normalize,
            pad=args.padding,
            remove=args.remove)
        diabetic_retinopathy_df = pd.read_csv('inputs/diabetic-retinopathy-resized/trainLabels.csv')
        diabetic_retinopathy_img_paths = \
            diabetic_retinopathy_dir + '/' + diabetic_retinopathy_df['image'].values + '.jpeg'
        diabetic_retinopathy_labels = diabetic_retinopathy_df['level'].values

    if 'aptos2019' in args.train_dataset:
        aptos2019_dir = preprocess(
            'aptos2019',
            args.img_size,
            scale=args.scale_radius,
            norm=args.normalize,
            pad=args.padding,
            remove=args.remove)
        aptos2019_df = pd.read_csv('inputs/train.csv')
        aptos2019_img_paths = aptos2019_dir + '/' + aptos2019_df['id_code'].values + '.png'
        aptos2019_labels = aptos2019_df['diagnosis'].values

    if args.train_dataset == 'aptos2019':
        skf = StratifiedKFold(n_splits=args.n_splits, shuffle=True, random_state=41)
        img_paths = []
        labels = []
        for fold, (train_idx, val_idx) in enumerate(skf.split(aptos2019_img_paths, aptos2019_labels)):
            img_paths.append((aptos2019_img_paths[train_idx], aptos2019_img_paths[val_idx]))
            labels.append((aptos2019_labels[train_idx], aptos2019_labels[val_idx]))
    elif args.train_dataset == 'diabetic_retinopathy':
        img_paths = [(diabetic_retinopathy_img_paths, aptos2019_img_paths)]
        labels = [(diabetic_retinopathy_labels, aptos2019_labels)]
    elif 'diabetic_retinopathy' in args.train_dataset and 'aptos2019' in args.train_dataset:
        skf = StratifiedKFold(n_splits=args.n_splits, shuffle=True, random_state=41)
        img_paths = []
        labels = []
        for fold, (train_idx, val_idx) in enumerate(skf.split(aptos2019_img_paths, aptos2019_labels)):
            img_paths.append((np.hstack((aptos2019_img_paths[train_idx], diabetic_retinopathy_img_paths)), aptos2019_img_paths[val_idx]))
            labels.append((np.hstack((aptos2019_labels[train_idx], diabetic_retinopathy_labels)), aptos2019_labels[val_idx]))
    # else:
    #     raise NotImplementedError

    if args.pseudo_labels:
        test_df = pd.read_csv('probs/%s.csv' % args.pseudo_labels)
        test_dir = preprocess(
            'test',
            args.img_size,
            scale=args.scale_radius,
            norm=args.normalize,
            pad=args.padding,
            remove=args.remove)
        test_img_paths = test_dir + '/' + test_df['id_code'].values + '.png'
        test_labels = test_df['diagnosis'].values
        for fold in range(len(img_paths)):
            img_paths[fold] = (np.hstack((img_paths[fold][0], test_img_paths)), img_paths[fold][1])
            labels[fold] = (np.hstack((labels[fold][0], test_labels)), labels[fold][1])

    if 'messidor' in args.train_dataset:
        test_dir = preprocess(
            'messidor',
            args.img_size,
            scale=args.scale_radius,
            norm=args.normalize,
            pad=args.padding,
            remove=args.remove)

    folds = []
    best_losses = []
    best_scores = []

    for fold, ((train_img_paths, val_img_paths), (train_labels, val_labels)) in enumerate(zip(img_paths, labels)):
        print('Fold [%d/%d]' %(fold+1, len(img_paths)))

        if os.path.exists('models/%s/model_%d.pth' % (args.name, fold+1)):
            log = pd.read_csv('models/%s/log_%d.csv' %(args.name, fold+1))
            best_loss, best_score = log.loc[log['val_loss'].values.argmin(), ['val_loss', 'val_score']].values
            folds.append(str(fold + 1))
            best_losses.append(best_loss)
            best_scores.append(best_score)
            continue

        if args.remove_duplicate:
            md5_df = pd.read_csv('inputs/strMd5.csv')
            duplicate_img_paths = aptos2019_dir + '/' + md5_df[(md5_df.strMd5_count > 1) & (~md5_df.diagnosis.isnull())]['id_code'].values + '.png'
            print(duplicate_img_paths)
            for duplicate_img_path in duplicate_img_paths:
                train_labels = train_labels[train_img_paths != duplicate_img_path]
                train_img_paths = train_img_paths[train_img_paths != duplicate_img_path]
                val_labels = val_labels[val_img_paths != duplicate_img_path]
                val_img_paths = val_img_paths[val_img_paths != duplicate_img_path]

        # train
        train_set = Dataset(
            train_img_paths,
            train_labels,
            transform=train_transform)

        _, class_sample_counts = np.unique(train_labels, return_counts=True)
        # print(class_sample_counts)
        # weights = 1. / torch.tensor(class_sample_counts, dtype=torch.float)
        # weights = np.array([0.2, 0.1, 0.6, 0.1, 0.1])
        # samples_weights = weights[train_labels]
        # sampler = WeightedRandomSampler(
        #     weights=samples_weights,
        #     num_samples=11000,
        #     replacement=False)
        train_loader = torch.utils.data.DataLoader(
            train_set,
            batch_size=args.batch_size,
            shuffle=False if args.class_aware else True,
            num_workers=4,
            sampler=sampler if args.class_aware else None)

        val_set = Dataset(
            val_img_paths,
            val_labels,
            transform=val_transform)
        val_loader = torch.utils.data.DataLoader(
            val_set,
            batch_size=args.batch_size,
            shuffle=False,
            num_workers=4)

        # create model
        model = get_model(model_name=args.arch,
                          num_outputs=num_outputs,
                          freeze_bn=args.freeze_bn,
                          dropout_p=args.dropout_p)
        model = model.cuda()
        if args.pretrained_model is not None:
            model.load_state_dict(torch.load('models/%s/model_%d.pth' % (args.pretrained_model, fold+1)))

        # print(model)

        if args.optimizer == 'Adam':
            optimizer = optim.Adam(
                filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr)
        elif args.optimizer == 'AdamW':
            optimizer = optim.AdamW(
                filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr)
        elif args.optimizer == 'RAdam':
            optimizer = RAdam(
                filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr)
        elif args.optimizer == 'SGD':
            optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr,
                                  momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov)

        if args.scheduler == 'CosineAnnealingLR':
            scheduler = lr_scheduler.CosineAnnealingLR(
                optimizer, T_max=args.epochs, eta_min=args.min_lr)
        elif args.scheduler == 'ReduceLROnPlateau':
            scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=args.factor, patience=args.patience,
                                                       verbose=1, min_lr=args.min_lr)

        log = pd.DataFrame(index=[], columns=[
            'epoch', 'loss', 'score', 'val_loss', 'val_score'
        ])
        log = {
            'epoch': [],
            'loss': [],
            'score': [],
            'val_loss': [],
            'val_score': [],
        }

        best_loss = float('inf')
        best_score = 0
        for epoch in range(args.epochs):
            print('Epoch [%d/%d]' % (epoch + 1, args.epochs))

            # train for one epoch
            train_loss, train_score = train(
                args, train_loader, model, criterion, optimizer, epoch)
            # evaluate on validation set
            val_loss, val_score = validate(args, val_loader, model, criterion)

            if args.scheduler == 'CosineAnnealingLR':
                scheduler.step()
            elif args.scheduler == 'ReduceLROnPlateau':
                scheduler.step(val_loss)

            print('loss %.4f - score %.4f - val_loss %.4f - val_score %.4f'
                  % (train_loss, train_score, val_loss, val_score))

            log['epoch'].append(epoch)
            log['loss'].append(train_loss)
            log['score'].append(train_score)
            log['val_loss'].append(val_loss)
            log['val_score'].append(val_score)

            pd.DataFrame(log).to_csv('models/%s/log_%d.csv' % (args.name, fold+1), index=False)

            if val_loss < best_loss:
                torch.save(model.state_dict(), 'models/%s/model_%d.pth' % (args.name, fold+1))
                best_loss = val_loss
                best_score = val_score
                print("=> saved best model")

        print('val_loss:  %f' % best_loss)
        print('val_score: %f' % best_score)

        folds.append(str(fold + 1))
        best_losses.append(best_loss)
        best_scores.append(best_score)

        results = pd.DataFrame({
            'fold': folds + ['mean'],
            'best_loss': best_losses + [np.mean(best_losses)],
            'best_score': best_scores + [np.mean(best_scores)],
        })

        print(results)
        results.to_csv('models/%s/results.csv' % args.name, index=False)

        torch.cuda.empty_cache()

        if not args.cv:
            break
Example #13
0
def main():
    args = parse_args()
    np.random.seed(args.seed)
    cudnn.benchmark = False
    cudnn.deterministic = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)

    if args.name is None:
        args.name = '%s_%s' % (args.arch, datetime.now().strftime('%m%d%H'))

    if not os.path.exists('models/%s' % args.name):
        os.makedirs('models/%s' % args.name)

    print('Config -----')
    for arg in vars(args):
        print('- %s: %s' % (arg, getattr(args, arg)))
    print('------------')

    with open('models/%s/args.txt' % args.name, 'w') as f:
        for arg in vars(args):
            print('- %s: %s' % (arg, getattr(args, arg)), file=f)

    joblib.dump(args, 'models/%s/args.pkl' % args.name)

    if args.loss == 'CrossEntropyLoss':
        criterion = nn.CrossEntropyLoss().cuda()
    elif args.loss == 'FocalLoss':
        criterion = FocalLoss().cuda()
    elif args.loss == 'MSELoss':
        criterion = nn.MSELoss().cuda()
    elif args.loss == 'multitask':
        criterion = {
            'classification': nn.CrossEntropyLoss().cuda(),
            'regression': nn.MSELoss().cuda(),
        }
    else:
        raise NotImplementedError

    if args.pred_type == 'classification':
        num_outputs = 5
    elif args.pred_type == 'regression':
        num_outputs = 1
    elif args.loss == 'multitask':
        num_outputs = 6
    else:
        raise NotImplementedError

    train_transform = transforms.Compose([
        transforms.Resize((args.img_size, args.img_size)),
        transforms.RandomAffine(
            degrees=(args.rotate_min, args.rotate_max) if args.rotate else 0,
            translate=(args.translate_min,
                       args.translate_max) if args.translate else None,
            scale=(args.rescale_min,
                   args.rescale_max) if args.rescale else None,
            shear=(args.shear_min, args.shear_max) if args.shear else None,
        ),
        transforms.CenterCrop(args.input_size),
        transforms.RandomHorizontalFlip(p=0.5 if args.flip else 0),
        transforms.RandomVerticalFlip(p=0.5 if args.flip else 0),
        transforms.ColorJitter(brightness=0,
                               contrast=args.contrast,
                               saturation=0,
                               hue=0),
        RandomErase(prob=args.random_erase_prob if args.random_erase else 0,
                    sl=args.random_erase_sl,
                    sh=args.random_erase_sh,
                    r=args.random_erase_r),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])

    val_transform = transforms.Compose([
        transforms.Resize((args.img_size, args.input_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])

    # data loading code
    if 'diabetic_retinopathy' in args.train_dataset:
        diabetic_retinopathy_dir = preprocess('diabetic_retinopathy',
                                              args.img_size,
                                              scale=args.scale_radius,
                                              norm=args.normalize,
                                              pad=args.padding,
                                              remove=args.remove)
        diabetic_retinopathy_df = pd.read_csv(
            'inputs/diabetic-retinopathy-resized/trainLabels.csv')
        diabetic_retinopathy_img_paths = \
            diabetic_retinopathy_dir + '/' + diabetic_retinopathy_df['image'].values + '.jpeg'
        diabetic_retinopathy_labels = diabetic_retinopathy_df['level'].values

    if 'aptos2019' in args.train_dataset:
        aptos2019_dir = preprocess('aptos2019',
                                   args.img_size,
                                   scale=args.scale_radius,
                                   norm=args.normalize,
                                   pad=args.padding,
                                   remove=args.remove)
        aptos2019_df = pd.read_csv('inputs/train.csv')
        aptos2019_img_paths = aptos2019_dir + '/' + aptos2019_df[
            'id_code'].values + '.png'
        aptos2019_labels = aptos2019_df['diagnosis'].values

    if 'chestxray' in args.train_dataset:
        chestxray_dir = preprocess('chestxray',
                                   args.img_size,
                                   scale=args.scale_radius,
                                   norm=args.normalize,
                                   pad=args.padding,
                                   remove=args.remove)

        chestxray_img_paths = []
        chestxray_labels = []
        normal_cases = glob('chest_xray/chest_xray/train/NORMAL/*.jpeg')
        pneumonia_cases = glob('chest_xray/chest_xray/train/PNEUMONIA/*.jpeg')
        for nor in normal_cases:
            p = nor.split('/')[-1]
            chestxray_img_paths.append(chestxray_dir + '/' + p)
            chestxray_labels.append(0)
        for abn in pneumonia_cases:
            p = abn.split('/')[-1]
            chestxray_img_paths.append(chestxray_dir + '/' + p)
            chestxray_labels.append(1)

        normal_cases = glob('chest_xray/chest_xray/test/NORMAL/*.jpeg')
        pneumonia_cases = glob('chest_xray/chest_xray/test/PNEUMONIA/*.jpeg')
        for nor in normal_cases:
            p = nor.split('/')[-1]
            chestxray_img_paths.append(chestxray_dir + '/' + p)
            chestxray_labels.append(0)
        for abn in pneumonia_cases:
            p = abn.split('/')[-1]
            chestxray_img_paths.append(chestxray_dir + '/' + p)
            chestxray_labels.append(1)

        normal_cases = glob('chest_xray/chest_xray/val/NORMAL/*.jpeg')
        pneumonia_cases = glob('chest_xray/chest_xray/val/PNEUMONIA/*.jpeg')
        for nor in normal_cases:
            p = nor.split('/')[-1]
            chestxray_img_paths.append(chestxray_dir + '/' + p)
            chestxray_labels.append(0)
        for abn in pneumonia_cases:
            p = abn.split('/')[-1]
            chestxray_img_paths.append(chestxray_dir + '/' + p)
            chestxray_labels.append(1)

        chestxray_img_paths = np.array(chestxray_img_paths)
        chestxray_labels = np.array(chestxray_labels)

    if args.train_dataset == 'aptos2019':
        skf = StratifiedKFold(n_splits=args.n_splits,
                              shuffle=True,
                              random_state=41)
        img_paths = []
        labels = []
        for fold, (train_idx, val_idx) in enumerate(
                skf.split(aptos2019_img_paths, aptos2019_labels)):
            img_paths.append(
                (aptos2019_img_paths[train_idx], aptos2019_img_paths[val_idx]))
            labels.append(
                (aptos2019_labels[train_idx], aptos2019_labels[val_idx]))
    elif args.train_dataset == 'diabetic_retinopathy':
        img_paths = [(diabetic_retinopathy_img_paths, aptos2019_img_paths)]
        labels = [(diabetic_retinopathy_labels, aptos2019_labels)]
    elif 'diabetic_retinopathy' in args.train_dataset and 'aptos2019' in args.train_dataset:
        skf = StratifiedKFold(n_splits=args.n_splits,
                              shuffle=True,
                              random_state=41)
        img_paths = []
        labels = []
        for fold, (train_idx, val_idx) in enumerate(
                skf.split(aptos2019_img_paths, aptos2019_labels)):
            img_paths.append((np.hstack((aptos2019_img_paths[train_idx],
                                         diabetic_retinopathy_img_paths)),
                              aptos2019_img_paths[val_idx]))
            labels.append((np.hstack(
                (aptos2019_labels[train_idx], diabetic_retinopathy_labels)),
                           aptos2019_labels[val_idx]))

    # FL setting: separate data into users
    if 'diabetic_retinopathy' in args.train_dataset and 'aptos2019' in args.train_dataset:
        combined_paths = np.hstack(
            (aptos2019_img_paths, diabetic_retinopathy_img_paths))
        combined_labels = np.hstack(
            (aptos2019_labels, diabetic_retinopathy_labels))
    elif 'chestxray' in args.train_dataset:
        combined_paths = chestxray_img_paths
        combined_labels = chestxray_labels
    else:
        raise NotImplementedError
    user_ind_dict, ind_test = split_dataset(combined_labels, args.num_users,
                                            args.iid)

    model = get_model(model_name=args.arch,
                      num_outputs=num_outputs,
                      freeze_bn=args.freeze_bn,
                      dropout_p=args.dropout_p)
    model = model.cuda()
    test_set = Dataset(combined_paths[ind_test],
                       combined_labels[ind_test],
                       transform=val_transform)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=4)

    test_acc = []
    test_scores = []
    test_scores_f1 = []
    lr = args.lr
    for epoch in range(args.epochs):

        print('Epoch [%d/%d]' % (epoch + 1, args.epochs))
        weight_list = []
        selected_ind = np.random.choice(args.num_users,
                                        int(args.num_users / 10),
                                        replace=False)
        for i in selected_ind:
            print('user: %d' % (i + 1))
            train_set = Dataset(combined_paths[user_ind_dict[i]],
                                combined_labels[user_ind_dict[i]],
                                transform=train_transform)
            train_loader = torch.utils.data.DataLoader(
                train_set,
                batch_size=args.batch_size,
                shuffle=False if args.class_aware else True,
                num_workers=4,
                sampler=sampler if args.class_aware else None)

            # train for one epoch
            train_loss, train_score, ret_w = train(args, train_loader,
                                                   copy.deepcopy(model),
                                                   criterion, lr)
            weight_list.append(ret_w)
            print('loss %.4f - score %.4f' % (train_loss, train_score))
        weights = fedavg(weight_list)
        model.load_state_dict(weights)
        test_loss, test_score, test_scoref1, accuracy, confusion_matrix = test(
            args, test_loader, copy.deepcopy(model), criterion)
        print('loss %.4f - score %.4f - accuracy %.4f' %
              (test_loss, test_score, accuracy))
        test_acc.append(accuracy)
        test_scores.append(test_score)
        test_scores_f1.append(test_scoref1)
        lr *= 0.992

    np.savez('./accuracy-xray-iid' + str(args.iid) + '-' + str(args.epochs) +
             '-beta' + str(args.beta) + '-seed' + str(args.seed),
             acc=np.array(test_acc),
             score=np.array(test_scores),
             scoref1=np.array(test_scores_f1),
             confusion=confusion_matrix)