コード例 #1
0
def valid(args):
    model.eval()
    valid_loader = create_loader(args, valid_ds)
    dev_loss = 0.
    y_true = []
    y_pred = []
    for i, (inputs, labels) in enumerate(valid_loader):
        if args.cuda:
            inputs = inputs.cuda()
            labels = labels.cuda()

        with torch.no_grad():
            if args.model == "CNN":
                logits = model(True, inputs)
            else:
                logits = model(inputs)
        loss = criterion(logits, labels)
        dev_loss += loss.item()
        
        y_true.extend(labels.cpu().detach().numpy().tolist())
        y_pred.extend(torch.sigmoid(logits).cpu().detach().numpy().tolist())
    dev_loss /= len(valid_loader)
    # print( y_true, y_pred)
    
    targets, outputs = y_true, np.array(y_pred) >= 0.5
    accuracy = metrics.accuracy_score(targets, outputs)
    f1_score_micro = metrics.f1_score(targets, outputs, average='micro', zero_division=1)
    f1_score_macro = metrics.f1_score(targets, outputs, average='macro', zero_division=1)
    print(f"Accuracy Score = {accuracy}")
    print(f"F1 Score (Micro) = {f1_score_micro}")
    print(f"F1 Score (Macro) = {f1_score_macro}")
    return dev_loss, accuracy, f1_score_micro,  f1_score_macro
コード例 #2
0
def test(args):
    # model.load_state_dict(torch.load(args.ckpt))
    model.eval()
    test_loader = create_loader(args, test_ds, shuffle=False)
    y_true, y_pred = [], []
    for i, (inputs, labels) in enumerate(test_loader):
        if args.cuda:
            inputs = inputs.cuda()
            labels = labels.cuda()

        with torch.no_grad():
            if args.model == "CNN":
                pred = model(False, inputs)
            else:
                pred = model(inputs)

        y_true.extend(labels.cpu().detach().numpy().tolist())
        y_pred.extend(torch.sigmoid(pred).cpu().detach().numpy().tolist())

    targets, outputs = y_true, np.array(y_pred) >= 0.5
    accuracy = metrics.accuracy_score(targets, outputs)
    f1_score_micro = metrics.f1_score(targets, outputs, average='micro',zero_division=1)
    f1_score_macro = metrics.f1_score(targets, outputs, average='macro', zero_division=1)
    prf1 = precision_recall_fscore_support(targets, outputs, beta=0.5, average=None)
    print(prf1)
    print(f"Accuracy Score = {accuracy}")
    print(f"F1 Score (Micro) = {f1_score_micro}")
    print(f"F1 Score (Macro) = {f1_score_macro}")

    #print(y_true_flatten, y_pred_flatten)
    return  accuracy, f1_score_micro,  f1_score_macro, classification_report(targets, outputs, target_names=list(test_ds.name2id)[:-1])
コード例 #3
0
def train(epoch):
    device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
    train_loader = create_loader(args, train_ds)
    train_loss = 0.
    global_steps = 0
    optimizer.zero_grad()

    for i, a in enumerate(train_loader):
        model.train()
        inputs, labels = a
        optimizer.zero_grad()

        # CE loss.
        if args.model == "CNN":
            out = model(True, inputs)
        else:
            out = model(inputs)

        loss = criterion(out, torch.max(labels, 1)[1])
        train_loss += (loss.item())
        loss.backward()

        if _grad_step(args, i):
            optimizer.step()
            optimizer.zero_grad()
            global_steps += 1

        logger.log("Loss: ", loss.item())
    print("Training Loss: ", train_loss / global_steps)
    return train_loss / global_steps
コード例 #4
0
def valid_(args):
    model.eval()
    valid_loader = create_loader(args, valid_ds)
    dev_loss = 0.
    for i, (inputs, labels) in enumerate(valid_loader):
        if args.cuda:
            inputs = inputs.cuda()
            labels = labels.cuda()

        with torch.no_grad():
            if args.model == "CNN":
                logits = model(True, inputs)
            else:
                logits = model(inputs)
        loss = criterion(logits, labels)
        dev_loss += loss.item()

    dev_loss /= len(valid_loader)

    return dev_loss
コード例 #5
0
def train(epoch, loss='ASYM'):
    device = torch.device("cuda:%s"%args.device if torch.cuda.is_available() else "cpu")
    train_loader = create_loader(args, train_ds, shuffle=True)
    print('start train')
    # if args.bias_type != 'frame':
    #     weights = [1.0, 10.0]
    #     weights = [1.0, 1.0]
    #     class_weights = torch.FloatTensor(weights).cuda()
    #     criterion = nn.CrossEntropyLoss(weight=class_weights)
    if loss == "BCE":
        criterion = torch.nn.BCEWithLogitsLoss()
    elif loss == "ASYM":
        criterion = AsymmetricLoss()

    train_loss = 0.
    global_steps = 0
    optimizer.zero_grad()

    for i, a in enumerate(train_loader):
        model.train()
        inputs, labels = a
        optimizer.zero_grad()
        # CE loss.
        if args.model == "CNN":
            out = model(True, inputs)
        else:
            out = model(inputs)

        loss = criterion(out, labels)
        
        train_loss += (loss.item())
        loss.backward()

        if _grad_step(args, i):
            optimizer.step()
            optimizer.zero_grad()
            global_steps += 1

        logger.log("Loss: ", loss.item())
    print("Training Loss: ", train_loss / global_steps)
    return  train_loss / global_steps
コード例 #6
0
def write_out(args, write_file):
    # model.load_state_dict(torch.load(args.ckpt))
    model.eval()
    test_loader = create_loader(args, test_ds, shuffle=False)
    y_true, y_pred = [], []
    for i, (inputs, labels) in enumerate(test_loader):
        if args.cuda:
            inputs = inputs.cuda()
            labels = labels.cuda()

        with torch.no_grad():
            if args.model == "CNN":
                pred = model(False, inputs)
            else:
                pred = model(inputs)

        cur_label = labels.cpu().detach().numpy().tolist()
        y_true.extend(cur_label)
        prediction = torch.sigmoid(pred).cpu().detach().numpy().tolist()
        prediction_value = (np.array(prediction) >= 0.5).astype(int)
        y_pred.extend(prediction)
        for i in range(inputs.size()[0]):
            write_file.write(
                str(cur_label[i]) + " |||" + str(prediction_value[i]) + "\n")
    # print(, np.array(y_pred.shape))
    # roc_auc = cal_roc_auc(np.array(y_true), np.array(y_pred))
    # targets, outputs = y_true, np.array(y_pred) >= 0.5
    # accuracy = metrics.accuracy_score(targets, outputs)
    # f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
    # f1_score_macro = metrics.f1_score(targets, outputs, average='macro')
    # prf1 = precision_recall_fscore_support(targets, outputs, beta=0.5, average=None)
    # print(str(prf1))
    # print(classification_report(targets, outputs))
    # print(f"Accuracy Score = {accuracy}")
    # print(f"F1 Score (Micro) = {f1_score_micro}")
    # print(f"F1 Score (Macro) = {f1_score_macro}")

    #print(y_true_flatten, y_pred_flatten)
    return accuracy, f1_score_micro, f1_score_macro, prf1, classification_report, roc_auc
コード例 #7
0
def main():
    parser = argparse.ArgumentParser(description='Training')
    # Dataset / Model parameters
    parser.add_argument('--data', metavar='DIR', help='path to dataset')
    parser.add_argument('--model',
                        default='hypernet',
                        type=str,
                        metavar='MODEL',
                        help='Name of model to train (default: "countception"')
    parser.add_argument(
        '--pretrained',
        action='store_true',
        default=False,
        help='Start with pretrained version of specified network (if avail)')
    parser.add_argument(
        '--initial-checkpoint',
        default='',
        type=str,
        metavar='PATH',
        help='Initialize model from this checkpoint (default: none)')
    parser.add_argument(
        '--resume',
        default='',
        type=str,
        metavar='PATH',
        help=
        'Resume full model and optimizer state from checkpoint (default: none)'
    )
    parser.add_argument('--num-classes',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='number of label classes (default: 1000)')
    parser.add_argument(
        '--gp',
        default='avg',
        type=str,
        metavar='POOL',
        help=
        'Type of global pool, "avg", "max", "avgmax", "avgmaxc" (default: "avg")'
    )
    parser.add_argument(
        '--img-size',
        type=int,
        default=None,
        metavar='N',
        help='Image patch size (default: None => model default)')
    parser.add_argument('--mean',
                        type=float,
                        nargs='+',
                        default=None,
                        metavar='MEAN',
                        help='Override mean pixel value of dataset')
    parser.add_argument('--std',
                        type=float,
                        nargs='+',
                        default=None,
                        metavar='STD',
                        help='Override std deviation of of dataset')
    parser.add_argument(
        '--interpolation',
        default='',
        type=str,
        metavar='NAME',
        help='Image resize interpolation type (overrides model)')
    parser.add_argument('-b',
                        '--batch-size',
                        type=int,
                        default=32,
                        metavar='N',
                        help='input batch size for training (default: 32)')
    parser.add_argument('--drop',
                        type=float,
                        default=0.0,
                        metavar='DROP',
                        help='Dropout rate (default: 0.)')
    # Optimizer parameters
    parser.add_argument('--opt',
                        default='sgd',
                        type=str,
                        metavar='OPTIMIZER',
                        help='Optimizer (default: "sgd"')
    parser.add_argument('--opt-eps',
                        default=1e-8,
                        type=float,
                        metavar='EPSILON',
                        help='Optimizer Epsilon (default: 1e-8)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.9,
                        metavar='M',
                        help='SGD momentum (default: 0.9)')
    parser.add_argument('--weight-decay',
                        type=float,
                        default=0.0001,
                        help='weight decay (default: 0.0001)')
    # Learning rate schedule parameters
    parser.add_argument('--sched',
                        default='spos_linear',
                        type=str,
                        metavar='SCHEDULER',
                        help='LR scheduler (default: "step"')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--warmup-lr',
                        type=float,
                        default=0.0001,
                        metavar='LR',
                        help='warmup learning rate (default: 0.0001)')
    parser.add_argument(
        '--min-lr',
        type=float,
        default=1e-5,
        metavar='LR',
        help='lower lr bound for cyclic schedulers that hit 0 (1e-5)')
    parser.add_argument('--epochs',
                        type=int,
                        default=120,
                        metavar='N',
                        help='number of epochs to train (default: 2)')
    parser.add_argument('--start-epoch',
                        default=None,
                        type=int,
                        metavar='N',
                        help='manual epoch number (useful on restarts)')
    parser.add_argument('--decay-epochs',
                        type=int,
                        default=15,
                        metavar='N',
                        help='epoch interval to decay LR')
    parser.add_argument('--warmup-epochs',
                        type=int,
                        default=3,
                        metavar='N',
                        help='epochs to warmup LR, if scheduler supports')
    parser.add_argument(
        '--cooldown-epochs',
        type=int,
        default=10,
        metavar='N',
        help='epochs to cooldown LR at min_lr, after cyclic schedule ends')
    parser.add_argument('--decay-rate',
                        '--dr',
                        type=float,
                        default=0.1,
                        metavar='RATE',
                        help='LR decay rate (default: 0.1)')
    parser.add_argument('--grad',
                        type=int,
                        default=1,
                        metavar='RATE',
                        help='LR decay rate (default: 0.1)')
    # Augmentation parameters
    parser.add_argument('--color-jitter',
                        type=float,
                        default=0.4,
                        metavar='PCT',
                        help='Color jitter factor (default: 0.4)')
    parser.add_argument('--reprob',
                        type=float,
                        default=0.,
                        metavar='PCT',
                        help='Random erase prob (default: 0.)')
    parser.add_argument('--remode',
                        type=str,
                        default='const',
                        help='Random erase mode (default: "const")')
    parser.add_argument(
        '--mixup',
        type=float,
        default=0.0,
        help='mixup alpha, mixup enabled if > 0. (default: 0.)')
    parser.add_argument(
        '--mixup-off-epoch',
        default=0,
        type=int,
        metavar='N',
        help='turn off mixup after this epoch, disabled if 0 (default: 0)')
    parser.add_argument('--smoothing',
                        type=float,
                        default=0.1,
                        help='label smoothing (default: 0.1)')
    # Batch norm parameters (only works with gen_efficientnet based models currently)
    parser.add_argument(
        '--bn-tf',
        action='store_true',
        default=False,
        help=
        'Use Tensorflow BatchNorm defaults for models that support it (default: False)'
    )
    parser.add_argument('--bn-momentum',
                        type=float,
                        default=None,
                        help='BatchNorm momentum override (if not None)')
    parser.add_argument('--bn-eps',
                        type=float,
                        default=None,
                        help='BatchNorm epsilon override (if not None)')
    # Model Exponential Moving Average
    parser.add_argument('--model-ema',
                        action='store_true',
                        default=False,
                        help='Enable tracking moving average of model weights')
    parser.add_argument(
        '--model-ema-force-cpu',
        action='store_true',
        default=False,
        help=
        'Force ema to be tracked on CPU, rank=0 node only. Disables EMA validation.'
    )
    parser.add_argument(
        '--model-ema-decay',
        type=float,
        default=0.9998,
        help='decay factor for model weights moving average (default: 0.9998)')
    parser.add_argument('--lr-noise',
                        type=float,
                        nargs='+',
                        default=None,
                        metavar='pct, pct',
                        help='learning rate noise on/off epoch percentages')
    parser.add_argument(
        '--lr-noise-pct',
        type=float,
        default=0.67,
        metavar='PERCENT',
        help='learning rate noise limit percent (default: 0.67)')
    parser.add_argument('--lr-noise-std',
                        type=float,
                        default=1.0,
                        metavar='STDDEV',
                        help='learning rate noise std-dev (default: 1.0)')
    # Misc
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        metavar='S',
                        help='random seed (default: 42)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=50,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('-j',
                        '--workers',
                        type=int,
                        default=4,
                        metavar='N',
                        help='how many training processes to use (default: 1)')
    parser.add_argument('--num-gpu',
                        type=int,
                        default=1,
                        help='Number of GPUS to use')
    parser.add_argument("--local_rank", default=0, type=int)
    parser.add_argument("--update_iter", default=1, type=int)
    parser.add_argument("--slice", default=4, type=int)
    parser.add_argument("--pool_size", default=10, type=int)
    parser.add_argument(
        '--resunit',
        action='store_true',
        default=False,
        help='Start with pretrained version of specified network (if avail)')
    parser.add_argument(
        '--dil_conv',
        action='store_true',
        default=False,
        help='Start with pretrained version of specified network (if avail)')
    parser.add_argument('--tiny', action='store_true', default=False)
    parser.add_argument('--flops_maximum', default=600, type=int)
    parser.add_argument('--flops_minimum', default=0, type=int)
    parser.add_argument('--pick_method', default='meta', type=str)
    parser.add_argument('--meta_lr', default=1e-2, type=float)
    parser.add_argument('--meta_sta_epoch', default=-1, type=int)
    parser.add_argument('--model_selection', default=14, type=int)
    parser.add_argument('--how_to_prob', default='pre_prob', type=str)
    parser.add_argument('--pre_prob',
                        default=(0.05, 0.2, 0.05, 0.5, 0.05, 0.15),
                        type=tuple)
    args = parser.parse_args()

    seed = args.seed
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True

    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1
        if args.distributed and args.num_gpu > 1:
            logger.warning(
                'Using more than one GPU per process in distributed mode is not allowed. Setting num_gpu to 1.'
            )
            args.num_gpu = 1

    args.device = 'cuda:0'
    args.world_size = 1
    args.rank = 0  # global rank
    if args.distributed:
        args.num_gpu = 1
        args.device = 'cuda:%d' % args.local_rank
        torch.cuda.set_device(args.local_rank)
        import random
        port = random.randint(0, 50000)
        torch.distributed.init_process_group(
            backend='nccl', init_method='env://'
        )  # tcp://127.0.0.1:{}'.format(port), rank=args.local_rank, world_size=8)
        args.world_size = torch.distributed.get_world_size()
        args.rank = torch.distributed.get_rank()
    assert args.rank >= 0

    if args.distributed:
        logging.info(
            'Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.'
            % (args.rank, args.world_size))
    else:
        logging.info('Training with a single process on %d GPUs.' %
                     args.num_gpu)

    if args.model_selection == 470:
        arch_list = [[0], [3, 4, 3, 1], [3, 2, 3, 0], [3, 3, 3, 1],
                     [3, 3, 3, 3], [3, 3, 3, 3], [0]]
        arch_def = [
            # stage 0, 112x112 in
            ['ds_r1_k3_s1_e1_c16_se0.25'],
            # stage 1, 112x112 in
            [
                'ir_r1_k3_s2_e4_c24_se0.25', 'ir_r1_k3_s1_e4_c24_se0.25',
                'ir_r1_k3_s1_e4_c24_se0.25', 'ir_r1_k3_s1_e4_c24_se0.25'
            ],
            # stage 2, 56x56 in
            [
                'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s1_e4_c40_se0.25',
                'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25'
            ],
            # stage 3, 28x28 in
            [
                'ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25',
                'ir_r1_k3_s1_e4_c80_se0.25', 'ir_r2_k3_s1_e4_c80_se0.25'
            ],
            # stage 4, 14x14in
            [
                'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25',
                'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25'
            ],
            # stage 5, 14x14in
            [
                'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25',
                'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25'
            ],
            # stage 6, 7x7 in
            ['cn_r1_k1_s1_c320_se0.25'],
        ]
        args.img_size = 224
    elif args.model_selection == 42:
        arch_list = [[0], [3], [3, 1], [3, 1], [3, 3, 3], [3, 3], [0]]
        arch_def = [
            # stage 0, 112x112 in
            ['ds_r1_k3_s1_e1_c16_se0.25'],
            # stage 1, 112x112 in
            ['ir_r1_k3_s2_e4_c24_se0.25'],
            # stage 2, 56x56 in
            ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25'],
            # stage 3, 28x28 in
            ['ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s2_e6_c80_se0.25'],
            # stage 4, 14x14in
            [
                'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25',
                'ir_r1_k3_s1_e6_c96_se0.25'
            ],
            # stage 5, 14x14in
            ['ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25'],
            # stage 6, 7x7 in
            ['cn_r1_k1_s1_c320_se0.25'],
        ]
        args.img_size = 96
    elif args.model_selection == 14:
        arch_list = [[0], [3], [3, 3], [3, 3], [3], [3], [0]]
        arch_def = [
            # stage 0, 112x112 in
            ['ds_r1_k3_s1_e1_c16_se0.25'],
            # stage 1, 112x112 in
            ['ir_r1_k3_s2_e4_c24_se0.25'],
            # stage 2, 56x56 in
            ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k3_s2_e4_c40_se0.25'],
            # stage 3, 28x28 in
            ['ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s2_e4_c80_se0.25'],
            # stage 4, 14x14in
            ['ir_r1_k3_s1_e6_c96_se0.25'],
            # stage 5, 14x14in
            ['ir_r1_k5_s2_e6_c192_se0.25'],
            # stage 6, 7x7 in
            ['cn_r1_k1_s1_c320_se0.25'],
        ]
        args.img_size = 64
    elif args.model_selection == 112:
        arch_list = [[0], [3], [3, 3], [3, 3], [3, 3, 3], [3, 3], [0]]
        arch_def = [
            # stage 0, 112x112 in
            ['ds_r1_k3_s1_e1_c16_se0.25'],
            # stage 1, 112x112 in
            ['ir_r1_k3_s2_e4_c24_se0.25'],
            # stage 2, 56x56 in
            ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k3_s2_e4_c40_se0.25'],
            # stage 3, 28x28 in
            ['ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s2_e6_c80_se0.25'],
            # stage 4, 14x14in
            [
                'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25',
                'ir_r1_k3_s1_e6_c96_se0.25'
            ],
            # stage 5, 14x14in
            ['ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25'],
            # stage 6, 7x7 in
            ['cn_r1_k1_s1_c320_se0.25'],
        ]
        args.img_size = 160
    elif args.model_selection == 285:
        arch_list = [[0], [3], [3, 3], [3, 1, 3], [3, 3, 3, 3], [3, 3, 3], [0]]
        arch_def = [
            # stage 0, 112x112 in
            ['ds_r1_k3_s1_e1_c16_se0.25'],
            # stage 1, 112x112 in
            ['ir_r1_k3_s2_e4_c24_se0.25'],
            # stage 2, 56x56 in
            ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25'],
            # stage 3, 28x28 in
            [
                'ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s2_e6_c80_se0.25',
                'ir_r1_k3_s2_e6_c80_se0.25'
            ],
            # stage 4, 14x14in
            [
                'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25',
                'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25'
            ],
            # stage 5, 14x14in
            [
                'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25',
                'ir_r1_k5_s2_e6_c192_se0.25'
            ],
            # stage 6, 7x7 in
            ['cn_r1_k1_s1_c320_se0.25'],
        ]
        args.img_size = 224
    elif args.model_selection == 600:
        arch_list = [[0], [3, 3, 2, 3, 3], [3, 2, 3, 2, 3], [3, 2, 3, 2, 3],
                     [3, 3, 2, 2, 3, 3], [3, 3, 2, 3, 3, 3], [0]]
        arch_def = [
            # stage 0, 112x112 in
            ['ds_r1_k3_s1_e1_c16_se0.25'],
            # stage 1, 112x112 in
            [
                'ir_r1_k3_s2_e4_c24_se0.25', 'ir_r1_k3_s2_e4_c24_se0.25',
                'ir_r1_k3_s2_e4_c24_se0.25', 'ir_r1_k3_s2_e4_c24_se0.25',
                'ir_r1_k3_s2_e4_c24_se0.25'
            ],
            # stage 2, 56x56 in
            [
                'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25',
                'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25',
                'ir_r1_k5_s2_e4_c40_se0.25'
            ],
            # stage 3, 28x28 in
            [
                'ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25',
                'ir_r1_k3_s1_e4_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25',
                'ir_r1_k3_s1_e4_c80_se0.25'
            ],
            # stage 4, 14x14in
            [
                'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25',
                'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25',
                'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25'
            ],
            # stage 5, 14x14in
            [
                'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25',
                'ir_r1_k5_s1_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25',
                'ir_r1_k5_s1_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25'
            ],
            # stage 6, 7x7 in
            ['cn_r1_k1_s1_c320_se0.25'],
        ]
        args.img_size = 224

    model = _gen_childnet(arch_list,
                          arch_def,
                          num_classes=args.num_classes,
                          drop_rate=args.drop,
                          global_pool=args.gp)

    data_config = resolve_data_config(vars(args),
                                      model=model,
                                      verbose=args.local_rank == 0)
    if args.local_rank == 0:
        logger.info(args)

    if args.local_rank == 0:
        logger.info('Model %s created, param count: %d' %
                    (args.model, sum([m.numel() for m in model.parameters()])))

    # data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0)

    if args.num_gpu > 1:
        if args.amp:
            logging.warning(
                'AMP does not work well with nn.DataParallel, disabling. Use distributed mode for multi-GPU AMP.'
            )
            args.amp = False
        model = nn.DataParallel(model,
                                device_ids=list(range(args.num_gpu))).cuda()
    else:
        model.cuda()

    if args.distributed:
        if has_apex:
            model = DDP(model, delay_allreduce=True)
        else:
            if args.local_rank == 0:
                logger.info(
                    "Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP."
                )
            model = DDP(model,
                        device_ids=[args.local_rank
                                    ])  # can use device str in Torch >= 1.1
        # NOTE: EMA model does not need to be wrapped by DDP

    model_ema = ModelEma(model,
                         decay=args.model_ema_decay,
                         device='cpu' if args.model_ema_force_cpu else '',
                         resume=args.resume)

    if args.tiny:
        from dataset.tiny_imagenet import get_newimagenet
        [loader_train,
         loader_eval], [train_sampler, test_sampler
                        ] = get_newimagenet(args.data, args.batch_size)
    else:
        train_dir = os.path.join(args.data, 'train')
        if not os.path.exists(train_dir):
            logger.error(
                'Training folder does not exist at: {}'.format(train_dir))
            exit(1)

        eval_dir = os.path.join(args.data, 'val')
        if not os.path.isdir(eval_dir):
            logger.error(
                'Validation folder does not exist at: {}'.format(eval_dir))
            exit(1)
        dataset_eval = Dataset(eval_dir)

        loader_eval = create_loader(
            dataset_eval,
            input_size=data_config['input_size'],
            batch_size=4 * args.batch_size,
            is_training=False,
            interpolation=data_config['interpolation'],
            mean=data_config['mean'],
            std=data_config['std'],
            num_workers=args.workers,
            distributed=args.distributed,
        )

    def accuracy(output, target, topk=(1, )):
        """Computes the accuracy over the k top predictions for the specified values of k"""
        maxk = max(topk)
        batch_size = target.size(0)
        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))
        return [
            correct[:k].view(-1).float().sum(0) * 100. / batch_size
            for k in topk
        ]

    prec1_m = AverageMeter()
    prec5_m = AverageMeter()

    def reduce_tensor(tensor, n):
        rt = tensor.clone()
        dist.all_reduce(rt, op=dist.ReduceOp.SUM)
        rt /= n
        return rt

    model_ema.ema.eval()

    with torch.no_grad():
        for step, (x, y) in enumerate(loader_eval):
            logits = model_ema.ema(x)
            prec1, prec5 = accuracy(logits, y, topk=(1, 5))

            prec1 = reduce_tensor(prec1, args.world_size)
            prec5 = reduce_tensor(prec5, args.world_size)

            prec1_m.update(prec1.item(), logits.size(0))
            prec5_m.update(prec5.item(), logits.size(0))

    if args.local_rank == 0:
        logger.info("Prec1: %s Prec5: %s", prec1_m.avg, prec5_m.avg)
コード例 #8
0
ファイル: main.py プロジェクト: liuguoyou/FRSKD
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir', required=True, type=str)
    parser.add_argument('--data', default='CIFAR100', type=str)
    parser.add_argument('--random_seed', default=10, type=int)

    parser.add_argument('--epoch', default=200, type=int)
    parser.add_argument('--scheduler',
                        default='step',
                        type=str,
                        help='step|cos')
    parser.add_argument('--schedule', default=[100, 150], type=int, nargs='+')
    parser.add_argument('--batch_size', default=128, type=int)
    parser.add_argument('--lr', default=0.1, type=float)
    parser.add_argument('--lr_decay', default=0.1, type=float)
    parser.add_argument('--momentum', default=0.9, type=float)
    parser.add_argument('--weight_decay', default=1e-4, type=float)

    parser.add_argument('--model', default='cifarresnet18', type=str)

    parser.add_argument('--num_channels', default=256, type=int)
    parser.add_argument('--num_features', default=-1, type=int)

    parser.add_argument('--repeat', default=1, type=int)
    parser.add_argument('--depth', default=2, type=int)
    parser.add_argument('--temperature', default=4, type=float)

    # distill
    parser.add_argument('--bifpn',
                        default='BiFPNc',
                        type=str,
                        help='BiFPN|BiFPNc')
    parser.add_argument('--width', default=2, type=int)
    parser.add_argument('--distill', default='att', type=str)
    parser.add_argument('--alpha', default=1, type=float)
    parser.add_argument('--beta', default=0.0, type=float)
    parser.add_argument('--aux', default='none', type=str)
    parser.add_argument('--aux_lamb', default=0.0, type=float)

    # augmentation
    parser.add_argument('--aug', default='none', type=str)
    parser.add_argument('--aug_a', default=0.0, type=float)

    args = parser.parse_args()
    np.random.seed(args.random_seed)
    torch.manual_seed(args.random_seed)
    torch.cuda.manual_seed(args.random_seed)
    random.seed(args.random_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    args_path = '{}_{}_{}_a{}_b{}_{}{}_{}{}'.format(args.data, args.model,
                                                    args.distill, args.alpha,
                                                    args.beta, args.aux,
                                                    args.aux_lamb, args.aug,
                                                    args.aug_a)
    path_log = os.path.join('logs', args_path)
    if not os.path.exists(path_log):
        os.makedirs(path_log)
    logger = create_logging(os.path.join(path_log,
                                         '%s.txt' % args.random_seed))
    train_loader, test_loader, args.num_classes = create_loader(
        args.batch_size, args.data_dir, args.data)

    for param in sorted(vars(args).keys()):
        logger.info('--{0} {1}'.format(param, vars(args)[param]))

    args.depth = [args.depth] * 3
    model = models.__dict__[args.model](num_classes=args.num_classes)
    if args.num_features == -1:
        args.num_features = len(model.network_channels)
    args.network_channels = model.network_channels[-args.num_features:]
    bifpn = models.__dict__[args.bifpn](args.network_channels,
                                        args.num_classes, args)

    if args.aux == 'sla':
        criterion_ce = distill_loss.__dict__[args.aux](args)
        criterion_ce.train()
    else:
        criterion_ce = nn.CrossEntropyLoss()

    criterion_kd = distill_loss.__dict__[args.distill](args, bifpn)
    criterion_kd.train()
    train_list = nn.ModuleList()
    train_list.append(model)
    train_list.append(criterion_ce)
    train_list.append(criterion_kd)
    train_list.append(bifpn)
    bifpn.cuda()
    train_list.cuda()

    criterion = [criterion_ce, criterion_kd]
    optimizer = optim.SGD(train_list.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    scheduler = lr_scheduler(optimizer, args.scheduler, args.schedule,
                             args.lr_decay, args.epoch)

    for epoch in range(1, args.epoch + 1):
        s = time()
        loss, train_acc1 = train(model, bifpn, optimizer, criterion,
                                 train_loader, args)
        scheduler.step()
        test_acc1 = test(model, test_loader)
        logger.info(
            'Epoch: {0:>2d}|Train Loss: {1:2.4f}| Train Acc: {2:.4f}| Test Acc: {3:.4f}| Time: {4:4.2f}(s)'
            .format(epoch, loss, train_acc1, test_acc1,
                    time() - s))
コード例 #9
0
ファイル: main.py プロジェクト: CreamNuts/Dacon_nh
    elif args.model == 'albert':
        model = AlbertForSequenceClassification.from_pretrained(
            "albert-base-v2").to(device)

    if args.load is not None:
        model.load_state_dict(torch.load(args.load))

    if args.mode != 'test':
        if args.tensorboard is True:
            writer = SummaryWriter(
                f'runs/{args.model}_{args.mode}_{args.lr}_{args.batchsize}')
        else:
            writer = None
        train_loader, val_loader = create_loader(args.data_dir,
                                                 args.model,
                                                 args.mode,
                                                 batch_size=args.batchsize,
                                                 ratio=args.ratio)
        optimizer = AdamW(model.parameters(), lr=args.lr)
        for epoch in range(args.epoch):
            train(epoch, train_loader, val_loader, optimizer, model, device,
                  args.save, writer)

    else:
        args.data_dir = './data/news_test.csv'
        test_loader, _ = create_loader(args.data_dir,
                                       args.model,
                                       args.mode,
                                       batch_size=args.batchsize)
        info_list = test(test_loader, model, device)
        submission = pd.read_csv('./data/sample_submission.csv')
コード例 #10
0
def test(args, name):
    # model.load_state_dict(torch.load(args.ckpt))
    model.eval()
    test_loader = create_loader(args, test_ds, shuffle=False)
    y_true, y_pred = [], []
    for i, (inputs, labels) in enumerate(test_loader):
        if args.cuda:
            inputs = inputs.cuda()
            labels = labels.cuda()

        with torch.no_grad():
            if args.model == "CNN":
                pred = model(False, inputs)
            else:
                pred = model(inputs)

        cur_label = labels.cpu().detach().numpy()
        y_true.extend(cur_label)
        prediction = torch.sigmoid(pred).cpu().detach().numpy().tolist()
        prediction_value = (np.array(prediction) >= 0.5).astype(int)
        y_pred.extend(prediction)
        for i in range(inputs.size()[0]):
            gold = np.where(cur_label[i] != 0.0)
            pred = np.where(prediction_value[i] != 0)

    import json
    test_file = pd.read_json("data/data_task6/test.json")
    ids = list(test_file.id)
    y_pred_value = np.array(y_pred) >= 0.5

    store_file = []

    def get_key(val, my_dict):
        for key, value in my_dict.items():
            if val == value:
                if key.startswith('Thought-terminating'):
                    return 'Thought-terminating'
                return key

    for i in range(len(ids)):
        labels_name_ = [
            get_key(index, test_ds.name2id)
            for index in np.where(y_pred_value[i] != 0)[0].tolist()
        ]
        cur = {"id": ids[i], "labels": labels_name_}
        store_file.append(cur)
    out_file = open("output/%s.json" % name, "w")

    json.dump(store_file, out_file)
    # print(, np.array(y_pred.shape))
    roc_auc = cal_roc_auc(np.array(y_true), np.array(y_pred))
    targets, outputs = y_true, np.array(y_pred) >= 0.5
    accuracy = metrics.accuracy_score(targets, outputs)
    f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
    f1_score_macro = metrics.f1_score(targets, outputs, average='macro')
    prf1 = precision_recall_fscore_support(targets,
                                           outputs,
                                           beta=0.5,
                                           average=None)
    # print(str(prf1))
    # print(classification_report(targets, outputs))
    print(f"Accuracy Score = {accuracy}")
    print(f"F1 Score (Micro) = {f1_score_micro}")
    print(f"F1 Score (Macro) = {f1_score_macro}")

    #print(y_true_flatten, y_pred_flatten)
    return accuracy, f1_score_micro, f1_score_macro, prf1, classification_report, roc_auc
コード例 #11
0
lr = config.lr
optimizer = optim.Adam(model.parameters(), weight_decay=0.0, lr=lr)
#optimizer_smoothing = optim.Adam(model2.parameters(), betas=[.9, .999], weight_decay=0.0, lr=lr)

scheduler = optim.lr_scheduler.StepLR(optimizer, 8, gamma=0.1, last_epoch=-1)
n_epochs = config.num_epochs
log_interval = 100

# DataLoader
# Train Dataset & Loader
print("Data Loading ...")
trainset = Dataset(config.traindata_dir)
trainloader = create_loader(dataset=trainset,
                            input_size=(3, 224, 224),
                            batch_size=config.batch_size,
                            interpolation="bicubic",
                            mean=(0.485, 0.456, 0.406),
                            std=(0.229, 0.224, 0.225),
                            num_workers=2,
                            crop_pct=1.0)
# trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2, drop_last= True)

# Test Dataset & Loader
validset = Dataset(config.validdata_dir)
validloader = create_loader(dataset=validset,
                            input_size=(3, 224, 224),
                            batch_size=config.batch_size,
                            interpolation="bicubic",
                            mean=(0.485, 0.456, 0.406),
                            std=(0.229, 0.224, 0.225),
                            num_workers=2,
                            crop_pct=1.0)