Esempio n. 1
0
    criterion = torch.nn.CrossEntropyLoss()
    if gpus is not None:
        model = torch.nn.DataParallel(model, gpus)
    model.to(device=device, dtype=dtype)
    criterion.to(device=device, dtype=dtype)

    optimizer = torch.optim.SGD(model.parameters(), learning_rate, momentum=momentum, weight_decay=decay,
                                nesterov=True)
    if args.find_clr:
        find_bounds_clr(model, train_loader, optimizer, criterion, device, dtype, min_lr=min_lr,
                        max_lr=max_lr, step_size=epochs_per_step * len(train_loader), mode=mode,
                        save_path=save_path)
        return

    if args.clr:
        scheduler = CyclicLR(optimizer, base_lr=args.min_lr, max_lr=args.max_lr,
                             step_size=args.epochs_per_step * len(train_loader), mode=args.mode)
    else:
        scheduler = MultiStepLR(optimizer, milestones=args.schedule, gamma=args.gamma)

    best_test = 0

 
    if evaluate == 'true':
        loss, top1, top5 = test(model, val_loader, criterion, device, dtype)  # TODO
        return

    csv_logger = CsvLogger(filepath=save_path, data=data)
    csv_logger.save_params(sys.argv, args)

    claimed_acc1 = None
    claimed_acc5 = None
Esempio n. 2
0
def main():
    args = parser.parse_args()

    if args.seed is None:
        args.seed = random.randint(1, 10000)
    print("Random Seed: ", args.seed)
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.gpus:
        torch.cuda.manual_seed_all(args.seed)

    time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    if args.evaluate:
        args.results_dir = '/tmp'
    if args.save is '':
        args.save = time_stamp
    save_path = os.path.join(args.results_dir, args.save)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if args.gpus is not None:
        args.gpus = [int(i) for i in args.gpus.split(',')]
        device = 'cuda:' + str(args.gpus[0])
        cudnn.benchmark = True
    else:
        device = 'cpu'

    if args.type == 'float64':
        dtype = torch.float64
    elif args.type == 'float32':
        dtype = torch.float32
    elif args.type == 'float16':
        dtype = torch.float16
    else:
        raise ValueError('Wrong type!')  # TODO int8

    if (args.model == "recnn"):
        print("Training RECNN")
        model = RECNN()
        ex_model = RECNN_Extractor()
    else:
        print("Error: no model matched!")

    num_parameters = sum([l.nelement() for l in model.parameters()])
    print(model)
    print('number of parameters: {}'.format(num_parameters))

    # define loss function (criterion) and optimizer
    criterion = torch.nn.MSELoss()

    if args.gpus is not None:
        model = torch.nn.DataParallel(model, args.gpus)
        ex_model = torch.nn.DataParallel(ex_model, args.gpus)

    model.to(device=device, dtype=dtype)
    ex_model.to(device=device, dtype=dtype)
    criterion.to(device=device, dtype=dtype)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.decay,
                                nesterov=True)
    if args.find_clr:
        find_bounds_clr(model,
                        train_loader,
                        optimizer,
                        criterion,
                        device,
                        dtype,
                        min_lr=args.min_lr,
                        max_lr=args.max_lr,
                        step_size=args.epochs_per_step * len(train_loader),
                        mode=args.mode,
                        save_path=save_path)
        return

    if args.clr:
        scheduler = CyclicLR(optimizer,
                             base_lr=args.min_lr,
                             max_lr=args.max_lr,
                             step_size=args.epochs_per_step *
                             len(train_loader),
                             mode=args.mode)
    else:
        scheduler = MultiStepLR(optimizer,
                                milestones=args.schedule,
                                gamma=args.gamma)

    best_test = 0

    # optionally resume from a checkpoint
    data = None
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=device)
            args.start_epoch = checkpoint['epoch']
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        elif os.path.isdir(args.resume):
            checkpoint_path = os.path.join(args.resume, 'checkpoint.pth.tar')
            csv_path = os.path.join(args.resume, 'results.csv')
            print("=> loading checkpoint '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path, map_location=device)
            args.start_epoch = checkpoint['epoch']
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            ex_model.load_state_dict(checkpoint['state_dict'])

            print("=> loaded checkpoint '{}' (epoch {})".format(
                checkpoint_path, checkpoint['epoch']))

        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if args.extract_features:
        test_hdf5_list = [
            x for x in glob.glob(os.path.join(args.h5dir, 'test', '*.h5'))
        ]
        test_hdf5_list.sort()
        print(test_hdf5_list)
        tcnt = 0
        for f in test_hdf5_list:
            h5_file = h5py.File(f, 'r')
            tcnt = tcnt + 1
            if tcnt == 1:
                testx = torch.from_numpy(np.array(h5_file['data']))
                testy = torch.from_numpy(np.array(h5_file['label']))
            else:
                testcx = torch.from_numpy(np.array(h5_file['data']))
                testcy = torch.from_numpy(np.array(h5_file['label']))
                testx = torch.cat((testx, testcx), 0)
                testy = torch.cat((testy, testcy), 0)

        tex_shape = testx.shape
        testx = testx.view(tex_shape[0], 1, tex_shape[1], tex_shape[2],
                           tex_shape[3])
        testxy = torch.utils.data.TensorDataset(testx, testy)
        val_loader = torch.utils.data.DataLoader(testxy,
                                                 batch_size=args.batch_size,
                                                 shuffle=False)
        (test_features, test_preds,
         test_target) = extract_features(model, ex_model, val_loader,
                                         criterion, device, dtype)

        test_features_numpy = test_features.cpu().numpy()
        test_preds_numpy = test_preds.cpu().numpy()
        test_target_numpy = test_target.cpu().numpy()

        test_data = {
            'test_features': test_features_numpy,
            'test_preds': test_preds_numpy,
            'test_target': test_target_numpy
        }
        test_mat_filename = 'test' + args.setting
        scipy.io.savemat(test_mat_filename, test_data)
        train_hdf5_list = [
            x for x in glob.glob(os.path.join(args.h5dir, 'train', '*.h5'))
        ]
        train_hdf5_list.sort()
        tcnt = 0
        for f in train_hdf5_list:
            h5_file = h5py.File(f, 'r')
            tcnt = tcnt + 1
            if tcnt == 1:
                trainx = torch.from_numpy(np.array(h5_file['data']))
                trainy = torch.from_numpy(np.array(h5_file['label']))
            else:
                traincx = torch.from_numpy(np.array(h5_file['data']))
                traincy = torch.from_numpy(np.array(h5_file['label']))
                trainx = torch.cat((trainx, traincx), 0)
                trainy = torch.cat((trainy, traincy), 0)

        trx_shape = trainx.shape
        trainx = trainx.view(trx_shape[0], 1, trx_shape[1], trx_shape[2],
                             trx_shape[3])
        trainxy = torch.utils.data.TensorDataset(trainx, trainy)
        train_loader = torch.utils.data.DataLoader(trainxy,
                                                   batch_size=args.batch_size,
                                                   shuffle=False)

        (train_features, train_preds,
         train_target) = extract_features(model, ex_model, train_loader,
                                          criterion, device, dtype)
        train_features_numpy = train_features.cpu().numpy()
        train_preds_numpy = train_preds.cpu().numpy()
        train_target_numpy = train_target.cpu().numpy()
        train_data = {
            'train_features': train_features_numpy,
            'train_preds': train_preds_numpy,
            'train_target': train_target_numpy
        }
        train_mat_filename = 'train' + args.setting
        scipy.io.savemat(train_mat_filename, train_data)
        return

    if args.evaluate:
        loss, top1, top5 = test(model, val_loader, criterion, device, dtype)
        return

    csv_logger = CsvLogger(filepath=save_path, data=data)
    csv_logger.save_params(sys.argv, args)

    claimed_acc1 = None
    claimed_acc5 = None
    best_test = 10000000
    train_network(args.start_epoch, args.epochs, scheduler, model,
                  train_loader, val_loader, optimizer, criterion, device,
                  dtype, args.batch_size, args.log_interval, csv_logger,
                  save_path, claimed_acc1, claimed_acc5, best_test)
def main():
    args = parser.parse_args()

    if args.seed is None:
        args.seed = random.randint(1, 10000)
    print("Random Seed: ", args.seed)
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.gpus:
        torch.cuda.manual_seed_all(args.seed)

    time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    if args.evaluate:
        args.results_dir = '/tmp'
    if args.save is '':
        args.save = 'mar10_224_' + time_stamp
    save_path = os.path.join(args.results_dir, args.save)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if args.gpus is not None:
        args.gpus = [int(i) for i in args.gpus.split(',')]
        device = 'cuda:' + str(args.gpus[0])
        cudnn.benchmark = True
    else:
        device = 'cpu'

    if args.type == 'float64':
        dtype = torch.float64
    elif args.type == 'float32':
        dtype = torch.float32
    elif args.type == 'float16':
        dtype = torch.float16
    else:
        raise ValueError('Wrong type!')  # TODO int8

    model = STN_MobileNet2(input_size=args.input_size,
                           scale=args.scaling,
                           shearing=args.shearing)
    # print(model.stnmod.fc_loc[0].bias.data)
    num_parameters = sum([l.nelement() for l in model.parameters()])
    print(model)
    print('number of parameters: {}'.format(num_parameters))
    print('FLOPs: {}'.format(
        flops_benchmark.count_flops(
            STN_MobileNet2, args.batch_size //
            len(args.gpus) if args.gpus is not None else args.batch_size,
            device, dtype, args.input_size, 3, args.scaling)))

    train_loader, val_loader, test_loader = get_loaders(
        args.dataroot, args.batch_size, args.batch_size, args.input_size,
        args.workers, args.b_weights)
    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    L1_criterion = torch.nn.L1Loss()
    PW_criterion = torch.nn.CosineSimilarity(dim=2, eps=1e-6)

    if args.gpus is not None:
        model = torch.nn.DataParallel(model, args.gpus)
    model.to(device=device, dtype=dtype)

    criterion.to(device=device, dtype=dtype)
    L1_criterion.to(device=device, dtype=dtype)
    PW_criterion.to(device=device, dtype=dtype)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.decay,
                                nesterov=True)
    if args.find_clr:
        find_bounds_clr(model,
                        train_loader,
                        optimizer,
                        PW_criterion,
                        device,
                        dtype,
                        min_lr=args.min_lr,
                        max_lr=args.max_lr,
                        step_size=args.epochs_per_step * len(train_loader),
                        mode=args.mode,
                        save_path=save_path)
        return

    if args.clr:
        print('Use CLR')
        scheduler = CyclicLR(optimizer,
                             base_lr=args.min_lr,
                             max_lr=args.max_lr,
                             step_size=args.epochs_per_step *
                             len(train_loader),
                             mode=args.mode)
    else:
        print('Use scheduler')
        scheduler = MultiStepLR(optimizer,
                                milestones=args.schedule,
                                gamma=args.gamma)

    best_val = 500

    # optionally resume from a checkpoint
    data = None
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=device)
            # args.start_epoch = checkpoint['epoch'] - 1
            # best_val = checkpoint['best_prec1']
            # best_test = checkpoint['best_prec1']
            args.start_epoch = 0
            best_val = 500
            state_dict = checkpoint['state_dict']

            # if weights from imagenet

            new_state_dict = OrderedDict()
            for k, v in state_dict.items():
                #     print(k, v.size())
                name = k
                if k == 'module.fc.bias':
                    new_state_dict[name] = torch.zeros(101)
                    continue
                elif k == 'module.fc.weight':
                    new_state_dict[name] = torch.ones(101, 1280)
                    continue
                else:
                    print('else:', name)
                    new_state_dict[name] = v

            model.load_state_dict(new_state_dict, strict=False)
            # optimizer.load_state_dict(checkpoint['optimizer'], strict=False)
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        elif os.path.isdir(args.resume):
            checkpoint_path = os.path.join(args.resume, 'checkpoint.pth.tar')
            csv_path = os.path.join(args.resume, 'results.csv')
            print("=> loading checkpoint '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_val = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                checkpoint_path, checkpoint['epoch']))
            data = []
            with open(csv_path) as csvfile:
                reader = csv.DictReader(csvfile)
                for row in reader:
                    data.append(row)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if args.evaluate:
        loss, test_mae = test(model, predefined_points, 0, test_loader,
                              PW_criterion, device, dtype)  # TODO
        return

    csv_logger = CsvLogger(filepath=save_path, data=data)
    csv_logger.save_params(sys.argv, args)

    # claimed_acc1 = None
    # claimed_acc5 = None
    # if args.input_size in claimed_acc_top1:
    #     if args.scaling in claimed_acc_top1[args.input_size]:
    #         claimed_acc1 = claimed_acc_top1[args.input_size][args.scaling]
    #         claimed_acc5 = claimed_acc_top5[args.input_size][args.scaling]
    #         csv_logger.write_text(
    #             'Claimed accuracies are: {:.2f}% top-1, {:.2f}% top-5'.format(claimed_acc1 * 100., claimed_acc5 * 100.))

    train_network(args.start_epoch, args.epochs, scheduler, model,
                  predefined_points, train_loader, val_loader, test_loader,
                  optimizer, PW_criterion, device, dtype, args.batch_size,
                  args.log_interval, csv_logger, save_path, best_val)
def train_gan(train_set,
              indices: List,
              samples_per_N: int,
              repetition_n: int,
              identifier: str,
              experiment_name: str,
              batch_size: int = 256,
              desired_epochs: int = 1000):
    """
    The GAN is trained for 1000 epochs. If a a set of 60k samples is trained with a batchsize of 256,
    then a epoch equals 226 iterations. A budget of 100,000 iterations would equals to 426

    """
    assert train_set.shape[0] > len(indices)

    print(train_set.shape)
    print(len(indices))

    my_ds = DataSetManager(train_set[indices])

    # print("Set number of iterations to train\n")
    v5 = (desired_epochs * (train_set[indices].shape[0])) // batch_size + 1

    print("ITERS " + str(v5))
    print("SIZE " + str(train_set[indices].shape))

    # print("Use pretrained model? (0 means No, some number different to 0 means yes)\n")
    decision_number = 0  #int( input() )

    # print("Type a name to save the model with?\n")
    model_tag = str(round(samples_per_N)) + '_' + str(repetition_n)

    storing_path = 'data/' + experiment_name + "/" + model_tag + '_data/'
    model_path = storing_path + model_tag + '.ckpt'

    # Recall that os.mkdir isn't recursive, so it only makes on directoryt at a time
    try:
        # Create target Directory
        os.mkdir(storing_path)
        print("Directory ", storing_path, " Created ")
    except FileExistsError:
        print("Directory ", storing_path, " already exists")

    # ===> Auxiliar functions <===
    """
    ----------------8<-------------[ cut here ]------------------

    ------------------------------------------------
    """
    def save_history(files_prefix, gen_loss_record, disc_loss_record,
                     jsd_error, current_epoch, epoch_record, my_ds, iter_,
                     epochs, global_iters):
        # Save losses per epoch

        df = pd.DataFrame(np.array(gen_loss_record))
        with open(files_prefix + '_gen_loss.csv', 'w+') as f:
            df.to_csv(f, header=False, index=False)

        df = pd.DataFrame(np.array(disc_loss_record))
        with open(files_prefix + '_disc_loss.csv', 'w+') as f:
            df.to_csv(f, header=False, index=False)

        df = pd.DataFrame(np.array(epoch_record))
        with open(files_prefix + '_epoch_record.csv', 'w+') as f:
            df.to_csv(f, header=False, index=False)
        # Save current iter and epochs

        df = pd.DataFrame(
            np.array([epochs + my_ds.epochs_completed, global_iters + iter_]))

        with open(files_prefix + '_training.csv', 'w+') as f:
            df.to_csv(f, header=False, index=False)

        with open(files_prefix + '_jsd_error.csv', 'a') as csvFile:
            writer = csv.writer(csvFile)
            writer.writerow([current_epoch, jsd_error])

    def send_bot_message(bot, my_ds, iter_, ITERS, identifier):
        """ 
        Not quite straighforward since the critic draws many more samples.

        """

        message = "\nEpochs [" + str(
            my_ds.epochs_completed) + "] Iter: " + str(iter_) + ";\t" + str(
                np.round(100 * iter_ / ITERS, 2)) + "% "
        message = message + identifier
        print(message)
        bot.set_status(message)
        # Send update message
        if bot.verbose:
            bot.send_message(message)

        print("\n")

    def save_gen_samples(gen_op, disc_op, sess, path, k, n=4):
        """
        k: is the number of epochs used to trained the generator
        n: is the number of batches to draw samples
        """

        suffix = '_gen_samples_' + str(k) + '_epochs_' + '.csv'

        for k in range(n):

            samples = sess.run(gen_op)
            df = pd.DataFrame(np.array(samples))
            with open(path + suffix, 'a') as f:
                df.to_csv(f, header=False, index=False)

            # Score the samples using the critic
            scores = sess.run(disc_op)
            df = pd.DataFrame(np.array(scores))
            with open(path + 'scores_' + suffix, 'a') as f:
                df.to_csv(f, header=False, index=False)

    # ===> Model Parameters <===
    """
    ----------------8<-------------[ cut here ]------------------

    ------------------------------------------------
    """

    DIM = 512  # model dimensionality
    GEN_DIM = 100  # output dimension of the generator
    DIS_DIM = 1  # outptu dimension fo the discriminator
    FIXED_GENERATOR = False  # wheter to hold the generator fixed at ral data plus Gaussian noise, as in the plots in the paper
    LAMBDA = .1  # smaller lambda makes things faster for toy tasks, but isn't necessary if you increase CRITIC_ITERS enough
    BATCH_SIZE = batch_size  # batch size
    ITERS = v5  #100000 # how many generator iterations to train for
    FREQ = 250  # sample frequency

    CRITIC_ITERS = 5  # homw many critic iteractions per generator iteration

    def Generator_Softmax(n_samples, name='gen'):

        with tf.variable_scope(name):
            noise = tf.random_normal([n_samples, GEN_DIM])
            output01 = tf_utils.linear(noise, DIM, name='fc-1')
            output01 = tf_utils.relu(output01, name='relu-1')

            output02 = tf_utils.linear(output01, DIM, name='fc-2')
            output02 = tf_utils.relu(output02, name='relu-2')

            output03 = tf_utils.linear(output02, DIM, name='fc-3')
            output03 = tf_utils.relu(output03, name='relu-3')

            output04 = tf_utils.linear(output03, DIM, name='fc-4')
            output04 = tf_utils.relu(output04, name='relu-4')

            output05 = tf_utils.linear(output04, GEN_DIM, name='fc-5')

            # Reminder: a logit can be modeled as a linear function of the predictors
            output05 = tf.nn.softmax(output05, name='softmax-1')

            return output05

    def Discriminator(inputs, is_reuse=True, name='disc'):
        with tf.variable_scope(name, reuse=is_reuse):
            print('is_reuse: {}'.format(is_reuse))
            output01 = tf_utils.linear(inputs, DIM, name='fc-1')
            output01 = tf_utils.relu(output01, name='relu-1')

            output02 = tf_utils.linear(output01, DIM, name='fc-2')
            output02 = tf_utils.relu(output02, name='relu-2')

            output03 = tf_utils.linear(output02, DIM, name='fc-3')
            output03 = tf_utils.relu(output03, name='relu-3')

            output04 = tf_utils.linear(output03, DIM, name='fc-4')
            output04 = tf_utils.relu(output04, name='relu-4')

            output05 = tf_utils.linear(output04, DIS_DIM, name='fc-5')

            return output05

    real_data = tf.placeholder(tf.float32, shape=[None, GEN_DIM])
    fake_data = Generator_Softmax(BATCH_SIZE)

    disc_real = Discriminator(real_data, is_reuse=False)
    disc_fake = Discriminator(fake_data)

    disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real)
    gen_cost = -tf.reduce_mean(disc_fake)

    # WGAN gradient penalty parameters

    alpha = tf.random_uniform(shape=[BATCH_SIZE, 1], minval=0., maxval=1.)
    interpolates = alpha * real_data + (1. - alpha) * fake_data
    disc_interpolates = Discriminator(interpolates)
    gradients = tf.gradients(disc_interpolates, [interpolates][0])
    slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients),
                                   reduction_indices=[1]))
    gradient_penalty = tf.reduce_mean((slopes - 1)**2)

    disc_cost += LAMBDA * gradient_penalty

    disc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                  scope='disc')
    gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='gen')

    disc_lr = tf.placeholder(tf.float32, shape=())  # 1e-4
    gen_lr = tf.placeholder(tf.float32, shape=())  # 1e-4

    disc_train_op = tf.train.AdamOptimizer(learning_rate=disc_lr,
                                           beta1=0.5,
                                           beta2=0.9).minimize(
                                               disc_cost, var_list=disc_vars)

    if len(gen_vars) > 0:
        gen_train_op = tf.train.AdamOptimizer(learning_rate=gen_lr,
                                              beta1=0.5,
                                              beta2=0.9).minimize(
                                                  gen_cost, var_list=gen_vars)
    else:
        gen_train_op = tf.no_op()
    """
    ----------------8<-------------[ cut here ]------------------

    ------------------------------------------------
    """
    # ===> Model Parameters <===

    df = pd.DataFrame(np.array(indices))
    with open(storing_path + 'training_indices.csv', 'a') as f:
        df.to_csv(f, header=False, index=False)

    session_saver = tf.train.Saver()

    # files_prefix = 'model/'+ model_tag

    if decision_number == 0:
        pre_trained = False

        gen_loss_record = []  # type: List[float]
        disc_loss_record = []  # type: List[float]
        epoch_record = []  # type: List[float]

        epochs = 0
        global_iters = 0

    else:
        pre_trained = True
        temp = pd.read_csv(storing_path + '_training.csv', header=None).values

        epochs, global_iters = temp.flatten()

        my_ds.epochs_completed = epochs

        gen_loss_record = (pd.read_csv(storing_path + '_gen_loss.csv',
                                       header=None).values).tolist()
        disc_loss_record = (pd.read_csv(storing_path + '_disc_loss.csv',
                                        header=None).values).tolist()
        epoch_record = (pd.read_csv(storing_path + '_epoch_record.csv',
                                    header=None).values).tolist()

    # Create a DLBot instance
    bot = DLBot(token=telegram_token, user_id=telegram_user_id)
    # Activate the bot
    bot.activate_bot()

    print("\nTelegram bot has been activated ")

    iters_per_epoch = my_ds.num_examples / BATCH_SIZE

    total_iters = int(
        np.ceil((desired_epochs * iters_per_epoch) / CRITIC_ITERS))

    critic_iters = np.round((5 / 6) * total_iters)
    gen_iters = np.round((1 / 6) * total_iters)

    ITERS = total_iters

    # Train loop
    with tf.Session() as sess:

        if pre_trained == False:  # false by default:
            sess.run(tf.global_variables_initializer())
        if pre_trained == True:
            # tf.reset_default_graph()
            session_saver.restore(sess, model_path)
        #
        # DUCK TAPE SOLUTION
        iter_ = 0
        """
        while my_ds.epochs_completed < desired_epochs:
            iter_ +=1
        """
        gen_lr_ = CyclicLR(base_lr=10**-4.72,
                           max_lr=10**-3.72,
                           step_size=gen_iters)
        disc_lr_ = CyclicLR(base_lr=10**-4.72,
                            max_lr=10**-3.72,
                            step_size=critic_iters)

        for iter_ in range(ITERS):
            batch_data, disc_cost_ = None, None

            previous_epoch = my_ds.epochs_completed

            # train critic
            for i_ in range(CRITIC_ITERS):
                batch_data = my_ds.next_batch(
                    BATCH_SIZE)  # data_gen.__next__()
                disc_cost_, _ = sess.run([disc_cost, disc_train_op],
                                         feed_dict={
                                             real_data: batch_data,
                                             disc_lr: disc_lr_.clr()
                                         })
                disc_lr_.on_batch_end()

            # train generator
            sess.run(gen_train_op, feed_dict={gen_lr: gen_lr_.clr()})
            gen_lr_.on_batch_end()

            gen_cost2 = sess.run(gen_cost)

            current_epoch = my_ds.epochs_completed

            condition2 = current_epoch % 5 == 0
            if current_epoch > previous_epoch and condition2:
                disc_loss_record.append(disc_cost_)
                gen_loss_record.append(gen_cost2)
                epoch_record.append(my_ds.epochs_completed)
                # print("Diff "+str(current_epoch - previous_epoch))

            if (np.mod(iter_, FREQ) == 0) or (iter_ + 1 == ITERS):
                """
                print("===> Debugging")
                print(disc_loss_record)
                print(gen_loss_record)
                """

                bot.loss_hist.append(disc_cost_)

                fake_samples = sess.run(
                    fake_data)  # , feed_dict={real_data: batch_data}
                # print("\n==> Sum-Simplex condition: " +str(np.sum(fake_samples, axis=1)))
                send_bot_message(bot, my_ds, iter_, ITERS, identifier)

                jsd_error = gan_error_all_species(fake_samples, train_set)
                current_epoch = my_ds.epochs_completed

                session_saver.save(sess, model_path)
                save_history(storing_path, gen_loss_record, disc_loss_record,
                             jsd_error, current_epoch, epoch_record, my_ds,
                             iter_, epochs, global_iters)

                # save_gen_samples(fake_data, disc_fake ,sess, storing_path, k) # fake_data = Generator_Softmax(BATCH_SIZE)

            utils.tick()  #  _iter[0] += 1

        if iter_ == ITERS:
            session_saver.save(sess, model_path)

        # Create gan samples
        n_samples = len(indices)

        k_iter = n_samples // BATCH_SIZE + 1

        gan_samples_path = storing_path + "gan_samples_" + model_tag + '.csv'

        for k in range(k_iter):
            fake_samples = sess.run(fake_data)

            df = pd.DataFrame(fake_samples)
            with open(gan_samples_path, 'a') as f:
                df.to_csv(f, header=False, index=False)

    # Clear variables valuies

    tf.reset_default_graph()

    current_epoch = my_ds.epochs_completed
    save_history(storing_path, gen_loss_record, disc_loss_record, jsd_error,
                 current_epoch, epoch_record, my_ds, iter_, epochs,
                 global_iters)

    bot.stop_bot()

    print("Training is done")

    # Duct tapping the size of gan sample set to avoid changing the TF Graph

    temp1 = pd.read_csv(gan_samples_path, header=None).values
    temp1 = temp1[0:n_samples]
    df = pd.DataFrame(temp1)

    with open(gan_samples_path, 'w+') as f:
        df.to_csv(f, header=False, index=False)

    print("Training is done")
Esempio n. 5
0
model.add(Flatten())

model.add(Dense(units = 256, activation='relu', name='fc1'))
model.add(Dropout(rate=0.5))

model.add(Dense(units = 256, activation='relu', name='fc2'))
model.add(Dropout(rate=0.5))

model.add(Dense(units=2, activation='softmax', name='predictions'))


Y_one_hot = to_categorical(np.ravel(Y_Train),2)

##Cyclic Learing Rate

clr_triangular = CyclicLR(mode = 'triangular2',base_lr = base_lr, max_lr = max_lr, step_size = step_size)

gradientDescent=SGD()
model.compile(gradientDescent, loss= 'categorical_crossentropy', metrics = ['accuracy'])

#### model.summary()
model.fit_generator(datagen.flow(x= X_Train, y =Y_one_hot,batch_size = batch_size),steps_per_epoch=len(X_Train) / batch_size,
                    callbacks=[clr_triangular], verbose=2, 
                    epochs=epochs, use_multiprocessing=True)

#####Plots of CLR###########################################################
plt.figure(1)
plt.ylabel('Training accuracy')
plt.xlabel('Learning Rate')
plt.title("CLR - 'triangular2' Policy")
plt.plot(clr_triangular.history['lr'],clr_triangular.history['acc'] )
Esempio n. 6
0
checkpoint = torch.load(SAVED_MODEL_PATH)
model.load_state_dict(checkpoint['state_dict'])
#start_epoch = checkpoint['epoch']

print("Model size: {:.5f}M".format(
    sum(p.numel() for p in model.parameters()) / 1000000.0))

model.fc0.conv2.register_forward_hook(get_activation('fc0.conv2'))
model.fc1.conv2.register_forward_hook(get_activation('fc1.conv2'))

optim = torch.optim.SGD(model.parameters(),
                        lr=lr,
                        momentum=0.9,
                        weight_decay=weight_decay)
scheduler = CyclicLR(optim, gamma=gamma, step_size=stepsize)

num_epochs = 1202

start_time = time.time()
train_time = 0
best_rank1 = -np.inf
best_epoch = 0

if evaluation:
    print("Evaluation in Progress")
    test(model, queryloader, galleryloader)
    sys.exit(0)

print("Training of model in progress")
model = Sequential()
model.add(
    Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

clr = CyclicLR(base_lr=0.001, max_lr=0.006, step_size=20.)

tfb = keras.callbacks.TensorBoard(log_dir='logs',
                                  histogram_freq=0,
                                  batch_size=batch_size,
                                  write_graph=True,
                                  write_grads=True,
                                  write_images=False)

model.fit(x_train,
          y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          callbacks=[clr, tfb])
score = model.evaluate(x_test, y_test, verbose=0)
Esempio n. 8
0
model.compile(loss={
    "main_output": geodistance_tensorflow,
    "auxiliary_output": 'categorical_crossentropy',
    "terrain_output": 'categorical_crossentropy'
},
              optimizer=opt,
              loss_weights=[1 / 3, 1 / 3, 1 / 3],
              metrics={
                  "main_output": geodistance_tensorflow,
                  "auxiliary_output": 'categorical_accuracy',
                  "terrain_output": 'categorical_crossentropy'
              })

step = 8 * len(X_train) // batch_size
clr = CyclicLR(base_lr=0.00001,
               max_lr=0.01,
               step_size=step,
               mode='triangular2')
earlyStopping = keras.callbacks.EarlyStopping(monitor='loss',
                                              patience=5,
                                              verbose=1,
                                              restore_best_weights=True)

history = model.fit_generator(
    generate_arrays_from_file(X_train, Y1_train, Y2_train, Y3_train,
                              codes_matrix, terrain_matrix, batch_size),
    epochs=epochs,
    steps_per_epoch=train_instances_sz / batch_size,
    callbacks=[clr, earlyStopping],
    validation_steps=test_instances_sz / batch_size,
    validation_data=generate_arrays_from_file(X_test, Y1_test, Y2_test,
                                              Y3_test, codes_matrix,
Esempio n. 9
0
def main():
    torch.manual_seed(1)
    torch.cuda.manual_seed_all(1)
    global args, best_prec1
    best_prec1 = 0
    args = parser.parse_args()
    time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    if args.evaluate:
        args.results_dir = '/tmp'
    if args.save is '':
        args.save = time_stamp
    save_path = os.path.join(args.results_dir, args.save)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    args.noise = not args.no_noise
    args.quant = not args.no_quantization
    args.act_quant = not args.no_act_quantization
    args.quant_edges = not args.no_quant_edges

    logging.info("saving to %s", save_path)
    logging.debug("run arguments: %s", args)

    if args.gpus is not None:
        args.gpus = [int(i) for i in args.gpus.split(',')]
        device = 'cuda:' + str(args.gpus[0])
        cudnn.benchmark = True
    else:
        device = 'cpu'
    dtype = torch.float32

    args.step_setup = None

    model = models.__dict__[args.model]
    model_config = {
        'scale': args.scale,
        'input_size': args.input_size,
        'dataset': args.dataset,
        'bitwidth': args.bitwidth,
        'quantize': args.quant,
        'noise': args.noise,
        'step': args.step,
        'depth': args.depth,
        'act_bitwidth': args.act_bitwidth,
        'act_quant': args.act_quant,
        'quant_edges': args.quant_edges,
        'step_setup': args.step_setup,
        'quant_epoch_step': args.quant_epoch_step,
        'quant_start_stage': args.quant_start_stage,
        'normalize': args.no_pre_process_normalize,
        'noise_mask': args.noise_mask
    }

    if args.model_config is not '':
        model_config = dict(model_config, **literal_eval(args.model_config))

    # create model
    model = model(**model_config)
    logging.info("creating model %s", args.model)
    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    print("number of parameters: ", params)
    logging.info("created model with configuration: %s", model_config)
    print(model)

    data = None
    checkpoint_epoch = 0
    # optionally resume from a checkpoint
    if args.evaluate:
        if not os.path.isfile(args.evaluate):
            parser.error('invalid checkpoint: {}'.format(args.evaluate))
        checkpoint = torch.load(args.evaluate, map_location=device)
        load_model(model, checkpoint)
        logging.info("loaded checkpoint '%s' (epoch %s)", args.evaluate,
                     checkpoint['epoch'])

        print("loaded checkpoint {0} (epoch {1})".format(
            args.evaluate, checkpoint['epoch']))

    elif args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=device)
            if not args.start_from_zero:
                args.start_epoch = checkpoint['epoch'] - 1
            best_test = checkpoint['best_prec1']
            checkpoint_epoch = checkpoint['epoch']

            load_model(model, checkpoint)

            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        elif os.path.isdir(args.resume):
            checkpoint_path = os.path.join(args.resume, 'checkpoint.pth.tar')
            csv_path = os.path.join(args.resume, 'results.csv')
            print("=> loading checkpoint '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path, map_location=device)
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                checkpoint_path, checkpoint['epoch']))
            data = []
            with open(csv_path) as csvfile:
                reader = csv.DictReader(csvfile)
                for row in reader:
                    data.append(row)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if args.gpus is not None:
        model = torch.nn.DataParallel(
            model, [args.gpus[0]]
        )  # Statistics need to be calculated on single GPU to be consistant with data among multiplr GPUs

    # Data loading code
    default_transform = {
        'train':
        get_transform(args.dataset,
                      input_size=args.input_size,
                      augment=True,
                      integer_values=args.quant_dataloader,
                      norm=not args.no_pre_process_normalize),
        'eval':
        get_transform(args.dataset,
                      input_size=args.input_size,
                      augment=False,
                      integer_values=args.quant_dataloader,
                      norm=not args.no_pre_process_normalize)
    }
    transform = getattr(model.module, 'input_transform', default_transform)

    val_data = get_dataset(args.dataset,
                           'val',
                           transform['eval'],
                           datasets_path=args.datapath)
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=args.val_batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    train_data = get_dataset(args.dataset,
                             'train',
                             transform['train'],
                             datasets_path=args.datapath)
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    statistics_train_loader = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.act_stats_batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.decay,
                                nesterov=True)
    model, criterion = model.to(device, dtype), criterion.to(device, dtype)
    if args.clr:
        scheduler = CyclicLR(optimizer,
                             base_lr=args.min_lr,
                             max_lr=args.max_lr,
                             step_size=args.epochs_per_step *
                             len(train_loader),
                             mode=args.mode)
    else:
        scheduler = MultiStepLR(optimizer,
                                milestones=args.schedule,
                                gamma=args.gamma)

    csv_logger = CsvLogger(filepath=save_path, data=data)
    csv_logger.save_params(sys.argv, args)
    csv_logger_training_stats = os.path.join(save_path, 'training_stats.csv')

    # pre-training activation and parameters statistics calculation ####
    if check_if_need_to_collect_statistics(model):
        for layer in model.modules():
            if isinstance(layer, actquant.ActQuantBuffers):
                layer.pre_training_statistics = True  # Turn on pre-training activation statistics calculation
        model.module.statistics_phase = True

        validate(
            statistics_train_loader,
            model,
            criterion,
            device,
            epoch=0,
            num_of_batches=80,
            stats_phase=True)  # Run validation on training set for statistics
        model.module.quantize.get_act_max_value_from_pre_calc_stats(
            list(model.modules()))
        _ = model.module.quantize.set_weight_basis(list(model.modules()), None)

        for layer in model.modules():
            if isinstance(layer, actquant.ActQuantBuffers):
                layer.pre_training_statistics = False  # Turn off pre-training activation statistics calculation
        model.module.statistics_phase = False

    else:  # Maximal activation values still need to be derived from loaded stats
        model.module.quantize.assign_act_clamp_during_val(list(
            model.modules()),
                                                          print_clamp_val=True)
        model.module.quantize.assign_weight_clamp_during_val(
            list(model.modules()), print_clamp_val=True)
        # model.module.quantize.get_act_max_value_from_pre_calc_stats(list(model.modules()))

    if args.gpus is not None:  # Return to Multi-GPU after statistics calculations
        model = torch.nn.DataParallel(model.module, args.gpus)
        model, criterion = model.to(device, dtype), criterion.to(device, dtype)

    # pre-training activation statistics calculation ####

    if args.evaluate:
        val_loss, val_prec1, val_prec5 = validate(val_loader,
                                                  model,
                                                  criterion,
                                                  device,
                                                  epoch=0)
        print("val_prec1: ", val_prec1)
        return

    # fast forward to curr stage
    for i in range(args.quant_start_stage):
        model.module.switch_stage(0)

    for epoch in trange(args.start_epoch, args.epochs + 1):

        if not isinstance(scheduler, CyclicLR):
            scheduler.step()

        #     scheduler.optimizer = optimizer
        train_loss, train_prec1, train_prec5 = train(
            train_loader,
            model,
            criterion,
            device,
            epoch,
            optimizer,
            scheduler,
            training_stats_logger=csv_logger_training_stats)

        for layer in model.modules():
            if isinstance(layer, actquant.ActQuantBuffers):
                layer.print_clamp()

        # evaluate on validation set

        val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion,
                                                  device, epoch)

        # remember best prec@1 and save checkpoint
        is_best = val_prec1 > best_prec1
        best_prec1 = max(val_prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': args.model,
                'config': args.model_config,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'layers_b_dict': model.module.
                layers_b_dict  #TODO this doesn't work for multi gpu - need to del
            },
            is_best,
            path=save_path)
        # New type of logging
        csv_logger.write({
            'epoch': epoch + 1,
            'val_error1': 1 - val_prec1,
            'val_error5': 1 - val_prec5,
            'val_loss': val_loss,
            'train_error1': 1 - train_prec1,
            'train_error5': 1 - train_prec5,
            'train_loss': train_loss
        })
        csv_logger.plot_progress(title=args.model + str(args.depth))
        csv_logger.write_text(
            'Epoch {}: Best accuracy is {:.2f}% top-1'.format(
                epoch + 1, best_prec1 * 100.))
Esempio n. 10
0
def main():
    args = parser.parse_args()

    if args.seed is None:
        args.seed = random.randint(1, 10000)
    print("Random Seed: ", args.seed)
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.gpus:
        torch.cuda.manual_seed_all(args.seed)

    time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    if args.evaluate:
        args.results_dir = '/tmp'
    if args.save is '':
        args.save = time_stamp
    save_path = os.path.join(args.results_dir, args.save)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if args.gpus is not None:
        args.gpus = [int(i) for i in args.gpus.split(',')]
        device = 'cuda:' + str(args.gpus[0])
        cudnn.benchmark = True
    else:
        device = 'cpu'

    if args.type == 'float64':
        dtype = torch.float64
    elif args.type == 'float32':
        dtype = torch.float32
    elif args.type == 'float16':
        dtype = torch.float16
    else:
        raise ValueError('Wrong type!')  # TODO int8

    model = ShuffleNetV2(scale=args.scaling,
                         c_tag=args.c_tag,
                         SE=args.SE,
                         residual=args.residual,
                         groups=args.groups)
    num_parameters = sum([l.nelement() for l in model.parameters()])
    print(model)
    print('number of parameters: {}'.format(num_parameters))
    print('FLOPs: {}'.format(
        flops_benchmark.count_flops(
            ShuffleNetV2, args.batch_size //
            len(args.gpus) if args.gpus is not None else args.batch_size,
            device, dtype, args.input_size, 3, args.scaling, 3, args.c_tag,
            1000, torch.nn.ReLU, args.SE, args.residual, args.groups)))

    train_loader, val_loader = get_loaders(args.dataroot, args.batch_size,
                                           args.batch_size, args.input_size,
                                           args.workers)
    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    if args.gpus is not None:
        model = torch.nn.DataParallel(model, args.gpus)
    model.to(device=device, dtype=dtype)
    criterion.to(device=device, dtype=dtype)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.decay,
                                nesterov=True)
    if args.find_clr:
        find_bounds_clr(model,
                        train_loader,
                        optimizer,
                        criterion,
                        device,
                        dtype,
                        min_lr=args.min_lr,
                        max_lr=args.max_lr,
                        step_size=args.epochs_per_step * len(train_loader),
                        mode=args.mode,
                        save_path=save_path)
        return

    if args.clr:
        scheduler = CyclicLR(optimizer,
                             base_lr=args.min_lr,
                             max_lr=args.max_lr,
                             step_size=args.epochs_per_step *
                             len(train_loader),
                             mode=args.mode)
    else:
        scheduler = MultiStepLR(optimizer,
                                milestones=args.schedule,
                                gamma=args.gamma)

    best_test = 0

    # optionally resume from a checkpoint
    data = None
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        elif os.path.isdir(args.resume):
            checkpoint_path = os.path.join(args.resume, 'checkpoint.pth.tar')
            csv_path = os.path.join(args.resume, 'results.csv')
            print("=> loading checkpoint '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                checkpoint_path, checkpoint['epoch']))
            data = []
            with open(csv_path) as csvfile:
                reader = csv.DictReader(csvfile)
                for row in reader:
                    data.append(row)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if args.evaluate:
        loss, top1, top5 = test(model, val_loader, criterion, device,
                                dtype)  # TODO
        return

    csv_logger = CsvLogger(filepath=save_path, data=data)
    csv_logger.save_params(sys.argv, args)

    claimed_acc1 = None
    claimed_acc5 = None
    if args.SE in claimed_acc_top1:
        if args.scaling in claimed_acc_top1[args.SE]:
            claimed_acc1 = 1 - claimed_acc_top1[args.SE][args.scaling]
            csv_logger.write_text('Claimed accuracy is {:.2f}% top-1'.format(
                claimed_acc1 * 100.))
    train_network(args.start_epoch, args.epochs, scheduler, model,
                  train_loader, val_loader, optimizer, criterion, device,
                  dtype, args.batch_size, args.log_interval, csv_logger,
                  save_path, claimed_acc1, claimed_acc5, best_test)
Esempio n. 11
0
class Solver(object):
    def __init__(self, config):
        self.n_classes = config['n_classes']
        self.model = UNET(1, self.n_classes)
        if self.n_classes > 1:
            self.criterion = nn.CrossEntropyLoss()
        else:
            self.criterion = nn.BCEWithLogitsLoss()

        self.lr = config['lr']
        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=self.lr,
                                    weight_decay=1e-3,
                                    betas=(0.8, 0.9))
        self.device = config['device']
        self.num_epochs = config['num_epochs']
        if config['N_subimgs'] % config['batch_size'] != 0:
            self.train_step = config['N_subimgs'] // config['batch_size'] + 1
        else:
            self.train_step = config['N_subimgs'] / config['batch_size']

        self.model_save_dir = config['save_pth']
        self.best_loss = 10
        self.scheduler = CyclicLR(self.optimizer,
                                  step_size=2 *
                                  (config['N_subimgs'] % config['batch_size']),
                                  mode='triangular2')

        if self.device is not None:
            self.device = torch.device(self.device)
            self.model.to(self.device)

    def restore_best(self):
        model_pth = os.path.join(self.model_save_dir,
                                 'BEST_checkpoint.pth.tar')
        checkpoint = torch.load(model_pth)
        state_dict = checkpoint['model']
        best_loss = checkpoint['loss']
        epoch = checkpoint['epoch']
        return epoch + 1, best_loss

    def restore_model(self):
        model_pth = os.path.join(self.model_save_dir, 'checkpoint.pth.tar')
        checkpoint = torch.load(model_pth)
        state_dict = checkpoint['model']
        epoch = checkpoint['epoch']
        return epoch + 1

    def save_checkpoint(self, state, path):
        torch.save(state, os.path.join(path, 'BEST_checkpoint.pth.tar'))

    def update_lr(self, lr):
        for param in self.optimizer.param_groups:
            param['lr'] = lr

    def train(self, prefetcher, resume=True, best=True):
        if best and resume:
            start_epoch, best_loss = self.restore_best()
            self.best_loss = best_loss.to(self.device)
            print('Start from %d, so far the best loss is %.6f' \
                    % (start_epoch, best_loss))
        elif resume:
            start_epoch = self.restore_model()
            print('Start from %d' % (start_epoch))
        else:
            start_epoch = 0
        #not really epoch, consider using step for naming
        for i in range(start_epoch, self.num_epochs):
            epoch_loss = 0
            self.model.train()
            self.scheduler.batch_step()
            for j in range(self.train_step):
                self.optimizer.zero_grad()
                img, label = prefetcher.next()
                img = Variable(img.to(self.device, dtype=torch.float32))
                label = Variable(label.to(self.device, dtype=torch.float32))
                output = self.model(img)
                loss = self.criterion(output, label)
                epoch_loss += loss
                loss.backward()
                self.optimizer.step()

                if loss < self.best_loss:
                    state = {}
                    state['loss'] = loss
                    state['model'] = self.model.state_dict()
                    state['epoch'] = i
                    print('loss decrease, saving model...........')
                    self.save_checkpoint(state, self.model_save_dir)
                    self.best_loss = loss

            aver_loss = epoch_loss / self.train_step
            print('training %d epoch, average loss is %.6f' % (i, aver_loss))
Esempio n. 12
0
model.compile(
    optimizer=sgd,
    loss=weighted_pixelwise_crossentropy([0.00418313, 0.509627837, 1.]),
    #loss = keras_lovasz_softmax,
    sample_weight_mode="temporal",
    metrics=[bla_iou, r_iou, blu_iou])
print("Model compiled!")

# Callbacks
tensorboard = TensorBoard(log_dir="logs/{}".format(time.time()),
                          write_graph=True,
                          update_freq="batch")
print("Tensorboard loaded!")
# 5e-5
cyclical = CyclicLR(base_lr=float(FLAGS.lr),
                    max_lr=float(FLAGS.max_lr),
                    step_size=train_data.__len__() * 2.5,
                    mode="triangular2")
checkpoint = ModelCheckpoint(FLAGS.chkpt.rstrip("/") +
                             "/weights-{epoch:02d}.hdf5",
                             verbose=1,
                             period=1)


def train(data_generator, val_generator, callbacks):
    return model.fit_generator(generator=data_generator,
                               validation_data=val_generator,
                               callbacks=callbacks,
                               epochs=50,
                               verbose=1,
                               shuffle=False)
Esempio n. 13
0
def main():
	
    seed = random.randint(1, 10000)
    random.seed(seed)
    torch.manual_seed(seed)
    
    torch.cuda.manual_seed_all(seed)

    time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

    results_dir = '/tmp'
    save = time_stamp
    save_path = os.path.join(results_dir, save)

    if not os.path.exists(save_path):
        os.makedirs(save_path)

    gpus = 2#[int(i) for i in gpus.split(',')]
    device = 'cuda:0' #+ str(args.gpus[0])
    cudnn.benchmark = True
    dtype = torch.float64

    input_size = 224
    scaling = 1.0
    batch_size = 20
    workers = 4
    learning_rate = 0.02
    momentum = 0.9
    decay = 0.00004
    max_lr = 1
    min_lr = 0.00001
    start_epoch = 0
    epochs = 400
    epochs_per_step = 20
    log_interval = 100
    mode = 'triangular2'
    evaluate = 'false'
    dataroot = "data"


    model = MobileNet2(input_size=input_size, scale=scaling)
    num_parameters = sum([l.nelement() for l in model.parameters()])
    #print(model)


    """print('number of parameters: {}'.format(num_parameters))
    print('FLOPs: {}'.format(
        flops_benchmark.count_flops(MobileNet2,
                                    batch_size // len(gpus) if gpus is not None else batch_size,
                                    device, dtype, input_size, 3, scaling)))"""

    train_loader, val_loader = get_loaders(dataroot, batch_size, batch_size, input_size, workers)

    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    model = torch.nn.DataParallel(model)
        
    model.to(device=device, dtype=dtype)
    criterion.to(device=device, dtype=dtype)

    optimizer = torch.optim.SGD(model.parameters(), learning_rate, momentum=momentum, weight_decay=decay,
                                nesterov=True)
    find_bounds_clr(model, train_loader, optimizer, criterion, device, dtype, min_lr=min_lr,
                        max_lr=max_lr, step_size=epochs_per_step * len(train_loader), mode=mode,
                        save_path=save_path)
    scheduler = CyclicLR(optimizer, base_lr=min_lr, max_lr=max_lr,
                             step_size=epochs_per_step * len(train_loader), mode=mode)
    
    best_test = 0

 
    if evaluate == 'true':
        loss, top1, top5 = test(model, val_loader, criterion, device, dtype)  # TODO
        return

    data = []

    csv_logger = CsvLogger(filepath=save_path, data=data)
    #csv_logger.save_params(sys.argv, args)

    claimed_acc1 = None
    claimed_acc5 = None
    if input_size in claimed_acc_top1:
        if scaling in claimed_acc_top1[input_size]:
            claimed_acc1 = claimed_acc_top1[input_size][scaling]
            claimed_acc5 = claimed_acc_top5[input_size][scaling]
            csv_logger.write_text(
                'Claimed accuracies are: {:.2f}% top-1, {:.2f}% top-5'.format(claimed_acc1 * 100., claimed_acc5 * 100.))
            
            
    train_network(start_epoch, epochs, scheduler, model, train_loader, val_loader, optimizer, criterion,
                  device, dtype, batch_size, log_interval, csv_logger, './data', claimed_acc1, claimed_acc5,
                  best_test)
    
    return 1
Esempio n. 14
0
    callbacks = []

    train_source, val_source, classes, batches_per_epoch =\
        set_data(input_dims, dataset_path, val_split, train_batch_size, val_batch_size, use_aug, save_aug)

    model, model_name = set_model(input_dims, model_type, classes)

    model.compile(optimizer=optimizer,
                  loss=tf.keras.losses.CategoricalCrossentropy(),
                  metrics=metrics)

    if lr_mode == 'test':
        callbacks.append(
            CyclicLR(bottom_lr=bottom_lr,
                     top_lr=top_lr,
                     iter_per_cycle=epochs * batches_per_epoch,
                     mode='triangular',
                     start_mode='bottom-start',
                     cycle_mode='one-way'))

    elif lr_mode == 'cyclic':
        callbacks.append(
            CyclicLR(bottom_lr=bottom_lr,
                     top_lr=top_lr,
                     iter_per_cycle=epochs_per_clr * batches_per_epoch,
                     mode=clr_mode,
                     profile_fn=clr_profile_fn,
                     scale_fn=clr_scale_fn,
                     gamma=clr_gamma,
                     omega=clr_omega,
                     start_mode=clr_start_mode,
                     cycle_mode=clr_cycle_mode,
Esempio n. 15
0
    def train(self):
        kf = KFold(n_splits=self.n_folds, shuffle=True, random_state=self.seed)
        fold = 0
        for train_keys_, val_keys_ in kf.split(self.train_keys):
            if fold in self.exclude_folds:
                print('!!! Skipped fold ' + str(fold))
                fold += 1
                pass
            else:
                # print(train_keys_, self.additional_keys)
                train_data, val_data = self.ids_to_keys(
                    train_keys_,
                    self.additional_keys), self.ids_to_keys(val_keys_)

                train_gen = self.get_generator(is_train=True,
                                               dataset=train_data)
                val_gen = self.get_generator(is_train=False, dataset=val_data)

                # model_ = zoo_unet_K.get_unet_512_spartial
                # model = model_(input_shape=(self.input_size, self.input_size, self.n_channels), classes=self.classes,
                #                batch_norm=self.batch_norm, n_filters=16, dropout_val=self.dropout)
                model = self.model(UNET_INPUT=self.input_size,
                                   classes=self.classes,
                                   dropout_val=self.dropout,
                                   batch_norm=self.batch_norm)
                callback = KFold_cbk(model, fold)
                fold_checkpoint_name = 'model_at_fold_BN_%d.h5' % fold
                for epochs, lr, iter_n in self.lr_schedule:
                    if iter_n == '1' or (iter_n != check_id):
                        with tf.device("/cpu:0"):
                            print(
                                '--------\nNew init, fold %d, starting iteration %s'
                                % (fold, iter_n))
                            if iter_n != '1':
                                print('loading weights...')
                                model.load_weights(fold_checkpoint_name)
                                print('Done!')
                            if iter_n not in ['1', '2']:
                                print('VGG layers are trainable now')
                                for l in model.layers:
                                    l.trainable = True
                            # if iter_n not in['1', '2', '3', '4']:
                            #     print('HueSat, Contrast, Brightness are enabled ')
                            #     train_gen = self.get_generator(is_train=True, dataset=train_data, hue_br_contr=True)
                        parallel_model = multi_gpu_model(model, gpus=2)

                    clr = CyclicLR(base_lr=lr * 0.8,
                                   max_lr=lr * 2,
                                   step_size=120.)

                    parallel_model.compile(optimizer=self.optimizer(
                        lr=lr, clipnorm=1.),
                                           loss=self.loss,
                                           metrics=[zoo_losses_K.dice_coef])

                    parallel_model.fit_generator(
                        train_gen,
                        steps_per_epoch=np.ceil(
                            len(train_keys_) / self.batch_size),
                        epochs=epochs,
                        validation_data=val_gen,
                        validation_steps=np.ceil(
                            len(val_keys_) / self.batch_size),
                        verbose=2,
                        callbacks=[callback],  # FIXME: turn clr back
                        max_queue_size=30,
                        use_multiprocessing=self.use_multiprocessing,
                        workers=4)
                    check_id = iter_n
                fold += 1
            print('Finished!')