def main():
    """
    Script entrypoint
    """
    t_start = datetime.now()
    header = ["Start Time", "End Time", "Duration (s)"]
    row = [t_start.strftime(DEFAULT_DATE_TIME_FORMAT)]

    dnn = MobileNet()

    # show class indices
    print('****************')
    for cls, idx in dnn.train_batches.class_indices.items():
        print('Class #{} = {}'.format(idx, cls))
    print('****************')

    print(dnn.model.summary())

    dnn.train(t_start,
              epochs=dnn.num_epochs,
              batch_size=dnn.batch_size,
              training=dnn.train_batches,
              validation=dnn.valid_batches)

    # save trained weights
    dnn.model.save(dnn.file_weights + 'old')

    dnn.model.save_weights(dnn.file_weights)
    with open(dnn.file_architecture, 'w') as f:
        f.write(dnn.model.to_json())

    t_end = datetime.now()
    difference_in_seconds = get_difference_in_seconds(t_start, t_end)

    row.append(t_end.strftime(DEFAULT_DATE_TIME_FORMAT))
    row.append(str(difference_in_seconds))

    append_row_to_csv(complete_run_timing_file, header)
    append_row_to_csv(complete_run_timing_file, row)
예제 #2
0
class HyperTrain(Trainable):
    def _get_dataset(self, name):

        normalize = transforms.Normalize(
            mean=[0.4914, 0.4822, 0.4465],
            std=[0.2023, 0.1994, 0.2010],
        )

        if name == 'FashionMNIST':

            data_transforms = transforms.Compose([
                transforms.Grayscale(num_output_channels=3),
                transforms.ToTensor(), normalize
            ])
            dataset = torchvision.datasets.FashionMNIST(
                root="/home/kn15263s/data/FashionMNIST",
                transform=data_transforms)
            num_classes = 10
            input_size = 512 * 1 * 1

            return dataset, num_classes, input_size

        elif name == 'KMNIST':

            data_transforms = transforms.Compose([
                transforms.Grayscale(num_output_channels=3),
                transforms.ToTensor(), normalize
            ])

            dataset = torchvision.datasets.KMNIST(
                root="/home/kn15263s/data/KMNIST",
                transform=data_transforms,
                download=True)
            num_classes = 10
            input_size = 512 * 1 * 1

            return dataset, num_classes, input_size

        elif name == 'CIFAR10':

            data_transforms = transforms.Compose(
                [transforms.ToTensor(), normalize])
            dataset = torchvision.datasets.CIFAR10(
                root="/home/kn15263s/data/CIFAR10/", transform=data_transforms)
            num_classes = 10
            input_size = 512 * 1 * 1

            return dataset, num_classes, input_size

        elif name == 'SVHN':

            data_transforms = transforms.Compose(
                [transforms.ToTensor(), normalize])
            dataset = torchvision.datasets.SVHN(
                root="/home/kn15263s/data/SVHN/", transform=data_transforms)
            num_classes = 10
            input_size = 512 * 1 * 1

            return dataset, num_classes, input_size

        elif name == 'STL10':

            data_transforms = transforms.Compose(
                [transforms.ToTensor(), normalize])
            dataset = torchvision.datasets.STL10(
                root="/home/kn15263s/data/STL10/", transform=data_transforms)
            num_classes = 10
            input_size = 512 * 3 * 3

            return dataset, num_classes, input_size

        # elif name == 'Food':
        #
        #     class Food(Dataset):
        #
        #         def __init__(self, files, class_names, transform=transforms.ToTensor()):
        #
        #             self.data = files
        #             self.transform = transform
        #             self.class_names = class_names
        #
        #         def __getitem__(self, idx):
        #             img = Image.open(self.data[idx]).convert('RGB')
        #             name = self.data[idx].split('/')[-2]
        #             y = self.class_names.index(name)
        #             img = self.transform(img)
        #             return img, y
        #
        #         def __len__(self):
        #             return len(self.data)
        #
        #     data_transforms = transforms.Compose([
        #         transforms.RandomHorizontalFlip(),
        #         transforms.RandomVerticalFlip(),
        #         transforms.Resize((224, 224)),
        #         transforms.ToTensor(),
        #         normalize])
        #
        #     path = '/home/willy-huang/workspace/data/food'
        #     files_training = glob(os.path.join(path, '*/*.jpg'))
        #     class_names = []
        #
        #     for folder in os.listdir(os.path.join(path)):
        #         class_names.append(folder)
        #
        #     num_classes = len(class_names)
        #     dataset = Food(files_training, class_names, data_transforms)
        #     input_size = 512 * 7 * 7
        #
        #     return dataset, num_classes, input_size
        #
        # elif name == 'Stanford_dogs':
        #
        #     class Stanford_dogs(Dataset):
        #
        #         def __init__(self, files, class_names, transform=transforms.ToTensor()):
        #
        #             self.data = files
        #             self.transform = transform
        #             self.class_names = class_names
        #
        #         def __getitem__(self, idx):
        #             img = Image.open(self.data[idx]).convert('RGB')
        #             name = self.data[idx].split('/')[-2]
        #             y = self.class_names.index(name)
        #             img = self.transform(img)
        #             return img, y
        #
        #         def __len__(self):
        #             return len(self.data)
        #
        #
        #     data_transforms = transforms.Compose([
        #         transforms.RandomHorizontalFlip(),
        #         transforms.RandomVerticalFlip(),
        #         transforms.Resize((224, 224)),
        #         transforms.ToTensor(),
        #         normalize])
        #
        #     path = '/home/willy-huang/workspace/data/stanford_dogs'
        #     files_training = glob(os.path.join(path, '*/*.jpg'))
        #     class_names = []
        #
        #     for folder in os.listdir(os.path.join(path)):
        #         class_names.append(folder)
        #
        #     num_classes = len(class_names)
        #     dataset = Stanford_dogs(files_training, class_names, data_transforms)
        #     input_size = 512 * 7 * 7
        #
        #     return dataset, num_classes, input_size

    def _setup(self, config):
        random.seed(50)
        np.random.seed(50)
        torch.cuda.manual_seed_all(50)
        torch.manual_seed(50)
        self.total_time = time.time()
        self.name = args.Dataset_name
        nnArchitecture = args.Network_name

        dataset, num_class, input_size = self._get_dataset(self.name)

        num_total = len(dataset)
        shuffle = np.random.permutation(num_total)
        split_val = int(num_total * 0.2)

        train_idx, valid_idx = shuffle[split_val:], shuffle[:split_val]

        train_sampler = SubsetRandomSampler(train_idx)
        valid_sampler = SubsetRandomSampler(valid_idx)

        self.trainset_ld = DataLoader(dataset,
                                      batch_size=256,
                                      sampler=train_sampler,
                                      num_workers=4)
        self.validset_ld = DataLoader(dataset,
                                      batch_size=256,
                                      sampler=valid_sampler,
                                      num_workers=4)

        self.modelname = '{}--{}.pth.tar'.format(self.name, nnArchitecture)
        loggername = self.modelname.replace("pth.tar", "log")
        self.logger = utils.buildLogger(loggername)

        self.seed_table = np.array([
            "", "epoch", "lr", "momentum", "weight_decay", "factor", "outLoss",
            "accuracy"
        ])

        # ---- hyperparameters ----
        self.lr = config["lr"]
        self.momentum = config["momentum"]
        self.weight_decay = config["weight_decay"]
        self.factor = config["factor"]

        self.epochID = 0
        self.loss = nn.CrossEntropyLoss()
        self.accuracy = -999999999999.0

        # -------------------- SETTINGS: NETWORK ARCHITECTURE

        if nnArchitecture == 'Vgg11':
            self.model = Vgg11(num_class, input_size).cuda()

        elif nnArchitecture == 'Resnet18':
            self.model = Resnet18(num_class, input_size).cuda()

        elif nnArchitecture == 'MobileNet':
            self.model = MobileNet(num_class, input_size).cuda()

        elif nnArchitecture == 'MobileNet_V2':
            self.model = MobileNet_V2(num_class, input_size).cuda()

        else:
            self.model = None
            assert 0

        self.model = torch.nn.DataParallel(self.model).cuda()
        self.logger.info("Build Model Done")

        # -------------------- SETTINGS: OPTIMIZER & SCHEDULER --------------------
        self.optimizer = optim.SGD(filter(lambda x: x.requires_grad,
                                          self.model.parameters()),
                                   lr=self.lr,
                                   momentum=self.momentum,
                                   weight_decay=self.weight_decay,
                                   nesterov=False)

        self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            self.optimizer, factor=self.factor, patience=10, mode='min')

        self.logger.info("Build Optimizer Done")

    def _train_iteration(self):
        self.start_time = time.time()
        self.model.train()

        losstra = 0
        losstraNorm = 0

        for batchID, (input, target) in enumerate(self.trainset_ld):
            varInput = Variable(input).cuda()
            varTarget = Variable(target).cuda()
            varOutput = self.model(varInput)

            lossvalue = self.loss(varOutput, varTarget)

            losstra += lossvalue.item()
            losstraNorm += 1

            self.optimizer.zero_grad()
            lossvalue.backward()
            torch.nn.utils.clip_grad_value_(self.model.parameters(), 10)
            self.optimizer.step()

        self.trainLoss = losstra / losstraNorm

    def _test(self):

        self.model.eval()

        lossVal = 0
        lossValNorm = 0
        correct = 0

        num_samples = 0
        for batchID, (input, target) in enumerate(self.validset_ld):
            with torch.no_grad():
                varInput = Variable(input).cuda(async=True)
                varTarget = Variable(target).cuda(async=True)
                varOutput = self.model(varInput)

                losstensor = self.loss(varOutput, varTarget)

                pred = varOutput.argmax(1)
                correct += (pred == varTarget).sum().cpu()

                lossVal += losstensor.item()
                lossValNorm += 1
                num_samples += len(input)

        self.outLoss = lossVal / lossValNorm
        accuracy = correct.item() / num_samples

        self.scheduler.step(self.outLoss, epoch=self.epochID)

        if accuracy > self.accuracy:
            self.accuracy = accuracy

            torch.save(
                {
                    'epoch': self.epochID + 1,
                    'state_dict': self.model.state_dict(),
                    'loss': self.outLoss,
                    'best_accuracy': self.accuracy,
                    'optimizer': self.optimizer.state_dict(),
                }, "./best_" + self.modelname)

            save = np.array([
                self.seed_table,
                [
                    str(self.name),
                    str(self.epochID + 1),
                    str(self.lr),
                    str(self.momentum),
                    str(self.weight_decay),
                    str(self.factor),
                    str(self.outLoss),
                    str(self.accuracy)
                ]
            ])

            np.savetxt("./seed(50).csv", save, delimiter=',', fmt="%s")

        self.logger.info('Epoch [' + str(self.epochID + 1) +
                         '] loss= {:.5f}'.format(self.outLoss) +
                         ' ---- accuracy= {:.5f}'.format(accuracy) +
                         ' ---- best_accuracy= {:.5f}'.format(self.accuracy) +
                         ' ---- model: {}'.format(self.modelname) +
                         ' ---- time: {:.1f} s'.format((time.time() -
                                                        self.start_time)) +
                         ' ---- total_time: {:.1f} s'.format(
                             (time.time() - self.total_time)))

        self.epochID += 1
        return {
            "episode_reward_mean": accuracy,
            "neg_mean_loss": self.outLoss,
            "mean_accuracy": accuracy,
            "epoch": self.epochID,
            'mean_train_loss': self.trainLoss
        }

    def _train(self):
        self._train_iteration()
        return self._test()

    def _save(self, checkpoint_dir):
        checkpoint_path = os.path.join(checkpoint_dir, "final_model.pth")
        torch.save(
            {
                "epoch": self.epochID,
                "best_accuracy": self.accuracy,
                'loss': self.outLoss,
                "state_dict": self.model.state_dict(),
                'optimizer': self.optimizer.state_dict(),
            }, checkpoint_path)
        return checkpoint_path

    def _restore(self, checkpoint_path):
        self.model.load_state_dict(checkpoint_path)