Ejemplo n.º 1
0
    def fit(self, X: np.ndarray, y: np.ndarray, X_valid=None, y_valid=None):

        # preprocessing
        self.train_x = torch.from_numpy(train_x).type(torch.float).to(DEVICE)
        self.train_y = torch.from_numpy(train_y).type(torch.float).to(DEVICE)

        train_dataset_loader = torchDataLoader(dataset=DiabetesDataset(
            self.train_x, self.train_y),
                                               batch_size=self.batch_size,
                                               shuffle=True,
                                               drop_last=False)

        if (X_valid is not None) and (y_valid is not None):
            self.valid_data = True
        else:
            pass

        for epoch in tqdm(range(self.n_epochs)):
            for i, (x, y) in enumerate(train_dataset_loader, 0):
                self.train()
                self.optimizer.zero_grad()
                y_pred = self(x)
                loss = self.criterion(y_pred, y)
                loss.backward()

                # optimizer mode
                if self.valid_data:
                    pass
                else:
                    self.optimizer.step()
            if self.writer is not None:
                self.writer.add_scalar('train/train_loss', loss.item(), epoch)

        return self
Ejemplo n.º 2
0
def test_torch_dataset(spark, tmp_path, num_workers):
    total = 1000
    dataset_dir = tmp_path / "data"
    asset_dir = tmp_path / "asset"
    asset_dir.mkdir(parents=True)
    data = []
    expected = []
    for i in range(total):
        image_data = np.random.randint(0, 128, size=(128, 128), dtype=np.uint8)
        image_uri = asset_dir / f"{i}.png"
        Image.from_array(image_data, image_uri),

        array = wrap(np.random.random_sample((3, 4)))
        data.append(
            {
                "id": i,
                "array": array,
                "image": Image(image_uri),
            }
        )
        expected.append(
            {
                "id": i,
                "array": torch.as_tensor(np.array([array])),
                "image": torch.as_tensor(np.array([image_data])),
            }
        )

    df = spark.createDataFrame(data)
    df.write.mode("overwrite").format("rikai").save(str(dataset_dir))
    dataset = Dataset(dataset_dir)
    loader = torchDataLoader(
        dataset,
        num_workers=num_workers,
        drop_last=True,
    )
    actual = sorted(list(loader), key=lambda x: x["id"])
    assert len(actual) == total
    for expect, act in zip(expected, actual):
        assert torch.equal(
            expect["array"], act["array"]
        ), f"Expected {expect['array']} got {act['array']}"
        assert torch.equal(expect["image"], act["image"])
Ejemplo n.º 3
0
    def loadData(self):
        logging.info(
            f'Loading data from {self.dataPath} with resolution {self.resolution}x{self.resolution}'
        )
        self.dataset = ImageFolder(
            root=self.dataPath,
            transform=transforms.Compose([
                transforms.Resize(size=(self.resolution, self.resolution),
                                  interpolation=Image.LANCZOS),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
            ]))

        self.dataloader = torchDataLoader(dataset=self.dataset,
                                          batch_size=self.batchSize,
                                          shuffle=True,
                                          num_workers=self.numWorkers,
                                          drop_last=True,
                                          pin_memory=torch.cuda.is_available())
Ejemplo n.º 4
0
def get_loader(opt: argparse.ArgumentParser, cfg: yacs.config.CfgNode, ipu_opts: poptorch.Options):
    """Gets a new loader for the model.
    Parameters:
        opt: opt object containing options introduced in the command line
        cfg: yacs object containing the config
        ipu_opts: Options for the IPU configuration
    Returns:
        model[Detector]: a torch Detector Model
    """
    dataset = Dataset(path=opt.data, cfg=cfg)

    # Creates a loader using the dataset
    if cfg.model.ipu:
        loader = DataLoader(ipu_opts,
                            dataset,
                            batch_size=cfg.model.micro_batch_size,
                            num_workers=cfg.system.num_workers,
                            mode=DataLoaderMode.Async)
    else:
        loader = torchDataLoader(dataset, batch_size=cfg.model.micro_batch_size)

    return loader