Example #1
0
class DelimFiles(Dataset):
    """
    Sets up a raw delimited text input file based dataset.

    Attributes:
        backend (neon.backends.Backend): backend used for this data
        inputs (dict): structure housing the loaded train/test/validation
                       input data
        targets (dict): structure housing the loaded train/test/validation
                        target data

    Kwargs:
        repo_path (str): where to locally host this dataset on disk
        item_size (int): length of each input when flattened.
        noutputs (int): number of output target classes (should be 1 for
                        regression problems)
    """

    def __init__(self, **kwargs):
        self.live = False
        self.server = None
        opt_param(self, ['batch_size'], default_value=1)
        opt_param(self, ['input_dtype', 'target_dtype'],
                  default_value=np.float32)
        self.__dict__.update(kwargs)

    def load(self, backend=None, experiment=None):
        if self.inputs['train'] is not None or self.inputs['test'] is not None:
            return
        if 'repo_path' not in self.__dict__:
            raise AttributeError('repo_path not specified in config')
        if 'item_size' not in self.__dict__:
            try:
                # infer item_size from DataLayer
                self.item_size = experiment.model.layers[0].nout
            except AttributeError:
                raise AttributeError('item_size not specified in config'
                                     ' and cannot be inferred')
        if 'noutputs' not in self.__dict__:
            try:
                # infer noutputs from CostLayer
                self.noutputs = experiment.model.layers[-1].nin
            except AttributeError:
                raise AttributeError('noutputs not specified in config'
                                     ' and cannot be inferred')

        self.repo_path = os.path.expandvars(os.path.expanduser(self.repo_path))
        self.rootdir = os.path.join(self.repo_path, self.__class__.__name__)
        self.batchdata = np.zeros((self.item_size, self.batch_size),
                                  dtype=self.input_dtype)

        if self.live:
            self.predict = True
            req_size = (np.dtype(self.input_dtype).itemsize * self.batch_size *
                        self.item_size)
            res_size = (np.dtype(self.target_dtype).itemsize * self.noutputs *
                        self.batch_size)
            self.server = Server(req_size=req_size, res_size=res_size)
            return

    def unload(self):
        logger.info('Unloading...')
        if self.server is not None:
            self.server.stop()

    def serialize(self, obj, save_path):
        fd = open(save_path, 'w')
        pickle.dump(obj, fd, -1)
        fd.close()

    def deserialize(self, load_path):
        fd = open(load_path, 'r')
        obj = pickle.load(fd)
        fd.close()
        return obj

    def receive_batch(self):
        assert self.server is not None
        data, header = self.server.receive()
        self.batchdata[:, 0] = data.view(self.input_dtype)
        dbatchdata = self.backend.distribute(self.batchdata, self.input_dtype)
        return dbatchdata, None

    def read_batch(self, batch_idx):
        raise NotImplementedError("This dataset is still a work in progress")

    def process_result(self, result):
        if self.live:
            assert self.server is not None
            self.server.send(result)

    def get_mini_batch(self, batch_idx):
        if self.live:
            inputs, targets = self.receive_batch()
        else:
            inputs, targets = self.read_batch(batch_idx)
        return inputs, targets
Example #2
0
class DelimFiles(Dataset):
    """
    Sets up a raw delimited text input file based dataset.

    Attributes:
        backend (neon.backends.Backend): backend used for this data
        inputs (dict): structure housing the loaded train/test/validation
                       input data
        targets (dict): structure housing the loaded train/test/validation
                        target data

    Kwargs:
        repo_path (str): where to locally host this dataset on disk
        item_size (int): length of each input when flattened.
        noutputs (int): number of output target classes (should be 1 for
                        regression problems)
    """
    def __init__(self, **kwargs):
        self.live = False
        self.server = None
        opt_param(self, ['batch_size'], default_value=1)
        opt_param(self, ['input_dtype', 'target_dtype'],
                  default_value=np.float32)
        self.__dict__.update(kwargs)

    def load(self, backend=None, experiment=None):
        if self.inputs['train'] is not None or self.inputs['test'] is not None:
            return
        if 'repo_path' not in self.__dict__:
            raise AttributeError('repo_path not specified in config')
        if 'item_size' not in self.__dict__:
            try:
                # infer item_size from DataLayer
                self.item_size = experiment.model.layers[0].nout
            except AttributeError:
                raise AttributeError('item_size not specified in config'
                                     ' and cannot be inferred')
        if 'noutputs' not in self.__dict__:
            try:
                # infer noutputs from CostLayer
                self.noutputs = experiment.model.layers[-1].nin
            except AttributeError:
                raise AttributeError('noutputs not specified in config'
                                     ' and cannot be inferred')

        self.repo_path = os.path.expandvars(os.path.expanduser(self.repo_path))
        self.rootdir = os.path.join(self.repo_path, self.__class__.__name__)
        self.batchdata = np.zeros((self.item_size, self.batch_size),
                                  dtype=self.input_dtype)

        if self.live:
            from neon.ipc.shmem import Server
            self.predict = True
            req_size = (np.dtype(self.input_dtype).itemsize * self.batch_size *
                        self.item_size)
            res_size = (np.dtype(self.target_dtype).itemsize * self.noutputs *
                        self.batch_size)
            self.server = Server(req_size=req_size, res_size=res_size)
            return

    def unload(self):
        logger.info('Unloading...')
        if self.server is not None:
            self.server.stop()

    def serialize(self, obj, save_path):
        fd = open(save_path, 'w')
        pickle.dump(obj, fd, -1)
        fd.close()

    def deserialize(self, load_path):
        fd = open(load_path, 'r')
        obj = pickle.load(fd)
        fd.close()
        return obj

    def receive_batch(self):
        assert self.server is not None
        data, header = self.server.receive()
        self.batchdata[:, 0] = data.view(self.input_dtype)
        dbatchdata = self.backend.allocate_fragment(self.batchdata.shape,
                                                    self.input_data)
        self.backend.set(dbatchdata, self.batchdata)
        return dbatchdata, None

    def read_batch(self, batch_idx):
        raise NotImplementedError("This dataset is still a work in progress")

    def process_result(self, result):
        if self.live:
            assert self.server is not None
            self.server.send(result)

    def get_mini_batch(self, batch_idx):
        if self.live:
            inputs, targets = self.receive_batch()
        else:
            inputs, targets = self.read_batch(batch_idx)
        return inputs, targets