def read_and_extract_features(reader, period, features):
    print("number of get_number_of_examples", reader.get_number_of_examples())
    ret = common_utils.read_chunk(reader, reader.get_number_of_examples())
    # ret = common_utils.read_chunk(reader, 100)
    X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'],
                                                   period, features)
    return (X, ret['y'], ret['name'])
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                self.reader.random_shuffle()
            remaining = self.n_examples
            while remaining > 0:
                current_size = min(self.chunk_size, remaining)
                remaining -= current_size

                ret = common_utils.read_chunk(self.reader, current_size)
                Xs = ret["X"]
                ts = ret["t"]
                ys = ret["y"]
                names = ret["name"]

                Xs = preprocess_chunk(Xs, ts, self.discretizer, self.normalizer)
                (Xs, ys, ts, names) = common_utils.sort_and_shuffle([Xs, ys, ts, names], B)

                for i in range(0, current_size, B):
                    X = common_utils.pad_zeros(Xs[i:i + B])
                    y = np.array(ys[i:i + B])
                    batch_names = names[i:i+B]
                    batch_ts = ts[i:i+B]
                    batch_data = (X, y)
                    if not self.return_names:
                        yield batch_data
                    else:
                        yield {"data": batch_data, "names": batch_names, "ts": batch_ts}
    def __init__(self,
                 reader,
                 discretizer,
                 normalizer,
                 ihm_pos,
                 partition,
                 target_repl,
                 batch_size,
                 small_part,
                 shuffle,
                 return_names=False):
        self.discretizer = discretizer
        self.normalizer = normalizer
        self.ihm_pos = ihm_pos
        self.partition = partition
        self.target_repl = target_repl
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.return_names = return_names

        N = reader.get_number_of_examples()
        if small_part:
            N = 1000
        self.steps = (N + batch_size - 1) // batch_size
        self.lock = threading.Lock()

        ret = common_utils.read_chunk(reader, N)
        Xs = ret['X']
        ts = ret['t']
        ihms = ret['ihm']
        loss = ret['los']
        phenos = ret['pheno']
        decomps = ret['decomp']

        self.data = dict()
        self.data['pheno_ts'] = ts
        self.data['names'] = ret['name']
        self.data['decomp_ts'] = []
        self.data['los_ts'] = []

        for i in range(N):
            self.data['decomp_ts'].append(
                [pos for pos, m in enumerate(decomps[i][0]) if m == 1])
            self.data['los_ts'].append(
                [pos for pos, m in enumerate(loss[i][0]) if m == 1])
            (Xs[i], ihms[i], decomps[i], loss[i], phenos[i]) = \
                self._preprocess_single(Xs[i], ts[i], ihms[i], decomps[i], loss[i], phenos[i])

        self.data['X'] = Xs
        self.data['ihm_M'] = [x[0] for x in ihms]
        self.data['ihm_y'] = [x[1] for x in ihms]
        self.data['decomp_M'] = [x[0] for x in decomps]
        self.data['decomp_y'] = [x[1] for x in decomps]
        self.data['los_M'] = [x[0] for x in loss]
        self.data['los_y'] = [x[1] for x in loss]
        self.data['pheno_y'] = phenos

        self.generator = self._generator()
Beispiel #4
0
def load_data(reader,
              discretizer,
              normalizer,
              small_part=False,
              return_names=False):
    N = reader.get_number_of_examples()
    if small_part:
        N = 1000
    ret = common_utils.read_chunk(reader, N)
    data = ret["X"]
    ts = ret["t"]
    labels = ret["y"]
    names = ret["name"]
    data = [discretizer.transform(X, end=t)[0] for (X, t) in zip(data, ts)]
    if normalizer is not None:
        data = [normalizer.transform(X) for X in data]
    whole_data = (np.array(data), labels)
    if not return_names:
        return whole_data
    return {"data": whole_data, "names": names}