예제 #1
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                N = len(self.data[1])
                order = range(N)
                random.shuffle(order)
                tmp = [[[None] * N, [None] * N], [None] * N]
                for i in range(N):
                    tmp[0][0][i] = self.data[0][0][order[i]]
                    tmp[0][1][i] = self.data[0][1][order[i]]
                    tmp[1][i] = self.data[1][order[i]]
                self.data = tmp
            else:
                # sort entirely
                Xs = self.data[0][0]
                masks = self.data[0][1]
                ys = self.data[1]
                (Xs, masks,
                 ys) = common_utils.sort_and_shuffle([Xs, masks, ys], B)
                self.data = [[Xs, masks], ys]

            for i in range(0, len(self.data[1]), B):
                X = self.data[0][0][i:i + B]
                mask = self.data[0][1][i:i + B]
                y = self.data[1][i:i + B]
                X = nn_utils.pad_zeros(X)  # (B, T, D)
                mask = nn_utils.pad_zeros(mask)  # (B, T)
                y = nn_utils.pad_zeros(y)
                y = np.expand_dims(y, axis=-1)  # (B, T, 1)
                yield ([X, mask], y)
예제 #2
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                N = len(self.data[1])
                order = range(N)
                random.shuffle(order)
                tmp = [[[None] * N, [None] * N], [None] * N]
                for i in range(N):
                    tmp[0][0][i] = self.data[0][0][order[i]]
                    tmp[0][1][i] = self.data[0][1][order[i]]
                    tmp[1][i] = self.data[1][order[i]]
                self.data = tmp
            else:
                # sort entirely
                Xs = self.data[0][0]
                masks = self.data[0][1]
                ys = self.data[1]
                (Xs, masks,
                 ys) = common_utils.sort_and_shuffle([Xs, masks, ys], B)
                self.data = [[Xs, masks], ys]

            for i in range(0, len(self.data[1]), B):
                X = self.data[0][0][i:i + B]
                mask = self.data[0][1][i:i + B]
                y = self.data[1][i:i + B]

                y_true = [np.array(x) for x in y]
                y_true = nn_utils.pad_zeros(y_true)
                y_true = np.expand_dims(y_true, axis=-1)

                if self.partition == 'log':
                    y = [
                        np.array([metrics.get_bin_log(x, 10) for x in z])
                        for z in y
                    ]
                if self.partition == 'custom':
                    y = [
                        np.array([metrics.get_bin_custom(x, 10) for x in z])
                        for z in y
                    ]

                X = nn_utils.pad_zeros(X)  # (B, T, D)
                mask = nn_utils.pad_zeros(mask)  # (B, T)
                y = nn_utils.pad_zeros(y)
                y = np.expand_dims(y, axis=-1)

                if self.return_y_true:
                    yield ([X, mask], y, y_true)
                else:
                    yield ([X, mask], y)
예제 #3
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                self.reader.random_shuffle()
            remaining = self.n_examples
            while remaining > 0:
                current_size = min(self.chunk_size, remaining)
                remaining -= current_size

                ret = common_utils.read_chunk(self.reader, current_size)
                Xs = ret["X"]
                ts = ret["t"]
                ys = ret["y"]
                names = ret["name"]

                Xs = preprocess_chunk(Xs, ts, self.discretizer, self.normalizer)
                (Xs, ys, ts, names) = common_utils.sort_and_shuffle([Xs, ys, ts, names], B)

                for i in range(0, current_size, B):
                    X = nn_utils.pad_zeros(Xs[i:i + B])
                    y = np.array(ys[i:i + B])
                    batch_names = names[i:i+B]
                    batch_ts = ts[i:i+B]
                    batch_data = (X, y)
                    if not self.return_names:
                        yield batch_data
                    else:
                        yield {"data": batch_data, "names": batch_names, "ts": batch_ts}
예제 #4
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                self.reader.random_shuffle()
            remaining = self.n_examples
            while remaining > 0:
                current_size = min(self.chunk_size, remaining)
                remaining -= current_size
                (data, ts, labels,
                 header) = read_chunk(self.reader, current_size)
                data = preprocess_chunk(data, ts, self.discretizer,
                                        self.normalizer)
                data = (data, labels)
                data = common_utils.sort_and_shuffle(data, B)

                for i in range(0, current_size, B):
                    X = nn_utils.pad_zeros(data[0][i:i + B])
                    y = data[1][i:i + B]
                    y_true = np.array(y)

                    if self.partition == 'log':
                        y = [metrics.get_bin_log(x, 10) for x in y]
                    if self.partition == 'custom':
                        y = [metrics.get_bin_custom(x, 10) for x in y]

                    y = np.array(y)

                    if self.return_y_true:
                        yield (X, y, y_true)
                    else:
                        yield (X, y)
예제 #5
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                self.reader.random_shuffle()
            remaining = self.n_examples
            while remaining > 0:
                current_size = min(self.chunk_size, remaining)
                remaining -= current_size

                ret = common_utils.read_chunk(self.reader, current_size)
                data = ret["X"]
                ts = ret["t"]
                labels = ret["y"]
                names = ret["name"]

                data = preprocess_chunk(data, ts, self.discretizer,
                                        self.normalizer)
                data = (data, labels)
                data = common_utils.sort_and_shuffle(data, B)

                for i in range(0, current_size, B):
                    X = nn_utils.pad_zeros(data[0][i:i + B])
                    y = np.array(data[1][i:i + B])
                    batch_data = (X, y)
                    if not self.return_names:
                        yield batch_data
                    else:
                        yield {"data": batch_data, "names": names, "ts": ts}
예제 #6
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                N = len(self.data[1])
                order = range(N)
                random.shuffle(order)
                tmp = [[None] * N, [None] * N]
                for i in range(N):
                    tmp[0][i] = self.data[0][order[i]]
                    tmp[1][i] = self.data[1][order[i]]
                self.data = tmp
            else:
                # sort entirely
                self.data = common_utils.sort_and_shuffle(self.data, B)

            self.data[1] = np.array(
                self.data[1])  # this is important for Keras
            for i in range(0, len(self.data[0]), B):
                x = self.data[0][i:i + B]
                y = self.data[1][i:i + B]

                x = nn_utils.pad_zeros(x)
                y = np.array(y)  # (B, 25)

                if self.target_repl:
                    T = x.shape[1]
                    y_rep = np.expand_dims(y,
                                           axis=1).repeat(T,
                                                          axis=1)  # (B, T, 25)
                    yield (x, [y, y_rep])
                else:
                    yield (x, y)
예제 #7
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                self.reader.random_shuffle()
            remaining = self.n_examples
            while remaining > 0:
                current_size = min(self.chunk_size, remaining)
                remaining -= current_size

                ret = common_utils.read_chunk(self.reader, current_size)
                Xs = ret["X"]
                ts = ret["t"]
                ys = ret["y"]
                names = ret["name"]

                Xs = preprocess_chunk(Xs, ts, self.discretizer,
                                      self.normalizer)
                (Xs, ys, ts,
                 names) = common_utils.sort_and_shuffle([Xs, ys, ts, names], B)

                for i in range(0, current_size, B):
                    X = nn_utils.pad_zeros(Xs[i:i + B])
                    y = np.array(ys[i:i + B])
                    batch_names = names[i:i + B]
                    batch_ts = ts[i:i + B]
                    batch_data = (X, y)
                    if not self.return_names:
                        yield batch_data
                    else:
                        yield {
                            "data": batch_data,
                            "names": batch_names,
                            "ts": batch_ts
                        }
예제 #8
0
def load_data(reader,
              discretizer,
              normalizer,
              diseases_embedding,
              return_names=False):
    N = reader.get_number_of_examples()

    ret = common_utils.read_chunk(reader, N)
    data = ret["X"]
    ts = ret["t"]
    labels = ret["y"]
    names = ret["name"]
    data = [
        discretizer.transform_end_t_hours(X, los=t)[0]
        for (X, t) in zip(data, ts)
    ]

    if (normalizer is not None):
        data = [normalizer.transform(X) for X in data]

    data = [
        np.hstack([X, [d] * len(X)])
        for (X, d) in zip(data, diseases_embedding)
    ]

    data = nn_utils.pad_zeros(data)

    whole_data = (data, labels)
    if not return_names:
        return whole_data
    return {"data": whole_data, "names": names}
예제 #9
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                N = len(self.data[1])
                order = range(N)
                random.shuffle(order)
                tmp_data = [[[None] * N, [None] * N], [None] * N]
                tmp_names = [None] * N
                tmp_ts = [None] * N
                for i in range(N):
                    tmp_data[0][0][i] = self.data[0][0][order[i]]
                    tmp_data[0][1][i] = self.data[0][1][order[i]]
                    tmp_data[1][i] = self.data[1][order[i]]
                    tmp_names[i] = self.names[order[i]]
                    tmp_ts[i] = self.ts[order[i]]
                self.data = tmp_data
                self.names = tmp_names
                self.ts = tmp_ts
            else:
                # sort entirely
                Xs = self.data[0][0]
                masks = self.data[0][1]
                ys = self.data[1]
                (Xs, masks, ys, self.names,
                 self.ts) = common_utils.sort_and_shuffle(
                     [Xs, masks, ys, self.names, self.ts], B)
                self.data = [[Xs, masks], ys]

            for i in range(0, len(self.data[1]), B):
                X = self.data[0][0][i:i + B]
                mask = self.data[0][1][i:i + B]
                y = self.data[1][i:i + B]
                names = self.names[i:i + B]
                ts = self.ts[i:i + B]

                X = nn_utils.pad_zeros(X)  # (B, T, D)
                mask = nn_utils.pad_zeros(mask)  # (B, T)
                y = nn_utils.pad_zeros(y)
                y = np.expand_dims(y, axis=-1)  # (B, T, 1)
                batch_data = ([X, mask], y)
                if not self.return_names:
                    yield batch_data
                else:
                    yield {"data": batch_data, "names": names, "ts": ts}
def load_train_data(reader, discretizer, normalizer, diseases_embedding, return_names=False):
    N = reader.get_number_of_examples()

    ret = common_utils.read_chunk(reader, N)
    data = ret["X"]
    ts = ret["t"]
    labels = ret["y"]
    names = ret["name"]
    data = [discretizer.transform_first_t_hours(X, end=t)[0] for (X, t) in zip(data, ts)]



    if (normalizer is not None):
        data = [normalizer.transform(X) for X in data]
    data = [np.hstack([X, [d]*len(X)]) for (X, d) in zip(data, diseases_embedding)]
    labels_1=[]
    labels_0=[]
    data_1=[]
    data_0=[]
    for i in range(len(labels)):
        if labels[i]==1:
            labels_1.append(labels[i])
            data_1.append(data[i])
        elif labels[i] == 0:
            labels_0.append(labels[i])
            data_0.append(data[i])

    print('labels_1:', len(labels_1))
    print('labels_0:', len(labels_0))
    indices = np.random.choice(len(labels_0), len(labels_1),replace=False)
    labels_0_sample =[labels_0[idx] for idx in indices]
    print('len(labels_0_sample): ', len(labels_0_sample))

    data_0_sample =[data_0[idx] for idx in indices]
    print('len(data_0_sample): ', len(data_0_sample))

    data_new=data_0_sample+data_1
    label_new=labels_0_sample+labels_1

    c = list(zip(data_new, label_new))

    random.shuffle(c)

    data_new, label_new = zip(*c)
    data_new=list(data_new)
    label_new=list(label_new)
    print('data_new: ', len(data_new))
    print('label_new: ', len(label_new))



    data = nn_utils.pad_zeros(data_new)

    whole_data = (data, label_new)
    if not return_names:
        return whole_data
    return {"data": whole_data}
예제 #11
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                N = len(self.data[1])
                order = range(N)
                random.shuffle(order)
                tmp_data = [[[None]*N, [None]*N], [None]*N]
                tmp_names = [None] * N
                tmp_ts = [None] * N
                for i in range(N):
                    tmp_data[0][0][i] = self.data[0][0][order[i]]
                    tmp_data[0][1][i] = self.data[0][1][order[i]]
                    tmp_data[1][i] = self.data[1][order[i]]
                    tmp_names[i] = self.names[order[i]]
                    tmp_ts[i] = self.ts[order[i]]
                self.data = tmp_data
                self.names = tmp_names
                self.ts = tmp_ts
            else:
                # sort entirely
                Xs = self.data[0][0]
                masks = self.data[0][1]
                ys = self.data[1]
                (Xs, masks, ys, self.names, self.ts) = common_utils.sort_and_shuffle([Xs, masks, ys,
                                                                                      self.names, self.ts], B)
                self.data = [[Xs, masks], ys]

            for i in range(0, len(self.data[1]), B):
                X = self.data[0][0][i:i + B]
                mask = self.data[0][1][i:i + B]
                y = self.data[1][i:i + B]
                names = self.names[i:i + B]
                ts = self.ts[i:i + B]

                X = nn_utils.pad_zeros(X)  # (B, T, D)
                mask = nn_utils.pad_zeros(mask)  # (B, T)
                y = nn_utils.pad_zeros(y)
                y = np.expand_dims(y, axis=-1)  # (B, T, 1)
                batch_data = ([X, mask], y)
                if not self.return_names:
                    yield batch_data
                else:
                    yield {"data": batch_data, "names": names, "ts": ts}
예제 #12
0
    def _generator(self):
        B = self.batch_size
        while True:
            # convert to right format for sort_and_shuffle
            Xs = self.data[0][0]
            masks = self.data[0][1]
            ys = self.data[1]
            (Xs, masks, ys) = common_utils.sort_and_shuffle([Xs, masks, ys], B)
            self.data = [[Xs, masks], ys]

            for i in range(0, len(self.data[1]), B):
                X = self.data[0][0][i:i + B]
                mask = self.data[0][1][i:i + B]
                y = self.data[1][i:i + B]
                X = nn_utils.pad_zeros(X)  # (B, T, D)
                mask = nn_utils.pad_zeros(mask)  # (B, T)
                y = nn_utils.pad_zeros(y)
                y = np.expand_dims(y, axis=-1)  # (B, T, 1)
                yield ([X, mask], y)
예제 #13
0
    def process_input(self, data_raw):
        Xs = nn_utils.pad_zeros(data_raw[0]).astype(np.float32)
        lens = map(len, data_raw[0])
        ys = np.array(data_raw[1]).astype(np.float32)

        bin_ids = [metrics.get_bin_custom(x, self.nbins) for x in ys]

        for x in bin_ids:
            assert x >= 0 and x < self.nbins

        return (Xs, lens, np.array(bin_ids, dtype=np.int32), ys)
예제 #14
0
def load_data(reader, discretizer, normalizer, small_part=False, pad=False):
    N = reader.get_number_of_examples()
    if small_part:
        N = 1000
    (data, ts, ys, header) = read_chunk(reader, N)
    data = [discretizer.transform(X, end=t)[0] for (X, t) in zip(data, ts)]
    if (normalizer is not None):
        data = [normalizer.transform(X) for X in data]
    ys = np.array(ys, dtype=np.int32)
    if pad:
        return (nn_utils.pad_zeros(data), ys)
    return (data, ys)
예제 #15
0
 def process_input(self, data_raw):
     X = nn_utils.pad_zeros(data_raw[0]).astype(np.float32)
     lens = np.array(map(len, data_raw[0]), dtype=np.int32)
     
     fms = data_raw[1]
     loss = data_raw[2]
     phs = data_raw[3]
     sws = data_raw[4]
     
     ihm_pos = np.array([x[0] for x in fms], dtype=np.int32)
     ihm_mask = np.array([x[1] for x in fms], dtype=np.int32)
     ihm_label = np.array([x[2] for x in fms], dtype=np.int32)
     
     los_mask = [np.array(x[0], dtype=np.int32) for x in loss]
     los_mask = nn_utils.pad_zeros(los_mask).astype(np.int32)
     
     los_label = [np.array(x[1], dtype=np.float32) for x in loss]
     los_label = np.log(1.0 + nn_utils.pad_zeros(los_label)).astype(np.float32)
     
     ph_label = [np.array(x, dtype=np.int32) for x in phs]
     ph_label = nn_utils.pad_zeros(ph_label).astype(np.int32)
     
     decomp_mask = [np.array(x[0], dtype=np.int32) for x in sws]
     decomp_mask = nn_utils.pad_zeros(decomp_mask).astype(np.int32)
     
     decomp_label = [np.array(x[1], dtype=np.int32) for x in sws]
     decomp_label = nn_utils.pad_zeros(decomp_label).astype(np.int32)
     
     return (X, lens,
             ihm_pos, ihm_mask, ihm_label,
             los_mask, los_label,
             ph_label,
             decomp_mask, decomp_label)
예제 #16
0
    def _generator(self):
        B = self.batch_size
        while True:
            # convert to right format for sort_and_shuffle
            Xs = self.data[0][0]
            masks = self.data[0][1]
            ys = self.data[1]
            (Xs, masks, ys) = common_utils.sort_and_shuffle([Xs, masks, ys], B)
            self.data = [[Xs, masks], ys]

            for i in range(0, len(self.data[1]), B):
                X = self.data[0][0][i:i + B]
                mask = self.data[0][1][i:i + B]
                y = self.data[1][i:i + B]

                y_true = [np.array(x) for x in y]
                y_true = nn_utils.pad_zeros(y_true)
                y_true = np.expand_dims(y_true, axis=-1)

                if self.partition == 'log':
                    y = [
                        np.array([metrics.get_bin_log(x, 10) for x in z])
                        for z in y
                    ]
                if self.partition == 'custom':
                    y = [
                        np.array([metrics.get_bin_custom(x, 10) for x in z])
                        for z in y
                    ]

                X = nn_utils.pad_zeros(X)  # (B, T, D)
                mask = nn_utils.pad_zeros(mask)  # (B, T)
                y = nn_utils.pad_zeros(y)
                y = np.expand_dims(y, axis=-1)

                if self.return_y_true:
                    yield ([X, mask], y, y_true)
                else:
                    yield ([X, mask], y)
예제 #17
0
파일: utils.py 프로젝트: aureus5/mimiciII
    def _generator(self):
        B = self.batch_size
        while True:
            self.reader.random_shuffle()
            (data, ts, labels, header) = read_chunk(self.reader,
                                                    self.chunk_size)
            data = preprocess_chunk(data, ts, self.discretizer,
                                    self.normalizer)
            data = (data, labels)
            data = common_utils.sort_and_shuffle(data, B)

            for i in range(0, self.chunk_size, B):
                yield (nn_utils.pad_zeros(data[0][i:i + B]),
                       np.array(data[1][i:i + B]))
예제 #18
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                N = len(self.data[1])
                order = range(N)
                random.shuffle(order)
                tmp_data = [[None] * N, [None] * N]
                tmp_names = [None] * N
                tmp_ts = [None] * N
                for i in range(N):
                    tmp_data[0][i] = self.data[0][order[i]]
                    tmp_data[1][i] = self.data[1][order[i]]
                    tmp_names[i] = self.names[order[i]]
                    tmp_ts[i] = self.ts[order[i]]
                self.data = tmp_data
                self.names = tmp_names
                self.ts = tmp_ts
            else:
                # sort entirely
                X = self.data[0]
                y = self.data[1]
                (X, y, self.names, self.ts) = common_utils.sort_and_shuffle(
                    [X, y, self.names, self.ts], B)
                self.data = [X, y]

            self.data[1] = np.array(
                self.data[1])  # this is important for Keras
            for i in range(0, len(self.data[0]), B):
                x = self.data[0][i:i + B]
                y = self.data[1][i:i + B]
                names = self.names[i:i + B]
                ts = self.ts[i:i + B]

                x = nn_utils.pad_zeros(x)
                y = np.array(y)  # (B, 25)

                if self.target_repl:
                    y_rep = np.expand_dims(y,
                                           axis=1).repeat(x.shape[1],
                                                          axis=1)  # (B, T, 25)
                    batch_data = (x, [y, y_rep])
                else:
                    batch_data = (x, y)

                if not self.return_names:
                    yield batch_data
                else:
                    yield {"data": batch_data, "names": names, "ts": ts}
예제 #19
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                N = len(self.data[1])
                order = range(N)
                random.shuffle(order)
                tmp_data = [[None] * N, [None] * N]
                tmp_names = [None] * N
                tmp_ts = [None] * N
                for i in range(N):
                    tmp_data[0][i] = self.data[0][order[i]]
                    tmp_data[1][i] = self.data[1][order[i]]
                    tmp_names[i] = self.names[order[i]]
                    tmp_ts[i] = self.ts[order[i]]
                self.data = tmp_data
                self.names = tmp_names
                self.ts = tmp_ts
            else:
                # sort entirely
                X = self.data[0]
                y = self.data[1]
                (X, y, self.names, self.ts) = common_utils.sort_and_shuffle([X, y, self.names, self.ts], B)
                self.data = [X, y]

            self.data[1] = np.array(self.data[1])  # this is important for Keras
            for i in range(0, len(self.data[0]), B):
                x = self.data[0][i:i+B]
                y = self.data[1][i:i+B]
                names = self.names[i:i + B]
                ts = self.ts[i:i + B]

                x = nn_utils.pad_zeros(x)
                y = np.array(y)  # (B, 25)

                if self.target_repl:
                    y_rep = np.expand_dims(y, axis=1).repeat(x.shape[1], axis=1)  # (B, T, 25)
                    batch_data = (x, [y, y_rep])
                else:
                    batch_data = (x, y)

                if not self.return_names:
                    yield batch_data
                else:
                    yield {"data": batch_data, "names": names, "ts": ts}
예제 #20
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                self.reader.random_shuffle()
            remaining = self.n_examples
            while remaining > 0:
                current_size = min(self.chunk_size, remaining)
                remaining -= current_size
                (data, ts, labels,
                 header) = read_chunk(self.reader, current_size)
                data = preprocess_chunk(data, ts, self.discretizer,
                                        self.normalizer)
                data = (data, labels)
                data = common_utils.sort_and_shuffle(data, B)

                for i in range(0, current_size, B):
                    yield (nn_utils.pad_zeros(data[0][i:i + B]),
                           np.array(data[1][i:i + B]))
예제 #21
0
    def _generator(self):
        B = self.batch_size
        while True:
            self.data = common_utils.sort_and_shuffle(self.data, B)
            self.data[1] = np.array(
                self.data[1])  # this is important for Keras
            for i in range(0, len(self.data[0]), B):
                x = self.data[0][i:i + B]
                y = self.data[1][i:i + B]

                x = nn_utils.pad_zeros(x)
                y = np.array(y)  # (B, 25)

                if self.target_repl:
                    T = x.shape[1]
                    y_rep = np.expand_dims(y,
                                           axis=1).repeat(T,
                                                          axis=1)  # (B, T, 25)
                    yield (x, [y, y_rep])
                else:
                    yield (x, y)
예제 #22
0
 def process_input(self, data_raw):
     return (nn_utils.pad_zeros(data_raw[0]).astype(np.float32),
             map(len, data_raw[0]), data_raw[1],
             map(lambda y: (25 if y == 1 else 1), data_raw[1]))
예제 #23
0
    def _generator(self):
        B = self.batch_size
        while True:
            # convert to right format for sort_and_shuffle
            kvpairs = self.data.items()
            mas = [kv[1] for kv in kvpairs]
            mas = common_utils.sort_and_shuffle(mas, B)
            for i in range(len(kvpairs)):
                self.data[kvpairs[i][0]] = mas[i]

            for i in range(0, len(self.data['X']), B):
                outputs = []

                # X
                X = self.data['X'][i:i + B]
                X = nn_utils.pad_zeros(X, min_length=self.ihm_pos + 1)
                T = X.shape[1]

                ## ihm
                ihm_M = np.array(self.data['ihm_M'][i:i + B])
                ihm_M = np.expand_dims(ihm_M, axis=-1)  # (B, 1)
                ihm_y = np.array(self.data['ihm_y'][i:i + B])
                ihm_y = np.expand_dims(ihm_y, axis=-1)  # (B, 1)
                outputs.append(ihm_y)
                if self.target_repl:
                    ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat(
                        T, axis=1)  # (B, T, 1)
                    outputs.append(ihm_seq)

                ## decomp
                decomp_M = self.data['decomp_M'][i:i + B]
                decomp_M = nn_utils.pad_zeros(decomp_M,
                                              min_length=self.ihm_pos + 1)
                decomp_y = self.data['decomp_y'][i:i + B]
                decomp_y = nn_utils.pad_zeros(decomp_y,
                                              min_length=self.ihm_pos + 1)
                decomp_y = np.expand_dims(decomp_y, axis=-1)  # (B, T, 1)
                outputs.append(decomp_y)

                ## los
                los_M = self.data['los_M'][i:i + B]
                los_M = nn_utils.pad_zeros(los_M, min_length=self.ihm_pos + 1)
                los_y = self.data['los_y'][i:i + B]
                los_y_true = nn_utils.pad_zeros(los_y,
                                                min_length=self.ihm_pos + 1)

                if self.partition == 'log':
                    los_y = [
                        np.array([metrics.get_bin_log(x, 10) for x in z])
                        for z in los_y
                    ]
                if self.partition == 'custom':
                    los_y = [
                        np.array([metrics.get_bin_custom(x, 10) for x in z])
                        for z in los_y
                    ]
                los_y = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos + 1)
                los_y = np.expand_dims(los_y, axis=-1)  # (B, T, 1)
                outputs.append(los_y)

                ## pheno
                pheno_y = np.array(self.data['pheno_y'][i:i + B])
                outputs.append(pheno_y)
                if self.target_repl:
                    pheno_seq = np.expand_dims(pheno_y, axis=1).repeat(
                        T, axis=1)  # (B, T, 25)
                    outputs.append(pheno_seq)

                inputs = [X, ihm_M, decomp_M, los_M]

                if self.return_y_true:
                    yield (inputs, outputs, los_y_true)
                else:
                    yield (inputs, outputs)
예제 #24
0
    def _generator(self):
        B = self.batch_size
        while True:
            # convert to right format for sort_and_shuffle
            kvpairs = self.data.items()
            mas = [kv[1] for kv in kvpairs]

            if self.shuffle:
                N = len(self.data['X'])
                order = range(N)
                random.shuffle(order)
                tmp = [None] * len(mas)
                for mas_idx in range(len(mas)):
                    tmp[mas_idx] = [None] * len(mas[mas_idx])
                    for i in range(N):
                        tmp[mas_idx][i] = mas[mas_idx][order[i]]
                for i in range(len(kvpairs)):
                    self.data[kvpairs[i][0]] = tmp[i]
            else:
                # sort entirely
                mas = common_utils.sort_and_shuffle(mas, B)
                for i in range(len(kvpairs)):
                    self.data[kvpairs[i][0]] = mas[i]

            for i in range(0, len(self.data['X']), B):
                outputs = []

                # X
                X = self.data['X'][i:i + B]
                X = nn_utils.pad_zeros(X, min_length=self.ihm_pos + 1)
                T = X.shape[1]

                # ihm
                ihm_M = np.array(self.data['ihm_M'][i:i + B])
                ihm_M = np.expand_dims(ihm_M, axis=-1)  # (B, 1)
                ihm_y = np.array(self.data['ihm_y'][i:i + B])
                ihm_y = np.expand_dims(ihm_y, axis=-1)  # (B, 1)
                outputs.append(ihm_y)
                if self.target_repl:
                    ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat(
                        T, axis=1)  # (B, T, 1)
                    outputs.append(ihm_seq)

                # decomp
                decomp_M = self.data['decomp_M'][i:i + B]
                decomp_M = nn_utils.pad_zeros(decomp_M,
                                              min_length=self.ihm_pos + 1)
                decomp_y = self.data['decomp_y'][i:i + B]
                decomp_y = nn_utils.pad_zeros(decomp_y,
                                              min_length=self.ihm_pos + 1)
                decomp_y = np.expand_dims(decomp_y, axis=-1)  # (B, T, 1)
                outputs.append(decomp_y)

                # los
                los_M = self.data['los_M'][i:i + B]
                los_M = nn_utils.pad_zeros(los_M, min_length=self.ihm_pos + 1)
                los_y = self.data['los_y'][i:i + B]
                los_y_true = nn_utils.pad_zeros(los_y,
                                                min_length=self.ihm_pos + 1)

                if self.partition == 'log':
                    los_y = [
                        np.array([metrics.get_bin_log(x, 10) for x in z])
                        for z in los_y
                    ]
                if self.partition == 'custom':
                    los_y = [
                        np.array([metrics.get_bin_custom(x, 10) for x in z])
                        for z in los_y
                    ]
                los_y = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos + 1)
                los_y = np.expand_dims(los_y, axis=-1)  # (B, T, 1)
                outputs.append(los_y)

                # pheno
                pheno_y = np.array(self.data['pheno_y'][i:i + B])
                outputs.append(pheno_y)
                if self.target_repl:
                    pheno_seq = np.expand_dims(pheno_y, axis=1).repeat(
                        T, axis=1)  # (B, T, 25)
                    outputs.append(pheno_seq)

                inputs = [X, ihm_M, decomp_M, los_M]

                if self.return_y_true:
                    batch_data = (inputs, outputs, los_y_true)
                else:
                    batch_data = (inputs, outputs)

                if not self.return_names:
                    yield batch_data
                else:
                    yield {
                        "data": batch_data,
                        "names": self.data["names"][i:i + B],
                        "ts": self.data["ts"][i:i + B],
                        "decomp_ts": self.data["decomp_ts"][i:i + B],
                        "los_ts": self.data["los_ts"][i:i + B]
                    }
예제 #25
0
    def _generator(self):
        B = self.batch_size
        while True:
            # convert to right format for sort_and_shuffle
            kv_pairs = self.data.items()
            mas = [kv[1] for kv in kv_pairs]

            if self.shuffle:
                N = len(self.data['X'])
                order = range(N)
                random.shuffle(order)
                tmp = [None] * len(mas)
                for mas_idx in range(len(mas)):
                    tmp[mas_idx] = [None] * len(mas[mas_idx])
                    for i in range(N):
                        tmp[mas_idx][i] = mas[mas_idx][order[i]]
                for i in range(len(kv_pairs)):
                    self.data[kv_pairs[i][0]] = tmp[i]
            else:
                # sort entirely
                mas = common_utils.sort_and_shuffle(mas, B)
                for i in range(len(kv_pairs)):
                    self.data[kv_pairs[i][0]] = mas[i]

            for i in range(0, len(self.data['X']), B):
                outputs = []

                # X
                X = self.data['X'][i:i+B]
                X = nn_utils.pad_zeros(X, min_length=self.ihm_pos+1)
                T = X.shape[1]

                # ihm
                ihm_M = np.array(self.data['ihm_M'][i:i+B])
                ihm_M = np.expand_dims(ihm_M, axis=-1)  # (B, 1)
                ihm_y = np.array(self.data['ihm_y'][i:i+B])
                ihm_y = np.expand_dims(ihm_y, axis=-1)  # (B, 1)
                outputs.append(ihm_y)
                if self.target_repl:
                    ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat(T, axis=1)  # (B, T, 1)
                    outputs.append(ihm_seq)

                # decomp
                decomp_M = self.data['decomp_M'][i:i+B]
                decomp_M = nn_utils.pad_zeros(decomp_M, min_length=self.ihm_pos+1)
                decomp_y = self.data['decomp_y'][i:i+B]
                decomp_y = nn_utils.pad_zeros(decomp_y, min_length=self.ihm_pos+1)
                decomp_y = np.expand_dims(decomp_y, axis=-1)  # (B, T, 1)
                outputs.append(decomp_y)

                # los
                los_M = self.data['los_M'][i:i+B]
                los_M = nn_utils.pad_zeros(los_M, min_length=self.ihm_pos+1)
                los_y = self.data['los_y'][i:i+B]
                los_y_true = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos+1)

                if self.partition == 'log':
                    los_y = [np.array([metrics.get_bin_log(x, 10) for x in z]) for z in los_y]
                if self.partition == 'custom':
                    los_y = [np.array([metrics.get_bin_custom(x, 10) for x in z]) for z in los_y]
                los_y = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos+1)
                los_y = np.expand_dims(los_y, axis=-1)  # (B, T, 1)
                outputs.append(los_y)

                # pheno
                pheno_y = np.array(self.data['pheno_y'][i:i+B])
                outputs.append(pheno_y)
                if self.target_repl:
                    pheno_seq = np.expand_dims(pheno_y, axis=1).repeat(T, axis=1)  # (B, T, 25)
                    outputs.append(pheno_seq)

                inputs = [X, ihm_M, decomp_M, los_M]

                if self.return_y_true:
                    batch_data = (inputs, outputs, los_y_true)
                else:
                    batch_data = (inputs, outputs)

                if not self.return_names:
                    yield batch_data
                else:
                    yield {'data': batch_data,
                           'names': self.data['names'][i:i+B],
                           'decomp_ts': self.data['decomp_ts'][i:i+B],
                           'los_ts': self.data['los_ts'][i:i+B],
                           'pheno_ts': self.data['pheno_ts'][i:i + B]}
예제 #26
0
 def process_input(self, data_raw):
     return (nn_utils.pad_zeros(data_raw[0]).astype(np.float32),
             map(len, data_raw[0]), data_raw[1])
예제 #27
0
                    listfile='../../data/in-hospital-mortality/test_listfile.csv',
                    period_length=48.0)

    ihm_y_true = []
    ihm_pred = []

    n_examples = test_reader.get_number_of_examples()
    for i in range(0, n_examples, args.batch_size):
        j = min(i + args.batch_size, n_examples)
        (X, ts, labels, header) = read_chunk(test_reader, j - i)

        for i in range(args.batch_size):
            X[i] = discretizer.transform(X[i], end=48.0)[0]
            X[i] = normalizer.transform(X[i])

        X = nn_utils.pad_zeros(X, min_length=args_dict['ihm_pos']+1)
        T = X.shape[1]
        ihm_M = np.ones(shape=(args.batch_size,1))
        decomp_M = np.ones(shape=(args.batch_size, T))
        los_M = np.ones(shape=(args.batch_size, T))

        pred = model.predict([X, ihm_M, decomp_M, los_M])[0]
        ihm_y_true += labels
        ihm_pred += list(pred.flatten())

    print "\n ================= 48h mortality ================"
    ihm_pred = np.array(ihm_pred)
    ihm_pred = np.stack([1-ihm_pred, ihm_pred], axis=1)
    ihm_ret = metrics.print_metrics_binary(ihm_y_true, ihm_pred)

else: