Exemple #1
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                N = len(self.data[1])
                order = list(range(N))
                random.shuffle(order)
                tmp_data = [[[None] * N, [None] * N], [None] * N]
                tmp_names = [None] * N
                tmp_ts = [None] * N
                for i in range(N):
                    tmp_data[0][0][i] = self.data[0][0][order[i]]
                    tmp_data[0][1][i] = self.data[0][1][order[i]]
                    tmp_data[1][i] = self.data[1][order[i]]
                    tmp_names[i] = self.names[order[i]]
                    tmp_ts[i] = self.ts[order[i]]
                self.data = tmp_data
                self.names = tmp_names
                self.ts = tmp_ts
            else:
                # sort entirely
                Xs = self.data[0][0]
                masks = self.data[0][1]
                ys = self.data[1]
                (Xs, masks, ys, self.names,
                 self.ts) = common_utils.sort_and_shuffle(
                     [Xs, masks, ys, self.names, self.ts], B)
                self.data = [[Xs, masks], ys]

            for i in range(0, len(self.data[1]), B):
                X = self.data[0][0][i:i + B]
                mask = self.data[0][1][i:i + B]
                y = self.data[1][i:i + B]
                names = self.names[i:i + B]
                ts = self.ts[i:i + B]

                X = common_utils.pad_zeros(X)  # (B, T, D)
                mask = common_utils.pad_zeros(mask)  # (B, T)
                y = common_utils.pad_zeros(y)
                y = np.expand_dims(y, axis=-1)  # (B, T, 1)
                batch_data = ([X, mask], y)
                if not self.return_names:
                    yield batch_data
                else:
                    yield {"data": batch_data, "names": names, "ts": ts}
    def _generator(self):
        print(f"examples: {self.n_examples}  steps: {self.steps}")

        B = self.batch_size
        while True:
            if self.shuffle:
                self.reader.random_shuffle()
            remaining = int(self.n_examples * 1.15)
            while remaining > 0:
                current_size = min(self.chunk_size, remaining)
                remaining -= current_size
                print(f"Reading chunk size: {current_size} with {remaining} remaining")

                ret = common_utils.read_chunk(self.reader, current_size)
                Xs = ret["X"]
                ts = ret["t"]
                ys = ret["y"]
                names = ret["name"]

                print(f"len(Xs): {len(Xs)}")

                Xs = preprocess_chunk(Xs, ts, self.discretizer, self.normalizer)
                (Xs, ys, ts, names) = common_utils.sort_and_shuffle([Xs, ys, ts, names], B)

                for i in range(0, current_size, B):

                    X = common_utils.pad_zeros(Xs[i:i + B])
                    y = ys[i:i+B]
                    y_true = np.array(y)
                    batch_names = names[i:i+B]
                    batch_ts = ts[i:i+B]

                    if self.partition == 'log':
                        y = [metrics.get_bin_log(x, 10) for x in y]
                    if self.partition == 'custom':
                        y = [metrics.get_bin_custom(x, 10) for x in y]

                    y = np.array(y)

                    #aflanders: debug-Convert to tensors
                    # X = tf.convert_to_tensor(X)
                    # y = tf.convert_to_tensor(y)
                    # y_true = tf.convert_to_tensor(y_true)
                    #aflanders: debug-Convert to tensors

                    if self.return_y_true:
                        batch_data = (X, y, y_true)
                    else:
                        batch_data = (X, y)

                    if not self.return_names:
                        yield batch_data
                    else:
                        yield {"data": batch_data, "names": batch_names, "ts": batch_ts}
Exemple #3
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                self.reader.random_shuffle()
            remaining = self.n_examples
            while remaining > 0:
                current_size = min(self.chunk_size, remaining)
                remaining -= current_size

                ret = common_utils.read_chunk(self.reader, current_size)
                Xs = ret["X"]
                ts = ret["t"]
                ys = ret["y"]
                names = ret["name"]

                Xs, Ts = preprocess_chunk_time(Xs, ts, self.discretizer,
                                               self.normalizer,
                                               self.max_seq_len,
                                               self.mask_value)
                (Xs, Ts, ys, ts, names) = common_utils.sort_and_shuffle(
                    [Xs, Ts, ys, ts, names], B)

                for i in range(0, current_size, B):
                    X = common_utils.pad_zeros(Xs[i:i + B])
                    T = common_utils.pad_zeros(Ts[i:i + B])
                    y = np.array(ys[i:i + B])
                    batch_names = names[i:i + B]
                    batch_ts = ts[i:i + B]
                    if self.use_time:
                        batch_data = ([X, T], y)
                    else:
                        batch_data = (X, y)
                    if not self.return_names:
                        yield batch_data
                    else:
                        yield {
                            "data": batch_data,
                            "names": batch_names,
                            "ts": batch_ts
                        }
Exemple #4
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                self.reader.random_shuffle()
            remaining = self.n_examples
            while remaining > 0:
                current_size = min(self.chunk_size, remaining)
                remaining -= current_size

                ret = common_utils.read_chunk(self.reader, current_size)
                Xs = ret["X"]
                ts = ret["t"]
                ys = ret["y"]
                names = ret["name"]

                Xs = preprocess_chunk(Xs,
                                      ts,
                                      self.discretizer,
                                      self.normalizer,
                                      max_seq_len=1200,
                                      mask_value=0.)
                (Xs, ys, ts,
                 names) = common_utils.sort_and_shuffle([Xs, ys, ts, names], B)

                for i in range(0, current_size, B):
                    X = common_utils.pad_zeros(Xs[i:i + B])
                    y = ys[i:i + B]
                    y_true = np.array(y)
                    batch_names = names[i:i + B]
                    batch_ts = ts[i:i + B]

                    if self.partition == 'log':
                        y = [metrics.get_bin_log(x, 10) for x in y]
                    if self.partition == 'custom':
                        y = [metrics.get_bin_custom(x, 10) for x in y]

                    y = np.array(y)

                    if self.return_y_true:
                        batch_data = (X, y, y_true)
                    else:
                        batch_data = (X, y)

                    if not self.return_names:
                        yield batch_data
                    else:
                        yield {
                            "data": batch_data,
                            "names": batch_names,
                            "ts": batch_ts
                        }
Exemple #5
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                N = len(self.data[1])
                order = list(range(N))
                random.shuffle(order)
                tmp_data = [[None] * N, [None] * N]
                tmp_names = [None] * N
                tmp_ts = [None] * N
                for i in range(N):
                    tmp_data[0][i] = self.data[0][order[i]]
                    tmp_data[1][i] = self.data[1][order[i]]
                    tmp_names[i] = self.names[order[i]]
                    tmp_ts[i] = self.ts[order[i]]
                self.data = tmp_data
                self.names = tmp_names
                self.ts = tmp_ts
            else:
                # sort entirely
                X = self.data[0]
                y = self.data[1]
                (X, y, self.names, self.ts) = common_utils.sort_and_shuffle(
                    [X, y, self.names, self.ts], B)
                self.data = [X, y]

            self.data[1] = np.array(
                self.data[1])  # this is important for Keras
            for i in range(0, len(self.data[0]), B):
                x = self.data[0][i:i + B]
                y = self.data[1][i:i + B]
                names = self.names[i:i + B]
                ts = self.ts[i:i + B]

                x = common_utils.pad_zeros(x)
                y = np.array(y)  # (B, 25)

                if self.target_repl:
                    y_rep = np.expand_dims(y,
                                           axis=1).repeat(x.shape[1],
                                                          axis=1)  # (B, T, 25)
                    batch_data = (x, [y, y_rep])
                else:
                    batch_data = (x, y)

                if not self.return_names:
                    yield batch_data
                else:
                    yield {"data": batch_data, "names": names, "ts": ts}
Exemple #6
0
    def getitem(self, index, return_y_true=False):
        print(f"Start: {index} from reader:{self.reader.listfile}")

        B = self.batch_size
        ret = common_utils.read_chunk_index(self.reader, index*B, B)
        Xs = ret["X"]
        ts = ret["t"]
        ys = ret["y"]
        names = ret["name"]

        Xs = preprocess_chunk(Xs, ts, self.discretizer, self.normalizer)
        #(Xs, ys, ts, names) = common_utils.sort_and_shuffle([Xs, ys, ts, names], B)

        i=0
        X = common_utils.pad_zeros(Xs[i:i + B])
        y = ys[i:i+B]
        y_true = np.array(y)
        batch_names = names[i:i+B]
        batch_ts = ts[i:i+B]

        if self.partition == 'log':
            y = [metrics.get_bin_log(x, 10) for x in y]
        if self.partition == 'custom':
            y = [metrics.get_bin_custom(x, 10) for x in y]

        y = np.array(y)

        #aflanders: debug-Convert to tensors
        # X = tf.convert_to_tensor(X)
        # y = tf.convert_to_tensor(y)
        # y_true = tf.convert_to_tensor(y_true)
        #aflanders: debug-Convert to tensors

        if return_y_true:
            batch_data = (X, y, y_true)
        else:
            batch_data = (X, y)

        print(f"End: {index} from reader:{self.reader.listfile}")

        if not self.return_names:
            return batch_data
        else:
            return {"data": batch_data, "names": batch_names, "ts": batch_ts}
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                self.reader.random_shuffle()
            remaining = self.n_examples
            while remaining > 0:
                current_size = min(self.chunk_size, remaining)
                remaining -= current_size

                ret = common_utils.read_chunk(self.reader, current_size)
                Xs = ret["X"]
                ts = ret["t"]
                ys = ret["y"]
                names = ret["name"]

                Xs = preprocess_chunk(Xs, ts, self.discretizer, self.normalizer)
                (Xs, ys, ts, names) = common_utils.sort_and_shuffle([Xs, ys, ts, names], B)

                for i in range(0, current_size, B):
                    X = common_utils.pad_zeros(Xs[i:i + B])
                    y_1d = np.array(ys[i:i + B])
                    y = y_1d
                    #print(y_1d)
                    # print(self.num_classes)
                    if self.num_classes!=1:
                        y = np.zeros((y_1d.size,self.num_classes))
                        #print(y)
                        y[np.arange(y_1d.size),y_1d] = 1
                    #print(y)
                    batch_names = names[i:i+B]
                    batch_ts = ts[i:i+B]
                    weight_list = [self.class_0_weight if x==0 else self.class_1_weight for x in np.nditer(y_1d)]
                    #print(weight_list)
                    sample_weight = np.asanyarray(weight_list,dtype=float)
                    sample_weight = sample_weight.reshape(y_1d.shape)
                    batch_data = (X, y,sample_weight)
                    #batch_data = (X,y)
                    if not self.return_names:
                        yield batch_data
                    else:
                        yield {"data": batch_data, "names": batch_names, "ts": batch_ts}
Exemple #8
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                N = len(self.data[1])
                order = list(range(N))
                random.shuffle(order)
                tmp_data = [[[None] * N, [None] * N], [None] * N]
                tmp_names = [None] * N
                tmp_ts = [None] * N
                for i in range(N):
                    tmp_data[0][0][i] = self.data[0][0][order[i]]
                    tmp_data[0][1][i] = self.data[0][1][order[i]]
                    tmp_data[1][i] = self.data[1][order[i]]
                    tmp_names[i] = self.names[order[i]]
                    tmp_ts[i] = self.ts[order[i]]
                self.data = tmp_data
                self.names = tmp_names
                self.ts = tmp_ts
            else:
                # sort entirely
                Xs = self.data[0][0]
                masks = self.data[0][1]
                ys = self.data[1]
                (Xs, masks, ys, self.names,
                 self.ts) = common_utils.sort_and_shuffle(
                     [Xs, masks, ys, self.names, self.ts], B)
                self.data = [[Xs, masks], ys]

            for i in range(0, len(self.data[1]), B):
                X = self.data[0][0][i:i + B]
                mask = self.data[0][1][i:i + B]
                y = self.data[1][i:i + B]
                names = self.names[i:i + B]
                ts = self.ts[i:i + B]

                y_true = [np.array(x) for x in y]
                y_true = common_utils.pad_zeros(y_true)
                y_true = np.expand_dims(y_true, axis=-1)

                if self.partition == 'log':
                    y = [
                        np.array([metrics.get_bin_log(x, 10) for x in z])
                        for z in y
                    ]
                if self.partition == 'custom':
                    y = [
                        np.array([metrics.get_bin_custom(x, 10) for x in z])
                        for z in y
                    ]

                X = common_utils.pad_zeros(X)  # (B, T, D)
                mask = common_utils.pad_zeros(mask)  # (B, T)
                y = common_utils.pad_zeros(y)
                y = np.expand_dims(y, axis=-1)

                if self.return_y_true:
                    batch_data = ([X, mask], y, y_true)
                else:
                    batch_data = ([X, mask], y)

                if not self.return_names:
                    yield batch_data
                else:
                    yield {"data": batch_data, "names": names, "ts": ts}
Exemple #9
0
    def _generator(self):
        B = self.batch_size
        while True:
            # convert to right format for sort_and_shuffle
            kv_pairs = list(self.data.items())
            data_index = [pair[0] for pair in kv_pairs].index('X')
            if data_index > 0:
                kv_pairs[0], kv_pairs[data_index] = kv_pairs[
                    data_index], kv_pairs[0]
            mas = [kv[1] for kv in kv_pairs]

            if self.shuffle:
                N = len(self.data['X'])
                order = list(range(N))
                random.shuffle(order)
                tmp = [None] * len(mas)
                for mas_idx in range(len(mas)):
                    tmp[mas_idx] = [None] * len(mas[mas_idx])
                    for i in range(N):
                        tmp[mas_idx][i] = mas[mas_idx][order[i]]
                for i in range(len(kv_pairs)):
                    self.data[kv_pairs[i][0]] = tmp[i]
            else:
                # sort entirely
                mas = common_utils.sort_and_shuffle(mas, B)
                for i in range(len(kv_pairs)):
                    self.data[kv_pairs[i][0]] = mas[i]

            for i in range(0, len(self.data['X']), B):
                outputs = []

                # X
                X = self.data['X'][i:i + B]
                X = common_utils.pad_zeros(X, min_length=self.ihm_pos + 1)
                T = X.shape[1]

                # ihm
                ihm_M = np.array(self.data['ihm_M'][i:i + B])
                ihm_M = np.expand_dims(ihm_M, axis=-1)  # (B, 1)
                ihm_y = np.array(self.data['ihm_y'][i:i + B])
                ihm_y = np.expand_dims(ihm_y, axis=-1)  # (B, 1)
                outputs.append(ihm_y)
                if self.target_repl:
                    ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat(
                        T, axis=1)  # (B, T, 1)
                    outputs.append(ihm_seq)

                # los
                los_M = self.data['los_M'][i:i + B]
                los_M = common_utils.pad_zeros(los_M,
                                               min_length=self.ihm_pos + 1)
                los_y = self.data['los_y'][i:i + B]
                los_y_true = common_utils.pad_zeros(los_y,
                                                    min_length=self.ihm_pos +
                                                    1)

                if self.partition == 'log':
                    los_y = [
                        np.array([metrics.get_bin_log(x, 10) for x in z])
                        for z in los_y
                    ]
                if self.partition == 'custom':
                    los_y = [
                        np.array([metrics.get_bin_custom(x, 10) for x in z])
                        for z in los_y
                    ]
                los_y = common_utils.pad_zeros(los_y,
                                               min_length=self.ihm_pos + 1)
                los_y = np.expand_dims(los_y, axis=-1)  # (B, T, 1)
                outputs.append(los_y)

                inputs = [X, ihm_M, los_M]

                if self.return_y_true:
                    batch_data = (inputs, outputs, los_y_true)
                else:
                    batch_data = (inputs, outputs)

                if not self.return_names:
                    yield batch_data
                else:
                    yield {
                        'data': batch_data,
                        'names': self.data['names'][i:i + B],
                        'los_ts': self.data['los_ts'][i:i + B]
                    }
Exemple #10
0
    def _generator(self):
        B = self.batch_size
        while True:
            # convert to right format for sort_and_shuffle
            kv_pairs = list(self.data.items())
            data_index = [pair[0] for pair in kv_pairs].index('X')
            if data_index > 0:
                kv_pairs[0], kv_pairs[data_index] = kv_pairs[
                    data_index], kv_pairs[0]
            mas = [kv[1] for kv in kv_pairs]

            if self.shuffle:
                N = len(self.data['X'])
                order = list(range(N))
                random.shuffle(order)
                tmp = [None] * len(mas)
                for mas_idx in range(len(mas)):
                    tmp[mas_idx] = [None] * len(mas[mas_idx])
                    for i in range(N):
                        tmp[mas_idx][i] = mas[mas_idx][order[i]]
                for i in range(len(kv_pairs)):
                    self.data[kv_pairs[i][0]] = tmp[i]
            else:
                # sort entirely
                mas = common_utils.sort_and_shuffle(mas, B)
                for i in range(len(kv_pairs)):
                    self.data[kv_pairs[i][0]] = mas[i]

            for i in range(0, len(self.data['X']), B):
                outputs = []

                # X
                X = self.data['X'][i:i + B]
                T = self.data['T'][i:i + B]
                X = common_utils.pad_zeros(X, min_length=self.ihm_pos + 1)
                T = common_utils.pad_zeros(T, min_length=self.ihm_pos + 1)
                t = X.shape[1]

                # ihm
                ihm_M = np.array(self.data['ihm_M'][i:i + B])
                ihm_M = np.expand_dims(ihm_M, axis=-1)  # (B, 1)
                ihm_y = np.array(self.data['ihm_y'][i:i + B])
                ihm_y = np.expand_dims(ihm_y, axis=-1)  # (B, 1)
                outputs.append(ihm_y)
                if self.target_repl:
                    ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat(
                        t, axis=1)  # (B, t, 1)
                    outputs.append(ihm_seq)

                # decomp
                decomp_y = self.data['decomp_y'][i:i + B]
                decomp_y = np.expand_dims(decomp_y, axis=-1)  # (B, 1)
                outputs.append(decomp_y)

                # los
                los_y = self.data['los_y'][i:i + B]
                los_y = np.expand_dims(los_y, axis=-1)  # (B, 1)
                outputs.append(los_y)

                # pheno
                pheno_y = np.array(self.data['pheno_y'][i:i + B])
                outputs.append(pheno_y)
                if self.target_repl:
                    pheno_seq = np.expand_dims(pheno_y, axis=1).repeat(
                        t, axis=1)  # (B, t, 25)
                    outputs.append(pheno_seq)

                inputs = [X, T, ihm_M]

                batch_data = (inputs, outputs)

                if not self.return_names:
                    yield batch_data
                else:
                    yield {
                        'data': batch_data,
                        'names': self.data['names'][i:i + B],
                        'decomp_ts': self.data['decomp_ts'][i:i + B],
                        'los_ts': self.data['los_ts'][i:i + B],
                        'pheno_ts': self.data['pheno_ts'][i:i + B]
                    }