예제 #1
0
    def _prepare_data(self, seqs, slots, with_labels=True):
        x = []
        y_seq_id = []
        y_time = []
        y_labels = [[] for slot in slots]
        for item in seqs:
            x_vecs = []
            for features in item['data']:
                x_vec = np.zeros((len(self.vocab), ))
                for ftr, val in features.iteritems():
                    x_vec[self.vocab[ftr]] = val
                x_vecs.append(x_vec)

            x.append(x_vecs)

            labels = item['labels']

            for label in labels:
                y_seq_id.append(len(x) - 1)
                y_time.append(label['time'])

                for i, slot in enumerate(slots):
                    lbl_val = label['slots'][slot]
                    if lbl_val < 0:
                        lbl_val = len(self.slot_classes[slot]) + lbl_val
                    y_labels[i].append(lbl_val)

        x_zero_pad = np.zeros((len(self.vocab), ))
        x = padded(x, pad_by=[x_zero_pad]).transpose(1, 0, 2)

        data = [x]
        data.extend([y_seq_id, y_time])
        if with_labels:
            data.extend(y_labels)
        return tuple(data)
예제 #2
0
    def _prepare_data(self, seqs, slots, with_labels=True):
        x = []
        y_seq_id = []
        y_time = []
        y_labels = [[] for slot in slots]
        for item in seqs:
            x_vecs = []
            for features in item['data']:
                x_vec = np.zeros((len(self.vocab), ))
                for ftr, val in features.iteritems():
                    x_vec[self.vocab[ftr]] = val
                x_vecs.append(x_vec)

            x.append(x_vecs)

            labels = item['labels']

            for label in labels:
                y_seq_id.append(len(x) - 1)
                y_time.append(label['time'])

                for i, slot in enumerate(slots):
                    lbl_val = label['slots'][slot]
                    if lbl_val < 0:
                        lbl_val = len(self.slot_classes[slot]) + lbl_val
                    y_labels[i].append(lbl_val)

        x_zero_pad = np.zeros((len(self.vocab), ))
        x = padded(x, pad_by=[x_zero_pad]).transpose(1, 0, 2)

        data = [x]
        data.extend([y_seq_id, y_time])
        if with_labels:
            data.extend(y_labels)
        return tuple(data)
예제 #3
0
파일: model.py 프로젝트: hydercps/xtrack2
    def _prepare_data(self, seqs, slots, with_labels=True):
        x = []
        x_score = []
        x_actor = []
        y_seq_id = []
        y_time = []
        y_labels = [[] for slot in slots]
        y_weights = []
        for item in seqs:
            x.append(item['data'])
            x_score.append(item['data_score'])
            x_actor.append(item['data_actor'])

            labels = item['labels']

            for label in labels:
                y_seq_id.append(len(x) - 1)
                y_time.append(label['time'])

                for i, slot in enumerate(slots):
                    lbl_val = label['slots'][slot]
                    if lbl_val < 0:
                        lbl_val = len(self.slot_classes[slot]) + lbl_val
                    y_labels[i].append(lbl_val)
                y_weights.append(label['score'])

        x = padded(x, is_int=True).transpose(1, 0)

        x_score = padded(x_score).transpose(1, 0)
        x_actor = padded(x_actor, is_int=True).transpose(1, 0)

        x_score = np.array(x_score, dtype=np.int32)[:,:]

        y_weights = np.array(y_weights, dtype=np.float32)

        y_token_labels_padding = self._prepare_y_token_labels_padding()

        data = [x]
        if self.x_include_score:
            data.append(x_score)
        data.extend([y_seq_id, y_time])
        if with_labels:
            data.append(y_weights)
            data.extend(y_labels)

        return tuple(data)
예제 #4
0
    def _prepare_data(self, seqs, slots, with_labels=True):
        x = []
        x_score = []
        x_actor = []
        y_seq_id = []
        y_time = []
        y_labels = [[] for slot in slots]
        y_weights = []
        for item in seqs:
            x.append(item['data'])
            x_score.append(item['data_score'])
            x_actor.append(item['data_actor'])

            labels = item['labels']

            for label in labels:
                y_seq_id.append(len(x) - 1)
                y_time.append(label['time'])

                for i, slot in enumerate(slots):
                    lbl_val = label['slots'][slot]
                    if lbl_val < 0:
                        lbl_val = len(self.slot_classes[slot]) + lbl_val
                    y_labels[i].append(lbl_val)
                y_weights.append(label['score'])

        x = padded(x, is_int=True).transpose(1, 0)

        x_score = padded(x_score).transpose(1, 0)
        x_actor = padded(x_actor, is_int=True).transpose(1, 0)

        x_score = np.array(x_score, dtype=np.int32)[:,:]

        y_weights = np.array(y_weights, dtype=np.float32)

        y_token_labels_padding = self._prepare_y_token_labels_padding()

        data = [x]
        if self.x_include_score:
            data.append(x_score)
        data.extend([y_seq_id, y_time])
        if with_labels:
            data.append(y_weights)
            data.extend(y_labels)

        return tuple(data)