def _prepare_data(self, seqs, slots, with_labels=True): x = [] y_seq_id = [] y_time = [] y_labels = [[] for slot in slots] for item in seqs: x_vecs = [] for features in item['data']: x_vec = np.zeros((len(self.vocab), )) for ftr, val in features.iteritems(): x_vec[self.vocab[ftr]] = val x_vecs.append(x_vec) x.append(x_vecs) labels = item['labels'] for label in labels: y_seq_id.append(len(x) - 1) y_time.append(label['time']) for i, slot in enumerate(slots): lbl_val = label['slots'][slot] if lbl_val < 0: lbl_val = len(self.slot_classes[slot]) + lbl_val y_labels[i].append(lbl_val) x_zero_pad = np.zeros((len(self.vocab), )) x = padded(x, pad_by=[x_zero_pad]).transpose(1, 0, 2) data = [x] data.extend([y_seq_id, y_time]) if with_labels: data.extend(y_labels) return tuple(data)
def _prepare_data(self, seqs, slots, with_labels=True): x = [] x_score = [] x_actor = [] y_seq_id = [] y_time = [] y_labels = [[] for slot in slots] y_weights = [] for item in seqs: x.append(item['data']) x_score.append(item['data_score']) x_actor.append(item['data_actor']) labels = item['labels'] for label in labels: y_seq_id.append(len(x) - 1) y_time.append(label['time']) for i, slot in enumerate(slots): lbl_val = label['slots'][slot] if lbl_val < 0: lbl_val = len(self.slot_classes[slot]) + lbl_val y_labels[i].append(lbl_val) y_weights.append(label['score']) x = padded(x, is_int=True).transpose(1, 0) x_score = padded(x_score).transpose(1, 0) x_actor = padded(x_actor, is_int=True).transpose(1, 0) x_score = np.array(x_score, dtype=np.int32)[:,:] y_weights = np.array(y_weights, dtype=np.float32) y_token_labels_padding = self._prepare_y_token_labels_padding() data = [x] if self.x_include_score: data.append(x_score) data.extend([y_seq_id, y_time]) if with_labels: data.append(y_weights) data.extend(y_labels) return tuple(data)