def fuse_rows(self, grouper, sort=None): """ Merge groups of rows Return the fused table ! TODO : in-place is better grouper : row grouping attribute sorter : positioning attribute """ sorter = (lambda x: x[sort].value) if sort else (lambda x: 0) head, fnames = self.tab_header() mergers = valmerger.gen_vm(fnames) groups = defaultdict(list) for r in self.t: groups[r[grouper].value].append(r) with open(TMP, 'w') as f: f.write(head) for rl in groups.values(): # Sort turn rows by position rlis = sorted(rl, key=sorter) # Collect and apply mergers vals = [rlis[0][n].value for n in fnames] for r in rlis[1:]: vals = list( m(v, r[n].value) for m, v, n in zip(mergers, vals, fnames)) f.write('\t'.join(map(str, vals)) + '\n') self.t = Orange.data.Table(TMP) discard(TMP)
def fuse_rows(self, grouper, sort=None): """ Merge groups of rows Return the fused table ! TODO : in-place is better grouper : row grouping attribute sorter : positioning attribute """ sorter = (lambda x:x[sort].value) if sort else (lambda x:0) head, fnames = self.tab_header() mergers = valmerger.gen_vm(fnames) groups = defaultdict(list) for r in self.t: groups[r[grouper].value].append(r) with open(TMP, 'w') as f: f.write(head) for rl in groups.values(): # Sort turn rows by position rlis = sorted(rl, key=sorter) # Collect and apply mergers vals = [rlis[0][n].value for n in fnames] for r in rlis[1:]: vals = list(m(v,r[n].value) for m,v,n in zip(mergers, vals, fnames)) f.write('\t'.join(map(str,vals))+'\n') self.t = Orange.data.Table(TMP) discard(TMP)
mrg_table = Orange.data.Table([cf_table, df_table]) mgd = mrg_table.domain md = Orange.data.Domain(mgd, 'is_commitment') md = Orange.data.Domain(feat_sel, md) # Add necessary meta information for nm in ('id', 'turn_id', 'dialogue', 'start'): md.add_meta(mgd.meta_id(nm), mgd.get_meta(nm)) m_table = Orange.data.Table(md, mrg_table) m_table.save(fmerge) print('Data built with {0} EDUs'.format(len(m_table))) ##### Part 2 : Turn merging ##### head, fnames = gen_tab_header(m_table) mergers = gen_vm(fnames) # Indexes for features AND metas # (can't iterate easily on both at the same time) inds = list(md.index(n) for n in fnames) # Build turn-rows dict tid_i = md.index('turn_id') t_rows = defaultdict(list) for r in m_table: t_rows[r[tid_i].value].append(r) # Merge and store data s_id = md.index('start') with open(fturns, 'w') as f: f.write(head)