def test_select_rows(single_item_profile): p = itsdb.ItsdbProfile(single_item_profile) # assert list(itsdb.select_rows(None, p.read_table('item'))) == [['0', 'The dog barks.']] assert list( itsdb.select_rows(['i-id', 'i-input'], p.read_table('item'))) == [['0', 'The dog barks.']] assert list( itsdb.select_rows(['item:i-id', 'parse:parse-id'], p.join('item', 'parse'))) == [['0', '0']]
def rows(p): if p.exists('p-result'): return p.select('p-result', ('i-id', 'p-id', 'mrs')) elif p.exists('result') and p.exists('parse'): return itsdb.select_rows( ('parse:i-id', 'result:result-id', 'result:mrs'), p.join('parse', 'result')) else: raise Exception('Invalid profile: ' + str(p.root))
def _select(projection, tables, condition, ts, mode, cast): table = _select_from(tables, None, ts) table = _select_projection(projection, table, ts) table = _select_where(condition, table, ts) # finally select the relevant columns from the joined table if projection == '*': if len(tables) == 1: projection = [f.name for f in ts.relations[tables[0]]] else: projection = [] for t in tables: projection.extend(t + ':' + f.name for f in ts.relations[t]) return itsdb.select_rows(projection, table, mode=mode, cast=cast)
def select(args): """ Select data from [incr tsdb()] profiles. """ in_profile = _prepare_input_profile(args['PROFILE'], filters=args['--filter'], applicators=args['--apply']) if args['--join']: tbl1, tbl2 = map(str.strip, args['--join'].split(',')) rows = in_profile.join(tbl1, tbl2, key_filter=True) # Adding : is just for robustness. We need something like # :table:col@table@col, but may have gotten table:col@table@col if not args['DATASPEC'].startswith(':'): args['DATASPEC'] = ':' + args['DATASPEC'] table, cols = itsdb.get_data_specifier(args['DATASPEC']) else: table, cols = itsdb.get_data_specifier(args['DATASPEC']) rows = in_profile.read_table(table, key_filter=True) for row in itsdb.select_rows(cols, rows, mode='row'): print(row)
def pred_strings(prof): # need to join parse and result to get i-id to mrs mapping rows = prof.join('parse', 'result') cols = ('parse:i-id', 'result:result-id', 'result:mrs') for i_id, r_id, mrs in itsdb.select_rows(cols, rows): if r_id != '0': continue mrs = simplemrs.loads_one(mrs) preds = [] valency = {} nmz_locs = set() # pre-scan for ep in mrs.eps(): if ep.pred.short_form() == 'nominalization' and ep.cfrom != -1: nmz_locs.add(ep.lnk) valency[ep.nodeid] = extract_valency(ep) # extract and simplify predicates for ep in mrs.eps(): normpred = ep.pred.short_form() pred = ep.pred.string # skip omitted preds if normpred == 'nominalization' or normpred.endswith('unknown'): continue # combine named with CARG value if normpred == 'named': pred = 'nmd_"{}"'.format(str(ep.carg or '')) # normalize verbs if ep.pred.pos == 'v': # mark if nominalized if ep.lnk in nmz_locs: pred = 'nmz_' + pred # add argument info pred += '@' + valency[ep.nodeid] preds.append(pred) yield (int(i_id), ' '.join(preds))