def ancestors(obj, refattrs=(ALIGNMENT, SEGMENTATION)): """ >>> for anc in query.ancestors(igt.get_item('g1'), refattrs=(ALIGNMENT, SEGMENTATION)): ... print(anc) (<Tier object (id: g type: glosses) at ...>, 'alignment', <Tier object (id: m type: morphemes) at ...>, [<Item object (id: m1) at ...>]) (<Tier object (id: m type: morphemes) at ...>, 'segmentation', <Tier object (id: w type: words) at ...>, [<Item object (id: w1) at ...>]) (<Tier object (id: w type: words) at ...>, 'segmentation', <Tier object (id: p type: phrases) at ...>, [<Item object (id: p1) at ...>]) """ if hasattr(obj, 'tier'): tier = obj.tier items = [obj] else: tier = obj items = tier.items # a tier may be visited twice (e.g. A > B > A), but then it stops; # this is to avoid cycles visited = set([tier.id]) while True: # get the first specified attribute refattr = next((ra for ra in refattrs if ra in tier.attributes), None) if not refattr: break reftier = ref.dereference(tier, refattr) ids = set(chain.from_iterable( ref.ids(item.attributes.get(refattr, '')) for item in items )) refitems = [item for item in reftier.items if item.id in ids] yield (tier, refattr, reftier, refitems) # cycle detection; break if we've now encountered something twice if reftier.id in visited: break visited.update(reftier.id) tier = reftier items = refitems
def get_agenda(tier): agenda = [(ref.ids(item.alignment or item.segmentation), item) for item in tier.items] # then group those with the same alignment (still a list [(ids, item)]) agenda = deque(tuple([k, [g[1] for g in gs]]) for k, gs in groupby(agenda, key=lambda x: x[0])) return agenda
def ancestors(obj, refattrs=(ALIGNMENT, SEGMENTATION)): """ >>> for anc in query.ancestors(igt.get_item('g1'), refattrs=(ALIGNMENT, SEGMENTATION)): ... print(anc) (<Tier object (id: g type: glosses) at ...>, 'alignment', <Tier object (id: m type: morphemes) at ...>, [<Item object (id: m1) at ...>]) (<Tier object (id: m type: morphemes) at ...>, 'segmentation', <Tier object (id: w type: words) at ...>, [<Item object (id: w1) at ...>]) (<Tier object (id: w type: words) at ...>, 'segmentation', <Tier object (id: p type: phrases) at ...>, [<Item object (id: p1) at ...>]) """ if hasattr(obj, 'tier'): tier = obj.tier items = [obj] else: tier = obj items = tier.items while True: # get the first specified attribute refattr = next((ra for ra in refattrs if ra in tier.attributes), None) if not refattr: break reftier = ref.dereference(tier, refattr) ids = set( chain.from_iterable( ref.ids(item.attributes.get(refattr, '')) for item in items)) refitems = [item for item in reftier.items if item.id in ids] yield (tier, refattr, reftier, refitems) tier = reftier items = refitems
def _update_referent_index(self, obj): if obj.id is None: warnings.warn('Cannot cache referents for an object with no id.', XigtWarning) return rdict = self._referent_cache.setdefault(obj.id, {}) for refattr in obj.allowed_reference_attributes(): rdict[refattr] = ref.ids(obj.attributes.get(refattr, ''))
def get_agenda(tier): agenda = [(ref.ids(item.alignment or item.segmentation), item) for item in tier.items] # then group those with the same alignment (still a list [(ids, item)]) agenda = deque( tuple([k, [g[1] for g in gs]]) for k, gs in groupby(agenda, key=lambda x: x[0])) return agenda
def odin_ancestor(obj): # ODIN_LOG.debug("Looking up the odin ancestor for {}".format(str(obj))) # If we are at an ODIN item, return. if isinstance(obj, Item) and obj.tier.type == ODIN_TIER_TYPE: return obj # An Igt instance can't have a tier ancestor. elif isinstance(obj, Igt): return None # Also, an ODIN tier can't get a specific item... elif isinstance(obj, Tier) and obj.type == ODIN_TIER_TYPE: return None else: if SEGMENTATION in obj.attributes: ref_attr = SEGMENTATION elif CONTENT in obj.attributes: ref_attr = CONTENT elif ALIGNMENT in obj.attributes: ref_attr = ALIGNMENT elif DS_DEP_ATTRIBUTE in obj.attributes: ref_attr = DS_DEP_ATTRIBUTE else: return None # If this item is a tier, we would like to follow a random object if isinstance(obj, Tier): if len(obj) == 0: id = obj.attributes[ref_attr] else: id = [ids(i.attributes[ref_attr])[0] for i in obj if ref_attr in i.attributes][0] elif isinstance(obj, Item): id = ids(obj.attributes[ref_attr])[0] else: raise Exception item = xigt_find(obj.igt, id=id) if item is None: return None else: return odin_ancestor(item)
def _update_referent_index(self, obj): if obj.id is None: warnings.warn( 'Cannot cache referents for an object with no id.', XigtWarning ) return rdict = self._referent_cache.setdefault(obj.id, {}) for refattr in obj.allowed_reference_attributes(): rdict[refattr] = ref.ids(obj.attributes.get(refattr, ''))
def _update_referrer_index(self, obj): o_id = obj.id if o_id is None: warnings.warn('Cannot cache referrers for an object with no id.', XigtWarning) return rdict = self._referrer_cache attrget = obj.attributes.get # just loop optimization for refattr in obj.allowed_reference_attributes(): ids = ref.ids(attrget(refattr, '')) for id in ids: rdict.setdefault(id, {}).setdefault(refattr, []).append(o_id)
def test_ids(self): assert ref.ids('') == [] assert ref.ids('a1') == ['a1'] assert ref.ids('a1[3:5]') == ['a1'] assert ref.ids('a1[3:5+6:7]+a2[1:4]') == ['a1', 'a2'] assert ref.ids('a1[3:5+6:7]+a1[1:4]+a1') == ['a1', 'a1', 'a1'] assert ref.ids('a1 a2 a3') == ['a1', 'a2', 'a3']
def _update_referrer_index(self, obj): o_id = obj.id if o_id is None: warnings.warn( 'Cannot cache referrers for an object with no id.', XigtWarning ) return rdict = self._referrer_cache attrget = obj.attributes.get # just loop optimization for refattr in obj.allowed_reference_attributes(): ids = ref.ids(attrget(refattr, '')) for id in ids: rdict.setdefault(id, {}).setdefault(refattr, []).append(o_id)
def descendants(obj, refattrs=(SEGMENTATION, ALIGNMENT), follow='first'): """ >>> for des in query.descendants(igt.get_item('p1'), refattrs=(SEGMENTATION, ALIGNMENT)): ... print(des) (<Tier object (id: p type: phrases) at ...>, 'segmentation', <Tier object (id: w type: words) at ...>, [<Item object (id: w1) at ...>]) (<Tier object (id: p type: phrases) at ...>, 'alignment', <Tier object (id: t type: translations) at ...>, [<Item object (id: t1) at ...>]) (<Tier object (id: w type: words) at ...>, 'segmentation', <Tier object (id: m type: morphemes) at ...>, [<Item object (id: m1) at ...>]) (<Tier object (id: m type: morphemes) at ...>, 'alignment', <Tier object (id: g type: glosses) at ...>, [<Item object (id: g1) at ...>]) """ if hasattr(obj, 'tier'): tier = obj.tier items = [obj] else: tier = obj items = tier.items igt = tier.igt visited = set() agenda = deque([(tier, items)]) while agenda: tier, items = agenda.popleft() tier_refs = tier.referrers(refattrs) item_ids = set(item.id for item in items) # get followable refattrs with something on the referrers list ras = [ra for ra in refattrs if tier_refs[ra]] if follow == 'first' and ras: ras = [ras[0]] if not ras: continue # unlike ancestors, descendants for a refattr may have 1+ tiers for refattr in ras: # try to avoid cycles if (tier.id, refattr) in visited: continue else: visited.add((tier.id, refattr)) for reftier_id in tier_refs[refattr]: reftier = igt[reftier_id] refitems = [ item for item in reftier.items if set(ref.ids(item.attributes.get( refattr, ''))).intersection(item_ids) ] yield (tier, refattr, reftier, refitems) agenda.append((reftier, refitems)) #def ingroup(obj, refattrs) #def filter([objs], lambda x:
def descendants(obj, refattrs=(SEGMENTATION, ALIGNMENT), follow='first'): """ >>> for des in query.descendants(igt.get_item('p1'), refattrs=(SEGMENTATION, ALIGNMENT)): ... print(des) (<Tier object (id: p type: phrases) at ...>, 'segmentation', <Tier object (id: w type: words) at ...>, [<Item object (id: w1) at ...>]) (<Tier object (id: p type: phrases) at ...>, 'alignment', <Tier object (id: t type: translations) at ...>, [<Item object (id: t1) at ...>]) (<Tier object (id: w type: words) at ...>, 'segmentation', <Tier object (id: m type: morphemes) at ...>, [<Item object (id: m1) at ...>]) (<Tier object (id: m type: morphemes) at ...>, 'alignment', <Tier object (id: g type: glosses) at ...>, [<Item object (id: g1) at ...>]) """ if hasattr(obj, 'tier'): tier = obj.tier items = [obj] else: tier = obj items = tier.items igt = tier.igt visited = set() agenda = deque([(tier, items)]) while agenda: tier, items = agenda.popleft() tier_refs = tier.referrers(refattrs) item_ids = set(item.id for item in items) # get followable refattrs with something on the referrers list ras = [ra for ra in refattrs if tier_refs[ra]] if follow == 'first' and ras: ras = [ras[0]] if not ras: continue # unlike ancestors, descendants for a refattr may have 1+ tiers for refattr in ras: # try to avoid cycles if (tier.id, refattr) in visited: continue else: visited.add((tier.id, refattr)) for reftier_id in tier_refs[refattr]: reftier = igt[reftier_id] refitems = [ item for item in reftier.items if set(ref.ids(item.attributes.get(refattr,''))) .intersection(item_ids) ] yield (tier, refattr, reftier, refitems) agenda.append((reftier, refitems)) #def ingroup(obj, refattrs) #def filter([objs], lambda x:
def algnexpr_ids_in_referred_tier(item, refattr): itemref = item.attributes.get(refattr) reftier = get_referred_tier(item, refattr) if not itemref or not reftier: return missing = [] for ae_id in ids(itemref): if reftier.get(ae_id) is None: missing.append(ae_id) if missing: return ( 'The "{}" alignment expression {{modal}} select available ' '<item> ids from the aligned <tier> ("{}"). The following are ' 'unavailable: {}' .format(refattr, str(reftier.id), ', '.join(missing)) )
def test_ids(self): self.assertEqual(ref.ids(''), []) self.assertEqual(ref.ids('a1'), ['a1']) self.assertEqual(ref.ids('a1[3:5]'), ['a1']) self.assertEqual(ref.ids('a1[3:5+6:7]+a2[1:4]'), ['a1', 'a2']) self.assertEqual(ref.ids('a1[3:5+6:7]+a1[1:4]+a1'), ['a1', 'a1', 'a1']) self.assertEqual(ref.ids('a1 a2 a3'), ['a1', 'a2', 'a3'])
def ref_match(o, target_ref, ref_type): if ref_type in o.attributes: my_ref = o.attributes.get(ref_type) if my_ref and target_ref in ref.ids(my_ref): return True return False