Ejemplo n.º 1
0
Archivo: query.py Proyecto: xigt/xigt
def ancestors(obj, refattrs=(ALIGNMENT, SEGMENTATION)):
    """
    >>> for anc in query.ancestors(igt.get_item('g1'), refattrs=(ALIGNMENT, SEGMENTATION)):
    ...     print(anc)
    (<Tier object (id: g type: glosses) at ...>, 'alignment', <Tier object (id: m type: morphemes) at ...>, [<Item object (id: m1) at ...>])
    (<Tier object (id: m type: morphemes) at ...>, 'segmentation', <Tier object (id: w type: words) at ...>, [<Item object (id: w1) at ...>])
    (<Tier object (id: w type: words) at ...>, 'segmentation', <Tier object (id: p type: phrases) at ...>, [<Item object (id: p1) at ...>])
    """
    if hasattr(obj, 'tier'):
        tier = obj.tier
        items = [obj]
    else:
        tier = obj
        items = tier.items
    # a tier may be visited twice (e.g. A > B > A), but then it stops;
    # this is to avoid cycles
    visited = set([tier.id])
    while True:
        # get the first specified attribute
        refattr = next((ra for ra in refattrs if ra in tier.attributes), None)
        if not refattr:
            break
        reftier = ref.dereference(tier, refattr)
        ids = set(chain.from_iterable(
            ref.ids(item.attributes.get(refattr, '')) for item in items
        ))
        refitems = [item for item in reftier.items if item.id in ids]
        yield (tier, refattr, reftier, refitems)
        # cycle detection; break if we've now encountered something twice
        if reftier.id in visited:
            break
        visited.update(reftier.id)
        tier = reftier
        items = refitems
Ejemplo n.º 2
0
def get_agenda(tier):
    agenda = [(ref.ids(item.alignment or item.segmentation), item)
              for item in tier.items]
    # then group those with the same alignment (still a list [(ids, item)])
    agenda = deque(tuple([k, [g[1] for g in gs]])
                   for k, gs in groupby(agenda, key=lambda x: x[0]))
    return agenda
Ejemplo n.º 3
0
def ancestors(obj, refattrs=(ALIGNMENT, SEGMENTATION)):
    """
    >>> for anc in query.ancestors(igt.get_item('g1'), refattrs=(ALIGNMENT, SEGMENTATION)):
    ...     print(anc)
    (<Tier object (id: g type: glosses) at ...>, 'alignment', <Tier object (id: m type: morphemes) at ...>, [<Item object (id: m1) at ...>])
    (<Tier object (id: m type: morphemes) at ...>, 'segmentation', <Tier object (id: w type: words) at ...>, [<Item object (id: w1) at ...>])
    (<Tier object (id: w type: words) at ...>, 'segmentation', <Tier object (id: p type: phrases) at ...>, [<Item object (id: p1) at ...>])
    """
    if hasattr(obj, 'tier'):
        tier = obj.tier
        items = [obj]
    else:
        tier = obj
        items = tier.items
    while True:
        # get the first specified attribute
        refattr = next((ra for ra in refattrs if ra in tier.attributes), None)
        if not refattr:
            break
        reftier = ref.dereference(tier, refattr)
        ids = set(
            chain.from_iterable(
                ref.ids(item.attributes.get(refattr, '')) for item in items))
        refitems = [item for item in reftier.items if item.id in ids]
        yield (tier, refattr, reftier, refitems)
        tier = reftier
        items = refitems
Ejemplo n.º 4
0
 def _update_referent_index(self, obj):
     if obj.id is None:
         warnings.warn('Cannot cache referents for an object with no id.',
                       XigtWarning)
         return
     rdict = self._referent_cache.setdefault(obj.id, {})
     for refattr in obj.allowed_reference_attributes():
         rdict[refattr] = ref.ids(obj.attributes.get(refattr, ''))
Ejemplo n.º 5
0
def get_agenda(tier):
    agenda = [(ref.ids(item.alignment or item.segmentation), item)
              for item in tier.items]
    # then group those with the same alignment (still a list [(ids, item)])
    agenda = deque(
        tuple([k, [g[1] for g in gs]])
        for k, gs in groupby(agenda, key=lambda x: x[0]))
    return agenda
Ejemplo n.º 6
0
def odin_ancestor(obj):
    # ODIN_LOG.debug("Looking up the odin ancestor for {}".format(str(obj)))
    # If we are at an ODIN item, return.

    if isinstance(obj, Item) and obj.tier.type == ODIN_TIER_TYPE:
        return obj

    # An Igt instance can't have a tier ancestor.
    elif isinstance(obj, Igt):
        return None

    # Also, an ODIN tier can't get a specific item...
    elif isinstance(obj, Tier) and obj.type == ODIN_TIER_TYPE:
        return None

    else:


        if SEGMENTATION in obj.attributes:
            ref_attr = SEGMENTATION
        elif CONTENT in obj.attributes:
            ref_attr = CONTENT
        elif ALIGNMENT in obj.attributes:
            ref_attr = ALIGNMENT
        elif DS_DEP_ATTRIBUTE in obj.attributes:
            ref_attr = DS_DEP_ATTRIBUTE
        else:
            return None

        # If this item is a tier, we would like to follow a random object
        if isinstance(obj, Tier):
            if len(obj) == 0:
                id = obj.attributes[ref_attr]
            else:
                id = [ids(i.attributes[ref_attr])[0] for i in obj if ref_attr in i.attributes][0]
        elif isinstance(obj, Item):
            id = ids(obj.attributes[ref_attr])[0]
        else:
            raise Exception

        item = xigt_find(obj.igt, id=id)
        if item is None:
            return None
        else:
            return odin_ancestor(item)
Ejemplo n.º 7
0
 def _update_referent_index(self, obj):
     if obj.id is None:
         warnings.warn(
             'Cannot cache referents for an object with no id.',
             XigtWarning
         )
         return
     rdict = self._referent_cache.setdefault(obj.id, {})
     for refattr in obj.allowed_reference_attributes():
         rdict[refattr] = ref.ids(obj.attributes.get(refattr, ''))
Ejemplo n.º 8
0
 def _update_referrer_index(self, obj):
     o_id = obj.id
     if o_id is None:
         warnings.warn('Cannot cache referrers for an object with no id.',
                       XigtWarning)
         return
     rdict = self._referrer_cache
     attrget = obj.attributes.get  # just loop optimization
     for refattr in obj.allowed_reference_attributes():
         ids = ref.ids(attrget(refattr, ''))
         for id in ids:
             rdict.setdefault(id, {}).setdefault(refattr, []).append(o_id)
Ejemplo n.º 9
0
 def test_ids(self):
     assert ref.ids('') == []
     assert ref.ids('a1') == ['a1']
     assert ref.ids('a1[3:5]') == ['a1']
     assert ref.ids('a1[3:5+6:7]+a2[1:4]') == ['a1', 'a2']
     assert ref.ids('a1[3:5+6:7]+a1[1:4]+a1') == ['a1', 'a1', 'a1']
     assert ref.ids('a1 a2  a3') == ['a1', 'a2', 'a3']
Ejemplo n.º 10
0
 def _update_referrer_index(self, obj):
     o_id = obj.id
     if o_id is None:
         warnings.warn(
             'Cannot cache referrers for an object with no id.',
             XigtWarning
         )
         return
     rdict = self._referrer_cache
     attrget = obj.attributes.get  # just loop optimization
     for refattr in obj.allowed_reference_attributes():
         ids = ref.ids(attrget(refattr, ''))
         for id in ids:
             rdict.setdefault(id, {}).setdefault(refattr, []).append(o_id)
Ejemplo n.º 11
0
def descendants(obj, refattrs=(SEGMENTATION, ALIGNMENT), follow='first'):
    """
    >>> for des in query.descendants(igt.get_item('p1'), refattrs=(SEGMENTATION, ALIGNMENT)):
    ...     print(des)
    (<Tier object (id: p type: phrases) at ...>, 'segmentation', <Tier object (id: w type: words) at ...>, [<Item object (id: w1) at ...>])
    (<Tier object (id: p type: phrases) at ...>, 'alignment', <Tier object (id: t type: translations) at ...>, [<Item object (id: t1) at ...>])
    (<Tier object (id: w type: words) at ...>, 'segmentation', <Tier object (id: m type: morphemes) at ...>, [<Item object (id: m1) at ...>])
    (<Tier object (id: m type: morphemes) at ...>, 'alignment', <Tier object (id: g type: glosses) at ...>, [<Item object (id: g1) at ...>])
    """

    if hasattr(obj, 'tier'):
        tier = obj.tier
        items = [obj]
    else:
        tier = obj
        items = tier.items
    igt = tier.igt
    visited = set()
    agenda = deque([(tier, items)])
    while agenda:
        tier, items = agenda.popleft()
        tier_refs = tier.referrers(refattrs)
        item_ids = set(item.id for item in items)
        # get followable refattrs with something on the referrers list
        ras = [ra for ra in refattrs if tier_refs[ra]]
        if follow == 'first' and ras:
            ras = [ras[0]]
        if not ras:
            continue
        # unlike ancestors, descendants for a refattr may have 1+ tiers
        for refattr in ras:
            # try to avoid cycles
            if (tier.id, refattr) in visited:
                continue
            else:
                visited.add((tier.id, refattr))
            for reftier_id in tier_refs[refattr]:
                reftier = igt[reftier_id]
                refitems = [
                    item for item in reftier.items
                    if set(ref.ids(item.attributes.get(
                        refattr, ''))).intersection(item_ids)
                ]
                yield (tier, refattr, reftier, refitems)
                agenda.append((reftier, refitems))


#def ingroup(obj, refattrs)
#def filter([objs], lambda x:
Ejemplo n.º 12
0
Archivo: query.py Proyecto: xigt/xigt
def descendants(obj, refattrs=(SEGMENTATION, ALIGNMENT), follow='first'):
    """
    >>> for des in query.descendants(igt.get_item('p1'), refattrs=(SEGMENTATION, ALIGNMENT)):
    ...     print(des)
    (<Tier object (id: p type: phrases) at ...>, 'segmentation', <Tier object (id: w type: words) at ...>, [<Item object (id: w1) at ...>])
    (<Tier object (id: p type: phrases) at ...>, 'alignment', <Tier object (id: t type: translations) at ...>, [<Item object (id: t1) at ...>])
    (<Tier object (id: w type: words) at ...>, 'segmentation', <Tier object (id: m type: morphemes) at ...>, [<Item object (id: m1) at ...>])
    (<Tier object (id: m type: morphemes) at ...>, 'alignment', <Tier object (id: g type: glosses) at ...>, [<Item object (id: g1) at ...>])
    """

    if hasattr(obj, 'tier'):
        tier = obj.tier
        items = [obj]
    else:
        tier = obj
        items = tier.items
    igt = tier.igt
    visited = set()
    agenda = deque([(tier, items)])
    while agenda:
        tier, items = agenda.popleft()
        tier_refs = tier.referrers(refattrs)
        item_ids = set(item.id for item in items)
        # get followable refattrs with something on the referrers list
        ras = [ra for ra in refattrs if tier_refs[ra]]
        if follow == 'first' and ras:
            ras = [ras[0]]
        if not ras:
            continue
        # unlike ancestors, descendants for a refattr may have 1+ tiers
        for refattr in ras:
            # try to avoid cycles
            if (tier.id, refattr) in visited:
                continue
            else:
                visited.add((tier.id, refattr))
            for reftier_id in tier_refs[refattr]:
                reftier = igt[reftier_id]
                refitems = [
                    item for item in reftier.items
                    if set(ref.ids(item.attributes.get(refattr,'')))
                       .intersection(item_ids)
                ]
                yield (tier, refattr, reftier, refitems)
                agenda.append((reftier, refitems))

#def ingroup(obj, refattrs)
#def filter([objs], lambda x:
Ejemplo n.º 13
0
def algnexpr_ids_in_referred_tier(item, refattr):
    itemref = item.attributes.get(refattr)
    reftier = get_referred_tier(item, refattr)
    if not itemref or not reftier:
        return
    missing = []
    for ae_id in ids(itemref):
        if reftier.get(ae_id) is None:
            missing.append(ae_id)
    if missing:
        return (
            'The "{}" alignment expression {{modal}} select available '
            '<item> ids from the aligned <tier> ("{}"). The following are '
            'unavailable: {}'
            .format(refattr, str(reftier.id), ', '.join(missing))
        )
Ejemplo n.º 14
0
 def test_ids(self):
     self.assertEqual(ref.ids(''), [])
     self.assertEqual(ref.ids('a1'),
                     ['a1'])
     self.assertEqual(ref.ids('a1[3:5]'),
                     ['a1'])
     self.assertEqual(ref.ids('a1[3:5+6:7]+a2[1:4]'),
                     ['a1', 'a2'])
     self.assertEqual(ref.ids('a1[3:5+6:7]+a1[1:4]+a1'),
                     ['a1', 'a1', 'a1'])
     self.assertEqual(ref.ids('a1 a2  a3'),
                     ['a1', 'a2', 'a3'])
Ejemplo n.º 15
0
def ref_match(o, target_ref, ref_type):
    if ref_type in o.attributes:
        my_ref = o.attributes.get(ref_type)
        if my_ref and target_ref in ref.ids(my_ref):
            return True
    return False