Ejemplo n.º 1
0
 def xdistribution(self, xrange, yrange):
     """
     TODO: no direct call found for this
     """
     begs = manatee.IntVector(xrange)
     vals = manatee.IntVector(xrange)
     self.distribution(vals, begs, yrange)
     return zip(vals, begs)
Ejemplo n.º 2
0
 def linegroup_info_select(self, selected_count=5):
     """
     TODO: no direct call found for this
     """
     ids = manatee.IntVector()
     freqs = manatee.IntVector()
     self.get_linegroup_stat(ids, freqs)
     grps = [(f, i) for f, i in zip(freqs, ids) if i]
     grps.sort()
     grps = [i for f, i in grps[-5:]]
     grps.sort()
     self.selected_grps = [0] + grps
     return self.selected_grps
Ejemplo n.º 3
0
def sort_line_groups(conc, group_ids):
    ids = manatee.IntVector()
    strs = manatee.StrVector()
    for g in group_ids:
        ids.append(g)
        strs.append('%05d' % g)
    conc.linegroup_sort(ids, strs)
Ejemplo n.º 4
0
 def linegroup_info_subset(self, conc):
     """
     TODO: no direct call found for this
     """
     # conc = manatee.Concordance (fstream)
     conc.sync()
     conc.set_linegroup_from_conc(self)
     if not conc.size():
         return 0, 0, [0] * (len(self.selected_grps) + 1)
     ids = manatee.IntVector()
     freqs = manatee.IntVector()
     conc.get_linegroup_stat(ids, freqs)
     info = dict(zip(ids, freqs))
     if not info:
         # no annotation
         return 0, 0, [0] * (len(self.selected_grps) + 1)
     hist = [info.get(i, 0) for i in self.selected_grps]
     hist.append(conc.size() - sum(hist))
     cnt, maxid = max(zip(freqs, ids))
     return maxid, (cnt / float(conc.size())), hist
Ejemplo n.º 5
0
 def command_g(self, options):
     """
     sort according to linegroups
     """
     annot = get_stored_conc(self.pycorp, options, self.pycorp._conc_dir)
     self.set_linegroup_from_conc(annot)
     lmap = annot.labelmap
     lmap[0] = None
     ids = manatee.IntVector(map(int, lmap.keys()))
     strs = manatee.StrVector(map(lngrp_sortstr, lmap.values()))
     self.linegroup_sort(ids, strs)
Ejemplo n.º 6
0
    def get_sort_idx(self, q=(), pagesize=20):
        """
        In case sorting is active this method generates shortcuts to pages where new
        first letter of sorted keys (it can be 'left', 'kwic', 'right') starts.

        arguments:
        q -- a query (as a list)
        pagesize -- number of items per page

        returns:
        a list of dicts with following structure (example):
            [{'page': 1, 'label': u'a'}, {'page': 1, 'label': u'A'}, {'page': 2, 'label': u'b'},...]
        """
        crit = ''
        for qq in q:
            if qq.startswith('s') and not qq.startswith('s*'):
                crit = qq[1:]
        if not crit:
            return []
        vals = manatee.StrVector()
        idx = manatee.IntVector()
        if '.' in crit.split('/')[0]:
            just_letters = False
        else:
            just_letters = True
        self.conc.sort_idx(crit, vals, idx, just_letters)
        out = [(v, pos / pagesize + 1) for v, pos in zip(vals, idx)]
        if just_letters:
            result = []
            keys = []
            for v, p in out:
                if not v[0] in keys:
                    result.append((v[0], p))
                    keys.append(v[0])
            out = result

        ans = []
        for v, p in out:
            try:
                ans.append({'page': p, 'label': v})
            except UnicodeDecodeError:
                # Without manatee.set_encoding, manatee appears to produce
                # few extra undecodable items. Ignoring them produces
                # the same result as in case of official Bonito app.
                pass
        return ans
Ejemplo n.º 7
0
def iter_label2pos(corpus, query):
    """
    Low-level function to evaluate a labeled CQL query
    on a given corpus (called from `match_cql` function).

    # Arguments
        corpus: `manatee.Corpus` object
        query: labeled CQL query

    # Yields
        {label: position} dictionaries
    """
    results = corpus.eval_query(query)
    while not results.end():
        beg = results.peek_beg()  # query's first token's position
        colls = manatee.IntVector()
        results.collocs(colls)
        yield {
            colls[i]: colls[i + 1] + beg
            for i in range(0, len(colls), 2)
        }
        results.next()
Ejemplo n.º 8
0
 def xdistribution(self, xrange: List[int],
                   amplitude: int) -> Tuple[List[int], List[int]]:
     begs = manatee.IntVector(xrange)
     values = manatee.IntVector(xrange)
     self.distribution(values, begs, amplitude)
     return begs, values