Example #1
0
def main(args):
    """
    The usual main. Extract feature vectors from the corpus
    (single edus only)
    """
    inputs = _read_corpus_inputs(args)
    lexinfo = _mk_lexlookup(inputs.lexicons)
    players = get_players(inputs)
    rows = concat(
        _on_doc(inputs, lexinfo, players, key) for key in inputs.corpus)

    writer = _conll_writer(args)
    for row in rows:
        writer.writerow(row)
Example #2
0
def main(args):
    """
    The usual main. Extract feature vectors from the corpus
    (single edus only)
    """
    inputs = _read_corpus_inputs(args)
    lexinfo = _mk_lexlookup(inputs.lexicons)
    players = get_players(inputs)
    rows = concat(_on_doc(inputs, lexinfo, players, key)
                  for key in inputs.corpus)

    writer = _conll_writer(args)
    for row in rows:
        writer.writerow(row)
Example #3
0
    def topdown(self, pred, prunable=None):
        """
        Searching from the top down, return the biggest subtrees for which the
        predicate is True (or empty list if none are found).

        The optional prunable function can be used to throw out subtrees for
        more efficient search (note that pred always overrides prunable
        though).  Note that leaf nodes are ignored.
        """
        if pred(self):
            return [self]
        elif prunable and prunable(self):
            return []
        else:
            return concat(x.topdown(pred, prunable) for x in self
                          if isinstance(x, SearchableTree))
Example #4
0
def wide_summary(s_counts, keys=None):
    """
    Return a table of relation instance and CDU counts for each
    section
    """
    rows = []
    total = defaultdict(int)
    keys = keys or list(frozenset(concat(d.keys() for d in s_counts.values())))
    for section in s_counts:
        row = [section]
        for skey in keys:
            row.append(s_counts[section][skey])
            total[skey] += s_counts[section][skey]
        rows.append(row)
    rows.append(["all together"] + [total[x] for x in keys])
    headers = ["annotator"] + keys
    return tabulate(rows, headers=headers)
Example #5
0
def summary(counts,
            doc_counts=None,
            title=None,
            keys=None,
            total=None):
    """
    (Multi-line) string summary of a categories dict.

    doc_counts gives per-document stats from which we can
    extract helpful details like means and medians

    If you supply the keys sequence, we use it both to select
    a subset of the keys and to assign an order to them.

    Total can be set to True/False depending on whether you
    want a final line for a total. If you set it to None,
    we use the default (true)
    """
    doc_counts = doc_counts or {}
    if keys is None:
        keys = counts.keys()

    dcount_keys = frozenset(concat(d.keys() for d in doc_counts.values()))
    has_doc_counts = any(k in dcount_keys for k in keys)
    rows = []
    for key in keys:
        row = [key, counts[key]]
        if key in dcount_keys:
            dcounts = [doc_counts[d][key] for d in doc_counts]
            mean, median = rounded_mean_median(dcounts)
            row += [min(dcounts),
                    max(dcounts),
                    mean,
                    median]
        elif has_doc_counts:
            row += [None, None, None, None]
        rows.append(row)
    if total is not False:
        rows.append(["TOTAL", sum(counts.values())])
        if has_doc_counts:
            row += [None, None, None, None]

    headers = [title or "", "total"]
    if has_doc_counts:
        headers += ["min", "max", "mean", "median"]
    return tabulate(rows, headers=headers)
Example #6
0
def wide_summary(s_counts, keys=None):
    """
    Return a table of relation instance and CDU counts for each
    section
    """
    rows = []
    total = defaultdict(int)
    keys = keys or list(frozenset(concat(d.keys() for d in s_counts.values())))
    for section in s_counts:
        row = [section]
        for skey in keys:
            row.append(s_counts[section][skey])
            total[skey] += s_counts[section][skey]
        rows.append(row)
    rows.append(["all together"] + [total[x] for x in keys])
    headers = ["annotator"] + keys
    return tabulate(rows, headers=headers)
Example #7
0
def summary(counts,
            doc_counts=None,
            title=None,
            keys=None,
            total=None):
    """
    (Multi-line) string summary of a categories dict.

    doc_counts gives per-document stats from which we can
    extract helpful details like means and medians

    If you supply the keys sequence, we use it both to select
    a subset of the keys and to assign an order to them.

    Total can be set to True/False depending on whether you
    want a final line for a total. If you set it to None,
    we use the default (true)
    """
    doc_counts = doc_counts or {}
    if keys is None:
        keys = counts.keys()

    dcount_keys = frozenset(concat(d.keys() for d in doc_counts.values()))
    has_doc_counts = any(k in dcount_keys for k in keys)
    rows = []
    for key in keys:
        row = [key, counts[key]]
        if key in dcount_keys:
            dcounts = [doc_counts[d][key] for d in doc_counts]
            mean, median = rounded_mean_median(dcounts)
            row += [min(dcounts),
                    max(dcounts),
                    mean,
                    median]
        elif has_doc_counts:
            row += [None, None, None, None]
        rows.append(row)
    if total is not False:
        rows.append(["TOTAL", sum(counts.values())])
        if has_doc_counts:
            row += [None, None, None, None]

    headers = [title or "", "total"]
    if has_doc_counts:
        headers += ["min", "max", "mean", "median"]
    return tabulate(rows, headers=headers)
Example #8
0
    def topdown(self, pred, prunable=None):
        """
        Searching from the top down, return the biggest subtrees for which the
        predicate is True (or empty list if none are found).

        The optional prunable function can be used to throw out subtrees for
        more efficient search (note that pred always overrides prunable
        though).  Note that leaf nodes are ignored.
        """
        if pred(self):
            return [self]
        elif prunable and prunable(self):
            return []
        else:
            return concat(
                x.topdown(pred, prunable) for x in self
                if isinstance(x, SearchableTree))
Example #9
0
 def matching_kids():
     "recursively apply on self"
     return concat(x.topdown_smallest(pred, prunable)
                   for x in self
                   if isinstance(x, SearchableTree))
Example #10
0
 def matching_kids():
     "recursively apply on self"
     return concat(
         x.topdown_smallest(pred, prunable) for x in self
         if isinstance(x, SearchableTree))