def main(args): """ The usual main. Extract feature vectors from the corpus (single edus only) """ inputs = _read_corpus_inputs(args) lexinfo = _mk_lexlookup(inputs.lexicons) players = get_players(inputs) rows = concat( _on_doc(inputs, lexinfo, players, key) for key in inputs.corpus) writer = _conll_writer(args) for row in rows: writer.writerow(row)
def main(args): """ The usual main. Extract feature vectors from the corpus (single edus only) """ inputs = _read_corpus_inputs(args) lexinfo = _mk_lexlookup(inputs.lexicons) players = get_players(inputs) rows = concat(_on_doc(inputs, lexinfo, players, key) for key in inputs.corpus) writer = _conll_writer(args) for row in rows: writer.writerow(row)
def topdown(self, pred, prunable=None): """ Searching from the top down, return the biggest subtrees for which the predicate is True (or empty list if none are found). The optional prunable function can be used to throw out subtrees for more efficient search (note that pred always overrides prunable though). Note that leaf nodes are ignored. """ if pred(self): return [self] elif prunable and prunable(self): return [] else: return concat(x.topdown(pred, prunable) for x in self if isinstance(x, SearchableTree))
def wide_summary(s_counts, keys=None): """ Return a table of relation instance and CDU counts for each section """ rows = [] total = defaultdict(int) keys = keys or list(frozenset(concat(d.keys() for d in s_counts.values()))) for section in s_counts: row = [section] for skey in keys: row.append(s_counts[section][skey]) total[skey] += s_counts[section][skey] rows.append(row) rows.append(["all together"] + [total[x] for x in keys]) headers = ["annotator"] + keys return tabulate(rows, headers=headers)
def summary(counts, doc_counts=None, title=None, keys=None, total=None): """ (Multi-line) string summary of a categories dict. doc_counts gives per-document stats from which we can extract helpful details like means and medians If you supply the keys sequence, we use it both to select a subset of the keys and to assign an order to them. Total can be set to True/False depending on whether you want a final line for a total. If you set it to None, we use the default (true) """ doc_counts = doc_counts or {} if keys is None: keys = counts.keys() dcount_keys = frozenset(concat(d.keys() for d in doc_counts.values())) has_doc_counts = any(k in dcount_keys for k in keys) rows = [] for key in keys: row = [key, counts[key]] if key in dcount_keys: dcounts = [doc_counts[d][key] for d in doc_counts] mean, median = rounded_mean_median(dcounts) row += [min(dcounts), max(dcounts), mean, median] elif has_doc_counts: row += [None, None, None, None] rows.append(row) if total is not False: rows.append(["TOTAL", sum(counts.values())]) if has_doc_counts: row += [None, None, None, None] headers = [title or "", "total"] if has_doc_counts: headers += ["min", "max", "mean", "median"] return tabulate(rows, headers=headers)
def topdown(self, pred, prunable=None): """ Searching from the top down, return the biggest subtrees for which the predicate is True (or empty list if none are found). The optional prunable function can be used to throw out subtrees for more efficient search (note that pred always overrides prunable though). Note that leaf nodes are ignored. """ if pred(self): return [self] elif prunable and prunable(self): return [] else: return concat( x.topdown(pred, prunable) for x in self if isinstance(x, SearchableTree))
def matching_kids(): "recursively apply on self" return concat(x.topdown_smallest(pred, prunable) for x in self if isinstance(x, SearchableTree))
def matching_kids(): "recursively apply on self" return concat( x.topdown_smallest(pred, prunable) for x in self if isinstance(x, SearchableTree))