def edge_summary(filepath, input_type, max_rows, output): """ Loads and summarizes a knowledge graph edge set """ t = build_transformer(filepath, input_type) t.parse(filepath) g = t.graph tuples = [] with click.progressbar(g.edges(data=True), label='Reading knowledge graph') as bar: for s, o, edge_attr in bar: subject_attr = g.node[s] object_attr = g.node[o] subject_prefix = stringify(get_prefix(s)) object_prefix = stringify(get_prefix(o)) subject_category = stringify(subject_attr.get('category')) object_category = stringify(object_attr.get('category')) edge_label = stringify(edge_attr.get('edge_label')) relation = stringify(edge_attr.get('relation')) tuples.append((subject_prefix, subject_category, edge_label, relation, object_prefix, object_category)) tuple_count = OrderedDict(Counter(tuples).most_common(max_rows)) headers = [['Subject Prefix', 'Subject Category', 'Edge Label', 'Relation', 'Object Prefix', 'Object Category', 'Frequency']] rows = [[*k, v] for k, v in tuple_count.items()] if output is not None: file_write(output, AsciiTable(headers + rows).table) else: click.echo(AsciiTable(headers + rows).table)
def edge_summary(config: dict, filepath: str, input_type: str, max_rows: int, output: str): """ Loads and summarizes a knowledge graph edge set, where the input is a file. \f Parameters ---------- config: dict A dictionary containing the configuration for kgx.cli filepath: str Input file input_type: str Input file type max_rows: int Max number of rows to display in the output output: str Where to write the output (stdout, by default) """ t = build_transformer(filepath, input_type) t.parse(filepath) g = t.graph tuples = [] with click.progressbar(g.edges(data=True), label='Reading knowledge graph') as bar: for s, o, edge_attr in bar: subject_attr = g.node[s] object_attr = g.node[o] subject_prefix = stringify(get_prefix(s)) object_prefix = stringify(get_prefix(o)) subject_category = stringify(subject_attr.get('category')) object_category = stringify(object_attr.get('category')) edge_label = stringify(edge_attr.get('edge_label')) relation = stringify(edge_attr.get('relation')) tuples.append((subject_prefix, subject_category, edge_label, relation, object_prefix, object_category)) tuple_count = OrderedDict(Counter(tuples).most_common(max_rows)) headers = [[ 'Subject Prefix', 'Subject Category', 'Edge Label', 'Relation', 'Object Prefix', 'Object Category', 'Frequency' ]] rows = [[*k, v] for k, v in tuple_count.items()] if output is not None: file_write(output, AsciiTable(headers + rows).table) else: click.echo(AsciiTable(headers + rows).table)
def node_summary(config: dict, filepath: str, input_type: str, max_rows: int, output: str): """ Loads and summarizes a knowledge graph node set, where the input is a file. \f Parameters ---------- config: dict A dictionary containing the configuration for kgx.cli filepath: str Input file input_type: str Input file type max_rows: int Max number of rows to display in the output output: str Where to write the output (stdout, by default) """ t = build_transformer(filepath, input_type) t.parse(filepath) g = t.graph tuples = [] xrefs = set() with click.progressbar(g.nodes(data=True), label='Reading knowledge graph') as bar: for n, data in bar: if 'same_as' in data: for xref in data['same_as']: xrefs.add(get_prefix(xref)) category = data.get('category') prefix = get_prefix(n) if category is not None and len( category) > 1 and 'named_thing' in category: category.remove('named_thing') if isinstance(category, (list, set)): category = ", ".join("'{}'".format(c) for c in category) if prefix is not None: prefix = "'{}'".format(prefix) tuples.append((prefix, category)) click.echo('|nodes|: {}'.format(len(g.nodes()))) click.echo('|edges|: {}'.format(len(g.edges()))) xrefs = [x for x in xrefs if x is not None] if len(xrefs) != 0: line = 'xref prefixes: {}'.format(', '.join(xrefs)) if output is not None: file_write(output, '|nodes|: {}'.format(len(g.nodes()))) file_write(output, '|edges|: {}'.format(len(g.edges()))) file_write(output, line) else: click.echo('|nodes|: {}'.format(len(g.nodes()))) click.echo('|edges|: {}'.format(len(g.edges()))) click.echo(line) tuple_count = OrderedDict(Counter(tuples).most_common(max_rows)) headers = [['Prefix', 'Category', 'Frequency']] rows = [[*k, v] for k, v in tuple_count.items()] if output is not None: file_write(output, AsciiTable(headers + rows).table, mode='a') else: click.echo(AsciiTable(headers + rows).table) category_count = defaultdict(lambda: 0) prefix_count = defaultdict(lambda: 0) for (prefix, category), frequency in tuple_count.items(): category_count[category] += frequency prefix_count[prefix] += frequency headers = [['Category', 'Frequency']] rows = [[k, v] for k, v in category_count.items()] if output is not None: file_write(output, AsciiTable(headers + rows).table, mode='a') else: click.echo(AsciiTable(headers + rows).table) headers = [['Prefixes', 'Frequency']] rows = [[k, v] for k, v in prefix_count.items()] if output is not None: file_write(output, AsciiTable(headers + rows).table, mode='a') else: click.echo(AsciiTable(headers + rows).table)
def node_summary(filepath, input_type, max_rows, output): """ Loads and summarizes a knowledge graph node set """ t = build_transformer(filepath, input_type) t.parse(filepath) g = t.graph tuples = [] xrefs = set() with click.progressbar(g.nodes(data=True), label='Reading knowledge graph') as bar: for n, data in bar: if 'same_as' in data: for xref in data['same_as']: xrefs.add(get_prefix(xref)) category = data.get('category') prefix = get_prefix(n) if category is not None and len( category) > 1 and 'named thing' in category: category.remove('named thing') if isinstance(category, (list, set)): category = ", ".join("'{}'".format(c) for c in category) if prefix is not None: prefix = "'{}'".format(prefix) tuples.append((prefix, category)) xrefs = [x for x in xrefs if x is not None] if len(xrefs) != 0: line = 'xref prefixes: {}'.format(', '.join(xrefs)) if output is not None: file_write(output, line) else: click.echo(line) tuple_count = OrderedDict(Counter(tuples).most_common(max_rows)) headers = [['Prefix', 'Category', 'Frequency']] rows = [[*k, v] for k, v in tuple_count.items()] if output is not None: file_write(output, AsciiTable(headers + rows).table, mode='a') else: click.echo(AsciiTable(headers + rows).table) category_count = defaultdict(lambda: 0) prefix_count = defaultdict(lambda: 0) for (prefix, category), frequency in tuple_count.items(): category_count[category] += frequency prefix_count[prefix] += frequency headers = [['Category', 'Frequency']] rows = [[k, v] for k, v in category_count.items()] if output is not None: file_write(output, AsciiTable(headers + rows).table, mode='a') else: click.echo(AsciiTable(headers + rows).table) headers = [['Prefixes', 'Frequency']] rows = [[k, v] for k, v in prefix_count.items()] if output is not None: file_write(output, AsciiTable(headers + rows).table, mode='a') else: click.echo(AsciiTable(headers + rows).table)