description= "Check number of columns in the knowledge base reading from standard input against HEAD-KB. Mismatch print to standard error output. Exit 1 when find one or more mismatch." ) parser.add_argument( '-H', '--head-kb', help= 'Header for the knowledge base, which specify its types and their atributes (default: %(default)s).', default=metrics_knowledge_base.PATH_HEAD_KB) parser.add_argument('--cat', action="store_true", help='Print standard input to standard output.') arguments = parser.parse_args() kb_struct = metrics_knowledge_base.KnowledgeBase( path_to_headkb=arguments.head_kb) kb_is_ok = True line_num = 0 for line in sys.stdin: line_num += 1 columns = line.rstrip("\n").split("\t") ent_head = kb_struct.get_ent_head(columns) if len(columns) != len(ent_head): sys.stderr.write( 'Bad line %s in KB: has %s columns, but its entity in HEAD-KB has %s columns.\n' % (line_num, len(columns), len(ent_head))) kb_is_ok = False if arguments.cat: sys.stdout.write(line)
""" import sys import metrics_knowledge_base with open("wiki_stats") as wiki_stats: stats = dict() for line in wiki_stats: items = line.rstrip("\n").split("\t") url = "https://cs.wikipedia.org/wiki/" + items[0] stats[url] = items[1:] found = 0 not_found = 0 kb_struct = metrics_knowledge_base.KnowledgeBase() for line in sys.stdin: columns = line.rstrip("\n").split("\t") link = kb_struct.get_data_for(columns, "WIKIPEDIA LINK") if link and link in stats: columns[kb_struct.get_col_for(columns, "WIKI BACKLINKS")] = stats[link][0] columns[kb_struct.get_col_for(columns, "WIKI HITS")] = stats[link][1] columns[kb_struct.get_col_for(columns, "WIKI PRIMARY SENSE")] = stats[link][2] sys.stdout.write("\t".join(columns) + "\n") found += 1 else: sys.stdout.write(line)
subtype += 'G' subtype = ''.join(sorted(subtype)) if 'person' in ent_type_set: gender = kb_struct.get_data_for(line, 'GENDER') append_names_to_list(names, 'P:' + subtype + '::' + gender, url_origin) elif 'geographical' in ent_type_set: append_names_to_list( names, 'L' + (':{}'.format(subtype) if subtype else ''), url_origin) else: continue for n in name_typeflag: print(n) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-k', '--kb_path', help='Path of Knowledge base.') parser.add_argument('-l', '--lang', help='Language to process.') args = parser.parse_args() nationalities = NatLoader.load(args.lang).get_nationalities() kb_struct = metrics_knowledge_base.KnowledgeBase(lang=args.lang, path_to_kb=args.kb_path) kb_struct.check_or_load_kb() generate_name_alternatives(args.kb_path)
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import metrics_knowledge_base import argparse parser = argparse.ArgumentParser() parser.add_argument( "-H", "--head-kb", help= "Header for the knowledge base, which specify its types and their atributes (default: %(default)s).", default=metrics_knowledge_base.PATH_HEAD_KB, ) parser.add_argument("-k", "--knowledge-base", help="File containing the knowledge base", required=True) arguments = parser.parse_args() kb = metrics_knowledge_base.KnowledgeBase(path_to_headkb=arguments.head_kb, path_to_kb=arguments.knowledge_base) kb.insert_metrics() print kb
CACHED_SUBNAMES = 'cached_subnames.pkl' CACHED_INFLECTEDNAMES = 'cached_inflectednames.pkl' if not args.indir or not os.path.isdir(args.indir): args.indir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'inputs/{}'.format(args.lang)) # automata variants config atm_config = AutomataVariants.DEFAULT if args.lowercase: atm_config |= AutomataVariants.LOWERCASE if args.autocomplete: # different format - can not be combined with other types atm_config = AutomataVariants.NONACCENT # load KB struct kb_struct = metrics_knowledge_base.KnowledgeBase(args.lang, args.kb) namelist = ModuleLoader.load('namelist', args.lang, 'Namelist', '..dictionaries') namelist.setKBStruct(kb_struct) namelist.setAutomataVariants(atm_config) # load laguage specific class of Persons entity persons = EntityLoader.load('persons', args.lang, 'Persons') SURNAME_MATCH = regex.compile(r"(((?<=^)|(?<=[ ]))(?:(?:da|von)(?:#[^ ]+)? )?((?:\p{Lu}\p{Ll}*(?:#[^- ]+)?-)?(?:\p{Lu}\p{Ll}+(?:#[^- ]+)?))$)") UNWANTED_MATCH = namelist.reUnwantedMatch() def pickle_load(fpath): with open(fpath, 'rb') as f: return pickle.load(f) def pickle_dump(data, fpath):