description=
    "Check number of columns in the knowledge base reading from standard input against HEAD-KB. Mismatch print to standard error output. Exit 1 when find one or more mismatch."
)
parser.add_argument(
    '-H',
    '--head-kb',
    help=
    'Header for the knowledge base, which specify its types and their atributes (default: %(default)s).',
    default=metrics_knowledge_base.PATH_HEAD_KB)
parser.add_argument('--cat',
                    action="store_true",
                    help='Print standard input to standard output.')

arguments = parser.parse_args()

kb_struct = metrics_knowledge_base.KnowledgeBase(
    path_to_headkb=arguments.head_kb)

kb_is_ok = True
line_num = 0
for line in sys.stdin:
    line_num += 1
    columns = line.rstrip("\n").split("\t")
    ent_head = kb_struct.get_ent_head(columns)
    if len(columns) != len(ent_head):
        sys.stderr.write(
            'Bad line %s in KB: has %s columns, but its entity in HEAD-KB has %s columns.\n'
            % (line_num, len(columns), len(ent_head)))
        kb_is_ok = False
    if arguments.cat:
        sys.stdout.write(line)
"""

import sys
import metrics_knowledge_base

with open("wiki_stats") as wiki_stats:
    stats = dict()
    for line in wiki_stats:
        items = line.rstrip("\n").split("\t")
        url = "https://cs.wikipedia.org/wiki/" + items[0]
        stats[url] = items[1:]

found = 0
not_found = 0

kb_struct = metrics_knowledge_base.KnowledgeBase()

for line in sys.stdin:
    columns = line.rstrip("\n").split("\t")

    link = kb_struct.get_data_for(columns, "WIKIPEDIA LINK")
    if link and link in stats:
        columns[kb_struct.get_col_for(columns,
                                      "WIKI BACKLINKS")] = stats[link][0]
        columns[kb_struct.get_col_for(columns, "WIKI HITS")] = stats[link][1]
        columns[kb_struct.get_col_for(columns,
                                      "WIKI PRIMARY SENSE")] = stats[link][2]
        sys.stdout.write("\t".join(columns) + "\n")
        found += 1
    else:
        sys.stdout.write(line)
                subtype += 'G'

            subtype = ''.join(sorted(subtype))
            if 'person' in ent_type_set:
                gender = kb_struct.get_data_for(line, 'GENDER')
                append_names_to_list(names, 'P:' + subtype + '::' + gender,
                                     url_origin)
            elif 'geographical' in ent_type_set:
                append_names_to_list(
                    names, 'L' + (':{}'.format(subtype) if subtype else ''),
                    url_origin)
            else:
                continue

        for n in name_typeflag:
            print(n)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-k', '--kb_path', help='Path of Knowledge base.')
    parser.add_argument('-l', '--lang', help='Language to process.')
    args = parser.parse_args()

    nationalities = NatLoader.load(args.lang).get_nationalities()
    kb_struct = metrics_knowledge_base.KnowledgeBase(lang=args.lang,
                                                     path_to_kb=args.kb_path)
    kb_struct.check_or_load_kb()

    generate_name_alternatives(args.kb_path)
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import metrics_knowledge_base
import argparse

parser = argparse.ArgumentParser()
parser.add_argument(
    "-H",
    "--head-kb",
    help=
    "Header for the knowledge base, which specify its types and their atributes (default: %(default)s).",
    default=metrics_knowledge_base.PATH_HEAD_KB,
)
parser.add_argument("-k",
                    "--knowledge-base",
                    help="File containing the knowledge base",
                    required=True)

arguments = parser.parse_args()

kb = metrics_knowledge_base.KnowledgeBase(path_to_headkb=arguments.head_kb,
                                          path_to_kb=arguments.knowledge_base)
kb.insert_metrics()
print kb
Esempio n. 5
0
CACHED_SUBNAMES = 'cached_subnames.pkl'
CACHED_INFLECTEDNAMES = 'cached_inflectednames.pkl'

if not args.indir or not os.path.isdir(args.indir):
	args.indir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'inputs/{}'.format(args.lang))

# automata variants config
atm_config = AutomataVariants.DEFAULT
if args.lowercase:
	atm_config |= AutomataVariants.LOWERCASE
if args.autocomplete:
	# different format - can not be combined with other types
	atm_config = AutomataVariants.NONACCENT

# load KB struct
kb_struct = metrics_knowledge_base.KnowledgeBase(args.lang, args.kb)
namelist = ModuleLoader.load('namelist', args.lang, 'Namelist', '..dictionaries')
namelist.setKBStruct(kb_struct)
namelist.setAutomataVariants(atm_config)
# load laguage specific class of Persons entity
persons = EntityLoader.load('persons', args.lang, 'Persons')

SURNAME_MATCH = regex.compile(r"(((?<=^)|(?<=[ ]))(?:(?:da|von)(?:#[^ ]+)? )?((?:\p{Lu}\p{Ll}*(?:#[^- ]+)?-)?(?:\p{Lu}\p{Ll}+(?:#[^- ]+)?))$)")
UNWANTED_MATCH = namelist.reUnwantedMatch()


def pickle_load(fpath):
	with open(fpath, 'rb') as f:
		return pickle.load(f)

def pickle_dump(data, fpath):