"w") as f:
        for e in projection.entityToPreferredLabels:
            for v in projection.entityToPreferredLabels[e]:
                f.write("%s preferred_label %s\n" % (e, v))
        for a in annotations:
            f.write("%s\n" % " ".join(a))

# read URI document
# two parts: walks, axioms (if the axiom file exists)
walk_sentences, axiom_sentences, URI_Doc = list(), list(), list()
if "URI_Doc" in config["DOCUMENT"] and config["DOCUMENT"]["URI_Doc"] == "yes":
    print("\nGenerate URI document ...")
    # walker_type=config['DOCUMENT']['walker']
    walks_ = get_rdf2vec_walks(
        onto_file=ontology_file,
        walker_type=config["DOCUMENT"]["walker"],
        walk_depth=int(config["DOCUMENT"]["walk_depth"]),
        classes=entities,
    )
    print("Extracted %d walks for %d seed entities" %
          (len(walks_), len(entities)))
    walk_sentences += [list(map(str, x)) for x in walks_]

    axiom_file = os.path.join(config["DOCUMENT"]["cache_dir"], "axioms.txt")
    if os.path.exists(axiom_file):
        for line in open(axiom_file).readlines():
            axiom_sentence = [item for item in line.strip().split()]
            axiom_sentences.append(axiom_sentence)
    print("Extracted %d axiom sentences" % len(axiom_sentences))
    URI_Doc = walk_sentences + axiom_sentences

예제 #2
0
print("\n		1.Extract corpus and learning embedding ... \n")
classes = [line.strip() for line in open(FLAGS.class_file).readlines()]
candidate_num = len(classes)
uri_label = dict()
annotations = list()
for line in open(FLAGS.annotation_file).readlines():
    tmp = line.strip().split()
    if tmp[1] == 'http://www.w3.org/2000/01/rdf-schema#label':
        uri_label[tmp[0]] = pre_process_words(tmp[2:])
    elif tmp[0] in classes:
        annotations.append(tmp)

walk_sentences, axiom_sentences = list(), list()
if FLAGS.URI_Doc.lower() == 'yes':
    walks_ = get_rdf2vec_walks(onto_file=FLAGS.onto_file, walker_type=FLAGS.walker,
                               walk_depth=FLAGS.walk_depth, classes=classes)
    print('Extracted {} walks for {} classes!'.format(len(walks_), len(classes)))
    walk_sentences += [list(map(str, x)) for x in walks_]
    for line in open(FLAGS.axiom_file).readlines():
        axiom_sentence = [item for item in line.strip().split()]
        axiom_sentences.append(axiom_sentence)
    print('Extracted %d axiom sentences' % len(axiom_sentences))
URI_Doc = walk_sentences + axiom_sentences

Lit_Doc = list()
if FLAGS.Lit_Doc.lower() == 'yes':
    for annotation in annotations:
        processed_words = pre_process_words(annotation[2:])
        if len(processed_words) > 0:
            Lit_Doc.append(uri_label[annotation[0]] + processed_words)
    print('Extracted %d literal annotations' % len(Lit_Doc))
              encoding='utf-8') as f:
        for e in projection.entityToPreferredLabels:
            for v in projection.entityToPreferredLabels[e]:
                f.write('%s preferred_label %s\n' % (e, v))
        for a in annotations:
            f.write('%s\n' % ' '.join(a))

# read URI document
# two parts: walks, axioms (if the axiom file exists)
walk_sentences, axiom_sentences, URI_Doc = list(), list(), list()
if 'URI_Doc' in config['DOCUMENT'] and config['DOCUMENT']['URI_Doc'] == 'yes':
    print('\nGenerate URI document ...')
    #walker_type=config['DOCUMENT']['walker']
    walks_ = get_rdf2vec_walks(onto_file=ontology_file,
                               walker_type=config['DOCUMENT']['walker'],
                               walk_depth=int(
                                   config['DOCUMENT']['walk_depth']),
                               classes=entities)
    print('Extracted %d walks for %d seed entities' %
          (len(walks_), len(entities)))
    walk_sentences += [list(map(str, x)) for x in walks_]

    axiom_file = os.path.join(config['DOCUMENT']['cache_dir'], 'axioms.txt')
    if os.path.exists(axiom_file):
        for line in open(axiom_file).readlines():
            axiom_sentence = [item for item in line.strip().split()]
            axiom_sentences.append(axiom_sentence)
    print('Extracted %d axiom sentences' % len(axiom_sentences))
    URI_Doc = walk_sentences + axiom_sentences