Beispiel #1
0
def main():



    #src_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings',
    #                           'graph_triples_hilti_erp.nt')
    #tgt_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings',
    #                           'graph_triples_hilti_web.nt')
    #src_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings',
    #                          'corpus_hilti_erp.txt')
    #tgt_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings',
    #                          'corpus_hilti_web.txt')
    #gold_mapping = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings',
    #                            'train_simple_sap_hilti.csv')
    src_triples = os.path.join(package_directory, '..', 'data', 'oaei_data',
                               'graph_triples_darkscape.nt')
    tgt_triples = os.path.join(package_directory, '..', 'data', 'oaei_data',
                               'graph_triples_oldschoolrunescape.nt')
    src_corpus = os.path.join(package_directory, '..', 'data', 'oaei_data',
                              'corpus_darkscape.txt')
    tgt_corpus = os.path.join(package_directory, '..', 'data', 'oaei_data',
                              'corpus_oldschoolrunescape.txt')
    gold_mapping = InternalGoldStandard({'trainsets':
                                            [os.path.join(package_directory, '..', 'data',
                                            'oaei_data', 'oaei_gold_standard2.csv')],
                                         'testsets':
                                             [os.path.join(package_directory, '..', 'data',
                                            'oaei_data', 'possible_matches.csv')]
                                        })
    dim = 2
    model = XGBClassifier()#LogisticRegression()
    labelfile = os.path.join(package_directory, '..', 'data', 'oaei_data','labels.txt')
    src_properties = StringMatcher_Interface.get_labels_from_file(labelfile)
    tgt_properties = StringMatcher_Interface.get_labels_from_file(labelfile)


    name = "OAEI_visualization"
    pipeline = Pipeline()
    line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples))
    line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples))
    line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples))
    line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples))
    line_ab = pipeline.append_step(WalkEmbedder_1.interface, PipelineDataTuple(line_a, line_b),
                                   PipelineDataTuple(dim, 'steps', True, 1))
    line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_ab), None)
    line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2))
    line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None)
    line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_ab), None)
    line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_ab), None)
    line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None)
    line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None)


    configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim,
                                  pipeline, src_properties, tgt_properties, calc_PLUS_SCORE=False, use_cache=False, use_streams=False)
    configuration_handler = ConfigurationHandler()
    configuration_handler.execute(configuration)
Beispiel #2
0
def main(src_triples, tgt_triples, labels, filename):

    # match products
    if type(labels) == str:
        index_properties = smi.get_labels_from_file(labels)
    else:
        index_properties = labels
    assert type(
        index_properties
    ) == list, "Labels-parameter must be provided as a list of label-properties"
    #print('label size: ' + str(len(index_properties)))

    stringmatcher = StringMatcher(src_triples, tgt_triples, index_properties)
    mappings = stringmatcher.preciseBatchMatch(filename, 0.9, index_properties,
                                               src_triples, tgt_triples)
Beispiel #3
0
def main():



    #src_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings',
    #                           'graph_triples_hilti_erp.nt')
    #tgt_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings',
    #                           'graph_triples_hilti_web.nt')
    #src_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings',
    #                          'corpus_hilti_erp.txt')
    #tgt_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings',
    #                          'corpus_hilti_web.txt')
    #gold_mapping = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings',
    #                            'train_simple_sap_hilti.csv')
    src_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks',
                               'graph_triples_hilti_erp.nt')
    tgt_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks',
                               'graph_triples_hilti_web.nt')
    src_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks',
                              'corpus_hilti_erp.txt')
    tgt_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks',
                              'corpus_hilti_web.txt')
    gold_mapping = InternalGoldStandard({'trainsets':
                                            [os.path.join(package_directory, '..', 'data',
                                            'sap_hilti_data', 'balanced_walks', 'final_trainset.csv')],
                                         'testsets': [os.path.join(package_directory, '..', 'data',
                                            'sap_hilti_data', 'balanced_walks', 'possible_matches.csv')]
                                        })
    dim = 1
    model = LogisticRegression()#XGBClassifier()
    labelfile = os.path.join(package_directory, '..', 'data', 'sap_hilti_data','balanced_walks',
                              'labels.txt')
    src_properties = StringMatcher_Interface.get_labels_from_file(labelfile)#["http://rdata2graph.sap.com/hilti_erp/property/mara_fert.maktx"]
    tgt_properties = StringMatcher_Interface.get_labels_from_file(labelfile)#["http://rdata2graph.sap.com/hilti_web/property/products.name"]
    use_streams = False


    name = "test"
    pipeline = Pipeline()
    line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples))
    line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples))
    #line_a = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_a),
    #                              PipelineDataTuple(src_corpus))
    line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples))
    line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples))
    #line_b = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_b),
    #                              PipelineDataTuple(tgt_corpus))
    line_ab = pipeline.append_step(WalkEmbedder_1.interface, PipelineDataTuple(line_a, line_b),
                                   PipelineDataTuple(dim, 'steps', False, 1))
    line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_ab), None)

    #line_ab = pipeline.append_step(muse.interface, PipelineDataTuple(line_ab), PipelineDataTuple(gold_mapping))
    line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab),
                                   PipelineDataTuple(model))
    #line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2))
    #line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_ab), None)
    #line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_ab), None)
    #line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None)
    #line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None)
    #line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None)
    line_ab = pipeline.append_step(StableRankMatcher.interface, PipelineDataTuple(line_ab), None)


    configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim,
                                  pipeline, src_properties, tgt_properties, use_streams, False, True)
    configuration_handler = ConfigurationHandler()
    configuration_handler.execute(configuration)