def main(): #src_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'graph_triples_hilti_erp.nt') #tgt_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'graph_triples_hilti_web.nt') #src_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'corpus_hilti_erp.txt') #tgt_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'corpus_hilti_web.txt') #gold_mapping = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'train_simple_sap_hilti.csv') src_triples = os.path.join(package_directory, '..', 'data', 'oaei_data', 'graph_triples_darkscape.nt') tgt_triples = os.path.join(package_directory, '..', 'data', 'oaei_data', 'graph_triples_oldschoolrunescape.nt') src_corpus = os.path.join(package_directory, '..', 'data', 'oaei_data', 'corpus_darkscape.txt') tgt_corpus = os.path.join(package_directory, '..', 'data', 'oaei_data', 'corpus_oldschoolrunescape.txt') gold_mapping = InternalGoldStandard({'trainsets': [os.path.join(package_directory, '..', 'data', 'oaei_data', 'oaei_gold_standard2.csv')], 'testsets': [os.path.join(package_directory, '..', 'data', 'oaei_data', 'possible_matches.csv')] }) dim = 2 model = XGBClassifier()#LogisticRegression() labelfile = os.path.join(package_directory, '..', 'data', 'oaei_data','labels.txt') src_properties = StringMatcher_Interface.get_labels_from_file(labelfile) tgt_properties = StringMatcher_Interface.get_labels_from_file(labelfile) name = "OAEI_visualization" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_ab = pipeline.append_step(WalkEmbedder_1.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(dim, 'steps', True, 1)) line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2)) line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties, calc_PLUS_SCORE=False, use_cache=False, use_streams=False) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration)
def main(src_triples, tgt_triples, labels, filename): # match products if type(labels) == str: index_properties = smi.get_labels_from_file(labels) else: index_properties = labels assert type( index_properties ) == list, "Labels-parameter must be provided as a list of label-properties" #print('label size: ' + str(len(index_properties))) stringmatcher = StringMatcher(src_triples, tgt_triples, index_properties) mappings = stringmatcher.preciseBatchMatch(filename, 0.9, index_properties, src_triples, tgt_triples)
def main(): #src_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'graph_triples_hilti_erp.nt') #tgt_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'graph_triples_hilti_web.nt') #src_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'corpus_hilti_erp.txt') #tgt_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'corpus_hilti_web.txt') #gold_mapping = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'train_simple_sap_hilti.csv') src_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks', 'graph_triples_hilti_erp.nt') tgt_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks', 'graph_triples_hilti_web.nt') src_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks', 'corpus_hilti_erp.txt') tgt_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks', 'corpus_hilti_web.txt') gold_mapping = InternalGoldStandard({'trainsets': [os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks', 'final_trainset.csv')], 'testsets': [os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks', 'possible_matches.csv')] }) dim = 1 model = LogisticRegression()#XGBClassifier() labelfile = os.path.join(package_directory, '..', 'data', 'sap_hilti_data','balanced_walks', 'labels.txt') src_properties = StringMatcher_Interface.get_labels_from_file(labelfile)#["http://rdata2graph.sap.com/hilti_erp/property/mara_fert.maktx"] tgt_properties = StringMatcher_Interface.get_labels_from_file(labelfile)#["http://rdata2graph.sap.com/hilti_web/property/products.name"] use_streams = False name = "test" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) #line_a = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_a), # PipelineDataTuple(src_corpus)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) #line_b = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_b), # PipelineDataTuple(tgt_corpus)) line_ab = pipeline.append_step(WalkEmbedder_1.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(dim, 'steps', False, 1)) line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(muse.interface, PipelineDataTuple(line_ab), PipelineDataTuple(gold_mapping)) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(model)) #line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2)) #line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(StableRankMatcher.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties, use_streams, False, True) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration)