def main(): #src_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'graph_triples_hilti_erp.nt') #tgt_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'graph_triples_hilti_web.nt') #src_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'corpus_hilti_erp.txt') #tgt_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'corpus_hilti_web.txt') #gold_mapping = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'train_simple_sap_hilti.csv') src_triples = os.path.join(package_directory, '..', 'data', 'oaei_data', 'graph_triples_darkscape.nt') tgt_triples = os.path.join(package_directory, '..', 'data', 'oaei_data', 'graph_triples_oldschoolrunescape.nt') src_corpus = os.path.join(package_directory, '..', 'data', 'oaei_data', 'corpus_darkscape.txt') tgt_corpus = os.path.join(package_directory, '..', 'data', 'oaei_data', 'corpus_oldschoolrunescape.txt') gold_mapping = InternalGoldStandard({'trainsets': [os.path.join(package_directory, '..', 'data', 'oaei_data', 'oaei_gold_standard2.csv')], 'testsets': [os.path.join(package_directory, '..', 'data', 'oaei_data', 'possible_matches.csv')] }) dim = 2 model = XGBClassifier()#LogisticRegression() labelfile = os.path.join(package_directory, '..', 'data', 'oaei_data','labels.txt') src_properties = StringMatcher_Interface.get_labels_from_file(labelfile) tgt_properties = StringMatcher_Interface.get_labels_from_file(labelfile) name = "OAEI_visualization" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_ab = pipeline.append_step(WalkEmbedder_1.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(dim, 'steps', True, 1)) line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2)) line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties, calc_PLUS_SCORE=False, use_cache=False, use_streams=False) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration)
def main(): src_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', 'graph_triples_hilti_erp.nt') tgt_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', 'graph_triples_hilti_web.nt') src_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', 'corpus_hilti_erp.txt') tgt_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', 'corpus_hilti_web.txt') gold_mapping = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', 'train_simple_sap_hilti.csv') dim = 3 #model = make_pipeline(PolynomialFeatures(6), Ridge())#DecisionTreeClassifier() #make_pipeline(PolynomialFeatures(8), Ridge()) #model = sklearn.linear_model.LinearRegression() #from sklearn.ensemble import RandomForestRegressor #model = RandomForestRegressor(max_depth=2, random_state=0, n_estimators=100) #model = LinearSVC(C=0.01, class_weight=None, dual=True, fit_intercept=True, # intercept_scaling=1, loss='squared_hinge', max_iter=1000, # multi_class='ovr', penalty='l2', random_state=0, tol=1e-05, verbose=0) from sklearn.linear_model import LogisticRegression model = XGBClassifier() src_properties = ["http://rdata2graph.sap.com/hilti_erp/property/mara_fert.maktx"] tgt_properties = ["http://rdata2graph.sap.com/hilti_web/property/products.name"] name = "jaccard_no_props_given" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_ab = pipeline.append_step(W2V_1InterfaceWrapper.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(dim)) line_ab = pipeline.append_step(muse.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(gold_mapping)) line_ab = pipeline.append_step(EmbeddingMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(model)) line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_a, line_b), None) line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration)
def main(): logfile = os.path.join(package_directory, '..', 'results.log') src_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks', 'graph_triples_hilti_erp.nt') tgt_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks', 'graph_triples_hilti_web.nt') src_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks', 'corpus_hilti_erp.txt') tgt_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks', 'corpus_hilti_web.txt') gold_mapping = InternalGoldStandard({ 'trainsets': [ os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks', 'final_trainset.csv') ], 'testsets': [ os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks', 'possible_matches.csv') ] }) dim = 20 model = XGBClassifier() src_properties = [ "http://rdata2graph.sap.com/hilti_erp/property/mara_fert.maktx" ] tgt_properties = [ "http://rdata2graph.sap.com/hilti_web/property/products.name" ] name = "HILTI_pure_syntax" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_ab = pipeline.append_step(WalkEmbedder_1.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(1, 'steps', False, 1)) line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(muse.interface, PipelineDataTuple(line_ab), PipelineDataTuple(gold_mapping)) line_ab = pipeline.append_step(PureSyntaxMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(model)) line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(StableRankSyntaxMatcher.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2)) #line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties, calc_PLUS_SCORE=False, use_cache=False, use_streams=False) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration) name = "HILTI_w2v_steps_walklength1" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_ab = pipeline.append_step(WalkEmbedder_1.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(dim, 'steps', False, 1)) line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(muse.interface, PipelineDataTuple(line_ab), PipelineDataTuple(gold_mapping)) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(model)) line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(StableRankMatcher.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2)) #line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties, calc_PLUS_SCORE=False, use_cache=False, use_streams=False) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration) name = "HILTI_w2v_steps_walklength1_muse" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_ab = pipeline.append_step(WalkEmbedder_1.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(dim, 'steps', False, 1)) line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(muse.interface, PipelineDataTuple(line_ab), PipelineDataTuple(gold_mapping)) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(model)) #line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2)) line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(StableRankMatcher.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties, calc_PLUS_SCORE=False, use_cache=False, use_streams=False) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration) name = "HILTI_w2v_steps_walklength3" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_ab = pipeline.append_step(WalkEmbedder_1.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(dim, 'steps', False, 3)) line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(muse.interface, PipelineDataTuple(line_ab), PipelineDataTuple(gold_mapping)) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(model)) #line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2)) line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(StableRankMatcher.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties, calc_PLUS_SCORE=False, use_cache=False, use_streams=False) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration) name = "HILTI_w2v_steps_walklength1_3grams" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_ab = pipeline.append_step(WalkEmbedder_1.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(dim, 'steps', True, 1)) line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(muse.interface, PipelineDataTuple(line_ab), PipelineDataTuple(gold_mapping)) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(model)) #line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2)) line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(StableRankMatcher.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties, calc_PLUS_SCORE=False, use_cache=False, use_streams=False) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration) name = "HILTI_w2v_batch_walklength1" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_ab = pipeline.append_step(WalkEmbedder_1.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(dim, 'batch', False, 1)) line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(muse.interface, PipelineDataTuple(line_ab), PipelineDataTuple(gold_mapping)) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(model)) #line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2)) line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(StableRankMatcher.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties, calc_PLUS_SCORE=False, use_cache=False, use_streams=False) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration) name = "HILTI_w2v_steps_walklength1_dim100" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_ab = pipeline.append_step(WalkEmbedder_1.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(100, 'steps', False, 1)) line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(muse.interface, PipelineDataTuple(line_ab), PipelineDataTuple(gold_mapping)) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(model)) #line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2)) line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(StableRankMatcher.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties, calc_PLUS_SCORE=False, use_cache=False, use_streams=False) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration) name = "HILTI_d2v_steps_walklength1_muse" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_ab = pipeline.append_step(WalkD2V_1Embedder.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(dim, 'steps', False, 1)) line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(muse.interface, PipelineDataTuple(line_ab), PipelineDataTuple(gold_mapping)) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(model)) #line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2)) line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(StableRankMatcher.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties, calc_PLUS_SCORE=False, use_cache=False, use_streams=False) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration) name = "HILTI_visualization" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_ab = pipeline.append_step(WalkEmbedder_1.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(2, 'steps', False, 1)) line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(muse.interface, PipelineDataTuple(line_ab), PipelineDataTuple(gold_mapping)) line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2)) line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties, calc_PLUS_SCORE=False, use_cache=False, use_streams=False) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration)
def main_ngram_string(): logfile = os.path.join(package_directory, '..', 'results.log') src_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_3grams', 'graph_triples_hilti_erp.nt') tgt_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_3grams', 'graph_triples_hilti_web.nt') src_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_3grams', 'corpus_hilti_erp.txt') tgt_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_3grams', 'corpus_hilti_web.txt') gold_mapping = InternalGoldStandard({ 'trainsets': [ os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_3grams', 'train_simple_sap_hilti.csv'), os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_3grams', 'train_hard_sap_hilti.csv') ], 'testsets': [ os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_3grams', 'test_simple_sap_hilti.csv'), os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_3grams', 'test_hard_sap_hilti.csv') ] }) dim = 20 model = XGBClassifier() src_properties = [ "http://rdata2graph.sap.com/hilti_erp/property/mara_fert.maktx" ] tgt_properties = [ "http://rdata2graph.sap.com/hilti_web/property/products.name" ] name = "3gram: simpletriplesembedding xgb" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_a = pipeline.append_step(SimpleTriplesEmbedder.interface, PipelineDataTuple(line_a), PipelineDataTuple(dim)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(SimpleTriplesEmbedder.interface, PipelineDataTuple(line_b), PipelineDataTuple(dim)) line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_a, line_b), None) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(model)) line_a = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_a), PipelineDataTuple(2)) line_b = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_b), PipelineDataTuple(2)) line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_a, line_b), None) line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration) name = "3gram: simpletriplesembedding_1 xgb" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_ab = pipeline.append_step(SimpleTriplesEmbedder_1.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(dim)) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(model)) line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2)) line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration) name = "3gram: w2v xgb" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_a = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_corpus)) line_a = pipeline.append_step(W2VInterfaceWrapper.interface, PipelineDataTuple(line_a), PipelineDataTuple(dim)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_corpus)) line_b = pipeline.append_step(W2VInterfaceWrapper.interface, PipelineDataTuple(line_b), PipelineDataTuple(dim)) line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_a, line_b), None) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(model)) line_a = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_a), PipelineDataTuple(2)) line_b = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_b), PipelineDataTuple(2)) line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_a, line_b), None) line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration) name = "3gram: d2v xgb" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_a = pipeline.append_step(D2VInterfaceWrapper.interface, PipelineDataTuple(line_a), PipelineDataTuple(dim)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(D2VInterfaceWrapper.interface, PipelineDataTuple(line_b), PipelineDataTuple(dim)) line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_a, line_b), None) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(model)) line_a = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_a), PipelineDataTuple(2)) line_b = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_b), PipelineDataTuple(2)) line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_a, line_b), None) line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration) # name = "3gram: pseudod2v xgb" # pipeline = Pipeline() # line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) # line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) # line_a = pipeline.append_step(PseudoD2VInterfaceWrapper.interface, PipelineDataTuple(line_a), PipelineDataTuple(dim)) # line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) # line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) # line_b = pipeline.append_step(PseudoD2VInterfaceWrapper.interface, PipelineDataTuple(line_b), PipelineDataTuple(dim)) # line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_a, line_b), None) # line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), # PipelineDataTuple(model)) # line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2)) # line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_ab), None) # line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None) # line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_ab), None) # line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None) # line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) # # configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, # pipeline, src_properties, tgt_properties) # configuration_handler = ConfigurationHandler() # configuration_handler.execute(configuration) name = "3gram: W2V_1 xgb" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_a = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_corpus)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_corpus)) line_ab = pipeline.append_step(W2V_1InterfaceWrapper.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(dim)) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(model)) line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2)) line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration) name = "3gram: D2V_1 xgb" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_ab = pipeline.append_step(D2V_1InterfaceWrapper.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(dim)) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(model)) line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2)) line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration)
def main(): logfile = os.path.join(package_directory, '..', 'results.log') try: os.remove(logfile) except: pass src_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', 'graph_triples_hilti_erp.nt') tgt_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', 'graph_triples_hilti_web.nt') src_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', 'corpus_hilti_erp.txt') tgt_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', 'corpus_hilti_web.txt') gold_mapping = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', 'sap_hilti_gold.csv') dim = 20 model = XGBClassifier() name = "w2v d2v concat muse xgb" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_a = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_corpus)) line_a = pipeline.append_step(W2VInterfaceWrapper.interface, PipelineDataTuple(line_a), PipelineDataTuple(dim)) line_a = pipeline.append_step(D2VInterfaceWrapper.interface, PipelineDataTuple(line_a), PipelineDataTuple(dim)) line_a = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_a), None) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_corpus)) line_b = pipeline.append_step(W2VInterfaceWrapper.interface, PipelineDataTuple(line_b), PipelineDataTuple(dim)) line_b = pipeline.append_step(D2VInterfaceWrapper.interface, PipelineDataTuple(line_b), PipelineDataTuple(dim)) line_b = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_b), None) line_ab = pipeline.append_step(muse.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(gold_mapping)) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(gold_mapping, model, logfile, name)) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, logfile, dim, pipeline) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration) name = "w2v d2v concat xgb" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_a = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_corpus)) line_a = pipeline.append_step(W2VInterfaceWrapper.interface, PipelineDataTuple(line_a), PipelineDataTuple(dim)) line_a = pipeline.append_step(D2VInterfaceWrapper.interface, PipelineDataTuple(line_a), PipelineDataTuple(dim)) line_a = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_a), None) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_corpus)) line_b = pipeline.append_step(W2VInterfaceWrapper.interface, PipelineDataTuple(line_b), PipelineDataTuple(dim)) line_b = pipeline.append_step(D2VInterfaceWrapper.interface, PipelineDataTuple(line_b), PipelineDataTuple(dim)) line_b = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_b), None) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTule(line_a, line_b), PipelineDataTuple(gold_mapping, model, logfile, name)) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, logfile, dim, pipeline) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration) name = "w2v muse xgb" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_a = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_corpus)) line_a = pipeline.append_step(W2VInterfaceWrapper.interface, PipelineDataTuple(line_a), PipelineDataTuple(dim)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_corpus)) line_b = pipeline.append_step(W2VInterfaceWrapper.interface, PipelineDataTuple(line_b), PipelineDataTuple(dim)) line_ab = pipeline.append_step(muse.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(gold_mapping)) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(gold_mapping, model, logfile, name)) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, logfile, dim, pipeline) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration) name = "w2v xgb" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_a = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_corpus)) line_a = pipeline.append_step(W2VInterfaceWrapper.interface, PipelineDataTuple(line_a), PipelineDataTuple(dim)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_corpus)) line_b = pipeline.append_step(W2VInterfaceWrapper.interface, PipelineDataTuple(line_b), PipelineDataTuple(dim)) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(gold_mapping, model, logfile, name)) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, logfile, dim, pipeline) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration) name = "d2v muse xgb" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_a = pipeline.append_step(D2VInterfaceWrapper.interface, PipelineDataTuple(line_a), PipelineDataTuple(dim)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(D2VInterfaceWrapper.interface, PipelineDataTuple(line_b), PipelineDataTuple(dim)) line_ab = pipeline.append_step(muse.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(gold_mapping)) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(gold_mapping, model, logfile, name)) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, logfile, dim, pipeline) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration) name = "d2v xgb" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) line_a = pipeline.append_step(D2VInterfaceWrapper.interface, PipelineDataTuple(line_a), PipelineDataTuple(dim)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(D2VInterfaceWrapper.interface, PipelineDataTuple(line_b), PipelineDataTuple(dim)) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(gold_mapping, model, logfile, name)) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, logfile, dim, pipeline) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration)
def main(): src_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', 'graph_triples_hilti_erp.nt') tgt_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', 'graph_triples_hilti_web.nt') src_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', 'corpus_hilti_erp.txt') tgt_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', 'corpus_hilti_web.txt') gold_mapping = InternalGoldStandard({'trainsets': [os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', 'train_simple_sap_hilti.csv'), os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', 'train_hard_sap_hilti.csv')], 'testsets': [os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', 'test_simple_sap_hilti.csv'), os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', 'test_hard_sap_hilti.csv')] }) dim = 20 model = LogisticRegression() src_properties = None#["http://rdata2graph.sap.com/hilti_erp/property/mara_fert.maktx"] tgt_properties = None#["http://rdata2graph.sap.com/hilti_web/property/products.name"] ##name = "W2V_1 muse xgb with 50k only on embeddings" ##pipeline = Pipeline() ##line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) ##line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) ##line_a = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_a), ## PipelineDataTuple(src_corpus)) ##line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) ##line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) ##line_b = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_b), ## PipelineDataTuple(tgt_corpus)) ##line_ab = pipeline.append_step(W2V_1InterfaceWrapper.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(dim)) ##line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_ab), None) ##line_ab = pipeline.append_step(muse.interface, PipelineDataTuple(line_ab), PipelineDataTuple(gold_mapping)) ##line_ab = pipeline.append_step(EmbeddingMatcher.interface, PipelineDataTuple(line_ab), ## PipelineDataTuple(model)) ##line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_ab), None) ##line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_ab), None) ##line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None) ##line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None) ## ##configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, ## pipeline, src_properties, tgt_properties) ##configuration_handler = ConfigurationHandler() ##configuration_handler.execute(configuration) ## ## ## ## ##name = "W2V_1 muse xgb with 50k on embeddings and sim" ##pipeline = Pipeline() ##line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) ##line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) ##line_a = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_a), ## PipelineDataTuple(src_corpus)) ##line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) ##line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) ##line_b = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_b), ## PipelineDataTuple(tgt_corpus)) ##line_ab = pipeline.append_step(W2V_1InterfaceWrapper.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(dim)) ##line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_ab), None) ##line_ab = pipeline.append_step(muse.interface, PipelineDataTuple(line_ab), PipelineDataTuple(gold_mapping)) ##line_ab = pipeline.append_step(EmbeddingMatcher.interface, PipelineDataTuple(line_ab), ## PipelineDataTuple(model)) ##line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_ab), None) ##line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_ab), None) ##line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None) ##line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None) ## ##configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, ## pipeline, src_properties, tgt_properties) ##configuration_handler = ConfigurationHandler() ##configuration_handler.execute(configuration) name = "jacc_no_schema_given" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) #line_a = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_a), # PipelineDataTuple(src_corpus)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) #line_b = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_b), # PipelineDataTuple(tgt_corpus)) line_ab = pipeline.append_step(PseudoD2V_1InterfaceWrapper_2.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(dim)) line_ab = pipeline.append_step(SimpleTriplesEmbedder_1.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(dim)) line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(muse.interface, PipelineDataTuple(line_ab), PipelineDataTuple(gold_mapping)) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(model)) #line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2)) #line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration)
def main(): #src_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'graph_triples_hilti_erp.nt') #tgt_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'graph_triples_hilti_web.nt') #src_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'corpus_hilti_erp.txt') #tgt_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'corpus_hilti_web.txt') #gold_mapping = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'sap_hilti_full_strings', # 'train_simple_sap_hilti.csv') src_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks', 'graph_triples_hilti_erp.nt') tgt_triples = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks', 'graph_triples_hilti_web.nt') src_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks', 'corpus_hilti_erp.txt') tgt_corpus = os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks', 'corpus_hilti_web.txt') gold_mapping = InternalGoldStandard({'trainsets': [os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks', 'final_trainset.csv')], 'testsets': [os.path.join(package_directory, '..', 'data', 'sap_hilti_data', 'balanced_walks', 'possible_matches.csv')] }) dim = 1 model = LogisticRegression()#XGBClassifier() labelfile = os.path.join(package_directory, '..', 'data', 'sap_hilti_data','balanced_walks', 'labels.txt') src_properties = StringMatcher_Interface.get_labels_from_file(labelfile)#["http://rdata2graph.sap.com/hilti_erp/property/mara_fert.maktx"] tgt_properties = StringMatcher_Interface.get_labels_from_file(labelfile)#["http://rdata2graph.sap.com/hilti_web/property/products.name"] use_streams = False name = "test" pipeline = Pipeline() line_a = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(src_triples)) line_a = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_a), PipelineDataTuple(src_triples)) #line_a = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_a), # PipelineDataTuple(src_corpus)) line_b = pipeline.append_step(load_kg_with_rdflib_ttl_interface, None, PipelineDataTuple(tgt_triples)) line_b = pipeline.append_step(GraphToolbox.interface, PipelineDataTuple(line_b), PipelineDataTuple(tgt_triples)) #line_b = pipeline.append_step(ReadSentencesInterfaceWrapper.interface, PipelineDataTuple(line_b), # PipelineDataTuple(tgt_corpus)) line_ab = pipeline.append_step(WalkEmbedder_1.interface, PipelineDataTuple(line_a, line_b), PipelineDataTuple(dim, 'steps', False, 1)) line_ab = pipeline.append_step(concat_combiner.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(muse.interface, PipelineDataTuple(line_ab), PipelineDataTuple(gold_mapping)) line_ab = pipeline.append_step(FlatMatcher.interface, PipelineDataTuple(line_ab), PipelineDataTuple(model)) #line_ab = pipeline.append_step(TSNEInterface.interface, PipelineDataTuple(line_ab), PipelineDataTuple(2)) #line_ab = pipeline.append_step(CategoriesVisualizer.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(StratifiedVisualizer.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(TypeVisualizer.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(FullVisualizer.interface, PipelineDataTuple(line_ab), None) #line_ab = pipeline.append_step(EmbeddingSaver.interface, PipelineDataTuple(line_ab), None) line_ab = pipeline.append_step(StableRankMatcher.interface, PipelineDataTuple(line_ab), None) configuration = Configuration(name, src_corpus, tgt_corpus, src_triples, tgt_triples, gold_mapping, dim, pipeline, src_properties, tgt_properties, use_streams, False, True) configuration_handler = ConfigurationHandler() configuration_handler.execute(configuration)