def read_rdf(ont1, ont2): largebio_data_processed_path = 'data/df_largebio_{}_{}.csv'.format( ont1, ont2) largebio_ref_processed_path = 'data/df_largebio_{}_{}_ref.csv'.format( ont1, ont2) if not os.path.isfile(largebio_data_processed_path): # Specify path for the alignments and reference alignments res_dir = os.path.join("data", "largebio-results-2019") ref_path = os.path.join( "data", "oaei2019_umls_flagged_reference", "oaei_{}_{}_mappings_with_flagged_repairs.rdf".format(ont1, ont2)) # Load rdf data df_data, df_ref = u.load_rdf('largebio', res_dir, ref_path, ont1, ont2) # Negative sampling df_data = u.negative_sampling_target(lb_measures, df_data, df_ref) # Save results to csv largebio_data_processed_path = 'data/df_largebio_{}_{}.csv'.format( ont1, ont2) largebio_ref_processed_path = 'data/df_largebio_{}_{}_ref.csv'.format( ont1, ont2) # Save results to csv df_data.to_csv(largebio_data_processed_path, index=False) df_ref.to_csv(largebio_ref_processed_path, index=False) else: print('File already exists') df_data = pd.read_csv(largebio_data_processed_path) df_ref = pd.read_csv(largebio_ref_processed_path) return df_data, df_ref
'measure_logmap', 'measure_logmaplt', 'measure_ontmat1', 'measure_sanom', 'measure_wiktionary' ] conference_data_processed_path = 'data/df_conference.csv' res_dir = os.path.join('data', 'conference-data') if not os.path.isfile(conference_data_processed_path): dfs_data, dfs_refs = [], [] for ont1, ont2 in itertools.combinations(cf_ontologies, 2): ref_path = os.path.join( "data", "conference-ref-data", "{}-{}.rdf".format(ont1, ont2), ) df_data, df_ref = u.load_rdf('conference', res_dir, ref_path, ont1, ont2) df_data = u.negative_sampling_target(cf_measures, df_data, df_ref) df_data["ontologies"] = f"{ont1}-{ont2}" dfs_data.append(df_data) dfs_refs.append(df_ref) df_conf = pd.concat(dfs_data, ignore_index=True) df_conf.to_csv(conference_data_processed_path, index=False) else: df_conf = pd.read_csv(conference_data_processed_path) X_cf, y_cf = df_conf[cf_measures], df_conf['label'] #fill missing values with 0 X_cf = X_cf.fillna(0) #binary features