def test_sources_file(): srcsfile = '_sources.yml' sources_yaml_dict = yaml_get_source(srcsfile) for src, src_dict in sources_yaml_dict.items(): assert src_dict['sparqlendpoint'] and src_dict['sparqlqueries'], \ f'{src} misses its sparqlendpoint or sparqlqueries in {srcsfile}' for query_name, query_file in src_dict['sparqlqueries'].items(): assert query_name and query_file sparql_query = relative_read_f(query_file) assert sparql_query,\ f'No content found in {query_file}'
def importdata(source: str, outformat: str, outfile: str, limit: int, write: bool): sources_yaml = yaml_get_source('_sources.yml') source_dict = sources_yaml[source] if source == 'wikidata': createglobals(source='wikidata') for query_class in source_dict['sparqlqueries'].keys(): if 'Test' not in query_class: summary = loop_sparql_results(source='wikidata', class_=query_class, outformat=outformat, limit=limit, write=write) print(summary)
def query(source: str, class_: str, limit=None, offset=None) -> Dict: sources_yaml = yaml_get_source('_sources.yml') source_dict = sources_yaml[source] sparql_endpoint = source_dict['sparqlendpoint'] sparql_f = source_dict['sparqlqueries'][class_] endpoint = SPARQLWrapper(endpoint=sparql_endpoint, agent=useragent) sparql_query = relative_read_f(sparql_f) if limit is not None and offset is not None: sparql_query += f'\nLIMIT {limit}\nOFFSET {offset}' endpoint.setQuery(sparql_query) endpoint.setReturnFormat(JSON) results = endpoint.query().convert() results_bindings = results['results']['bindings'] # ?wikidata specific? for result in results_bindings: yield result
def _getmapping(mapping): confid_mapping = yaml_get_source( f'{mapping}/confident2wikidata_mapping.yml') invert_confid_map = invert_mapping(schema=mapping) print(f'invert_confid_map: {invert_confid_map}') return confid_mapping, invert_confid_map
return foundfiles if __name__ == '__main__': sys.path.append(os.path.join(os.path.dirname(__file__), '../..')) from dataimports.file_utils import yaml_get_source, dict2yaml mappingfiles = find_files_recursively( filename='confident2wikidata_mapping.yml', startdir='dataimports', foundfiles=[]) mappingfile_path = mappingfiles[0] file_ = Path(mappingfile_path).parent.name / Path( Path(mappingfile_path).name) mapping = yaml_get_source(file_) for confi_prop, prop_dict in mapping.items(): # print(confi_prop, prop_dict) # confi_prop['external_prop'] # external_prop_dict = prop_dict['external_prop'] # keep values if prop_dict['external_prop']: external_prop_val = prop_dict['external_prop'] external_prop_URI = prop_dict['URI'] else: external_prop_val = '' external_prop_URI = '' # print(external_prop_URI, external_prop_val) # remove old keys prop_dict.pop('external_prop')