def test_sources_file():
    srcsfile = '_sources.yml'
    sources_yaml_dict = yaml_get_source(srcsfile)
    for src, src_dict in sources_yaml_dict.items():
        assert src_dict['sparqlendpoint'] and src_dict['sparqlqueries'], \
            f'{src} misses its sparqlendpoint or sparqlqueries in {srcsfile}'

        for query_name, query_file in src_dict['sparqlqueries'].items():
            assert query_name and query_file
            sparql_query = relative_read_f(query_file)
            assert sparql_query,\
                f'No content found in {query_file}'
Example #2
0
def importdata(source: str, outformat: str, outfile: str, limit: int,
               write: bool):
    sources_yaml = yaml_get_source('_sources.yml')
    source_dict = sources_yaml[source]

    if source == 'wikidata':
        createglobals(source='wikidata')
        for query_class in source_dict['sparqlqueries'].keys():
            if 'Test' not in query_class:
                summary = loop_sparql_results(source='wikidata',
                                              class_=query_class,
                                              outformat=outformat,
                                              limit=limit,
                                              write=write)
                print(summary)
Example #3
0
def query(source: str, class_: str, limit=None, offset=None) -> Dict:
    sources_yaml = yaml_get_source('_sources.yml')
    source_dict = sources_yaml[source]
    sparql_endpoint = source_dict['sparqlendpoint']
    sparql_f = source_dict['sparqlqueries'][class_]
    endpoint = SPARQLWrapper(endpoint=sparql_endpoint, agent=useragent)
    sparql_query = relative_read_f(sparql_f)
    if limit is not None and offset is not None:
        sparql_query += f'\nLIMIT {limit}\nOFFSET {offset}'
    endpoint.setQuery(sparql_query)
    endpoint.setReturnFormat(JSON)
    results = endpoint.query().convert()
    results_bindings = results['results']['bindings']  # ?wikidata specific?
    for result in results_bindings:
        yield result
 def _getmapping(mapping):
     confid_mapping = yaml_get_source(
         f'{mapping}/confident2wikidata_mapping.yml')
     invert_confid_map = invert_mapping(schema=mapping)
     print(f'invert_confid_map: {invert_confid_map}')
     return confid_mapping, invert_confid_map
Example #5
0
    return foundfiles


if __name__ == '__main__':
    sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))
    from dataimports.file_utils import yaml_get_source, dict2yaml

    mappingfiles = find_files_recursively(
        filename='confident2wikidata_mapping.yml',
        startdir='dataimports',
        foundfiles=[])
    mappingfile_path = mappingfiles[0]
    file_ = Path(mappingfile_path).parent.name / Path(
        Path(mappingfile_path).name)

    mapping = yaml_get_source(file_)
    for confi_prop, prop_dict in mapping.items():
        # print(confi_prop, prop_dict)
        # confi_prop['external_prop']
        # external_prop_dict = prop_dict['external_prop']

        # keep values
        if prop_dict['external_prop']:
            external_prop_val = prop_dict['external_prop']
            external_prop_URI = prop_dict['URI']
        else:
            external_prop_val = ''
            external_prop_URI = ''
            # print(external_prop_URI, external_prop_val)
        # remove old keys
        prop_dict.pop('external_prop')