Ejemplo n.º 1
0
def main():
  fips = pd.read_csv("./US_FIPS_Codes.csv", header=None)
  fips.columns = ['State', 'County', "State_FIPS", "County_FIPS"]
  fips['State_FIPS'] = fips['State_FIPS'].apply(lambda x: str(x).zfill(2))
  fips['County_FIPS'] = fips['County_FIPS'].apply(lambda x: str(x).zfill(3))
  fips['FIPS'] = fips['State_FIPS'] + fips['County_FIPS']

  OW_county = []
  fips_list = fips['FIPS'].tolist()

  file_path = "OW_result.csv"

  for item in fips_list:
      if int(item)>=39039:
        print("processing county:%s"%item)
        query_item = query_generator(item, "2021-01-09")
        server = sparql.SPARQLServer('http://128.111.106.227:7201/repositories/Covid-KG')
        result = server.query(query_item)
        result_list = extract_results(result)
        result_pd = pd.DataFrame(result_list, columns =['Model', 'Error'])  
        if len(result_pd.index[result_pd['Model'] == 'OliverWyman-Navigator'].tolist())>0:
          OW_index = result_pd.index[result_pd['Model'] == 'OliverWyman-Navigator'].tolist()[0]
          OW_ratio = (OW_index+1)/len(result_pd)  # index start from 0, so +1
          with open(file_path, 'a') as f:
            writer = csv.writer(f)
            writer.writerow([item, OW_index+1, len(result_pd), OW_ratio])
        else:
          with open(file_path, 'a') as f:
            writer = csv.writer(f)
            writer.writerow([item, '', '', ''])
Ejemplo n.º 2
0
    def __init__(self,
                 params_tsv=params_file,
                 return_param_name=False) -> None:

        self.df_params = read_csv(params_tsv, sep='\t')
        self.df_params = self.df_params[
            self.df_params.is_auth_param ==
            False]  # Remove authentication parameters
        self.df_params['name'] = self.df_params['name'].apply(
            ParamUtils.normalize)
        self.values = {}
        self.pattern_values = {}
        self.wikidata_server = sparql.SPARQLServer(
            'http://localhost:9999/bigdata/namespace/wdq/sparql')
        self.return_param_name = return_param_name

        for index, row in self.df_params.iterrows():
            name, pattern, example, typ, count = row['name'], row[
                'pattern'], row['example'], row['type'], row['count']

            if example == 'None':
                continue

            if (name, typ) not in self.values:
                self.values[(name, typ)] = Counter()
            self.values[(name, typ)].update({example: count})

            if pattern == 'None':
                continue

            if pattern not in self.pattern_values:
                self.pattern_values[pattern] = Counter()
            self.pattern_values[pattern].update({example: count})
Ejemplo n.º 3
0
def getIncludedFromExcluded(to_exclude,endpoint):
	server = sparql.SPARQLServer(endpoint)
	result = server.query('select distinct ?p where {?s ?p ?o}')
	to_include = [b['p']['value'] for b in result['results']['bindings']]
	for element in to_exclude:
		to_include.remove(element)
	to_include = [(p,'') for p in to_include]
	return to_include
Ejemplo n.º 4
0
def reset():
    ts = sparql.SPARQLServer("http://localhost:9999/blazegraph/sparql")
    ts.update('delete{?x ?y ?z} where{?x ?y ?z}')
    shutil.rmtree("meta\\demo\\dumontier\\corpus\\", ignore_errors=True)
    shutil.rmtree("meta\\demo\\dumontier\\CSVe\\", ignore_errors=True)
    with open(patha("meta\\DEMO\\Dumontier\\auxiliary\\auxiliary.txt"),
              'w') as br:
        br.write('')
    br.close()
Ejemplo n.º 5
0
def work(identification, graph_name, step_to_do, redis_fn):
    global redis_connection, strict_redis_connection, sparql_server, step, step_graph
    step = step_to_do

    log('work ' + '[' + str(step) + ']')

    #for Collections
    step_graph = ConjunctiveGraph(sparqlstore.SPARQLStore(sparql_uri),
                                  graph_name)

    sparql_server = sparql.SPARQLServer(sparql_uri)
    redis_connection = redislite.Redis(redis_fn)
    strict_redis_connection = redislite.StrictRedis(redis_fn)

    gv_output_file_name = identification + '_' + str(step).zfill(7) + '.gv'

    if list(subjects(RDF.type, kbdbg.frame)) == []:
        log('no frames.' + '[' + str(step) + ']')
        put_last_bindings(step, [])
        return

    if (step == global_start - 1):
        gv_output_file_name = 'dummy'
    try:
        os.unlink(gv_output_file_name)
    except FileNotFoundError:
        pass

    gv_output_file = open(gv_output_file_name, 'w')
    e = Emitter(gv_output_file, step)
    e.generate_gv_image()
    gv_output_file.close()

    if (step == global_start - 1):
        return

    log('convert..' + '[' + str(step) + ']')
    #cmd, args = subprocess.check_output, ("convert", '-regard-warnings', "-extent", '6000x3000',  gv_output_file_name, '-gravity', 'NorthWest', '-background', 'white', gv_output_file_name + '.svg')
    cmd, args = subprocess.check_output, ("dot", '-Tsvg', gv_output_file_name,
                                          '-O')
    try:
        r = cmd(args, stderr=subprocess.STDOUT)
        if r != b"":
            raise RuntimeError('[' + str(step) + '] ' + str(r))
    except subprocess.CalledProcessError as e:
        log('[' + str(step) + ']' + e.output)
    log('convert done.' + '[' + str(step) + ']')

    if len(stats):
        print('stats:')
        for i in stats:
            print(i)
        #stats.clear()

    redis_connection._cleanup()
    strict_redis_connection._cleanup()
Ejemplo n.º 6
0
def query_from_files(kb, goal, nokbdbg, nolog, visualize, sparql_uri,
                     identification, base):
    global server, this

    pyin.kbdbg_file_name, pyin._rules_file_name, identification, base, this, runs_path = set_up(
        identification, base)
    pyin.this = this
    pyin.nolog = nolog
    common.nolog = nolog
    pyin.init_logging()
    common.log = pyin.log

    if sparql_uri != '':
        pyin.pool = ThreadPoolExecutor(
        )  #max_workers = , thread_name_prefix='sparql_updater'
        server = sparql.SPARQLServer(sparql_uri)
        server.update("""CLEAR GRAPHS""")
        pyin.server = server
    if sparql_uri != '':
        new = """kbdbg:latest kbdbg:is <""" + this + ">"
        pyin.kbdbg(new, default=True)
        uuu = (
            pyin.prefixes +
            #WITH """ + default_graph + """
            """DELETE {kbdbg:latest kbdbg:is ?x} WHERE {kbdbg:latest kbdbg:is ?x}"""
        )
        server.update(uuu)
        pyin.kbdbg_text('#' + uuu)
    if identification != "":
        nolog or pyin.kbdbg(
            '<' + this + "> kbdbg:has_run_identification " +
            rdflib.Literal(identification).n3(), True)

    rules, query_rule, goal_graph = pyin.load(kb, goal, identification, base)

    for result in query(rules, query_rule, goal_graph):
        print()
        r = ''
        for triple in result:
            r += triple.str()
        print(' RESULT : ' + r)
        print(' step :' + str(pyin.global_step_counter))
        sys.stdout.flush()
        nolog or pyin.kbdbg_text('#result: ' + r)
    print(' steps :' + str(pyin.global_step_counter))

    if sparql_uri != '':
        pyin.kbdbg("<" + this + "> kbdbg:is kbdbg:done", default=True)
        pyin.flush_sparql_updates()

    if sparql_uri != '':
        pyin.pool.shutdown()
Ejemplo n.º 7
0
def requestSPARQL(gene, drug, verbose=True):
    """ Return result of query based on gene and drug in param """
    server = sparql.SPARQLServer(ADDRESS)
    q = query % {'gene': gene, 'drug': drug}
    print q
    res = server.query(q)
    print res


# Loading data to Blazegraph
# server.update('load <file:///tmp/data.n3>')

# Executing query

# for b in result['results']['bindings']:
#     print "%s %s" (b['p']['value'], b['o']['value']
Ejemplo n.º 8
0
def foo():
    global embeddings
    global args
    data = request.json
    if 'query' not in data:
        return {"status": "error"}
    response = json.dumps(processor.search(
        query = data['query'],
        fix_misspellings = data['fix-misspellings'] if 'fix-misspellings' in data else False,
        use_embeddings = data['use-embeddings'] if 'use-embeddings' in data else False,
        w2v = embeddings[data['embeddings'] if 'embeddings' in data else W2V_EMBEDDINGS],
        similar_tokens_score_weight = data['similar-tokens-score-weight'] if 'similar-tokens-score-weight' in data else 0.5,
        similar_tokens_quantity = data['similar-tokens-quantity'] if 'similar-tokens-quantity' in data else 2,
        products_quantity = data['products-quantity'] if 'products-quantity' in data else 5,
        similar_products_quantity = data['similar-products-quantity'] if 'similar-products-quantity' in data else 5,
        verbose = args.verbose,
        min_word_difference_ratio = data['min-word-difference-ratio'] if 'min-word-difference-ratio' in data else 50,
        sparql_server = sparql.SPARQLServer(f'http://{args.sparql_host}:9999/bigdata/sparql'),
        enable_good_type_diversity = data['enable-good-type-diversity'] if 'enable-good-type-diversity' in data else False,
        enable_product_type_diversity = data['enable-product-type-diversity'] if 'enable-product-type-diversity' in data else False,
        enable_developer_diversity = data['enable-developer-diversity'] if 'enable-developer-diversity' in data else False,
        enable_transliteration = data['enable-transliteration'] if 'enable-transliteration' in data else False
    )).encode().decode("utf-8")
    return response, 200, {'Content-Type': 'application/json; charset=utf-8'}
Ejemplo n.º 9
0
    PREFIX dbpedia: <http://dbpedia.org/resource/>
    INSERT
        { ?s a dbpedia:Human . }
    WHERE
        { ?s a foaf:Person . }
    """)

print("After the UPDATE, there are {} triples in the graph".format(len(g)))

g.add((rdflib.URIRef("http://sib/person/serguei"), FOAF.givenName,
       rdflib.Literal("Serguei", datatype=XSD.string)))

for row in g.query("SELECT ?s WHERE { [] foaf:knows ?s .}"):
    print(row.s)

print("After the UPDATE, there are {} triples in the graph".format(len(g)))

from pymantic import sparql

server = sparql.SPARQLServer(
    'http://HOST:PORT/blazegraph/namespace/NS_NAME/sparql')

# Loading data to Blazegraph
server.update(
    'load <file:///opt/semantics-information-broker/data/blazegraph_prefix.n3>'
)

# Executing query
result = server.query(
    'select * where { <http://blazegraph.com/blazegraph> ?p ?o }')
Ejemplo n.º 10
0
 def connect_to_server(self):
     return sparql.SPARQLServer(self.blaze_graph_server)
Ejemplo n.º 11
0
# prefix cnt: <http://www.w3.org/2011/content#>
# prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>


# select DISTINCT ?query ?path ?chars ?method ?headerName ?headerValue
# {  
#   ?query a http:Request.
#   ?query http:absolutePath ?path.
#   ?query http:methodName ?method.
#   ?query http:body ?body.
#   ?body cnt:chars ?chars.
#   ?query http:headers/(rdf:first|rdf:rest)* ?header.
#   ?header http:fieldName ?headerName.
#   ?header http:fieldValue ?headerValue.
# filter (?query = <http://voice.iot/request/get_device_state>)}
# ''')
# for b in result['results']['bindings']:
#     print(f"{b['p']['value']}, {b['o']['value']}")

from pymantic import sparql

server = sparql.SPARQLServer('http://25.29.130.188:9999/blazegraph/sparql')

# Loading data to Blazegraph
#server.update('load <file:///tmp/data.n3>')

# Executing query
result = server.query('select * where { <http://blazegraph.com/blazegraph> ?p ?o }')
for b in result['results']['bindings']:
    print(b['p']['value'], b['o']['value'])
Ejemplo n.º 12
0
 def __init__(self, url):
     self.server = sparql.SPARQLServer(url)
     self.prefixes = {}
Ejemplo n.º 13
0
from pymantic import sparql
import csv, json
server = sparql.SPARQLServer('http://127.0.0.1:9999/blazegraph/sparql')

geo_file = open('edhGeographicData.json')
geo_str = geo_file.read()
geo_data = json.loads(geo_str)["features"]

# Executing query
result = server.query(
    'PREFIX dcterms: <http://purl.org/dc/terms/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX nmo: <http://nomisma.org/ontology#> PREFIX epi: <http://edh-www.adw.uni-heidelberg.de/edh/ontology#> PREFIX dc: <http://purl.org/dc/terms/> PREFIX lawd: <http://lawd.info/ontology/> PREFIX foaf: <http://xmlns.com/foaf/0.1/> PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#> SELECT distinct ?epigraph ?text ?place ?start ?end WHERE {  ?epigraph epi:hasEditionText ?text. ?epigraph lawd:foundAt ?place. ?epigraph nmo:hasStartDate ?start. ?epigraph nmo:hasEndDate ?end . }'
)
data = list()

count = 0
for b in result['results']['bindings']:
    count += 1
    print(count)
    dictio = dict()
    dictio["epigraph"] = b["epigraph"]["value"]
    dictio["text"] = b["text"]["value"]
    dictio["place"] = b["place"]["value"]
    dictio["start"] = b["start"]["value"]
    dictio["end"] = b["end"]["value"]
    dictio["id"] = b["epigraph"]["value"].replace(
        "http://edh-www.adw.uni-heidelberg.de/edh/inschrift/", "")
    for place in geo_data:
        if str(place["properties"]["uri"]).replace("https", "http") == str(
                b["place"]["value"]):
            dictio["lat"] = place["geometry"]["coordinates"][0]
            dictio["lng"] = place["geometry"]["coordinates"][1]
Ejemplo n.º 14
0
 def __init__(self, ts_url):
     self.ts = sparql.SPARQLServer(ts_url)
Ejemplo n.º 15
0
from pymantic import sparql

server = sparql.SPARQLServer('https://tender-hack-spb-manaslu.aa13q.ru/bigdata/sparql')

# Loading data to Blazegraph
server.update('load <file:///tmp/data.ttl>')


# Executing query
result = server.query('select * where { ?s ?p ?o }')
for b in result['results']['bindings']:
    print(b['s']['value'], b['p']['value'], b['o']['value'])
Ejemplo n.º 16
0
def run(quiet, start, end, workers):
    global global_start, graphs_name_start, sparql_server, start_time, graphviz_pool
    if start:
        raise Exception("--start functionality needs updating")
    global_start = start
    if quiet:
        logger.setLevel(logging.INFO)
    sparql_server = sparql.SPARQLServer(sparql_uri)
    redis_fn = redislite.Redis().db
    info('redis is ' + redis_fn)
    if workers:
        worker_pool = ProcessPoolExecutor(max_workers=workers)
    graphviz_pool = ProcessPoolExecutor(max_workers=128)
    graphs_name_start = query_one('x', "{kbdbg:latest kbdbg:is ?x}")
    identification0 = query_one(
        'y', "{<" + graphs_name_start + "> kbdbg:has_run_identification ?y}")
    path = 'runs/' + fix_up_identification(identification0)
    info('output path:' + path)
    os.system('mkdir -p ' + path)
    identification = path + '/' + fix_up_identification(graphs_name_start)
    graph_list_position = graphs_name_start
    step_to_submit = -1
    done = False
    range_start = None
    start_time = time.perf_counter()
    range_size = 200000
    while not done:
        step_to_submit += 1
        if step_to_submit < start - 1:
            info("skipping [" + str(step_to_submit) + ']')
            continue
        if range_start == None:
            range_start = step_to_submit
        range_end = step_to_submit
        if range_end - range_start == range_size or (range_end >= end
                                                     and end != -1):
            args = (identification, 'step_graph_uri', range_start, range_end,
                    redis_fn)
            if not workers:
                work(*args)
            else:
                if check_futures() == 'end':
                    info("ending")
                    done = True
                    break
                while len(futures) > workers + 1:
                    info('sleeping')
                    time.sleep(10)
                    if check_futures() == 'end':
                        info("ending")
                        done = True
                        break
                info('submit ' + str(range_start) + '-' + str(range_end) +
                     ' (queue size: ' + str(len(futures)) + ')')
                fut = worker_pool.submit(work, *args)
                fut.step = step_to_submit
                futures.append(fut)
                log('submitted ')
                time.sleep(secs_per_frame)
                if check_futures() == 'end':
                    info("ending")
                    done = True
                    break
            range_start = range_end + 1
            range_size = range_size * 5
            if range_size >= 100000:
                range_size = 100000

        if range_start > end and end != -1:
            info("ending")
            done = True
            break
        log('loop' + str(step_to_submit))
    if workers:
        while len(futures) != 0:
            check_futures()
            info('waiting for workers to end')
            time.sleep(10)
        worker_pool.shutdown()
        check_futures()

    while len(graphviz_futures) != 0:
        check_futures2(graphviz_futures)
        info('waiting for graphviz workers to end')
        time.sleep(2)
    graphviz_pool.shutdown()
Ejemplo n.º 17
0
def work(identification, graph_name, _range_start, _range_end, redis_fn):
    global redis_connection, strict_redis_connection, sparql_server, current_step, range_start, range_end, ss, just_unbound_bindings, frames_done_count, frame_templates, bnode_strings
    range_start, range_end = _range_start, _range_end
    sparql_server = sparql.SPARQLServer(sparql_uri)
    redis_connection = redislite.Redis(redis_fn)
    strict_redis_connection = redislite.StrictRedis(redis_fn)
    frame_templates = redis_collections.Dict(key='frames',
                                             redis=strict_redis_connection,
                                             writeback=True)
    bnode_strings = redis_collections.Dict(key='bnodes',
                                           redis=strict_redis_connection,
                                           writeback=True)

    raw = defaultdict(list)
    #todo limit queries with range_start, then uncomment this
    #if range_start != 0:
    #	raw = redis_load('checkpoint'+str(range_start - 1))

    raw['frames'] += list(
        query(('frame', 'parent', 'is_for_rule', 'step_finished',
               'step_created'), """WHERE
	{
		""" + frame_query() + """
		OPTIONAL {?frame kbdbg:has_parent ?parent}.
		?frame kbdbg:is_for_rule ?is_for_rule. 
	}"""))

    raw['bnodes'] += list(
        query(('bnode', 'frame', 'items', 'step_created', 'step_finished'),
              """WHERE
		{
		?bnode kbdbg:has_items ?items.
		?bnode kbdbg:has_parent ?frame.
		GRAPH ?g_created {?bnode rdf:type kbdbg:bnode}.
		""" + step_magic('_created') + """
		OPTIONAL {
			GRAPH ?g_finished{?frame kbdbg:is_finished true}.
			""" + step_bind('_finished') + """
		}
		}"""))

    raw['results'] += list(
        query(('uri', 'value', 'step_unbound'), """WHERE {GRAPH ?g_created 
			{
				?uri rdf:type kbdbg:result.
				?uri rdf:value ?value.
			}.""" + step_magic('_created') + """
			OPTIONAL {GRAPH ?g_unbound {?uri kbdbg:was_ubound true}.}.""" +
              step_bind('_unbound') + '}'))

    raw['bindings'] += list(
        query(('x', 'source', 'target', 'source_frame', 'target_frame',
               'source_is_bnode', 'target_is_bnode', 'source_term_idx',
               'target_term_idx', 'source_is_in_head', 'target_is_in_head',
               'source_arg_idx', 'target_arg_idx', 'stepbinding_unbound',
               'stepbinding_failed', 'stepbinding_created'), """WHERE 
		{
		GRAPH ?gbinding_created {?x rdf:type kbdbg:binding.}.
		""" + step_magic('binding_created') + """
		OPTIONAL {GRAPH ?gbinding_unbound {?x kbdbg:was_unbound true}.
		""" + step_bind('binding_unbound') + """
		}.
		OPTIONAL {GRAPH ?gbinding_failed  {?x kbdbg:failed true}.
		""" + step_bind('binding_failed') + """
		}.
		?x kbdbg:has_source ?source.
		?x kbdbg:has_target ?target.
		?source kbdbg:has_frame ?source_frame.
		?target kbdbg:has_frame ?target_frame.
		OPTIONAL {?source kbdbg:is_bnode ?source_is_bnode.}.
		OPTIONAL {?target kbdbg:is_bnode ?target_is_bnode.}.
		?source kbdbg:term_idx ?source_term_idx.
		?target kbdbg:term_idx ?target_term_idx.
		OPTIONAL {?source kbdbg:is_in_head ?source_is_in_head.}.
		OPTIONAL {?target kbdbg:is_in_head ?target_is_in_head.}.
		OPTIONAL {?source kbdbg:arg_idx ?source_arg_idx.}.
		OPTIONAL {?target kbdbg:arg_idx ?target_arg_idx.}.
		}"""))

    current_step = range_end
    redis_save('checkpoint' + str(range_end), filter_out_irrelevant_stuff(raw))
    current_step = '666'

    last_bindings = raw['bindings'][:]
    for i in range(range_start, range_end + 1):
        current_step = i
        ss = '[' + str(current_step) + ']'
        info('work ' + ss)
        state = filter_out_irrelevant_stuff(raw)
        if len(state['frames']) == 0:
            info('no frames.' + ss)
            continue

        #wont work with euler-style steps
        #if last_bindings == state['bindings']:
        #	return 'end'

        #todo make emitter save data to output, not to file
        e = Emitter()
        e.gv("digraph frame" + str(current_step) +
             "{  ")  #splines=ortho;#gv("pack=true")
        e.do_frames(state['frames'])
        e.do_bnodes(state['bnodes'])
        e.do_results(state['results'])
        e.do_bindings(state['bindings'])
        e.gv("}")
        info('}..' + ss)

        args = (ss, identification + '_' + str(current_step).zfill(7) + '.gv',
                e.output)
        while len(graphviz_futures) > 10000:
            info('sleeping')
            time.sleep(10)
            check_futures2(graphviz_futures)

        graphviz_futures.append(graphviz_pool.submit(output, *args))
        check_futures2(graphviz_futures)

    #print_stats()
    redis_connection._cleanup()
    strict_redis_connection._cleanup()
Ejemplo n.º 18
0
?b4 kbdbg:has_source ?s4. ?s4 kbdbg:has_frame ?f5. ?f5 kbdbg:is_for_rule :Rule8. 
?b4 kbdbg:has_target ?t4. ?t4 kbdbg:has_frame ?f0.
?b5 rdf:type kbdbg:binding. MINUS {?b5 kbdbg:failed true}. MINUS {?b5 kbdbg:was_unbound true}.  
?b5 kbdbg:has_source ?s5. ?s5 kbdbg:has_frame ?f0. 
?b5 kbdbg:has_target ?t5. ?t5 kbdbg:has_frame ?f6. ?f6 kbdbg:is_for_rule :Rule2.

?b6 rdf:type kbdbg:binding. MINUS {?b6 kbdbg:failed true}. MINUS {?b6 kbdbg:was_unbound true}.  
?b6 kbdbg:has_source ?s6. ?s5 kbdbg:has_frame ?f6. 
?b6 kbdbg:has_target ?t6. ?t5 kbdbg:has_frame ?f7. ?f7 kbdbg:is_for_rule :Rule10.


""".strip().splitlines()

sp = 'http://192.168.122.108:9999/blazegraph/sparql'

server = sparql.SPARQLServer(sp)
server.post_directly = True
r = server.query("""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
PREFIX kbdbg: <http://kbd.bg/#> 
PREFIX : <file:///#> 
SELECT ?this WHERE {kbdbg:latest kbdbg:is ?this}""")
r = (r['results']['bindings'])
if not len(r):
    print("no kbdbg:latest")
    exit()
latest = URIRef(r[0]['this']['value'])
print('latest is ' + latest.n3())

r = server.query("""
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
Ejemplo n.º 19
0
from pymantic import sparql
import os
from glob import glob

server = sparql.SPARQLServer(
    'http://stko-roy.geog.ucsb.edu:9999/blazegraph/namespace/covid19-prediction/sparql'
)

progress_file_list = []
progress_file = 'triple_progress_file.csv'
if os.path.isfile(progress_file):
    with open(progress_file, newline='') as fr:
        for line in fr:
            progress_file_list.append(line.strip().replace('\n', ''))

##### Import the main part of the data #########
PATH = './output'
EXT = "*.ttl"
file_names = [
    file for path, subdir, files in os.walk(PATH)
    for file in glob(os.path.join(path, EXT))
]

for file_name in file_names:
    if (file_name not in progress_file_list) and (
            'LICENSE' not in file_name) and ('README' not in file_name):
        file_command = 'load <file://' + os.path.abspath(file_name) + '>'
        print('loading %s' % (file_command))
        server.update(file_command)
        with open(progress_file, 'a') as fa:
            fa.write(file_name + '\n')
Ejemplo n.º 20
0
from rdflib import URIRef, XSD, Namespace, Literal
from rdflib.namespace import OWL, DC, DCTERMS, RDF, RDFS
from rdflib.plugins.sparql import prepareQuery
from SPARQLWrapper import SPARQLWrapper, JSON
from web import form
from pymantic import sparql

WD = Namespace("http://www.wikidata.org/entity/")
WDP = Namespace("http://www.wikidata.org/wiki/Property:")
OL = Namespace("http://openlibrary.org/works/")
ULAN = Namespace("http://vocab.getty.edu/ulan/")
AAT = Namespace("http://vocab.getty.edu/aat/")
PROV = Namespace("http://www.w3.org/ns/prov#")
base = 'https://w3id.org/artchives/'

server = sparql.SPARQLServer(conf.artchivesEndpoint)
dir_path = os.path.dirname(os.path.realpath(__file__))


def clean_to_uri(stringa):
    """ given a string return a partial URI"""
    uri = re.sub('ä', 'a', stringa.strip().lower())
    uri = re.sub('à', 'a', uri)
    uri = re.sub('è', 'e', uri)
    uri = re.sub('é', 'e', uri)
    uri = re.sub('ì', 'i', uri)
    uri = re.sub('ò', 'o', uri)
    uri = re.sub('ù', 'u', uri)
    uri = re.sub('[^a-zA-Z\s]', '', uri)
    uri = re.sub('\s', '-', uri)
    return uri
Ejemplo n.º 21
0
import os
import time
import sparql as SQ
import requests
import json
from pymantic import sparql
from SPARQLWrapper import SPARQLWrapper, JSON

if __name__ == "__main__":
    q = 'ASK { ?s ?p ?o }'
    print 'bootstrapping provesense... - ', q
    endpoint = os.getenv('SPARQL_ENDPOINT',
                         'http://blazegraph:9999/blazegraph/sparql')
    while True:
        try:
            bootstrapped = SQ.query(endpoint, q)
            server = sparql.SPARQLServer(endpoint)
            if not bootstrapped.hasresult():
                server.update('load <file:///tmp/bootstrap/prov-o.ttl>')
                server.update('load <file:///tmp/bootstrap/ssn.ttl>')
                server.update('load <file:///tmp/bootstrap/time.ttl>')
                server.update(
                    'load <file:///tmp/bootstrap/provesense-bootstrap.ttl>')
#                server.update('load <file:///tmp/blazegraph/sao.ttl>')
            break

        except:
            pass

        time.sleep(2)