def return_titolo(): item_list = [] read_file = open('cacheDoc.json', 'r') result = read_file.read() read_file.close() if (result): data = result else: docFromScraping = scraping_documenti() query = "PREFIX fabio: <http://purl.org/spar/fabio/> SELECT DISTINCT ?doc WHERE { ?doc a fabio:Item . FILTER NOT EXISTS { ?doc a fabio:Item . FILTER regex(str(?doc), 'cited')} FILTER NOT EXISTS { ?doc a fabio:Item . FILTER regex(str(?doc), 'Reference')} FILTER NOT EXISTS { ?doc a fabio:Item . FILTER regex(str(?doc), '_ver')}}" docFromSPARQL = do_query_get(sparql_endpoint_remoto, query) for d in docFromScraping: item_list.append(d['url']) for doc in docFromSPARQL['results']['bindings']: if doc['doc']['value'] in item_list: continue else: item_list.append(doc['doc']['value']) # if url is not None: data = scraping_titolo(item_list) out_file = open('cacheDoc.json', 'w') out_file.write(data) out_file.close() return data
def return_documenti(): data = scraping_documenti() return data