def return_titolo(): item_list = [] read_file = open('cacheDoc.json', 'r') result = read_file.read() read_file.close() if (result): data = result else: docFromScraping = scraping_documenti() query = "PREFIX fabio: <http://purl.org/spar/fabio/> SELECT DISTINCT ?doc WHERE { ?doc a fabio:Item . FILTER NOT EXISTS { ?doc a fabio:Item . FILTER regex(str(?doc), 'cited')} FILTER NOT EXISTS { ?doc a fabio:Item . FILTER regex(str(?doc), 'Reference')} FILTER NOT EXISTS { ?doc a fabio:Item . FILTER regex(str(?doc), '_ver')}}" docFromSPARQL = do_query_get(sparql_endpoint_remoto, query) for d in docFromScraping: item_list.append(d['url']) for doc in docFromSPARQL['results']['bindings']: if doc['doc']['value'] in item_list: continue else: item_list.append(doc['doc']['value']) # if url is not None: data = scraping_titolo(item_list) out_file = open('cacheDoc.json', 'w') out_file.write(data) out_file.close() return data
def check_Documento_In_Cache(): url_doc = request.args.get('url') with open('cacheDoc.json') as fp: content = fp.read() result = json.loads(content) is_in = 0 for res in result: if url_doc == res["url"]: is_in = 1 break dict = [] dict.append(url_doc) lista = scraping_titolo(dict) # string item = lista[1:] item = item[:-1] if (is_in == 0): with open("cacheDoc.json", "r") as fp: content = fp.read() value = json.loads(content) jitem = json.loads(item) value.append(jitem) with open("cacheDoc.json", "w") as fp: json.dump(value, fp) rfrbDocToEndpoint(url_doc) else: lista = [] data = {} data['url'] = "no" data['titolo'] = "no" lista.append(data) lista = json.dumps(lista) return lista