Esempio n. 1
0
def return_titolo():

    item_list = []
    read_file = open('cacheDoc.json', 'r')
    result = read_file.read()
    read_file.close()
    if (result):
        data = result
    else:
        docFromScraping = scraping_documenti()
        query = "PREFIX fabio: <http://purl.org/spar/fabio/> SELECT DISTINCT ?doc WHERE { ?doc a fabio:Item . FILTER NOT EXISTS { ?doc a fabio:Item . FILTER regex(str(?doc), 'cited')} FILTER NOT EXISTS { ?doc a fabio:Item . FILTER regex(str(?doc), 'Reference')} FILTER NOT EXISTS { ?doc a fabio:Item . FILTER regex(str(?doc), '_ver')}}"
        docFromSPARQL = do_query_get(sparql_endpoint_remoto, query)

        for d in docFromScraping:
            item_list.append(d['url'])

        for doc in docFromSPARQL['results']['bindings']:
            if doc['doc']['value'] in item_list:
                continue
            else:
                item_list.append(doc['doc']['value'])
        # if url is not None:
        data = scraping_titolo(item_list)
        out_file = open('cacheDoc.json', 'w')
        out_file.write(data)
        out_file.close()
    return data
Esempio n. 2
0
def check_Documento_In_Cache():
    url_doc = request.args.get('url')
    with open('cacheDoc.json') as fp:
        content = fp.read()
        result = json.loads(content)

    is_in = 0
    for res in result:
        if url_doc == res["url"]:
            is_in = 1
            break

    dict = []
    dict.append(url_doc)
    lista = scraping_titolo(dict)  # string
    item = lista[1:]
    item = item[:-1]

    if (is_in == 0):
        with open("cacheDoc.json", "r") as fp:
            content = fp.read()
            value = json.loads(content)
            jitem = json.loads(item)
            value.append(jitem)

        with open("cacheDoc.json", "w") as fp:
            json.dump(value, fp)

        rfrbDocToEndpoint(url_doc)
    else:
        lista = []
        data = {}
        data['url'] = "no"
        data['titolo'] = "no"
        lista.append(data)
        lista = json.dumps(lista)

    return lista