Beispiel #1
0
def unes(targetURL):
    quotidien = {'nom': '20minutes', 'url': 'http://www.20minutes.fr'}
    modele_donnees.insert_quotidien(quotidien)
    file = urllib.urlopen(targetURL)
    data = file.read().decode('utf8')
    file.close()
    doc = lxml.html.document_fromstring(data)
    articles_href = doc.xpath('//article//h2/a/@href')
    doc = lxml.html.document_fromstring(data)
    article_titles = doc.xpath('//article//h2//text()')
    i = 0
    date = []
    while i < len(article_titles):
        date.append(time.strftime('%Y/%m/%d', time.localtime()))
        i = i + 1
    tab = zip(article_titles, articles_href, date)
    dico = {}
    j = 0
    while j < len(tab):
        dico['titre'] = str(tab[j][0].encode('utf8'))
        dico['URL'] = targetURL + str(tab[j][1].encode('utf8'))
        dico['date'] = tab[j][2]
        quotidien = {'nom': '20minutes', 'url': 'http://www.20minutes.fr'}
        modele_donnees.insert_une(dico, quotidien)
        j = j + 1
Beispiel #2
0
def unes(targetURL):
    quotidien = {'nom': 'sudouest', 'url': 'http://www.sudouest.fr'}
    modele_donnees.insert_quotidien(quotidien)
    file = urllib.urlopen("http://www.sudouest.fr/")
    data = file.read().decode('utf8')
    file.close()

    doc = lxml.html.document_fromstring(data)
    articles_href = doc.xpath(
        '//section[@class="articles essentiel "]//div[@class="article-wrapper"]/a/@href'
    ) + doc.xpath(
        '//section[@class="articles default "]//div[@class="article-wrapper"]/a/@href'
    )

    doc = lxml.html.document_fromstring(data)
    article_titles = doc.xpath(
        '//section[@class="articles essentiel "]//div[@class="article-wrapper"]/a/h2/text()'
    ) + doc.xpath(
        '//section[@class="articles default "]//div[@class="article-wrapper"]/a/h2/text()'
    )
    i = 0
    date = []
    while i < len(article_titles):
        date.append(time.strftime('%Y/%m/%d', time.localtime()))
        i = i + 1
    tab = zip(article_titles, articles_href, date)
    dico = {}
    j = 0
    while j < len(tab):
        dico['titre'] = str(tab[j][0].encode('utf8'))
        dico['URL'] = targetURL + str(tab[j][1].encode('utf8'))
        dico['date'] = tab[j][2]
        quotidien = {'nom': 'sudouest', 'url': 'http://www.sudouest.fr'}
        modele_donnees.insert_une(dico, quotidien)
        j = j + 1
Beispiel #3
0
def unes(targetURL):
    quotidien = {'nom': 'laTribune', 'url': 'http://www.latribune.fr'}
    modele_donnees.insert_quotidien(quotidien)
    file = urllib.urlopen(targetURL)
    data = file.read().decode('utf8')
    file.close()
    doc = lxml.html.document_fromstring(data)
    titres = doc.xpath("//div[@class='main-article']//article/h2/a/text()")
    titres += doc.xpath('//div[@class="title-wrapper"]//a/text()')
    titres += doc.xpath('//div[@class="title-river"]//a/text()')
    liens = doc.xpath('//div[@class="main-article"]/article/h2/a/@href')
    liens += doc.xpath('//div[@class="title-wrapper"]//a/@href')
    liens += doc.xpath('//div[@class="title-river"]//a/@href')
    i = 0
    date = []
    while i < len(titres):
        date.append(time.strftime('%Y/%m/%d', time.localtime()))
        i = i + 1
    tab = zip(titres, liens, date)
    dico = {}
    j = 0
    while j < len(tab):
        dico['titre'] = str(tab[j][0].encode('utf8'))
        dico['URL'] = str(tab[j][1].encode('utf8'))
        dico['date'] = tab[j][2]
        quotidien = {'nom': 'laTribune', 'url': 'http://www.latribune.fr'}
        modele_donnees.insert_une(dico, quotidien)
        j = j + 1