예제 #1
0
def QueueViamichelin(country, assettype, source, starturl, pageurl, page):
    if gL.trace: gL.log(gL.DEBUG)   
    try:

        #lista = page.xpath('//a[@class="clearfix"]')  # funziona
        href = page.xpath('//a[@class="parseHref jsNodePoiLink"]//@href')
        test = page.xpath('//h2[@class="parseInnerText jsNodePoiTitle"]//text()')
        # togli i nomi vuoti
        nomi = []
        for item in test:
            if item.replace(" ","") != '':
                nomi.append(item)
        if len(nomi) > len(href):
            msg ="%s - %s" % ("Errore nel parsing dei nomi o di href", url)
            gL.log(gL.ERROR, msg)
            return False
        if nomi is None or href is None:
            msg ="%s - %s" % ("Parsing dei nomi / href senza risultati", url)
            gL.log(gL.ERROR, msg)
            return False
        n = 0
        for asset in nomi:        
            if not href[n]:
                continue 
            name = gL.StdName(nomi[n])        
            url  = gL.SourceBaseUrl + href[n]               
            rc = gL.dbEnqueue(country, assettype, source, starturl, pageurl, url, name)
            n = n + 1  # next asset

    except Exception as err:
        gL.log(gL.ERROR, pageurl)
        gL.log(gL.ERROR, err)
        return False
    
    return True
예제 #2
0
def QueueTripadvisor(country, assettype, source, starturl, pageurl, page):
    if gL.trace: gL.log(gL.DEBUG)   
    try:

        # leggi la lista e inserisci asset
        lista = page.xpath('//*[@class="listing" or @class="listing first"]')
        for asset in lista:
            name = asset.xpath('.//*[@class="property_title "]//text()')[0]
            name = gL.StdName(name)
            url  = asset.xpath('.//a[contains(@class,"property_title ")]/@href')[0]
            url  = gL.SourceBaseUrl + url
            # inserisci o aggiorna l'asset        
            rc = gL.dbEnqueue(country, assettype, source, starturl, pageurl, url, name)

    except Exception as err:
        gL.log(gL.ERROR, pageurl)
        gL.log(gL.ERROR, err)
        return False
    
    return True
예제 #3
0
def QueueQristoranti(country, assettype, source, starturl, pageurl, page):
    if gL.trace: gL.log(gL.DEBUG)   
    try:

        # leggi la lista e inserisci asset
        lista = page.xpath('//div[@class="contentTitle"]')
        conta = 0
        for asset in lista:
            name = page.xpath('//div[@class="contentTitle"]/a//text()')[conta]
            url   = page.xpath('//div[@class="contentTitle"]/a//@href')[conta]
            name = gL.StdName(name)
            conta = conta + 1
            o = urlparse(starturl)
            link = "http://" + o.hostname + url
        
            rc = gL.dbEnqueue(country, assettype, source, starturl, pageurl, link, name)

    except Exception as err:
        gL.log(gL.ERROR, pageurl)
        gL.log(gL.ERROR, err)
        return False

    return True
예제 #4
0
def QueueDuespaghi(country, assettype, source, starturl, pageurl, page):
    if gL.trace: gL.log(gL.DEBUG)   
    try:

        lista = page.xpath('//a[@class="clearfix"]')  # funziona
        href = page.xpath('//a[@class="clearfix"]/@href')
        nomi = page.xpath('//a[@class="clearfix"]/@title')
        n = 0
        if nomi is None or href is None:
            msg ="%s - %s" % ("Parsing nomi / href senza risultati", url)
            gL.log(gL.ERROR, msg)
            return False
        if len(nomi) != len(href):
            msg ="%s - %s" % ("Errore nel parsing dei nomi o di href", url)
            gL.log(gL.ERROR, msg)
            return False
        for asset in lista:
            if not nomi or not lista or not href:
                msg ="%s - %s" % ("Errore get ", url)
                gL.log(gL.ERROR, msg)
                #print("Errore in lettura di ", url)
                return False
            if not href[n]:
                continue 
            name = gL.StdName(nomi[n])
        
            url  = gL.SourceBaseUrl + href[n]
               
            rc = gL.dbEnqueue(country, assettype, source, starturl, pageurl, url, name)
            n = n + 1  # next asset

    except Exception as err:
        gL.log(gL.ERROR, pageurl)
        gL.log(gL.ERROR, err)
        return False
    
    return True