コード例 #1
0
def scrapeBaramDom():
    # UTF-8 support
    reload(sys)
    sys.setdefaultencoding('utf-8')
    now = datetime.now()
    down = Downloader('http://www.baramdom.com/')
    content = down.get_content()
    html = unicode(content)
    p = xpath.get(html, '//div[@class="box post"]')
    linkovi = xpath.search(p, '//div[@class="content"]')
    ads = []
    for l in linkovi:
        link = "http://www.baramdom.com" + xpath.get(l, '//div[@class="post-title"]/h2/a/@href')
        title = xpath.get(l, '//div[@class="post-title"]/h2/a')
        imageUrl = xpath.get(l, '//a[@class="grouped"]/img/@src')
        if imageUrl == "":
            imageUrl = "http://www.baramdom.com/img/apartment_noimage.png"
        else:
            imageUrl = "http://www.baramdom.com" + imageUrl
        download = Downloader(link)
        cont = download.get_content()
        cont = unicode(cont)
        description = xpath.get(cont, '//p[@class="post_add_desc"]')
        description = description.strip()
        category = u"Недвижнини"
        ost = xpath.get(l, '//p[@class="add-title"]')
        ost = ost.strip()
        ost = ost.split(" во ")
        region = ost[1]
        country = u"Македонија"
        k = ost[0]
        k = k.split("ам ")
        subcategory = k[1]
        price = xpath.get(cont, '//div[@class="post-add"]/p[@class="last"]').strip()
        price = price.split(" ")
        if len(price)==3:
            value = "/"
            currency = "/"
        else:
            value = price[0]
            currency = price[1]
            if currency == "Euro.":
                currency = "EUR"
            elif currency == u"Ден.":
                currency = "MKD"
        date = xpath.get(l, '//div[@class="fl"]')
        date = date.strip()
        date = date.split(">")
        date = date[1]
        date = date.strip()
        date = date.split(" ")
        date = date[0]
        date = date.split("-")
        date = date[2]+"-"+date[1]+"-"+date[0]
        ad = Ad(link, title, imageUrl, description, category, subcategory, value, currency, region, date, country)    
        ads.append(ad)
    return adsToJson(ads)
#print scrapeBaramDom()
コード例 #2
0
def scrapeVipMarket5():
    # UTF-8 support
    reload(sys)
    sys.setdefaultencoding('utf-8')
    now = datetime.now()
    down = Downloader('http://www.vipmarket5.mk/search/')
    content = down.get_content()
    html = unicode(content)
    linkovi = xpath.search(html, '//tr[@class="frame_content"]')
    ads = []
    for l in linkovi:
        link = "http://www.vipmarket5.mk" + xpath.get(l, '//div[@style="width:365px; height:90%; margin-top:10px;"]/b/a/@href')
        title = xpath.get(l, '//div[@style="width:365px; height:90%; margin-top:10px;"]/b/a')
        imageUrl = xpath.get(l, '//div[@style="overflow:hidden; width:150px; height: 146px; margin: 5px;"]/a/img/@src')
        download = Downloader(link)
        cont = download.get_content()
        cont = unicode(cont)
        description = xpath.get(cont, '//div[@class="feature"]/p').strip()
        if description == "":
            description = "/"

        #VNIMANIE! NEMA KATEGORII
        category="/"
        subcategory="/"
        price = xpath.get(l, '//div[@style="margin-top:5px; margin-left:10px;height:155px; overflow:hidden;"]/h4/a')
        if price == u"Цена:По договор":
            value = "/"
            currency = "/"
        else:
            price = price.split(":")
            price = price[1]
            price = price.split(" ")
            value = price[0]
            if price[1]=="€":
                currency = "EUR"
            elif price[1]=="ден.":
                currency = "MKD"
        date = xpath.get(l, '//b[@style="font-weight:bold;"]')
        date = date.split(": ")
        date = date[1]
        date = date.split(".")
        date = date[2]+"-"+date[1]+"-"+date[0]
        country = u"Македонија"

        region = xpath.get(cont, '//div[@style="float:left; width: 140px; overflow:hidden; font-family: Tahoma,Geneva,sans-serif; font-weight:bold"]')
        if region == "":
            region = "/"
        
        ad = Ad(link, title, imageUrl, description, category, subcategory, value, currency, region, date, country)    
        ads.append(ad)
    return adsToJson(ads)
#print scrapeVipMarket5()
コード例 #3
0
def scrapeNedviznostiMakedonija():
    # UTF-8 support
    reload(sys)
    sys.setdefaultencoding('utf-8')
    now = datetime.now()
    down = Downloader(
        'http://www.nedviznostimakedonija.com.mk/Default.aspx?search=1')
    content = down.get_content()
    html = unicode(content)
    linkovi = xpath.search(html, '//div[@class="boxesResultNewTop"]')
    ads = []
    for l in linkovi:
        link = "http://www.nedviznostimakedonija.com.mk/" + xpath.get(
            l, '//a[@class="subjectLook nobackim"]/@href')
        title = xpath.get(l, '//a[@class="subjectLook nobackim"]').strip()
        imageUrl = "http://www.nedviznostimakedonija.com.mk/" + xpath.get(
            l, '//a[@class="nobackim"]/img/@src')
        download = Downloader(link)
        cont = download.get_content()
        cont = unicode(cont)
        description = xpath.get(
            cont, '//span[@id="Body1_DetailControl1_FormView1_Label5"]')
        category = u"Недвижнини"
        subcategory = "/"
        price = xpath.get(
            l,
            '//div[@style="float:right; color:#1b5474; font-size:14px; font-weight:bold;"]/span'
        )
        price = price.split(" ")
        price[0] = price[0].replace(".", "")
        if price[1] == "€":
            price[1] = "EUR"
        else:
            price[1] = "MKD"
        value = price[0]
        currency = price[1]
        region = xpath.get(
            cont,
            '//span[@id="Body1_DetailControl1_FormView1_cityDescriptionLabel"]'
        )
        country = u"Македонија"
        date = xpath.get(
            cont, '//span[@id="Body1_DetailControl1_FormView1_LabelDate"]')
        date = date.split(".")
        date = date[2] + "-" + date[1] + "-" + date[0]
        ad = Ad(link, title, imageUrl, description, category, subcategory,
                value, currency, region, date, country)
        ads.append(ad)
    return adsToJson(ads)


#print scrapeNedviznostiMakedonija()
コード例 #4
0
def scrapeOglasiRs():
    # UTF-8 support
    reload(sys)
    sys.setdefaultencoding('utf-8')
    now = datetime.now()
    down = Downloader('http://www.oglasi.rs/pretraga/0/0/')
    content = down.get_content()
    html = unicode(content)

    linkovi = xpath.search(html, '//li[@class="clearfix"]')
    ads = []
    for l in linkovi:
        link = xpath.get(l, '//a[@class="ogl_id"]/@href')
        title = xpath.get(l, '//h2/a[@class="ogl_id"].text()')
        imageUrl ="http://oglasi.rs" + xpath.get(l, '//a[@class="ogl_id"]/img/@src')
        price = xpath.get(l, '//div[@class="ad-price"]/h3')
        datum = xpath.get(l, '//div[@class="right-side"]/div/p/strong')
        datum = datum.split(".")
        date = datum[2]+"-"+datum[1]+"-"+datum[0]
        price = price.split(" ")
        price[0] = price[0].replace(".","")
        currency = price[1]
        value = price[0]
        value = value.split(",")
        value = value[0]
        download = Downloader(link)
        ad = download.get_content()
        ad = unicode(ad)
        description = xpath.search(ad, '//div[@class="description"]/p')
        description = description[1].strip()
        category="/"
        subcategory="/"
        loc = xpath.search(ad, '//div[@class="description"]/ul[@class="clearfix"]')
        lo = xpath.search(loc[0], '//li')
        region = lo[1]
        region = region.split("(")
        region = region[0]
        region = region.strip()
        country = u"Србија"
        ad = Ad(link, title, imageUrl, description, category, subcategory, value, currency, region, date, country)    
        ads.append(ad)
    return adsToJson(ads)

#print scrapeOglasiRs()
コード例 #5
0
ファイル: Pazar3.py プロジェクト: PsyLee/scrapers
def scrapePazar3():
    reload(sys)
    sys.setdefaultencoding('utf-8')
    down = Downloader('http://www.pazar3.mk/mk/Listing/Home/Search?CookieLocationId=0&Location=0-%D0%A6%D0%B5%D0%BB%D0%B0-%D0%9C%D0%B0%D0%BA%D0%B5%D0%B4%D0%BE%D0%BD%D0%B8%D1%98%D0%B0&_=1404733181509')
    content = down.get_content()
    content = unicode(content)
    dump = json.loads(content)
    t = dump["data"]
    ads = []
    items = t["Items"]
    now = datetime.now()
    for f in items:
        title = f["Title"]
        title = title.replace("\"", "")
        category = f["Category"]["Name"]
        
        if f["Price"]=="":
            value = "/"
        else:
            value = f["Price"]
        
        if f["Currency"]=="ЕУР":
            currency = "EUR"
        elif f["Currency"]=="":
            currency = "/"
        else:
            currency = f["Currency"]
            
        region = f["Location"]["Name"]
        link = "http://www.pazar3.mk/mk/Listing/AdDetail/Index/"+ f["IdSeo"]
        country = u"Македонија"

        imagedate = str(f["ImageDate"])
        imagetitle = str(f["ImageTitle"])
        if imagedate == 'None' or imagetitle == 'None':
            imageUrl = "/"
        else:        
            imageUrl = "http://media.pazar3.mk/ImageHandler.ashx?date="+f["ImageDate"]+"&guid="+f["ImageTitle"]+"&width=300&height=225&isVideo=false"
        
        description = ""
        description = getDescription(link, '//div[@class="well well-small noback nomargin hidden-overflow"]')
        
        if description is None:
            description = "/"
        else:
            description = description.strip()
            description = description.replace("\"", "")
                
        subcategory = "/"
        d = f["CreateDate"].split(" ")
        if len(d)==2:
            if d[0]=="Денес":
                d[0]=str(now.year)+"-"+str(now.month)+"-"+str(now.day)
            elif d[0]=="Вчера":
                da=datetime.now()-timedelta(days=1)
                d[0]=str(da.year)+"-"+str(da.month)+"-"+str(da.day)
                
            date=d[0]+" "+d[1]
        else:
            if d[1]=="јан":
                date="1-"+d[0]+" "+d[2]
            elif d[1]=="фев":
                date="2-"+d[0]+" "+d[2]
            elif d[1]=="мар":
                date="3-"+d[0]+" "+d[2]
            elif d[1]=="апр":
                date="4-"+d[0]+" "+d[2]
            elif d[1]=="мај":
                date="5-"+d[0]+" "+d[2]
            elif d[1]=="јун":
                date="6-"+d[0]+" "+d[2]
            elif d[1]=="јул":
                date="7-"+d[0]+" "+d[2]
            elif d[1]=="авг":
                date="8-"+d[0]+" "+d[2]
            elif d[1]=="сеп":
                date="9-"+d[0]+" "+d[2]
            elif d[1]=="окт":
                date="10-"+d[0]+" "+d[2]
            elif d[1]=="ное":
                date="11-"+d[0]+" "+d[2]
            elif d[1]=="дек":
                date="12-"+d[0]+" "+d[2]
            date = str(now.year)+"-"+date
        if currency == u"МКД":
            currency = "MKD"
        ad = Ad(link, title, imageUrl, description, category, subcategory, value, currency, region, date, country)    
        #print link, title, imageUrl, description, category, subcategory, value, currency, region, date
        ads.append(ad)
        
    first = t["FirstPositionItems"]
    for f in first:
        title = f["Title"]
        title = title.replace("\"", "")
        category = f["Category"]["Name"]
        
        if f["Price"]=="":
            value = "/"
        else:
            value = f["Price"]
        
        if f["Currency"]=="ЕУР":
            currency = "EUR"
        elif f["Currency"]=="":
            currency = "/"
        else:
            currency = f["Currency"]
            
        region = f["Location"]["Name"]
        link = "http://www.pazar3.mk/mk/Listing/AdDetail/Index/"+ f["IdSeo"]
        country = u"Македонија"
        imagedate = str(f["ImageDate"])
        imagetitle = str(f["ImageTitle"])
        if imagedate == 'None' or imagetitle == 'None':
            imageUrl = "/"
        else:        
            imageUrl = "http://media.pazar3.mk/ImageHandler.ashx?date="+f["ImageDate"]+"&guid="+f["ImageTitle"]+"&width=300&height=225&isVideo=false"
        
        description = ""
        description = getDescription(link, '//div[@class="well well-small noback nomargin hidden-overflow"]')

        if description is None:
            description = "/"
        else:
            description = description.strip()
            description = description.replace("\"", "")
        
        subcategory = "/"
        d = f["CreateDate"].split(" ")
        if len(d)==2:
            if d[0]=="Денес":
                d[0]=str(now.year)+"-"+str(now.month)+"-"+str(now.day)
            elif d[0]=="Вчера":
                da=datetime.now()-timedelta(days=1)
                d[0]=str(da.year)+"-"+str(da.month)+"-"+str(da.day)
                
            date=d[0]+" "+d[1]
        else:
            if d[1]=="јан":
                date="1-"+d[0]+" "+d[2]
            elif d[1]=="фев":
                date="2-"+d[0]+" "+d[2]
            elif d[1]=="мар":
                date="3-"+d[0]+" "+d[2]
            elif d[1]=="апр":
                date="4-"+d[0]+" "+d[2]
            elif d[1]=="мај":
                date="5-"+d[0]+" "+d[2]
            elif d[1]=="јун":
                date="6-"+d[0]+" "+d[2]
            elif d[1]=="јул":
                date="7-"+d[0]+" "+d[2]
            elif d[1]=="авг":
                date="8-"+d[0]+" "+d[2]
            elif d[1]=="сеп":
                date="9-"+d[0]+" "+d[2]
            elif d[1]=="окт":
                date="10-"+d[0]+" "+d[2]
            elif d[1]=="ное":
                date="11-"+d[0]+" "+d[2]
            elif d[1]=="дек":
                date="12-"+d[0]+" "+d[2]
            date = str(now.year)+"-"+date
#         print date
        if currency == u"МКД":
            currency = "MKD"
        ad = Ad(link, title, imageUrl, description, category, subcategory, value, currency, region, date, country)    
        #print link, title, imageUrl, description, category, subcategory, value, currency, region, date
        ads.append(ad)
    
    return adsToJson(ads)

# print scrapePazar3()
コード例 #6
0
ファイル: Koli.py プロジェクト: PsyLee/scrapers
def scrapeKoli():
    reload(sys)
    sys.setdefaultencoding('utf-8')
    down = Downloader('http://koli.com.mk/polovni_lista.aspx')
    html = down.get_content()
    html = unicode(html)
    requestedWebPageUrl = 'http://koli.com.mk/polovni_lista.aspx'
    adverts = xpath.search(html, '//table[@id="dlRezultati"]')
    ads = []
    links = xpath.search(html, '//a[@class="linkovi_desno_golemi"]/@href')
    da = datetime.now()
    for l in links:
        link = "http://koli.com.mk/" + l
        d = Downloader(link)
        ad = d.get_content()
        ad = unicode(ad)
        description = u"Опрема: " + xpath.get(
            ad, '//span[@id="lblOprema"]') + " \nOpis: " + xpath.get(
                ad, '//span[@id="lblOpis"]')
        title = xpath.get(ad, '//span[@id="lblMarkaModel"].text()').strip()
        imageUrl = 'http://koli.com.mk/' + xpath.get(
            ad, '//img[@id="slika"]/@src')
        subcategory = "/"
        category = u"Возила"
        region = xpath.get(ad, '//span[@id="lblGrad"].text()')
        country = u"Македонија"
        value = xpath.get(ad, '//span[@id="lblMomentalnaCena"]').strip()
        currency = "EUR"
        date = ""
        d = xpath.get(ad, '//span[@id="lblDenovi"]').strip()
        d = d.split(" ")
        if len(d) == 1:
            if d[0] == u"минута":
                date = str(da.year) + "-" + str(da.month) + "-" + str(da.day)
            elif d[0] == u"час":
                date = str(da.year) + "-" + str(da.month) + "-" + str(da.day)
            elif d[0] == u"ден":
                da = datetime.now() - timedelta(days=1)
                date = str(da.year) + "-" + str(da.month) + "-" + str(da.day)
            elif d[0] == u"месец":
                da = datetime.now() - timedelta(days=30)
                date = str(da.year) + "-" + str(da.month) + "-" + str(da.day)
            elif d[0] == u"секунда":
                date = str(da.year) + "-" + str(da.month) + "-" + str(da.day)
        else:
            if d[1] == u"месеци":
                da = datetime.now() - timedelta(days=int(d[0] * 30))
                date = str(da.year) + "-" + str(da.month) + "-" + str(da.day)
            elif d[1] == u"дена":
                da = datetime.now() - timedelta(days=int(d[0]))
                date = str(da.year) + "-" + str(da.month) + "-" + str(da.day)
            elif d[1] == u"минути":
                date = str(da.year) + "-" + str(da.month) + "-" + str(da.day)
            elif d[1] == u"часа":
                date = str(da.year) + "-" + str(da.month) + "-" + str(da.day)
            elif d[1] == u"секунди":
                date = str(da.year) + "-" + str(da.month) + "-" + str(da.day)

        ad = Ad(link, title, imageUrl, description, category, subcategory,
                value, currency, region, date, country)
        ads.append(ad)

    return adsToJson(ads)
コード例 #7
0
ファイル: MobileBg.py プロジェクト: PsyLee/scrapers
def scrapeMobileBg():
    # cp1251 support
    reload(sys)
    sys.setdefaultencoding('cp1251')
    now = datetime.now()
    down = Downloader('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71wxzy&f1=1')
    #http://www.mobile.bg/71ydeh
    #http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71wxzy&f1=1
    content = down.get_content()
    html = unicode(content)
    linkovi = xpath.search(html, '//form[@name="search"]/table[@class="tablereset"]')
    linkovi = linkovi[3:len(linkovi)-4]
    
    links = []
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71xw69&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71xwi1&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71xwr0&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71xx7g&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71xxjy&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71xzyr&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71y06e&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71y0dk&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71y0q6&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71y16v&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71y1ep&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71y2ih&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71y2x5&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71y34p&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71y3ex&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71y3wj&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71y449&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71y4wz&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71y5qh&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71y5yv&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71y6az&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71y6kg&f1=1')
    links.append('http://www.mobile.bg/pcgi/mobile.cgi?act=3&slink=71y6qz&f1=1')
     
    for link in links:
        dole = Downloader(link)
        content = dole.get_content()
        html = unicode(content)
        lin = xpath.search(html, '//form[@name="search"]/table[@class="tablereset"]')
        lin = lin[3:len(lin)-4]
        for li in lin:
            linkovi.append(li)
            linkot = xpath.get(li, '//td[@class="valgtop"]/a[@class="mmm"]/@href')

    ads = []
    for l in linkovi:
        link = xpath.get(l, '//td[@class="valgtop"]/a[@class="mmm"]/@href')
        title = xpath.get(l, '//td[@class="valgtop"]/a[@class="mmm"]').strip()
        imageUrl = xpath.get(l, '//a[@class="photoLink"]/img/@src')
        download = Downloader(link)
        cont = download.get_content()
        cont = unicode(cont)
        description = xpath.get(cont, '//td[@style="font-size:13px;"]').strip()
        description = description.split("<a href")
        description = description[0]
        if description == "» ":
            description = "/"
        else:
            description = description[0:len(description)-19]
        description = description = description.replace("\"", "")
        category = u"Возила"
        subcategory = "/"
        price = xpath.get(l, '//span[@class="price"]').strip()
        if price == u"Договаряне":
            value = "/"
            currency = "/"
        else:
            price = price.split(" ")
            if len(price)==2:
                value = price[0]
                currency = price[1]
            elif len(price)==3:
                currency = price[2]
                value = price[0]+price[1]
            else:
                currency = price[3]
                value = price[0]+price[1]+price[2]
            if currency == "лв.":
                currency = "BGN"        
        region = xpath.get(cont, '//td[@style="padding:10px"]').strip()
        region = region.split("Регион: ")
        region = region[1]
        region = region.split(" ")
        region = region[0]
        region = region.replace("<a","").strip()
        date = str(now.year)+"-"+str(now.month)+"-"+str(now.day)
        country = u"Бугарија"
        
        ad = Ad(link, title, imageUrl, description, category, subcategory, value, currency, region, date, country)    
        ads.append(ad)
    return adsToJson(ads)
#print scrapeMobileBg()
コード例 #8
0
ファイル: Mobile24.py プロジェクト: PsyLee/scrapers
def scrapeMobile24():
    # UTF-8 support
    reload(sys)
    sys.setdefaultencoding('utf-8')
    now = datetime.now()
    #http://www.mobile24.mk/avtomobili/'
    down = Downloader('http://www.mobile24.mk/avtomobili/')
    content = down.get_content()
    html = unicode(content)
    linkovi = xpath.search(html, '//tr[@class="t0"]')
    lin = xpath.search(html, '//tr[@class="t1"]')
    for l in lin:
        linkovi.append(l)
    #http://www.mobile24.mk/motocikli/
    down = Downloader('http://www.mobile24.mk/motocikli/')
    content = down.get_content()
    html = unicode(content)
    linko = xpath.search(html, '//tr[@class="t0"]')
    lin = xpath.search(html, '//tr[@class="t1"]')
    for l in lin:
        linkovi.append(l)
    for l in linko:
        linkovi.append(l)
    #http://www.mobile24.mk/kombinja/
    down = Downloader('http://www.mobile24.mk/kombinja/')
    content = down.get_content()
    html = unicode(content)
    linko = xpath.search(html, '//tr[@class="t0"]')
    lin = xpath.search(html, '//tr[@class="t1"]')
    for l in lin:
        linkovi.append(l)
    for l in linko:
        linkovi.append(l)
    #http://www.mobile24.mk/kamioni/
    down = Downloader('http://www.mobile24.mk/kamioni/')
    content = down.get_content()
    html = unicode(content)
    linko = xpath.search(html, '//tr[@class="t0"]')
    lin = xpath.search(html, '//tr[@class="t1"]')
    for l in lin:
        linkovi.append(l)
    for l in linko:
        linkovi.append(l)
    #http://www.mobile24.mk/prikolki/
    down = Downloader('http://www.mobile24.mk/prikolki/')
    content = down.get_content()
    html = unicode(content)
    linko = xpath.search(html, '//tr[@class="t0"]')
    lin = xpath.search(html, '//tr[@class="t1"]')
    for l in lin:
        linkovi.append(l)
    for l in linko:
        linkovi.append(l)
    #http://www.mobile24.mk/avtobusi/
    down = Downloader('http://www.mobile24.mk/avtobusi/')
    content = down.get_content()
    html = unicode(content)
    linko = xpath.search(html, '//tr[@class="t0"]')
    lin = xpath.search(html, '//tr[@class="t1"]')
    for l in lin:
        linkovi.append(l)
    for l in linko:
        linkovi.append(l)
    #http://www.mobile24.mk/gumiiavtodelovi/
    down = Downloader('http://www.mobile24.mk/gumiiavtodelovi/')
    content = down.get_content()
    html = unicode(content)
    linko = xpath.search(html, '//tr[@class="t0"]')
    lin = xpath.search(html, '//tr[@class="t1"]')
    for l in lin:
        linkovi.append(l)
    for l in linko:
        linkovi.append(l)

    ads = []
    for l in linkovi:
        link = xpath.get(l, '//a[@class="listing-title"]/@href')
        title = xpath.get(l, '//a[@class="listing-title"]/b')
        imageUrl = xpath.get(l, '//td[@class="image"]/a/img/@src')
        download = Downloader(link)
        cont = download.get_content()
        cont = unicode(cont)
        desc = xpath.search(
            cont,
            '//div[@class="item-left"]/div[@class="fieldset rounded4"]/div')
        if len(desc) == 4:
            description = desc[1]
        else:
            description = desc[0]
        category = u"Возила"
        subcategory = "/"
        price = xpath.get(l, '//td[@class="price"].text()')
        value = xpath.get(l, '//td[@class="price"]/span')
        value = value.replace(",", "")
        price = price.split("span>")
        price = price[2]
        price = price.split("<")
        price = price[0]
        currency = price
        if currency == u"денари":
            currency = "MKD"
        if value == u"По договор":
            value = "/"
            currency = "/"
        region = xpath.get(l, '//span[@class="city"]')
        date = str(now.year) + "-" + str(now.month) + "-" + str(now.day)
        country = u"Македонија"

        ad = Ad(link, title, imageUrl, description, category, subcategory,
                value, currency, region, date, country)
        ads.append(ad)
    return adsToJson(ads)


# print scrapeMobile24()
コード例 #9
0
def scrapeReklama5():
    # UTF-8 support
    reload(sys)
    sys.setdefaultencoding('utf-8')
    down = Downloader('https://www.reklama5.mk/Search')
    html = down.get_content()

    html = unicode(html)

    requestedWebPageUrl = 'https://www.reklama5.mk'

    adverts = xpath.search(html, '//div[@class="OglasResults"]')
    ads = []
    for advert in adverts:
        link = requestedWebPageUrl + xpath.get(
            advert, '//a[@class="SearchAdTitle"]/@href')

        title = xpath.get(
            advert,
            '//a[@class="SearchAdTitle"].text()').strip().replace("\"", "")

        description = getDescription(
            link, '//div[@class="oglasTitle"]/p[@class="oglasTitle"]').strip(
            ).replace("\"", "")

        subcategory = "/"
        imageUrl = xpath.get(advert, '//img[@class="thumbnail thumbs"]/@src')
        if imageUrl == "/Content/images/noImage2.jpg":
            imageUrl = requestedWebPageUrl + imageUrl

        price = xpath.get(advert, '//div[@class="text-left text-success"]')
        price = re.sub('\s+', ' ', price).strip()
        price = price.split(" ")

        if price[0] == "По":
            price[0] = "/"
        if price[1] == "Договор":
            price[1] = "/"

        value = price[0]
        currency = price[1]
        if currency == "€":
            currency = "EUR"
        if currency == u"МКД":
            currency = "MKD"
        region = xpath.get(advert, '//p[@class="clear-margin"]')
        region = region.split("&gt;")
        region = region[0].strip()
        country = u"Македонија"
        date = xpath.get(advert,
                         '//div[@class="text-center clear-padding adDate"]')
        date = re.sub('\s+', ' ', date).strip()
        time = xpath.get(advert,
                         '//div[@class="text-center clear-padding adDate"]')
        time = re.sub('\s+', ' ', time).strip()
        if date.split()[0] == u"Денес" and time.split()[0]:
            date = datetime.now()
            datum = str(date.year) + "-" + str(date.month) + "-" + str(
                date.day)
            vreme = time.split(" ")[1]
            p = datum + " " + vreme
            date = p
        category = xpath.get(advert, '//p[@class="adCategoryName"]/a')

        ad = Ad(link, title, imageUrl, description, category, subcategory,
                value, currency, region, date, country)
        #print link, title, imageUrl, description, category, subcategory, value, currency, region, date
        ads.append(ad)

    return adsToJson(ads)
コード例 #10
0
def scrapePobarajOglasi():
    # UTF-8 support
    reload(sys)
    sys.setdefaultencoding('utf-8')
    now = datetime.now()
    down = Downloader('http://www.pobaraj.com.mk/lista_na_oglasi/all/1')
    content = down.get_content()
    html = unicode(content)
    site = xpath.get(html, '//ul[@class="lista_na_oglasi"]')
    linkovi = xpath.search(site, '//li')
    ads = []
    for l in linkovi:
        link = "http://www.pobaraj.com.mk" + xpath.get(
            l, '//a[@class="title"]/@href')
        title = xpath.get(l, '//a[@class="title"]')
        imageUrl = xpath.get(l, '//a[@class="photo"]/img/@src')
        download = Downloader(link)
        cont = download.get_content()
        cont = unicode(cont)
        description = xpath.get(cont,
                                '//div[@class="oglas_prikaz_opis"]').strip()
        if description == "":
            description = "/"
        kategorii = xpath.search(cont, '//a[@class="pateka"]')
        category = kategorii[1]
        if len(kategorii) > 2:
            subcategory = kategorii[2]
        else:
            subcategory = "/"
        price = xpath.get(l, '//div[@class="price"]').strip()
        price = price.split("<div ")
        price = price[0].strip()
        price = price.split("Цена: ")
        price = price[1]
        if price == u"по договор":
            value = "/"
            currency = "/"
        else:
            price = price.split(" ")
            value = price[0]
            if price[1] == u"денари":
                currency = "MKD"
            elif price[1] == u"евра":
                currency = "EUR"
            else:
                currency = price[1]
        region = xpath.get(cont, '//div[@class="oglas_prikaz_left"]').strip()
        region = region.split("Град:<")
        region = region[1]
        region = region.split("<b class")
        region = region[0]
        region = region.split("b>")
        region = region[1]
        region = region.strip()
        country = u"Македонија"

        datum = xpath.get(l, '//div[@class="oglas_date"]').strip()
        datum = datum.split(": ")
        datum = datum[1]
        datum = datum.split(", ")
        vreme = datum[1]
        datum = datum[0]
        if datum == u"Денес":
            date = str(now.year) + "-" + str(now.month) + "-" + str(
                now.day) + " " + vreme
        elif datum == u"Вчера":
            da = datetime.now() - timedelta(days=1)
            date = str(da.year) + "-" + str(da.month) + "-" + str(
                da.day) + " " + vreme
        else:
            datum = datum.split(" ")
            if datum[1] == "Јан":
                datum = str(now.year) + "-1-" + datum[0]
            elif datum[1] == "Фев":
                datum = str(now.year) + "-2-" + datum[0]
            elif datum[1] == "Мар":
                datum = str(now.year) + "-3-" + datum[0]
            elif datum[1] == "Апр":
                datum = str(now.year) + "-4-" + datum[0]
            elif datum[1] == "Мај":
                datum = str(now.year) + "-5-" + datum[0]
            elif datum[1] == "Јун":
                datum = str(now.year) + "-6-" + datum[0]
            elif datum[1] == "Јул":
                datum = str(now.year) + "-7-" + datum[0]
            elif datum[1] == "Авг":
                datum = str(now.year) + "-8-" + datum[0]
            elif datum[1] == "Сеп":
                datum = str(now.year) + "-9-" + datum[0]
            elif datum[1] == "Окт":
                datum = str(now.year) + "-10-" + datum[0]
            elif datum[1] == "Ное":
                datum = str(now.year) + "-11-" + datum[0]
            elif datum[1] == "Дек":
                datum = str(now.year) + "-12-" + datum[0]
            date = datum + " " + vreme
        ad = Ad(link, title, imageUrl, description, category, subcategory,
                value, currency, region, date, country)
        ads.append(ad)
    return adsToJson(ads)


#print scrapePobarajOglasi()
コード例 #11
0
def scrapeAvtooglasi():
    # UTF-8 support
    reload(sys)
    sys.setdefaultencoding('utf-8')
    now = datetime.now()
    down = Downloader('http://www.avtooglasi.com.mk/rezultati/show/?vid=0&orderby=0')
    content = down.get_content()
    html = unicode(content)
    sliki = xpath.search(html, '//div[@class="resultLeft"]')
    ostanato = xpath.search(html, '//div[@class="oglasInfoTopContent"]')
    ceni = xpath.search(html, '//a[@class="btn btn-info btn-xs oglasInfoAdditionalPrice"]')
    
    link = {}
    title = {}
    imageUrl = {}
    description = {}
    category = {}
    subcategory = {}
    value = {}
    currency = {}
    region =  {}
    date = {}
    
    i = 0
    
    ads = []
    for slika in sliki:
        imageUrl[i] = xpath.search(slika, '//a[@class="thumbnail resultImg"]/img/@src')[0]
        i = i + 1
    
    i = 0
    
    for cena in ceni:
        price = xpath.get(cena,'//span/span').strip()
        price=price.split(" ")
        if len(price)>1:
            if price[0]=="По":
                price[0]="/"
            if price[1]=="договор":
                price[1]="/"
        
            value[i]=price[0]
            currency[i]=price[1]
            if currency[i]=="&euro;":
                currency[i]="EUR"  
        i = i + 1
        
    i = 0
    
    for advert in ostanato:
        link[i] = xpath.get(advert, '//a[@class="resultMainLink"]/@href')       
        title[i] = xpath.get(advert, '//a[@class="resultMainLink"]/span').strip().replace("\"", "")
        
        path = xpath.search(getDescription(link[i],'//div[@class="centerC"]'), '/div/div[@class="padded"]')
        description[i] = path[1]
        subcategory[i]="/"    
        category[i] = u"Возила"
        
        dodatok = xpath.get(advert, '//span[@class="oglasInfoAdditionalInfo"]')
        dodatok = dodatok.split(" | ")
        region[i] = dodatok[0]
        country = u"Македеонија"
        description[i] = dodatok[1] + u" година, "+ dodatok[2] +", "+ dodatok[3] +", "+ dodatok[4] +", "+ dodatok[5] +", "+ description[i]
        description[i] = description[i].strip().replace("\"", "")

        date[i]=""
        #print description[i]
        datum = dodatok[6].strip()
        datum = datum.split(" ")
        if datum[0]=="Денес":
            datum [0]= str(now.year)+"-"+str(now.month)+"-"+str(now.day)
            date[i]=datum[0]+" "+datum[2]
        elif datum[0]=="Вчера":
            da=datetime.now()-timedelta(days=1)
            datum[0]=str(da.year)+"-"+str(da.month)+"-"+str(da.day)
            date[i]=datum[0]+" "+datum[2]
        elif datum[0]=="пред":
            if datum[2]=="дена":
                da=datetime.now()-timedelta(days=int(datum[1]))
                datum[0]=str(da.year)+"-"+str(da.month)+"-"+str(da.day)
                date[i]=datum[0]
            else:
                if datum[1]=="1":
                    da=datetime.now()-timedelta(days=30)
                    datum[0]=str(da.year)+"-"+str(da.month)+"-"+str(da.day)
                    date[i]=datum[0]
                else:
                    da=datetime.now()-timedelta(days=60)
                    datum[0]=str(da.year)+"-"+str(da.month)+"-"+str(da.day)
                    date[i]=datum[0]
        else:
            date[i]=datum[0]+" "+datum[1]

        #print date[i]
        i = i + 1
        
    for i in link:
        ad = Ad(link[i], title[i], imageUrl[i], description[i], category[i], subcategory[i], value[i], currency[i], region[i], date[i], country)    
        ads.append(ad)
        
    return adsToJson(ads)

# print scrapeAvtooglasi()
コード例 #12
0
ファイル: KupujemProdajem.py プロジェクト: PsyLee/scrapers
def scrapeKupujemProdajem():
    # UTF-8 support
    reload(sys)
    sys.setdefaultencoding('utf-8')
    now = datetime.now()
    ads = []
    try:
        down = Downloader(
            'http://www.kupujemprodajem.com/search.php?action=list&data[category_id]=&data[group_id]=&data[location_id]=&data[keywords]=&submit[search]=Tra%C5%BEi'
        )
        content = down.get_content()
        html = unicode(content)
        link = ""
        title = ""
        imageUrl = ""
        description = "/"
        category = "/"
        subcategory = "/"
        value = "/"
        currency = "/"
        region = "/"
        date = str(now.year) + "-" + str(now.month) + "-" + str(now.day)
        linkovi = xpath.search(html, '//div[@class="item clearfix"]')
        highlighted = xpath.search(
            html, '//div[@class="item clearfix adHighlighted"]')
        for h in highlighted:
            linkovi.append(h)
        for l in linkovi:
            try:
                link = "http://www.kupujemprodajem.com/" + xpath.get(
                    l, '//a[@class="adName"]/@href')
                title = xpath.get(l, '//a[@class="adName"]')
                region = xpath.get(l,
                                   '//section[@class="locationSec"]').strip()
                region = region.split(" | ")
                region = region[0]
                price = xpath.get(l, '//span[@class="adPrice"]')
                price = price.split("&nbsp;")
                if len(price) == 2:
                    value = price[0]
                    value = value.replace(".", "")
                    value = value.split(",")
                    value = value[0]
                    currency = price[1]
                else:
                    value = "/"
                    currency = "/"

                if currency == "&euro;":
                    currency = "EUR"
                elif currency == "din":
                    currency = "DIN"

                down = Downloader(link)
                content = down.get_content()

                category = xpath.get(content, '//a[@class="crumbs"]')
                description = xpath.get(
                    l, '//section[@class="nameSec"]/p[@class="adDescription"]')
                category = category.split("|")
                category = category[0]
                category = category.strip()
                imageUrl = xpath.get(
                    content, '//div[@class="adThumbnailHolder"]/a/img/@src')
                imageUrl = imageUrl.replace("//", "/")
                imageUrl = imageUrl[1::]
                if imageUrl == "":
                    imageUrl = "/"
                description = description.replace("...<p>", "")
                description = description.strip()
                country = u"Србија"
                ad = Ad(link, title, imageUrl, description, category,
                        subcategory, value, currency, region, date, country)
                ads.append(ad)
            except:
                pass
    except:
        pass

    return adsToJson(ads)


#print scrapeKupujemProdajem()
コード例 #13
0
ファイル: HaloOglasi.py プロジェクト: PsyLee/scrapers
def scrapeHaloOglasi():
    # UTF-8 support
    reload(sys)
    sys.setdefaultencoding('utf-8')
    now = datetime.now()
    down = Downloader(
        'http://www.halooglasi.com/naslovna.240.html?search_text=&sortColumn=VremeDodavanja'
    )
    content = down.get_content()
    html = unicode(content)
    celo = xpath.get(html, '//div[@class="results_container"]')
    linkovi = xpath.search(celo, '//div[@class="result_brza"]')
    ads = []
    for l in linkovi:
        link = xpath.get(l, '//div[@style="height:auto;"]/h2/a/@href')
        link = "http://www.halooglasi.com" + link
        download = Downloader(link)
        cont = download.get_content()
        cont = unicode(cont)
        title = xpath.get(cont, '//div[@class="detail_bar_nek"]/h2').strip()
        if title == "":
            title = xpath.get(cont, '//div[@class="detail_bar"]/h2').strip()
        imageUrl = xpath.get(l, '//a[@class="thumb"]/img/@src')
        imageUrl = "http://www.halooglasi.com" + imageUrl

        description = xpath.get(l, '//div[@class="text_ogl"]/p')

        kategorija = xpath.get(l, '//div[@class="brza_link"]').strip()
        kategorija = kategorija.split("\r\n\t\t\t\t\t\t\r\n\t\t\t\t\t\t")
        kategorija = kategorija[1]
        kategorija = kategorija.split(" > ")
        category = kategorija[0]
        if len(kategorija) > 2:
            subcategory = kategorija[1]
        else:
            kategorija = kategorija[1].split("'>")
            kategorija = kategorija[1]
            kategorija = kategorija.split("<")
            subcategory = kategorija[0]
        price = xpath.get(cont, '//div[@class="price"]').strip()  #price
        if price == "":
            price = xpath.get(
                cont, '//div[@class="price deal"]').strip()  #price deal
        price = price.replace(".", "")
        price = price.replace("din", " DIN")
        price = price.replace("&euro;", " EUR")
        if price == "Dogovor":
            value = "/"
            currency = "/"
        else:
            price = price.split(" ")
            value = price[0]
            currency = price[1]
        date_loc = xpath.search(l, '//div[@class="datum_grad"]/h6/span')
        date_loc[0] = date_loc[0].strip()
        date = date_loc[0].split("\r\n")
        date = date[0]
        date = date.replace(".", "")
        date = date.split(" ")
        if date[1] == "Jan":
            date[1] = "1"
        elif date[1] == "Feb":
            date[1] = "2"
        elif date[1] == "Mar":
            date[1] = "3"
        elif date[1] == "Apr":
            date[1] = "4"
        elif date[1] == "Maj":
            date[1] = "5"
        elif date[1] == "Jun":
            date[1] = "6"
        elif date[1] == "Jul":
            date[1] = "7"
        elif date[1] == "Avg":
            date[1] = "8"
        elif date[1] == "Sep":
            date[1] = "9"
        elif date[1] == "Okt":
            date[1] = "10"
        elif date[1] == "Nov":
            date[1] = "11"
        elif date[1] == "Dec":
            date[1] = "12"
        date = date[2] + "-" + date[1] + "-" + date[0]
        l = date_loc[1].strip()
        l = l.split("&nbsp;")
        region = l[0]
        country = u"Србија"

        ad = Ad(link, title, imageUrl, description, category, subcategory,
                value, currency, region, date, country)
        ads.append(ad)

    return adsToJson(ads)


#print scrapeHaloOglasi()
コード例 #14
0
def scrapeAvtodelovi():
    # UTF-8 support
    reload(sys)
    sys.setdefaultencoding('utf-8')
    now = datetime.now()
    down = Downloader('http://www.avtooglasi.com.mk/avtodelovi/site/?page=0&orderby=0')
    content = down.get_content()
    html = unicode(content)
    celo = xpath.get(html, '//table[@class="table table-hover table-condensed"]')

    sliki = xpath.search(celo, '//div[@style="width: 120px;float:left;"]')
    ostanato = xpath.search(celo, '//div[@style="width: 568px;float:left;"]')
       
    ads = []
    for t in sliki:

        l = xpath.search(t, '/a/@href')
        link = l[0]
        tl = xpath.search(t, '/a/@title')
        subcategory = tl[0]
        img = xpath.search(t, '//img/@src')
        imageUrl = img[0]        
        dn = Downloader(link)
        cont = dn.get_content()
        ost = unicode(cont)
        os = xpath.search(ost, '//div[@class="centerC"]')
        category = u"Автоделови"
        tl = xpath.search(os[0], '//h3[@style="margin-top: 5px;"]')
        title = tl[0].strip().replace("\"", "")
        ds = xpath.search(os[0], '//div[@style="padding: 5px;"]')
        description = ds[1].strip().replace("\"", "")
        val = xpath.search(os[0], '//table[@class="table table-condensed"]')
        l = xpath.search(val[1], '//tr/td/strong')
        region = l[1].strip()
        country = u"Македонија"
        v = xpath.get(os[0], '//span[@class="label label-info"]')
        v = v.strip()
        v = v.split(" ")
        if v[1]=="&euro;":
            currency = "EUR"
        else:
            currency = "MKD"
        value =  v[0]
        sve = xpath.search(os[0], '//table[@class="table table-condensed"]')
        d = xpath.search(sve[0], '//tr/td/strong')
        dat = d[1].strip().split(" ")
        if len(dat)>2:
            if dat[0] == u"Денес":
                date = str(now.year)+"-"+str(now.month)+"-"+str(now.day)+" "+dat[2]
            elif dat[0] == u"Вчера":
                da=datetime.now()-timedelta(days=1)
                date= str(da.year)+"-"+str(da.month)+"-"+str(da.day)+" "+dat[2]
            elif dat[0] == u"пред":
                if dat[2] == u"дена":
                    da=datetime.now()-timedelta(days=int(dat[1]))
                    date=str(da.year)+"-"+str(da.month)+"-"+str(da.day)
                elif dat[2] == u"месец":
                    da=datetime.now()-timedelta(days=30)
                    date=str(da.year)+"-"+str(da.month)+"-"+str(da.day)
                else:
                    da=datetime.now()-timedelta(days=60)
                    date=str(da.year)+"-"+str(da.month)+"-"+str(da.day)
        else:
            date = ""
            da=datetime.now()-timedelta(days=90)
            date=str(da.year)+"-"+str(da.month)+"-"+str(da.day)
#         print date   

        ad = Ad(link, title, imageUrl, description, category, subcategory, value, currency, region, date, country)    
        ads.append(ad)
        
    return adsToJson(ads)

# print scrapeAvtodelovi()