コード例 #1
0
ファイル: getResource.py プロジェクト: surecc/githubtest
def grab_360buy_saveToModel(url, id_cate, id_s, localfile):
    request = urllib2.Request(url=url, headers={'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3' })
    response = urllib2.urlopen(request)
    HTML_response = response.read()
    soup = BeautifulSoup(HTML_response,from_encoding="gb18030")

    if soup:
        tag_div = soup.find_all('div', id = 'plist')
        if tag_div:
            tag_item_li = tag_div[0].find_all('li')
            #myfile = open(localfile,'w')
            i = 0
            # get the default
            m_cate = models.Category.objects.get(id=id_cate)
            m_s = models.Seller.objects.get(id=id_s)
            print m_cate
            print type(m_s)
            myfile = open(localfile,'w')
            for li in tag_item_li:
                i += 1
                #get the tag of each div
                div = li.find_all('div')
                if div:
                    print str(i)+'........'
                    p_img = div[0]
                    p_name = div[4]
                    p_price = div[5]
                    #save img
                    url_item = p_img.a['href']
                    url_img = p_img.img['data-lazyload']
                    path_dir = os.path.join(os.path.dirname(localfile), 'img')
                    path_img = os.path.join(path_dir , str(i)+'.jpg')
                    saveImg.saveImg(url_img, path_img)
                    #save price
                    url_price = p_price.img['data-lazyload']
                    path_price = os.path.join(path_dir, str(i)+'_price.jpg')
                    saveImg.saveImg(url_price, path_price)
                    #save to model
                    m_com = models.Commidity(url=url_item, price=0.0, name=str(p_name.a.contents))
                    m_com.categories = m_cate
                    m_com.seller = m_s
                    m_com.save()
                    print m_com
                    m_p = models.Picture(dir=path_dir,commidity=m_com.id)
                    m_p.save()
                    print m_p
                    #get info
                    myfile.write( str(path_img) + '---')
                    myfile.write( str(p_name.a.contents) + '---')
                    myfile.write( str(p_price.img['data-lazyload']) + '---')
                    myfile.write('\r\n')
                else:
                    print 'it is empty of div.... f**k'
            myfile.close()
    return True
コード例 #2
0
ファイル: getResource.py プロジェクト: surecc/githubtest
def grab_360buy(url, localfile):
    print localfile
    soup = makeSoup(url)
    if soup:
        tag_div = soup.find_all('div', id = 'plist')
        if tag_div:
            tag_item_li = tag_div[0].find_all('li')
            myfile = open(localfile,'w')
            i = 0
            for li in tag_item_li:
                i += 1
                #get the tag of each div
                div = li.find_all('div')
                if div:
                    print str(i)+'........'
                    p_img = div[0]
                    p_name = div[2]
                    p_price = div[3]
                    print div[0]
                    print '-----------------'
                    print div[2]
                    print '-----------------'
                    print div[3]
                    #save img
                    url_img = p_img.img['data-lazyload']
                    path_dir = os.path.join(os.path.dirname(localfile), 'img')
                    img_name = getRandom.getRandomStr(5)
                    path_img = os.path.join(path_dir , img_name +'.jpg')
                    saveImg.saveImg(url_img, path_img)
                    #save price
                    if p_price and p_name and p_img:
                        url_price = p_price.img['data-lazyload']
                        path_price = os.path.join(path_dir, img_name+'_price.jpg')
                        saveImg.saveImg(url_price, path_price)
                        #get info
                        myfile.write( str(path_img) + '---')
                        myfile.write( str(p_name.a.contents) + '---')
                        myfile.write( str(p_price.img['data-lazyload']) + '---')
                        myfile.write('\r\n')
                else:
                    print 'it is empty of div.... f**k'
            myfile.close()
    return True
コード例 #3
0
ファイル: views_taobao.py プロジェクト: surecc/githubtest
def getsoup(request):
    if request.method == 'POST':
        form = SoupForm(request.POST)
        if form.is_valid():
            clean_data = form.cleaned_data
            # as below, will grab the data of the url
            url = clean_data['url']
            website = clean_data['website']
            # store the url into a file named try.txt
            #rd = getRandomStr(10)
            #rd = getRandom.getRandomStr(10)
            rd = getRandom.getUUID()
            # os.path.join(os.path.dirname(__file__), 'templates').replace('\\','/'),
            path_img = os.path.join(os.path.join(os.path.dirname(__file__)), '..\\imgdb\\taobao_' + rd + '.jpg')
            localfile = os.path.join(os.path.join(os.path.dirname(__file__)), '..\\imgdb\\url_' + rd + '.txt')
            #getHtml.grabHref(url, localfile)
            #getResource.grabHref(url, localfile)
            #getResource.grab_360buy(url, localfile)
            #getResource.grab_360buy_saveToModel(url, 1, 1, localfile)
            if website == 'taobao':
                getResource.grabHref(url, localfile)
                data = taobao_lib.get_json(url)
                json_data = json.loads(data)
                json.loads(data, None)
                json_item_list = json_data['itemList']
                for item in json_item_list:
                    price = item['currentPrice']
                    name = item['fullTitle']
                    url = item['storeLink']
                    img_url = item['image']
                    #save img
                    saveImg.saveImg(img_url, path_img)
            elif website == '360buy':
                getResource.grab_360buy(url, localfile)
                #print name + price + url + img_url
            return render_to_response('beautiful_soup.html',{'form': form, 'ans':rd})
    else:
        form = SoupForm(initial={'url':'http://www.baidu.com'})
    return render_to_response('beautiful_soup.html',{'form': form})