Esempio n. 1
0
def getImgList(url = defaultUrl):
    try:
        urlDictList = mySpider.getImg(url)
    except:
        urlDictList = [{'href': 'evandjango.sinaapp.com',
                        'src': '/static/images/noImage.jpg',
                        'description': 'no description'}]
    storageUrl = u'http://6.evandjango.sinaapp.com/storageGet/'
    '''
    # try to stores at once, but failed
    srcList = []
    for each in urlDictList:
        srcList.append(each['src'])
    jsonSrcList = json.dumps(srcList)
    data =  {'urlDictList': jsonSrcList}
    encodeData = urllib.urlencode(data)
    req = urllib2.Request(storageUrl, encodeData)
    response = urllib2.urlopen(req)
    '''
        
    # replace original_url with storage_url
    for each in urlDictList:
        try:    # if in mysql
            each['src'] = (imgstorage.objects.get(original_url = each['src'])).storage_url
        except:     # if not in mysql
            try:
                stUrl = urllib2.urlopen(storageUrl + each['src'])    # store images to storage
                each['src'] = (imgstorage.objects.get(original_url = each['src'])).storage_url
            except:
                print 'stores failed or get url from mysql failed'
                pass    # solve this later
    return urlDictList
Esempio n. 2
0
def getImgList(url):
    try:
        urlDictList = mySpider.getImg(url)
    except:
        urlDictList = [{'href': 'evandjango.sinaapp.com',
                        'src': '/static/images/noImage.jpg',
                        'description': 'no description'}]
    fixed_site = mySpider.urlClean(url)
    return fixed_site, urlDictList
Esempio n. 3
0
import mySpider
url = 'http://news.qq.com/photo.shtml'
urlImg = mySpider.getImg(url)