def getImgList(url = defaultUrl): try: urlDictList = mySpider.getImg(url) except: urlDictList = [{'href': 'evandjango.sinaapp.com', 'src': '/static/images/noImage.jpg', 'description': 'no description'}] storageUrl = u'http://6.evandjango.sinaapp.com/storageGet/' ''' # try to stores at once, but failed srcList = [] for each in urlDictList: srcList.append(each['src']) jsonSrcList = json.dumps(srcList) data = {'urlDictList': jsonSrcList} encodeData = urllib.urlencode(data) req = urllib2.Request(storageUrl, encodeData) response = urllib2.urlopen(req) ''' # replace original_url with storage_url for each in urlDictList: try: # if in mysql each['src'] = (imgstorage.objects.get(original_url = each['src'])).storage_url except: # if not in mysql try: stUrl = urllib2.urlopen(storageUrl + each['src']) # store images to storage each['src'] = (imgstorage.objects.get(original_url = each['src'])).storage_url except: print 'stores failed or get url from mysql failed' pass # solve this later return urlDictList
def getImgList(url): try: urlDictList = mySpider.getImg(url) except: urlDictList = [{'href': 'evandjango.sinaapp.com', 'src': '/static/images/noImage.jpg', 'description': 'no description'}] fixed_site = mySpider.urlClean(url) return fixed_site, urlDictList
import mySpider url = 'http://news.qq.com/photo.shtml' urlImg = mySpider.getImg(url)