def download(self,feed,url,filename,extname=None): if feed.startswith('csdn'): true_url = url.split('?')[0] else: true_url = url logger.info('downloading image \'%s\'',url) try: req = urllib2.Request(true_url) req.add_header('User-Agent',USER_AGENT) resp = urllib2.urlopen(req,None,DOWNLOAD_TIMEOUT) #resp = urlopen(true_url) #logger.debug(resp.getcode()) data = resp.read(-1) resp.close() if extname == None: #in python3 #content_type = resp.getheader('Content-Type') #in python2 content_type = resp.info().getheader('Content-Type').lower() extname = self.get_extname(content_type) if extname == None: logger.warning('unsupported content type \'%s\'',content_type) return (None, STATE_BADURL, 'unsupported content type \''+content_type+'\'') file_fullname = filename+extname fn = os.path.join(self.image_dir,os.path.join(file_fullname[:1],os.path.join(file_fullname[1:2],file_fullname))) f = open(fn,'wb') f.write(data) f.close() return (file_fullname, STATE_SUCCESS, 'success') except Exception as e: logger.error('an error accur while downloading %s',url); logger.exception(e) return (None, STATE_NETWORK_ERROR, 'network error')
def fetch_feed(self,feed): feed_name = feed['name'] try: logger.info('start fetch feed \'%s\'', feed_name) fp = feedparser.parse(feed['url']) if len(fp.entries) > 0: feed['type'] = fp.version[0:3] logger.info('get %d articles from feed \'%s\'',len(fp.entries),feed_name) self.store(feed,fp.entries) else: logger.info('get nothing from feed \'%s\'',feed_name) except Exception as e: logger.error('get error while fetching feed \'%s\'',feed_name) logger.exception(e)