def getPage(url,logger): ####################获取页面 try: page = UrlContent.getHtmlConnent(url) if page.getcode() == 200: soup = BeautifulSoup(page.read()) return soup else: logger.info("The server returns :",page.getcode()) return None except Exception ,e: # print '服务器错误',url logger.info( 'server Exception'+url) return None
def getPage(url, logger): ####################获取页面 try: page = UrlContent.getHtmlConnent(url) if page.getcode() == 200: soup = BeautifulSoup(page.read()) return soup else: logger.info("The server returns :", page.getcode()) return None except Exception, e: # print '服务器错误',url logger.info('server Exception' + url) return None
def saveaudio(audios,db,es,logger,referer): if db == None: db = connectDB(logger) if es == None: es = getESCreate(logger) void_id = audios.get('void_id') url = 'http://www.ximalaya.com/tracks/'+void_id+'.json' logger.info(url) file_dir = UsePlatform() if file_dir ==None: #### 默认为Windows file_dir = file_dir_win if not os.path.exists(file_dir): os.mkdir(file_dir) dir = file_dir +void_id+'.txt' if os.path.exists(dir): logger.info('File already exists !' +void_id) return None page = UrlContent.getJSONConnent(url, referer) if page.getcode() != 200: return None page = page.read() #audios ={} compressedFile = StringIO.StringIO() if page != None: try: date = json.loads(page) if date['play_path_64'] != None: duration = date['duration'] #音频时长 audios['duration'] = duration play_count = date['play_count'] # 播放次数 audios['play_count'] = play_count title = date['title'] #音频标题 audios['title'] = title audios['void_id'] = void_id address = date['play_path_64'] if None != address: audios['address']=address formatsrc = address.split('.') if len(formatsrc)>0: try: outf =None format =formatsrc[len(formatsrc)-1] audios['format'] = format # dir = file_dir +void_id+'.txt' # if os.path.exists(dir): # print '文件已经存在!',void_id # logger.info('File already exists !' +void_id) # return None audio = UrlContent.getVoidConnent(address,referer) audios['audios_dir'] = dir outf = codecs.open(dir,'wb','utf-8') start = round(time.time()) while True: end = round(time.time()) if end - start > 700: # print '下载超时!',address logger.info('Download the timeout!') return None s = audio.read(1024*32) if len(s) == 0: break compressedFile.write(s) fs = GridFS(db,collection='audio') gf = fs.put(compressedFile.getvalue(),filename=title+'.'+format,format=format,playcount=play_count,size=compressedFile.len) audios['audio_id'] = str(gf) audios['size'] = compressedFile.len outf.write('audio_id : '+ audios.get('audio_id')+ ' title : ' + audios.get('title')) outf.flush() except Exception,e: # print '文件操作错误' ,e logger.info('File operations error') audios['tag'] = '0' finally: if outf != None: outf.close() compressedFile.close()
def saveaudio(audios, db, es, logger, referer): if db == None: db = connectDB(logger) if es == None: es = getESCreate(logger) void_id = audios.get('void_id') url = 'http://www.ximalaya.com/tracks/' + void_id + '.json' logger.info(url) file_dir = UsePlatform() if file_dir == None: #### 默认为Windows file_dir = file_dir_win if not os.path.exists(file_dir): os.mkdir(file_dir) dir = file_dir + void_id + '.txt' if os.path.exists(dir): logger.info('File already exists !' + void_id) return None page = UrlContent.getJSONConnent(url, referer) if page.getcode() != 200: return None page = page.read() #audios ={} compressedFile = StringIO.StringIO() if page != None: try: date = json.loads(page) if date['play_path_64'] != None: duration = date['duration'] #音频时长 audios['duration'] = duration play_count = date['play_count'] # 播放次数 audios['play_count'] = play_count title = date['title'] #音频标题 audios['title'] = title audios['void_id'] = void_id address = date['play_path_64'] if None != address: audios['address'] = address formatsrc = address.split('.') if len(formatsrc) > 0: try: outf = None format = formatsrc[len(formatsrc) - 1] audios['format'] = format # dir = file_dir +void_id+'.txt' # if os.path.exists(dir): # print '文件已经存在!',void_id # logger.info('File already exists !' +void_id) # return None audio = UrlContent.getVoidConnent(address, referer) audios['audios_dir'] = dir outf = codecs.open(dir, 'wb', 'utf-8') start = round(time.time()) while True: end = round(time.time()) if end - start > 700: # print '下载超时!',address logger.info('Download the timeout!') return None s = audio.read(1024 * 32) if len(s) == 0: break compressedFile.write(s) fs = GridFS(db, collection='audio') gf = fs.put(compressedFile.getvalue(), filename=title + '.' + format, format=format, playcount=play_count, size=compressedFile.len) audios['audio_id'] = str(gf) audios['size'] = compressedFile.len outf.write('audio_id : ' + audios.get('audio_id') + ' title : ' + audios.get('title')) outf.flush() except Exception, e: # print '文件操作错误' ,e logger.info('File operations error') audios['tag'] = '0' finally: if outf != None: outf.close() compressedFile.close()