コード例 #1
0
def getPage(url,logger):          ####################获取页面
    try:
        page = UrlContent.getHtmlConnent(url)
        if page.getcode() == 200:            
            soup = BeautifulSoup(page.read())
            return soup
        else:
            logger.info("The server returns :",page.getcode())
            return None
    except Exception ,e:
#         print '服务器错误',url
        logger.info( 'server Exception'+url)
        return None
コード例 #2
0
def getPage(url, logger):  ####################获取页面
    try:
        page = UrlContent.getHtmlConnent(url)
        if page.getcode() == 200:
            soup = BeautifulSoup(page.read())
            return soup
        else:
            logger.info("The server returns :", page.getcode())
            return None
    except Exception, e:
        #         print '服务器错误',url
        logger.info('server Exception' + url)
        return None
コード例 #3
0
def saveaudio(audios,db,es,logger,referer):
    
    if db == None:
        db = connectDB(logger)
    if es == None:
        es = getESCreate(logger)
    void_id = audios.get('void_id')
    url = 'http://www.ximalaya.com/tracks/'+void_id+'.json'
    logger.info(url)
    file_dir = UsePlatform()
    if file_dir ==None:  ####  默认为Windows
        file_dir = file_dir_win
    if not os.path.exists(file_dir):
        os.mkdir(file_dir)
    dir = file_dir +void_id+'.txt' 
    if os.path.exists(dir):
        logger.info('File already exists !' +void_id)
        return None  
    page = UrlContent.getJSONConnent(url, referer)
    if page.getcode() != 200:
        return None
    page = page.read()
    #audios ={}
    compressedFile = StringIO.StringIO()     
    if page != None:
        try:
            date = json.loads(page)
            if date['play_path_64'] != None:
                duration = date['duration']   #音频时长
                audios['duration'] = duration
                play_count = date['play_count'] # 播放次数
                audios['play_count'] = play_count
                title = date['title']  #音频标题
                audios['title'] = title
                audios['void_id'] = void_id
                address = date['play_path_64']
                if None != address:
                    audios['address']=address
                    formatsrc = address.split('.')
                    if len(formatsrc)>0:
                        try:
                            outf =None
                            format =formatsrc[len(formatsrc)-1]
                            audios['format'] = format                           
#                            dir = file_dir +void_id+'.txt' 
#                             if os.path.exists(dir):
#                                  print '文件已经存在!',void_id
#                                 logger.info('File already exists !' +void_id)
#                                 return None
                            audio = UrlContent.getVoidConnent(address,referer)
                            audios['audios_dir'] = dir
                            outf = codecs.open(dir,'wb','utf-8') 
                            start = round(time.time())              
                            while True:
                                end = round(time.time())
                                if end - start > 700:
#                                     print '下载超时!',address 
                                    logger.info('Download the timeout!')
                                    return None
                                s = audio.read(1024*32)
                                if len(s) == 0:
                                    break 
                                compressedFile.write(s)                                                              
                            fs = GridFS(db,collection='audio')
                            gf = fs.put(compressedFile.getvalue(),filename=title+'.'+format,format=format,playcount=play_count,size=compressedFile.len)                           
                            audios['audio_id'] = str(gf)
                            audios['size'] = compressedFile.len 
                            outf.write('audio_id : '+ audios.get('audio_id')+ '  title : ' + audios.get('title'))
                            outf.flush()
                        except Exception,e:
#                             print '文件操作错误' ,e 
                            logger.info('File operations error')
                            audios['tag'] = '0'
                        finally:
                            if outf != None:
                                outf.close()   
                            compressedFile.close()
コード例 #4
0
def saveaudio(audios, db, es, logger, referer):

    if db == None:
        db = connectDB(logger)
    if es == None:
        es = getESCreate(logger)
    void_id = audios.get('void_id')
    url = 'http://www.ximalaya.com/tracks/' + void_id + '.json'
    logger.info(url)
    file_dir = UsePlatform()
    if file_dir == None:  ####  默认为Windows
        file_dir = file_dir_win
    if not os.path.exists(file_dir):
        os.mkdir(file_dir)
    dir = file_dir + void_id + '.txt'
    if os.path.exists(dir):
        logger.info('File already exists !' + void_id)
        return None
    page = UrlContent.getJSONConnent(url, referer)
    if page.getcode() != 200:
        return None
    page = page.read()
    #audios ={}
    compressedFile = StringIO.StringIO()
    if page != None:
        try:
            date = json.loads(page)
            if date['play_path_64'] != None:
                duration = date['duration']  #音频时长
                audios['duration'] = duration
                play_count = date['play_count']  # 播放次数
                audios['play_count'] = play_count
                title = date['title']  #音频标题
                audios['title'] = title
                audios['void_id'] = void_id
                address = date['play_path_64']
                if None != address:
                    audios['address'] = address
                    formatsrc = address.split('.')
                    if len(formatsrc) > 0:
                        try:
                            outf = None
                            format = formatsrc[len(formatsrc) - 1]
                            audios['format'] = format
                            #                            dir = file_dir +void_id+'.txt'
                            #                             if os.path.exists(dir):
                            #                                  print '文件已经存在!',void_id
                            #                                 logger.info('File already exists !' +void_id)
                            #                                 return None
                            audio = UrlContent.getVoidConnent(address, referer)
                            audios['audios_dir'] = dir
                            outf = codecs.open(dir, 'wb', 'utf-8')
                            start = round(time.time())
                            while True:
                                end = round(time.time())
                                if end - start > 700:
                                    #                                     print '下载超时!',address
                                    logger.info('Download the timeout!')
                                    return None
                                s = audio.read(1024 * 32)
                                if len(s) == 0:
                                    break
                                compressedFile.write(s)
                            fs = GridFS(db, collection='audio')
                            gf = fs.put(compressedFile.getvalue(),
                                        filename=title + '.' + format,
                                        format=format,
                                        playcount=play_count,
                                        size=compressedFile.len)
                            audios['audio_id'] = str(gf)
                            audios['size'] = compressedFile.len
                            outf.write('audio_id : ' + audios.get('audio_id') +
                                       '  title : ' + audios.get('title'))
                            outf.flush()
                        except Exception, e:
                            #                             print '文件操作错误' ,e
                            logger.info('File operations error')
                            audios['tag'] = '0'
                        finally:
                            if outf != None:
                                outf.close()
                            compressedFile.close()