Beispiel #1
0
    def upLoadPhotoToFtp(self, starId, strGrabStarCode, inforrationSources, headImgUrl, cleanAfterId, upLoadCount, currentStamp, starIdList):
        try :
            if upLoadCount % 1000 == 0:
                currentStamp = DateUtils.getSysTimeSecond()
            # 明星_抓取
            dateType = data_type_1_star + data_type_separator + data_type_2_grab
            if (inforrationSources == 0 and headImgUrl and headImgUrl != douban_default_star_photo) or (inforrationSources == 2 and headImgUrl and headImgUrl != iqiyi_default_star_photo):
                remoteFile, webFileUrl = UrlUtils().getRemoteFile(dateType, currentStamp, inforrationSources, headImgUrl)
                photoHeight, photoWidth = UrlUtils().upLoadFtp(remoteFile, webFileUrl, inforrationSources)
                displayStatus = 1
                if (photoHeight != 0 and photoWidth != 0) :
                    upLoadCount += 1
                    GrabStarPhoto().saveGrabStarPhoto(starId, strGrabStarCode, inforrationSources, webFileUrl, photoHeight,
                                                      photoWidth, remoteFile, cleanAfterId, displayStatus)
                    starIdList.append(starId)

            else:
                remoteFile = ""
                webFileUrl = headImgUrl
                photoHeight, photoWidth = 0, 0
                displayStatus = 2
                GrabStarPhoto().saveGrabStarPhoto(starId, strGrabStarCode, inforrationSources, webFileUrl, photoHeight,
                                              photoWidth, remoteFile, cleanAfterId, displayStatus)
                starIdList.append(starId)
            if inforrationSources == 0:
                # time.sleep(1)
                pass
        except BaseException as e:
            print("e.message:", str(e), "上传来源" + str(inforrationSources) + " id为"+ str(starId) + "明星图片到FTP出现异常")
        return upLoadCount, currentStamp
Beispiel #2
0
 def addMataImg(self, media_source_code,poster_img_url,ftpUrl,photoWidth,photoHeight):
     try:
         grabPoster = GrabPoster()
         # 序列id
         grabPoster.posterId = DBUtil().createPK("GRAB_POSTER")
         # 影片源id
         grabPoster.mediaSourceCode = media_source_code
         # 信息来源
         grabPoster.informationSources = 0
         # 海报url
         grabPoster.posterUrl = poster_img_url
         # 海报FTP
         grabPoster.posterFtpUrl = ftpUrl
         # 图片宽度
         grabPoster.posterWidth = photoWidth
         # 图片高度
         grabPoster.posterHeight = photoHeight
         # 抓取时间
         grabPoster.grabTime = DateUtils.getSysTimeFormat("%Y-%m-%d %H:%M:%S")
         # 清洗状态
         grabPoster.cleanStatus = 0
         # 清洗后id
         grabPoster.cleanAfterId = 0
         # 图片显示状态
         grabPoster.displayStatus = 1
         grabPoster.save(using="grab", force_insert=True)
     except BaseException as e:
         print("e.message:", str(e))
Beispiel #3
0
 def parseImageUpload(self, webUrl):
     ftpUrl = ''
     photoHeight = 0
     photoWidth = 0
     dateType = data_type_1_meta + data_type_separator + data_type_2_grab
     if self.imageUploadNum % 1000 == 0:
         currentStamp = DateUtils.getSysTimeSecond()
     # 取得FTP上传后地址
     ftpUrl, webFileUrl = UrlUtils().getRemoteFile(dateType, currentStamp, self.informationSources, webUrl)
     # 上传图片到FTP,返回图片尺寸
     photoHeight, photoWidth = UrlUtils().upLoadFtp(ftpUrl, webFileUrl, self.informationSources)
     self.imageUploadNum += 1
     return ftpUrl, photoHeight, photoWidth
Beispiel #4
0
 def addMataPF(self, media_source_code,mediaId,pingfen):
     try:
         grabMediaDynamic = GrabMediaDynamic()
         # 影片id
         grabMediaDynamic.mediaId = mediaId
         # 媒资数据源id
         grabMediaDynamic.mediaSourceCode = media_source_code
         # 评分
         grabMediaDynamic.mediaScore = pingfen
         # 信息来源
         grabMediaDynamic.informationSources = 0
         # 清洗状态
         grabMediaDynamic.cleanStatus = 0
         # 清洗后id
         grabMediaDynamic.cleanAfterId = 0
         # 抓取时间
         grabMediaDynamic.grabTime = DateUtils.getSysTimeFormat("%Y-%m-%d %H:%M:%S")
         grabMediaDynamic.save(using="grab", force_insert=True)
     except BaseException as e:
         print("e.message:", str(e))
Beispiel #5
0
    def queryItemMata(self, response):
        try:
            json_dict = json.loads(response)
            media_source_code = json_dict['id']
            cn_name = json_dict['title']
            genresNo = len(json_dict['genres'])
            if genresNo > 0:
                subordinate_type = json_dict['genres']
            else:
                subordinate_type = ""
            akaNo = len(json_dict['aka'])
            if akaNo > 0:
                alternate_name = json_dict['aka']
            else:
                alternate_name = ""
            subtypeNo = len(json_dict['subtype'])
            if subtypeNo > 0:
                media_type = json_dict['subtype']
            else:
                media_type = ""
            poster_img_url = json_dict['images']['large']
            media_intro = json_dict['summary']
            media_year = json_dict['year']
            if media_year == "" or media_year == None:
                media_year = 0
            grab_web_url = json_dict['share_url']
            pingfen = json_dict['rating']['average']
            total_season = json_dict['seasons_count']
            if total_season == "" or total_season == None:
                total_season = "1"
            current_season = json_dict['current_season']
            if current_season == "" or current_season == None:
               current_season = "1"
            current_season_number = json_dict['episodes_count']
            if current_season_number == "" or current_season_number is None:
                current_season_number = "1"
            directorsNo = len(json_dict['directors'])
            if directorsNo > 0:
                dy = []
                for n in range(directorsNo):
                    media_director = json_dict['directors'][n]['name']
                    dy.append(media_director)
            else:
                dy = ""
            zy = []
            castsNo = len(json_dict['casts'])
            if castsNo > 0:
                for i in range(castsNo):
                    media_actor = json_dict['casts'][i]['name']
                    zy.append(media_actor)
            else:
                zy = ""
            countriesNo = len(json_dict['countries'])
            if countriesNo > 0:
                production_contry = json_dict['countries'][0]
            else:
                production_contry = ''

            # -----保存媒资基础信息------
            grabMediaInfo = GrabMediaInfo()
            #设置主键id
            grabMediaInfo.mediaId = DBUtil().createPK("GRAB_MEDIA_INFO")
            # 媒资数据源id
            grabMediaInfo.mediaSourceCode = media_source_code
            # 信息来源
            grabMediaInfo.informationSources = 0
            #媒资父id 豆瓣没有剧集默认值为0
            grabMediaInfo.parentSourceCode = 0
            #
            grabMediaInfo.cleanParentMediaId = 0
            # 媒资类型
            grabMediaInfo.mediaType = media_type
            # 中文名
            grabMediaInfo.cnName = cn_name
            # 又名
            grabMediaInfo.alternateName = alternate_name
            # 发行年份
            grabMediaInfo.mediaYear = media_year
            # 简介
            grabMediaInfo.mediaIntro = media_intro
            # 影片类型
            grabMediaInfo.subordinateType = subordinate_type
            # 制片国家/地区
            grabMediaInfo.productionContry = production_contry
            # 总季数
            grabMediaInfo.totalSeason = int(total_season)
            # 当前季数
            grabMediaInfo.currentSeason = int(current_season)
            # 当前季的集数
            grabMediaInfo.currentSeasonNumber = int(current_season_number)
            # 海报url
            grabMediaInfo.posterImgUrl = poster_img_url
            # 导演
            grabMediaInfo.mediaDirector = dy
            # 演员
            grabMediaInfo.mediaActor = str(zy)
            # 是否清洗
            grabMediaInfo.cleanStatus = 0
            # 清洗后id
            grabMediaInfo.cleanAfterId = 0
            # 抓取时间
            grabMediaInfo.grabTime = DateUtils.getSysTimeFormat("%Y-%m-%d %H:%M:%S")
            # 抓取页面URL
            grabMediaInfo.grabWebUrl = grab_web_url
            if (media_type  == 'tv'):
                if ('真人秀' in subordinate_type):
                    # 协助媒资类型
                    grabMediaInfo.mediaAssistType = 'variety'
                elif ('音乐' in subordinate_type):
                    # 协助媒资类型
                    grabMediaInfo.mediaAssistType = 'variety'
                elif ('歌舞' in subordinate_type):
                    # 协助媒资类型
                    grabMediaInfo.mediaAssistType = 'variety'
                elif ('脱口秀' in subordinate_type):
                    # 协助媒资类型
                    grabMediaInfo.mediaAssistType = 'variety'
                elif ('动画' in subordinate_type):
                    # 协助媒资类型
                    grabMediaInfo.mediaAssistType = 'manga'
                elif ('儿童' in subordinate_type):
                    # 协助媒资类型
                    grabMediaInfo.mediaAssistType = 'manga'
                elif ('纪录片' in subordinate_type):
                    # 协助媒资类型
                    grabMediaInfo.mediaAssistType = 'documentary'
                else:
                    grabMediaInfo.mediaAssistType = 'tv'
            else:
                if ('纪录片' in subordinate_type):
                    # 协助媒资类型
                    grabMediaInfo.mediaAssistType = 'documentary'
                else:
                    grabMediaInfo.mediaAssistType = 'movie'
            # 保存数据库
            grabMediaInfo.save(using="grab", force_insert=True)
            # -----保存媒资图片------
            ftpUrl, photoHeight, photoWidth = DoubanMZAnalysis().parseImageUpload(poster_img_url)
            self.addMataImg(media_source_code, poster_img_url, ftpUrl, photoWidth, photoHeight)
            # -----保存媒资评分------
            self.addMataPF(media_source_code, grabMediaInfo.mediaId, pingfen)
            #-------保存媒资演员信息-------
            #DoubanMediaCelebrities().mediaCelebrities(media_source_code)
        except BaseException as e:
            print("e.message:", str(e))
            time.sleep(10)