def rename_file(): # 1.查询文件名称;2.根据相应的数据,确认计算出要修改的名称及对应的hash值 logging.debug('begin') aa = list(select_tmp()) for dict in aa: try: id_media = 'no_id' file_name = get_file_name_by_download_url(dict['download_url']) if dict['media_type'] == consts.constant_manager.SUBTITLE: file_name = 'viki' + '_' + get_file_name_by_download_url(dict['original_url']) + \ '_' + dict['language'] dict[ 'new_absolute_path'] = '/data/dev_ant/' + file_name + '.' + dict[ 'file_type'] dict['new_hash_sign'] = get_hash_sign(file_name) # 重命名文件 os.rename(dict['absolute_path'], dict['new_absolute_path']) # 写库操作 id_media = dict['id'] download_media_json = { 'id': id_media, 'hash_sign': get_hash_sign(file_name), 'absolute_path': dict['new_absolute_path'], } update(DownloadMedia().from_json_to_obj(download_media_json)) logging.debug('update success id %s' % dict['id']) except: traceback.print_exc() logging.error('update error id %s' % id_media) pass pass
def process(self, content): response_stream = from_string_to_json(get_and_download_stream_obj(content)) if response_stream['type'] == consts.constant_manager.DOWNLOAD: for download_info in response_stream['download_file_list']: file_name = get_file_name_by_download_url(download_info['download_url']) if download_info['media_type'] == consts.constant_manager.SUBTITLE : file_name = response_stream['site'] + '_' + get_file_name_by_download_url(response_stream['original_url']) + \ '_' + download_info['language'] file_obj = DownloadFile(download_url=download_info['download_url'], file_name=file_name, site=response_stream['site'], original_url=response_stream['original_url']) download_media_json = { 'video_url': response_stream['video_url'], 'original_url': response_stream['original_url'], 'download_url': download_info['download_url'], 'media_quality': download_info['media_quality'], 'episode': response_stream['episode'], 'download_path': ConfigInit().get_config_by_option('download_path'), 'media_name': response_stream['media_name'], 'hash_sign': get_hash_sign(file_name), 'media_type': download_info['media_type'], 'site': response_stream['site'], 'language': download_info['language'], 'merged_sign': download_info['merged_sign'], 'merged_order': download_info['merged_order'], } scheduler_db_save_queue(download_media_json) # todo:下载优先级细粒度管理 if int(download_info['priority']) > 50: scheduler_download_queue(file_obj.from_obj_to_json(), priority=True) else: scheduler_download_queue(file_obj.from_obj_to_json()) return response_stream
def downloading(file_name): # todo:兼容其他是否下載的驗證方式 redis_queue = RedisMsgQueue() if get_hash_sign(file_name) in redis_queue.hash_get_all( consts.constant_manager.DOWNLOAD_STATUS_QUEUE_NAME).keys(): return True return False
def process(self, content): to_merged_medias_lists = from_string_to_json(content) merged_absolue_path = self.merge_media(to_merged_medias_lists) # todo:原子性操作此次批量数据库操作 if merged_absolue_path: download_media_merged_json = copy.deepcopy( to_merged_medias_lists[0]) del_list = [ 'id', 'cloud_path', 'create_time', 'merged_status', 'update_time', 'upload_status' ] for del_column in del_list: del download_media_merged_json[del_column] download_media_merged_json['absolute_path'] = merged_absolue_path download_media_merged_json[ 'media_type'] = consts.constant_manager.MERGED download_media_merged_json['total_size'] = get_file_size( merged_absolue_path) download_media_merged_json['hash_sign'] = get_hash_sign( download_media_merged_json['merged_sign']) download_media_merged_json['merged_order'] = -1 scheduler_db_save_queue(download_media_merged_json) for download_media_json in to_merged_medias_lists: if exist_file(download_media_json['absolute_path']): del_file(download_media_json['absolute_path']) download_media_json['merged_status'] = '1' for column in download_media_json.keys(): if download_media_json[column] == 'None': del download_media_json[column] scheduler_db_save_queue(download_media_json) pass
def get_file_download_status(file_name): """ 未完成,已完成,下载中 :param hash_sign: :return: """ hash_sign = get_hash_sign(file_name) download_status = consts.constant_manager.NOT_DOWNLOAD_OVER if select_by_hash_sign(hash_sign): download_status = consts.constant_manager.DOWNLOAD_OVER elif downloading(file_name): download_status = consts.constant_manager.DOWNLOADING return download_status
def get_file_download_status(file_name): """ 未完成,已完成,下载中 :param hash_sign: :return: """ hash_sign = get_hash_sign(file_name) download_status = consts.constant_manager.NOT_DOWNLOAD_OVER if has_download_over(hash_sign): download_status = consts.constant_manager.DOWNLOAD_OVER # elif being_download(hash_sign): # download_status = consts.constant_manager.DOWNLOADING return download_status
def pre_parse_download_obj(self, download_file_obj): # type: (DownloadFile) -> None """ :param download_file_obj: :return: """ try: headers = { 'User-Agent': random.choice(consts.constant_manager.USER_AGENTS) } response = requests.get(download_file_obj.download_url, stream=True, headers=headers) if response.status_code == 400: logging.error('invalid timestamp %s' % download_file_obj.download_url) return False headers_json = dict(response.headers) if 'mp4' in headers_json['Content-Type']: download_file_obj.file_type = 'mp4' elif 'text' in headers_json['Content-Type']: download_file_obj.file_type = 'txt' else: logging.error('unknow file_type in %s' % download_file_obj.download_url) return False download_file_obj.total_size = int(headers_json['Content-Length']) except: traceback.print_exc() logging.error('pre_parse_download_obj error download_url %s' % download_file_obj.download_url) if download_file_obj.file_name == '': download_file_obj.file_name = get_file_name_by_download_url( download_file_obj.download_url) download_file_obj.hash_sign = get_hash_sign( file_name=download_file_obj.file_name) if download_file_obj.download_path == '': download_file_obj.download_path = ConfigInit().get_download_path() # todo:gzip压缩文件的续下载问题 if 'Content-Encoding' in headers_json and headers_json[ 'Content-Encoding'] == 'gzip': download_file_obj.download_type = consts.constant_manager.RE_DOWNLOAD download_file_obj.absolute_path = download_file_obj.download_path + download_file_obj.file_name + '.' + download_file_obj.file_type return True
def process(self, content): to_merged_medias_lists = from_string_to_json(content) merged_absolue_path = self.merge_media(to_merged_medias_lists) # todo:原子性操作此次批量数据库操作 if merged_absolue_path: download_media_json = copy.deepcopy(to_merged_medias_lists[0]) del download_media_json['id'] download_media_json['absolute_path'] = merged_absolue_path download_media_json['media_type'] = consts.constant_manager.MERGED download_media_json['total_size'] = get_file_size( merged_absolue_path) download_media_json['hash_sign'] = get_hash_sign( download_media_json['merged_sign']) download_media_json['download_status'] = '' download_media_json['merged_order'] = '' scheduler_db_save_queue(download_media_json) for download_media_json in to_merged_medias_lists: download_media_json['merged_status'] = '1' scheduler_db_save_queue(download_media_json) pass
def pre_parse_download_obj(self, download_file_obj): # type: (DownloadFile) -> None """ :param download_file_obj: :return: """ headers = { 'User-Agent': random.choice(consts.constant_manager.USER_AGENTS) } response = requests.get(download_file_obj.download_url, stream=True, headers=headers) headers_json = dict(response.headers) if 'mp4' in headers_json['Content-Type']: download_file_obj.file_type = 'mp4' elif 'text' in headers_json['Content-Type']: download_file_obj.file_type = 'txt' else: logging.error('unknow file_type in %s' % download_file_obj.download_url) try: download_file_obj.total_size = int(headers_json['Content-Length']) except: logging.error('can not get total_size from download_url %s' % download_file_obj.download_url) if download_file_obj.file_name == '': download_file_obj.file_name = hash_md5( download_file_obj.download_url) download_file_obj.hash_sign = get_hash_sign( file_name=download_file_obj.file_name) if download_file_obj.download_path == '': download_file_obj.download_path = ConfigInit().get_download_path() # todo:gzip压缩文件的续下载问题 if 'Content-Encoding' in headers_json and headers_json[ 'Content-Encoding'] == 'gzip': download_file_obj.download_type = 'wb+' download_file_obj.absolute_path = download_file_obj.download_path + download_file_obj.file_name + '.' + download_file_obj.file_type