def end_callback(): # 更新关键词状态 做完 sql = 'update tab_ivms_task_keyword set finish_status = 603 where id = %d' % keyword_id db.update(sql) # 如果该任务的所有关键词都做完 则更新任务状态为做完 sql = 'select t.* from tab_ivms_task_keyword t where task_id = %d and finish_status = 601' % task_id results = db.find(sql) if not results: # 导出数据 key_map = { 'program_id': 'vint_sequence.nextval', 'search_type': 'int_search_type', 'program_name': 'str_title', 'program_url': 'str_url', 'release_date': 'date_release_time', 'image_url': 'str_image_url', 'program_content': 'str_content', 'task_id': 'vint_%d' % task_id, 'keyword': 'str_keyword', 'keyword_count': 'int_keyword_count', 'check_status': 'vint_202' } export = ExportData('VA_content_info', 'tab_ivms_program_info', key_map, 'program_url') export.export_to_oracle() # 更新任务状态 做完 sql = 'update TAB_IVMS_TASK_INFO set task_status = 503 where task_id = %d' % task_id db.update(sql) log.info('\n********** VA end **********')
def end_callback(): # 导出数据 key_map = { 'id': 'int__id', 'release_time': 'date_release_time', 'come_from': 'str_come_from', 'content': 'clob_content', 'image_url': 'str_image_url', 'video_url': 'str_video_url', 'transpond_count': 'int_transpond_count', 'praise_count': 'int_praise_count', 'check_status': 'vint_301', 'weibo_id': 'int_weibo_id', 'article_url': 'str_url', 'violate_status': 'int_violate_id', 'sensitive_id': 'int_sensitive_id', 'record_time': 'date_record_time', 'SEXY_IMAGE_STATUS': 'str_sexy_image_status' } export = ExportData('WWA_weibo_info_info', 'tab_mvms_weibo_article_info', key_map, unique_key='ARTICLE_url', condition={ 'read_status': 0, "image_pron_status": 2 }) export.export_to_oracle() log.info('\n********** WWA_weibo_info end **********')
def site_main(): # 导出数据 key_map = { 'id': 'int__id', 'APP_ID': 'int_site_id', 'CONTENT_ID': 'int__id', 'RELEASE_TIME': 'date_release_time', 'TITLE': 'str_title', 'ORIGINAL_URL': 'str_url', 'CONTENT': 'clob_content', 'ABSTRACT_IMAGE_URL': 'str_image_url', 'ABSTRACT_IMAGE_LOCAL_PATH': 'str_img_stor_path', 'VIDEO_URL': 'str_video_url', # 'VIDEO_LOCAL_PATH': 'str_video_local_path', 'RECORD_TIME': 'date_record_time', # 'image_code': 'vint_5' 'image_code': 'vint_5' # 'image_code': 'str_sexy_image_status' } export_data = ExportData() export_data.export_to_oracle(source_table='TIANJIN_APP_content_info', aim_table='tab_app_program_info', key_map=key_map, unique_key='ORIGINAL_URL', condition={'read_status': 0}) # , "image_pron_status": 2})
def main(): # 导出数据 key_map = { 'id': 'vint_sequence.nextval', 'title': 'str_title', 'update_info': 'str_update_info', 'author': 'str_author', 'url': 'str_url', 'app_url': 'str_app_url', 'image_url': 'str_image_url', 'classify': 'int_classify_id', 'software_size': 'str_software_size', 'tag': 'str_tag', 'platform_type': 'vint_502', #android 'download_count': 'str_download_count', 'record_time': 'date_record_time', 'update_time': 'date_release_time', 'site_id': 'int_site_id', 'check_status': 'vint_201', 'sexy_image_status': 'str_sexy_image_status', 'sexy_image_url': 'str_sexy_image_url', 'score': 'str_score', 'summary': 'str_summary', 'compatibility': 'str_platform', 'language': 'int_language', } export_data = ExportData(source_table='GameApp_content_info', aim_table='tab_gams_app_info', key_map=key_map, unique_key='url') export_data.export_to_oracle()
def main(): while True: # 导出数据 key_map = { 'program_id': 'vint_sequence.nextval', 'search_type': 'int_search_type', 'program_name': 'str_title', 'program_url': 'str_url', 'release_date': 'date_release_time', 'image_url': 'str_image_url', 'program_content': 'str_content', 'task_id': 'int_task_id', 'keyword': 'str_keyword', 'keyword_count': 'int_keyword_count', 'check_status': 'vint_202', 'SEXY_IMAGE_STATUS': 'int_sexy_image_status' } export = ExportData('VA_content_info', 'tab_ivms_program_info', key_map, 'program_url', condition={ 'read_status': 0, "image_pron_status": 2 }) export.export_to_oracle() time.sleep(300) # 五分钟导一次数据
def main(): # 导出数据 key_map = { 'id' : 'int__id', 'title' : 'str_title', 'update_info' : 'str_update_info', 'author' : 'str_author', 'url' : 'str_url', 'app_url' : 'str_app_url', 'image_url' : 'str_image_url', 'software_size' : 'str_software_size', 'tag' : 'str_tag', 'platform_type' : 'vint_502', #android 'download_count' : 'str_download_count', 'record_time' : 'date_record_time', 'update_time' : 'date_release_time', 'site_id' : 'int_site_id', 'score' : 'str_score', 'summary' : 'str_summary', 'compatibility' : 'str_platform', 'language' : 'int_language', 'monitor_status' : 'vint_401' } export_data = ExportData() export_data.export_to_oracle(source_table='WWA_search_app_content_info', aim_table='TAB_MVMS_APP_INFO', key_map=key_map, unique_key='title')
def main(): # 导出数据 key_map = { 'aim_key1' : 'str_source_key2', # 目标键 = 源键对应的值 类型为str 'aim_key2' : 'int_source_key3', # 目标键 = 源键对应的值 类型为int 'aim_key3' : 'date_source_key4', # 目标键 = 源键对应的值 类型为date 'aim_key4' : 'vint_id', # 目标键 = 值 类型为int 'aim_key5' : 'vstr_name', # 目标键 = 值 类型为str 'aim_key6' : 'sint_select id from xxx' , # 目标键 = 值为sql 查询出的结果 类型为int 'aim_key7' : 'sstr_select name from xxx' # 目标键 = 值为sql 查询出的结果 类型为str } export_data = ExportData() export_data.export_to_oracle(source_table = '', aim_table = '', key_map = key_map, unique_key = 'url')
def user_main(): key_map = { 'id': 'int__id', 'name': 'str_name', 'sex': 'int_sex', 'summary': 'str_summary', 'fans_count': 'int_fans_count', 'blog_verified': 'str_blog_verified', 'is_verified': 'int_is_verified', 'account_url': 'str_url', 'follow_count': 'int_follow_count', 'image_url': 'str_image_url', 'monitor_status': 'vint_401', 'SEARCH_TYPE' : 'vint_702', 'region' : 'str_area', 'monitor_type': 'int_monitor_type' } export = ExportData('WWA_weibo_user_info', 'tab_mvms_weibo_info', key_map, 'account_url') export.export_to_oracle()
def main(): # 导出数据 export_data = ExportData() key_map = { 'id': 'int__id', 'name': 'str_name', 'url': 'str_url', } export_data.export_to_oracle(source_table='article_site_info', aim_table='OP_SITE_INFO', key_map=key_map, unique_key='url') key2_map = { 'id': 'int__id', 'op_title': 'str_title', 'ourl': 'str_url', 'summary': 'str_content', # 'op_author': 'str_author', # 'found_time': 'date_release_time', # 'creat_time': 'date_record_time', 'site_id': 'int_site_id' } export_data.export_to_oracle(source_table='article_text_info', aim_table='OP_OPINION_INFO', key_map=key2_map, unique_key='ourl')
def info_main(): key_map = { 'id': 'int__id', 'site_id': 'int_site_id', 'program_name': 'str_title', 'content': 'clob_content', 'program_url': 'str_url', 'release_date': 'date_release_time', 'image_url': 'str_image_url', 'image_code': 'int_sexy_image_status', 'video_download_url': 'str_video_download_url', 'find_date': 'date_record_time', 'OUT_CHAIN_STATUS': 'int_is_out_link' } export = ExportData( 'ZHEJIANG_CZVIDEO_info', 'TAB_VIDEO_PROGRAM_INFO', key_map, unique_key='PROGRAM_URL', condition={'read_status': 0}) #, 'site_id': 1023})#, "image_pron_status": 2}) export.export_to_oracle()
def main(): db = OracleDB() sql = 'update tab_nbsp_anchor_info t set t.live_view = 0' db.update(sql) # 导出数据 # 主播信息 key_map = { 'id': 'vint_sequence.nextval', 'room_id': 'int_room_id', 'name': 'str_name', 'sex': 'int_sex', 'age': 'int_age', 'address': 'str_address', 'image_url': 'str_image_url', 'fans_count': 'int_fans_count', 'watched_count': 'int_watched_count', 'room_url': 'str_room_url', 'video_path': 'str_video_path', 'site_id': 'int_site_id', 'record_time': 'date_record_time', 'live_view': 'int_live_view', 'monitor_status': 'vint_401', 'json_data_url': 'str_watched_count_url' } export_data = ExportData(source_table = 'LiveApp_anchor_info', aim_table = 'tab_nbsp_anchor_info', key_map = key_map, unique_key = 'room_id', update_read_status = False, unique_key_mapping_source_key = {'room_id':'int_room_id'}) export_data.export_to_oracle() # 违规信息 key_map = { 'id' : 'vint_sequence.nextval', 'TASK_ID':'int_task_id', 'ANCHOR_ID':'int_room_id', 'FOUND_TIME':'date_record_time', 'CONTENT':'str_violate_content', 'VIOLATE_IMAGE_STATUS':'str_sexy_image_status', 'VIOLATE_IMAGE_URL':'str_sexy_image_url' } export_data = ExportData(source_table = 'LiveApp_anchor_info', aim_table = 'tab_nbsp_violate_anchor_info', key_map = key_map, unique_key = 'ANCHOR_ID', update_read_status = True, condition = {'violate_content' : {'$ne':''}, 'read_status':0}, unique_key_mapping_source_key = {'ANCHOR_ID':'int_room_id'}) export_data.export_to_oracle()
def main(): export_data = ExportData() # 导出部数据 key_map = { 'PROGRAM_ID' : 'int__id', 'RELEASE_TIME' : 'date_release_time', 'EPISODE' : 'str_episode', 'ACTORS' : 'str_actors', 'DIRECTORS' : 'str_directors', 'PROGRAM_NAME' : 'str_program_name', 'SUMMARY' : 'str_summary', 'SITE_ID' : 'int_site_id', 'IMAGE_URL' : 'str_image_url', 'PROGRAM_URL' : 'str_program_url' } export_data.export_to_oracle(source_table = 'PROGRAM_info', aim_table = 'tab_ntms_program_info', key_map = key_map, unique_key = 'PROGRAM_ID') # 导出集数据 key_map = { 'ID' : 'int__id', 'PROGRAM_ID' : 'int_program_id', 'PROGRAM_EPISODE' : 'str_episode_num', 'TIME_LENGTH' : 'str_time_length', 'EPISODE_NAME' : 'str_episode_name', 'DOWNLOAD_STATUS' : 'int_download_status', 'DOWNLOAD_URL' : 'str_download_url', 'PLAY_URL' : 'str_download_url', 'EPISODE_URL' : 'str_episode_url', 'UPDATE_TIME' : 'date_record_time', 'SUMMARY' : 'str_summary', 'IMAGE_URL' : 'str_image_url', 'sto_path' : 'str_sto_path', 'sto_id' : 'int_sto_id' } export_data.export_to_oracle(source_table = 'PROGRAM_EPISODE_info', aim_table = 'tab_ntms_program_episode_info', key_map = key_map, unique_key = 'EPISODE_URL')
from db.oracledb import OracleDB from IOPM.vip_checked import VipChecked HEADERS = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", "Accept-Encoding": "gzip, deflate", "Cache-Control": "max-age=0", "Connection": "keep-alive", "Accept-Language": "zh-CN,zh;q=0.8", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Upgrade-Insecure-Requests": "1", "Host": "192.168.60.38:8001" } db = OracleDB() export_data = ExportData() vip_checked = VipChecked() STO_PER_SYNC_TIME = '.sync_time' IOPM_SERVICE_ADDRESS = 'http://localhost:8080' def get_url(time_lenght = 60): ''' @summary: --------- @param time_lenght: 时间段 分钟 --------- @result: ''' current_date = tools.get_current_date()
import requests import pymongo from bs4 import BeautifulSoup import sys sys.path.append('../') import utils.tools as tools from utils.export_data import ExportData client = pymongo.MongoClient("localhost", 27017) db = client.gonggao if __name__ == '__main__': db.gonggao_content.ensure_index('url', unique=True) export_data = ExportData() urls = [ 'http://www.sapprft.gov.cn/sapprft/channels/6588.shtml', 'http://www.sapprft.gov.cn/sapprft/channels/6588_2.shtml', 'http://www.sapprft.gov.cn/sapprft/channels/6588_3.shtml', 'http://www.sapprft.gov.cn/sapprft/channels/6588_4.shtml', 'http://www.sapprft.gov.cn/sapprft/channels/6588_5.shtml' ] count = 0 for url in urls: html, res = tools.get_html_by_requests(url) links = tools.get_tag(html, 'a', {'class': 'fl'}) release_times = tools.get_tag(html, 'span', {'class': 'fr'})
def main(): # 导出content信息 key_map = { 'id': 'int__id', 'APP_ID': 'int_site_id', 'COLUMN_ID ': 'int_column_id', 'CONTENT_ID': 'int__id', 'RELEASE_TIME': 'date_release_time', 'TITLE': 'str_title', 'ORIGINAL_URL': 'str_url', 'CONTENT': 'str_content', 'ABSTRACT_IMAGE_URL': 'str_image_url', 'ABSTRACT_IMAGE_LOCAL_PATH': 'str_img_stor_path', 'VIDEO_URL': 'str_video_url', 'VIDEO_LOCAL_PATH': 'str_video_stor_path', 'ISAUDIO': 'int_is_audio', 'ISDOWNLOAD': 'int_is_download', 'CREATE_TIME': 'date_record_time', 'VIOLATE_ID': 'int_violate_id', 'SENSITIVE_ID': 'int_sensitive_id', 'STORAGE_ID': 'int_storage_id', # 'CHECK_STATUS ': # 'CHECK_TIM'E': # 'CHECK_USER': # 'DOWN_STO_ID': } export_data = ExportData() export_data.export_to_oracle(source_table='VAApp_content_info', aim_table='TAB_MVMS_APP_CONTENT', key_map=key_map, unique_key='ORIGINAL_URL') #导出信息 key_map = { 'id': 'int__id', # 目标键 = 源键对应的值 类型为str 'APP_ID': 'int_site_id', # 目标键 = 源键对应的值 类型为int 'COLUMN_ID': 'int_column_id', # 目标键 = 源键对应的值 类型为date 'CONTENT_ID': 'int__id', # 目标键 = 值 类型为int 'RELEASE_TIME': 'date_release_time', 'TITLE': 'str_title', # 目标键 = 值为sql 查询出的结果 类型为int 'ORIGINAL_URL': 'str_url', # 目标键 = 值为sql 查询出的结果 类型为str 'ABSTRACT_IMAGE_URL': 'str_image_url', 'ABSTRACT_IMAGE_LOCAL_PATH': 'str_img_stor_path', 'VIDEO_URL': 'str_video_url', 'VIDEO_LOCAL_PATH': 'str_video_stor_path', 'STORAGE_ID': 'int_storage_id', 'VIOLATE_ID': 'int_violate_id', 'SENSITIVE_ID': 'int_sensitive_id', 'ISAUDIO': 'int_is_audio', 'CREATE_TIME': 'date_record_time', # 'CHECK_STATUS': '', # 'CHECK_TIME': '', # 'CHECK_USER': '', # 'DOWN_STO_ID': '', 'CONTENT': 'str_content', # 'CONTENT1': '', } export_data.export_to_oracle(source_table='VAApp_vioation_content_info', aim_table='TAB_MVMS_VIOLATION_CONTENT', key_map=key_map, unique_key='ORIGINAL_URL')
def main(): # 导出出版物信息 key_map = { 'id': 'int__id', # 目标键 = 源键对应的值 类型为str 'name': 'str_title', # 目标键 = 源键对应的值 类型为int 'type': 'int_data_type', # 目标键 = 源键对应的值 类型为date 'page_url': 'str_url', # 目标键 = 值 类型为int 'media_url': 'str_image_url', # 目标键 = 值 类型为str 'visit_number': 'str_watched_count', # 目标键 = 值为sql 查询出的结果 类型为int 'site_id': 'int_site_id', # 目标键 = 值为sql 查询出的结果 类型为str 'VIOLATE_IMAGE_STATUS': 'str_sexy_image_status', 'VIOLATE_IMAGE_URL': 'str_sexy_image_url', # 'description': # 'copyright_id': # 'text_result': # 'media_result': # 'media_confidence': # 'sto_id': # 'sto_path': # 'sto_tran_id': # 'sto_tran_path': # 'sto_fea_id': # 'sto_fea_path': # 'is_violation': # 'violation_description': # 'check_status': # 'check_time': 'author': 'str_author', # 'cover_path': 'update_time': 'date_update_time', # 'check_user_id': } export_data = ExportData() export_data.export_to_oracle( source_table='WP_content_info', aim_table='TAB_LCMS_PUBLICATION_INFO', key_map=key_map, unique_key='name', unique_key_mapping_source_key={'name': 'str_title'}) #导出分集信息 key_map = { 'id': 'int__id', # 目标键 = 源键对应的值 类型为str 'publication_id': 'int_content_id', # 目标键 = 源键对应的值 类型为int 'title': 'str_title', # 目标键 = 源键对应的值 类型为date 'video_url': 'str_video_url', # 目标键 = 值 类型为int 'image_url': 'str_image_url', # 目标键 = 值 类型为str 'watched_count': 'int_watched_count', # 目标键 = 值为sql 查询出的结果 类型为int 'comment_count': 'int_comment_count', # 目标键 = 值为sql 查询出的结果 类型为str 'release_time': 'date_release_time', 'record_time': 'date_record_time' } export_data.export_to_oracle( source_table='WP_content_episode_info', aim_table='tab_lcms_episode_info', key_map=key_map, unique_key='title', unique_key_mapping_source_key={'title': 'str_title'}) # 导出网站表 key_map = { 'id': 'int_site_id', # 目标键 = 源键对应的值 类型为str 'name': 'str_name', # 目标键 = 源键对应的值 类型为int 'url': 'str_url', # 目标键 = 源键对应的值 类型为date 'domain_name': 'str_domain', # 目标键 = 值 类型为int 'ip': 'str_ip', # 目标键 = 值 类型为str 'icp_number': 'str_icp', # 目标键 = 值为sql 查询出的结果 类型为int # 'city_id' : '', # 目标键 = 值为sql 查询出的结果 类型为str # 'iffocus': '', # 'status': '', 'create_time': 'date_record_time', # 'update_time': 'date_record_time', # 'description': 'date_record_time', # 'crawl_status': 'date_record_time', 'ip_address': 'str_address', 'license_key': 'str_video_license' # 'hava_certificated': '', # 'site_sto_id': '', # 'site_sto_path': '', # 'check_status': '', } export_data.export_to_oracle(source_table='WP_site_info', aim_table='TAB_LCMS_SITE_INFO', key_map=key_map, unique_key='id')