Exemple #1
0
            def end_callback():
                # 更新关键词状态 做完
                sql = 'update tab_ivms_task_keyword set finish_status = 603 where id = %d' % keyword_id
                db.update(sql)

                # 如果该任务的所有关键词都做完 则更新任务状态为做完
                sql = 'select t.* from tab_ivms_task_keyword t where task_id = %d and finish_status = 601' % task_id
                results = db.find(sql)
                if not results:
                    # 导出数据
                    key_map = {
                        'program_id': 'vint_sequence.nextval',
                        'search_type': 'int_search_type',
                        'program_name': 'str_title',
                        'program_url': 'str_url',
                        'release_date': 'date_release_time',
                        'image_url': 'str_image_url',
                        'program_content': 'str_content',
                        'task_id': 'vint_%d' % task_id,
                        'keyword': 'str_keyword',
                        'keyword_count': 'int_keyword_count',
                        'check_status': 'vint_202'
                    }

                    export = ExportData('VA_content_info',
                                        'tab_ivms_program_info', key_map,
                                        'program_url')
                    export.export_to_oracle()

                    # 更新任务状态 做完
                    sql = 'update TAB_IVMS_TASK_INFO set task_status = 503 where task_id = %d' % task_id
                    db.update(sql)
                    log.info('\n********** VA end **********')
Exemple #2
0
    def end_callback():
        # 导出数据
        key_map = {
            'id': 'int__id',
            'release_time': 'date_release_time',
            'come_from': 'str_come_from',
            'content': 'clob_content',
            'image_url': 'str_image_url',
            'video_url': 'str_video_url',
            'transpond_count': 'int_transpond_count',
            'praise_count': 'int_praise_count',
            'check_status': 'vint_301',
            'weibo_id': 'int_weibo_id',
            'article_url': 'str_url',
            'violate_status': 'int_violate_id',
            'sensitive_id': 'int_sensitive_id',
            'record_time': 'date_record_time',
            'SEXY_IMAGE_STATUS': 'str_sexy_image_status'
        }

        export = ExportData('WWA_weibo_info_info',
                            'tab_mvms_weibo_article_info',
                            key_map,
                            unique_key='ARTICLE_url',
                            condition={
                                'read_status': 0,
                                "image_pron_status": 2
                            })
        export.export_to_oracle()
        log.info('\n********** WWA_weibo_info end **********')
Exemple #3
0
def site_main():
    # 导出数据
    key_map = {
        'id': 'int__id',
        'APP_ID': 'int_site_id',
        'CONTENT_ID': 'int__id',
        'RELEASE_TIME': 'date_release_time',
        'TITLE': 'str_title',
        'ORIGINAL_URL': 'str_url',
        'CONTENT': 'clob_content',
        'ABSTRACT_IMAGE_URL': 'str_image_url',
        'ABSTRACT_IMAGE_LOCAL_PATH': 'str_img_stor_path',
        'VIDEO_URL': 'str_video_url',
        # 'VIDEO_LOCAL_PATH': 'str_video_local_path',
        'RECORD_TIME': 'date_record_time',
        # 'image_code': 'vint_5'
        'image_code': 'vint_5'
        # 'image_code': 'str_sexy_image_status'
    }
    export_data = ExportData()
    export_data.export_to_oracle(source_table='TIANJIN_APP_content_info',
                                 aim_table='tab_app_program_info',
                                 key_map=key_map,
                                 unique_key='ORIGINAL_URL',
                                 condition={'read_status':
                                            0})  # , "image_pron_status": 2})
Exemple #4
0
def main():
    # 导出数据
    key_map = {
        'id': 'vint_sequence.nextval',
        'title': 'str_title',
        'update_info': 'str_update_info',
        'author': 'str_author',
        'url': 'str_url',
        'app_url': 'str_app_url',
        'image_url': 'str_image_url',
        'classify': 'int_classify_id',
        'software_size': 'str_software_size',
        'tag': 'str_tag',
        'platform_type': 'vint_502',  #android
        'download_count': 'str_download_count',
        'record_time': 'date_record_time',
        'update_time': 'date_release_time',
        'site_id': 'int_site_id',
        'check_status': 'vint_201',
        'sexy_image_status': 'str_sexy_image_status',
        'sexy_image_url': 'str_sexy_image_url',
        'score': 'str_score',
        'summary': 'str_summary',
        'compatibility': 'str_platform',
        'language': 'int_language',
    }

    export_data = ExportData(source_table='GameApp_content_info',
                             aim_table='tab_gams_app_info',
                             key_map=key_map,
                             unique_key='url')
    export_data.export_to_oracle()
Exemple #5
0
def main():
    while True:
        # 导出数据
        key_map = {
            'program_id': 'vint_sequence.nextval',
            'search_type': 'int_search_type',
            'program_name': 'str_title',
            'program_url': 'str_url',
            'release_date': 'date_release_time',
            'image_url': 'str_image_url',
            'program_content': 'str_content',
            'task_id': 'int_task_id',
            'keyword': 'str_keyword',
            'keyword_count': 'int_keyword_count',
            'check_status': 'vint_202',
            'SEXY_IMAGE_STATUS': 'int_sexy_image_status'
        }

        export = ExportData('VA_content_info',
                            'tab_ivms_program_info',
                            key_map,
                            'program_url',
                            condition={
                                'read_status': 0,
                                "image_pron_status": 2
                            })
        export.export_to_oracle()

        time.sleep(300)  # 五分钟导一次数据
Exemple #6
0
def main():
    # 导出数据
    key_map = {
        'id' : 'int__id',
        'title' : 'str_title',
        'update_info' : 'str_update_info',
        'author' : 'str_author',
        'url' : 'str_url',
        'app_url' : 'str_app_url',
        'image_url' : 'str_image_url',
        'software_size' : 'str_software_size',
        'tag' : 'str_tag',
        'platform_type' : 'vint_502', #android
        'download_count' : 'str_download_count',
        'record_time' : 'date_record_time',
        'update_time' : 'date_release_time',
        'site_id' : 'int_site_id',
        'score' : 'str_score',
        'summary' : 'str_summary',
        'compatibility' : 'str_platform',
        'language' : 'int_language',
        'monitor_status' : 'vint_401'
    }
    export_data = ExportData()
    export_data.export_to_oracle(source_table='WWA_search_app_content_info', aim_table='TAB_MVMS_APP_INFO', key_map=key_map,
                                 unique_key='title')
Exemple #7
0
def main():
    # 导出数据
    key_map = {
        'aim_key1' : 'str_source_key2',          # 目标键 = 源键对应的值         类型为str
        'aim_key2' : 'int_source_key3',          # 目标键 = 源键对应的值         类型为int
        'aim_key3' : 'date_source_key4',         # 目标键 = 源键对应的值         类型为date
        'aim_key4' : 'vint_id',                  # 目标键 = 值                   类型为int
        'aim_key5' : 'vstr_name',                # 目标键 = 值                   类型为str
        'aim_key6' : 'sint_select id from xxx' ,  # 目标键 = 值为sql 查询出的结果 类型为int
        'aim_key7' : 'sstr_select name from xxx' # 目标键 = 值为sql 查询出的结果 类型为str
    }

    export_data = ExportData()
    export_data.export_to_oracle(source_table = '', aim_table = '', key_map = key_map, unique_key = 'url')
def user_main():
    key_map = {
        'id': 'int__id',
        'name': 'str_name',
        'sex': 'int_sex',
        'summary': 'str_summary',
        'fans_count': 'int_fans_count',
        'blog_verified': 'str_blog_verified',
        'is_verified': 'int_is_verified',
        'account_url': 'str_url',
        'follow_count': 'int_follow_count',
        'image_url': 'str_image_url',
        'monitor_status': 'vint_401',
        'SEARCH_TYPE' : 'vint_702',
        'region' : 'str_area',
        'monitor_type': 'int_monitor_type'
    }

    export = ExportData('WWA_weibo_user_info', 'tab_mvms_weibo_info', key_map, 'account_url')
    export.export_to_oracle()
def main():
    # 导出数据
    export_data = ExportData()
    key_map = {
        'id': 'int__id',
        'name': 'str_name',
        'url': 'str_url',
    }
    export_data.export_to_oracle(source_table='article_site_info',
                                 aim_table='OP_SITE_INFO',
                                 key_map=key_map,
                                 unique_key='url')
    key2_map = {
        'id': 'int__id',
        'op_title': 'str_title',
        'ourl': 'str_url',
        'summary': 'str_content',
        # 'op_author': 'str_author',
        # 'found_time': 'date_release_time',
        # 'creat_time': 'date_record_time',
        'site_id': 'int_site_id'
    }
    export_data.export_to_oracle(source_table='article_text_info',
                                 aim_table='OP_OPINION_INFO',
                                 key_map=key2_map,
                                 unique_key='ourl')
Exemple #10
0
def info_main():
    key_map = {
        'id': 'int__id',
        'site_id': 'int_site_id',
        'program_name': 'str_title',
        'content': 'clob_content',
        'program_url': 'str_url',
        'release_date': 'date_release_time',
        'image_url': 'str_image_url',
        'image_code': 'int_sexy_image_status',
        'video_download_url': 'str_video_download_url',
        'find_date': 'date_record_time',
        'OUT_CHAIN_STATUS': 'int_is_out_link'
    }

    export = ExportData(
        'ZHEJIANG_CZVIDEO_info',
        'TAB_VIDEO_PROGRAM_INFO',
        key_map,
        unique_key='PROGRAM_URL',
        condition={'read_status':
                   0})  #, 'site_id': 1023})#, "image_pron_status": 2})
    export.export_to_oracle()
Exemple #11
0
def main():
    db = OracleDB()
    sql = 'update  tab_nbsp_anchor_info t set t.live_view = 0'
    db.update(sql)

    # 导出数据
    # 主播信息
    key_map = {
        'id': 'vint_sequence.nextval',
        'room_id': 'int_room_id',
        'name': 'str_name',
        'sex': 'int_sex',
        'age': 'int_age',
        'address': 'str_address',
        'image_url': 'str_image_url',
        'fans_count': 'int_fans_count',
        'watched_count': 'int_watched_count',
        'room_url': 'str_room_url',
        'video_path': 'str_video_path',
        'site_id': 'int_site_id',
        'record_time': 'date_record_time',
        'live_view': 'int_live_view',
        'monitor_status': 'vint_401',
        'json_data_url': 'str_watched_count_url'
    }

    export_data = ExportData(source_table = 'LiveApp_anchor_info', aim_table = 'tab_nbsp_anchor_info', key_map = key_map, unique_key = 'room_id', update_read_status = False, unique_key_mapping_source_key = {'room_id':'int_room_id'})
    export_data.export_to_oracle()

    # 违规信息
    key_map = {
        'id' : 'vint_sequence.nextval',
        'TASK_ID':'int_task_id',
        'ANCHOR_ID':'int_room_id',
        'FOUND_TIME':'date_record_time',
        'CONTENT':'str_violate_content',
        'VIOLATE_IMAGE_STATUS':'str_sexy_image_status',
        'VIOLATE_IMAGE_URL':'str_sexy_image_url'
    }

    export_data = ExportData(source_table = 'LiveApp_anchor_info', aim_table = 'tab_nbsp_violate_anchor_info', key_map = key_map, unique_key = 'ANCHOR_ID', update_read_status = True, condition = {'violate_content' : {'$ne':''}, 'read_status':0}, unique_key_mapping_source_key = {'ANCHOR_ID':'int_room_id'})
    export_data.export_to_oracle()
Exemple #12
0
def main():
    export_data = ExportData()

    # 导出部数据
    key_map = {
        'PROGRAM_ID' : 'int__id',
        'RELEASE_TIME' : 'date_release_time',
        'EPISODE' : 'str_episode',
        'ACTORS' : 'str_actors',
        'DIRECTORS' : 'str_directors',
        'PROGRAM_NAME' : 'str_program_name',
        'SUMMARY' : 'str_summary',
        'SITE_ID' : 'int_site_id',
        'IMAGE_URL' : 'str_image_url',
        'PROGRAM_URL' : 'str_program_url'
    }

    export_data.export_to_oracle(source_table = 'PROGRAM_info', aim_table = 'tab_ntms_program_info', key_map = key_map, unique_key = 'PROGRAM_ID')

    # 导出集数据
    key_map = {
        'ID' : 'int__id',
        'PROGRAM_ID' : 'int_program_id',
        'PROGRAM_EPISODE' : 'str_episode_num',
        'TIME_LENGTH' : 'str_time_length',
        'EPISODE_NAME' : 'str_episode_name',
        'DOWNLOAD_STATUS' : 'int_download_status',
        'DOWNLOAD_URL' : 'str_download_url',
        'PLAY_URL' : 'str_download_url',
        'EPISODE_URL' : 'str_episode_url',
        'UPDATE_TIME' : 'date_record_time',
        'SUMMARY' : 'str_summary',
        'IMAGE_URL' : 'str_image_url',
        'sto_path' : 'str_sto_path',
        'sto_id' : 'int_sto_id'
    }

    export_data.export_to_oracle(source_table = 'PROGRAM_EPISODE_info', aim_table = 'tab_ntms_program_episode_info', key_map = key_map, unique_key = 'EPISODE_URL')
Exemple #13
0
from db.oracledb import OracleDB
from IOPM.vip_checked import VipChecked

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
    "Accept-Encoding": "gzip, deflate",
    "Cache-Control": "max-age=0",
    "Connection": "keep-alive",
    "Accept-Language": "zh-CN,zh;q=0.8",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
    "Upgrade-Insecure-Requests": "1",
    "Host": "192.168.60.38:8001"
}

db = OracleDB()
export_data = ExportData()
vip_checked = VipChecked()

STO_PER_SYNC_TIME = '.sync_time'
IOPM_SERVICE_ADDRESS = 'http://localhost:8080'

def get_url(time_lenght = 60):
    '''
    @summary:
    ---------
    @param time_lenght: 时间段 分钟
    ---------
    @result:
    '''

    current_date = tools.get_current_date()
Exemple #14
0
import requests
import pymongo
from bs4 import BeautifulSoup
import sys
sys.path.append('../')
import utils.tools as tools
from utils.export_data import ExportData

client = pymongo.MongoClient("localhost", 27017)
db = client.gonggao

if __name__ == '__main__':
    db.gonggao_content.ensure_index('url', unique=True)
    export_data = ExportData()

    urls = [
        'http://www.sapprft.gov.cn/sapprft/channels/6588.shtml',
        'http://www.sapprft.gov.cn/sapprft/channels/6588_2.shtml',
        'http://www.sapprft.gov.cn/sapprft/channels/6588_3.shtml',
        'http://www.sapprft.gov.cn/sapprft/channels/6588_4.shtml',
        'http://www.sapprft.gov.cn/sapprft/channels/6588_5.shtml'
    ]

    count = 0

    for url in urls:
        html, res = tools.get_html_by_requests(url)

        links = tools.get_tag(html, 'a', {'class': 'fl'})
        release_times = tools.get_tag(html, 'span', {'class': 'fr'})
def main():
    # 导出content信息

    key_map = {
        'id': 'int__id',
        'APP_ID': 'int_site_id',
        'COLUMN_ID ': 'int_column_id',
        'CONTENT_ID': 'int__id',
        'RELEASE_TIME': 'date_release_time',
        'TITLE': 'str_title',
        'ORIGINAL_URL': 'str_url',
        'CONTENT': 'str_content',
        'ABSTRACT_IMAGE_URL': 'str_image_url',
        'ABSTRACT_IMAGE_LOCAL_PATH': 'str_img_stor_path',
        'VIDEO_URL': 'str_video_url',
        'VIDEO_LOCAL_PATH': 'str_video_stor_path',
        'ISAUDIO': 'int_is_audio',
        'ISDOWNLOAD': 'int_is_download',
        'CREATE_TIME': 'date_record_time',
        'VIOLATE_ID': 'int_violate_id',
        'SENSITIVE_ID': 'int_sensitive_id',
        'STORAGE_ID': 'int_storage_id',
        # 'CHECK_STATUS ':
        # 'CHECK_TIM'E':
        # 'CHECK_USER':
        # 'DOWN_STO_ID':
    }

    export_data = ExportData()
    export_data.export_to_oracle(source_table='VAApp_content_info',
                                 aim_table='TAB_MVMS_APP_CONTENT',
                                 key_map=key_map,
                                 unique_key='ORIGINAL_URL')

    #导出信息
    key_map = {
        'id': 'int__id',  # 目标键 = 源键对应的值         类型为str
        'APP_ID': 'int_site_id',  # 目标键 = 源键对应的值         类型为int
        'COLUMN_ID': 'int_column_id',  # 目标键 = 源键对应的值         类型为date
        'CONTENT_ID': 'int__id',  # 目标键 = 值                   类型为int
        'RELEASE_TIME': 'date_release_time',
        'TITLE': 'str_title',  # 目标键 = 值为sql 查询出的结果 类型为int
        'ORIGINAL_URL': 'str_url',  # 目标键 = 值为sql 查询出的结果 类型为str
        'ABSTRACT_IMAGE_URL': 'str_image_url',
        'ABSTRACT_IMAGE_LOCAL_PATH': 'str_img_stor_path',
        'VIDEO_URL': 'str_video_url',
        'VIDEO_LOCAL_PATH': 'str_video_stor_path',
        'STORAGE_ID': 'int_storage_id',
        'VIOLATE_ID': 'int_violate_id',
        'SENSITIVE_ID': 'int_sensitive_id',
        'ISAUDIO': 'int_is_audio',
        'CREATE_TIME': 'date_record_time',
        # 'CHECK_STATUS': '',
        # 'CHECK_TIME': '',
        # 'CHECK_USER': '',
        # 'DOWN_STO_ID': '',
        'CONTENT': 'str_content',
        # 'CONTENT1': '',
    }
    export_data.export_to_oracle(source_table='VAApp_vioation_content_info',
                                 aim_table='TAB_MVMS_VIOLATION_CONTENT',
                                 key_map=key_map,
                                 unique_key='ORIGINAL_URL')
def main():
    # 导出出版物信息
    key_map = {
        'id': 'int__id',  # 目标键 = 源键对应的值         类型为str
        'name': 'str_title',  # 目标键 = 源键对应的值         类型为int
        'type': 'int_data_type',  # 目标键 = 源键对应的值         类型为date
        'page_url': 'str_url',  # 目标键 = 值                   类型为int
        'media_url': 'str_image_url',  # 目标键 = 值                   类型为str
        'visit_number': 'str_watched_count',  # 目标键 = 值为sql 查询出的结果 类型为int
        'site_id': 'int_site_id',  # 目标键 = 值为sql 查询出的结果 类型为str
        'VIOLATE_IMAGE_STATUS': 'str_sexy_image_status',
        'VIOLATE_IMAGE_URL': 'str_sexy_image_url',
        # 'description':
        # 'copyright_id':
        # 'text_result':
        # 'media_result':
        # 'media_confidence':
        # 'sto_id':
        # 'sto_path':
        # 'sto_tran_id':
        # 'sto_tran_path':
        # 'sto_fea_id':
        # 'sto_fea_path':
        # 'is_violation':
        # 'violation_description':
        # 'check_status':
        # 'check_time':
        'author': 'str_author',
        # 'cover_path':
        'update_time': 'date_update_time',
        # 'check_user_id':
    }

    export_data = ExportData()
    export_data.export_to_oracle(
        source_table='WP_content_info',
        aim_table='TAB_LCMS_PUBLICATION_INFO',
        key_map=key_map,
        unique_key='name',
        unique_key_mapping_source_key={'name': 'str_title'})

    #导出分集信息
    key_map = {
        'id': 'int__id',  # 目标键 = 源键对应的值         类型为str
        'publication_id': 'int_content_id',  # 目标键 = 源键对应的值         类型为int
        'title': 'str_title',  # 目标键 = 源键对应的值         类型为date
        'video_url': 'str_video_url',  # 目标键 = 值                   类型为int
        'image_url': 'str_image_url',  # 目标键 = 值                   类型为str
        'watched_count': 'int_watched_count',  # 目标键 = 值为sql 查询出的结果 类型为int
        'comment_count': 'int_comment_count',  # 目标键 = 值为sql 查询出的结果 类型为str
        'release_time': 'date_release_time',
        'record_time': 'date_record_time'
    }
    export_data.export_to_oracle(
        source_table='WP_content_episode_info',
        aim_table='tab_lcms_episode_info',
        key_map=key_map,
        unique_key='title',
        unique_key_mapping_source_key={'title': 'str_title'})

    # 导出网站表
    key_map = {
        'id': 'int_site_id',  # 目标键 = 源键对应的值         类型为str
        'name': 'str_name',  # 目标键 = 源键对应的值         类型为int
        'url': 'str_url',  # 目标键 = 源键对应的值         类型为date
        'domain_name': 'str_domain',  # 目标键 = 值                   类型为int
        'ip': 'str_ip',  # 目标键 = 值                   类型为str
        'icp_number': 'str_icp',  # 目标键 = 值为sql 查询出的结果 类型为int
        # 'city_id' : '', # 目标键 = 值为sql 查询出的结果 类型为str
        # 'iffocus': '',
        # 'status': '',
        'create_time': 'date_record_time',
        # 'update_time': 'date_record_time',
        # 'description': 'date_record_time',
        # 'crawl_status': 'date_record_time',
        'ip_address': 'str_address',
        'license_key': 'str_video_license'
        # 'hava_certificated': '',
        # 'site_sto_id': '',
        # 'site_sto_path': '',
        # 'check_status': '',
    }
    export_data.export_to_oracle(source_table='WP_site_info',
                                 aim_table='TAB_LCMS_SITE_INFO',
                                 key_map=key_map,
                                 unique_key='id')