def __init__(self): self.db_oper = db_helper_class(conf.db_conf) # 分页大小 self.page_size = 20 # 读取偏移值 self.r_offset = 0 self.app_id = conf.app_conf.app_zygw_collect_id
def __init__(self): self.db_oper = db_helper_class(conf.db_conf) # 分页大小 self.page_size = 50 # 读取偏移值 self.r_offset = 0 pass
def __init__(self): # 数据库对象 self.db_oper = db_helper_class(conf.db_conf) # 原创标识 self.original_word = '''<span\sid="copyright_logo"\sclass="rich_media_meta\smeta_original_tag">原创</span>''' # 简介 self.intro = '''(?:<p\sclass="txt-info".*?>)(?P<intro>.*?)(?:</p>)''' # cookie设置 self.cookie = {"SUV":"00175E2BDA6B097D57CBCDBE7DA71981", "IPLOC":"CN4401", "CXID":"860E089DD32C47EE1ECF55ED807B8483", "weixinIndexVisited":"1", "Hm_lvt_96d9d92b8a4aac83bc206b6c9fb2844a":"1474170419,1474181444", "pgv_pvi":"5441667072", "m":"3B3BF1EE5348A83F1126D107D0679A9B", "GOTO":"Af99046", "ld":"1kllllllll2gc8WVQx31FOkMUKPY9rSnLu7JKyllll9lllll4klll5@@@@@@@@@@", "ad":"IZllllllll2g8FjPlllllVkcVMklllllLu7JKyllllklllllpqxlw@@@@@@@@@@@", "SUID":"7D096BDA3320910A0000000057CD2EFF", "YYID":"3B3BF1EE5348A83F1126D107D0679A9B", "ABTEST":"6|1480328071|v1", "SNUID":"176301B06A6F2830D6BC3C936AE5FEE5", "SUIR":"176301B06A6F2830D6BC3C936AE5FEE5", "ppinf":"5|1480472199|1481681799|dHJ1c3Q6MToxfGNsaWVudGlkOjQ6MjAxN3x1bmlxbmFtZTo0NTolRTYlQTElOTElRTQlQkIlQTMlRTUlODUlOEIlRTclOUElODQlRTclOEMlQUJ8Y3J0OjEwOjE0ODA0NzIxOTl8cmVmbmljazo0NTolRTYlQTElOTElRTQlQkIlQTMlRTUlODUlOEIlRTclOUElODQlRTclOEMlQUJ8dXNlcmlkOjQ0OjBGRUY3RjYyNURFRURDNENENTQ1QTQzRUVGM0NGN0I1QHFxLnNvaHUuY29tfA", "pprdig":"RiQVGnfvU914niX77Vxk8xtJThF_ux3DA7UjmVNJrxBwrzpx6CWo38ZIEm4AfXZ3Ez6dKMnwA0pj3kXHiP8-Qytg8HVJfNCd0NwWrGOjgLT3CHcQtHeHy7oqhJhwGtjlIqataNhNA-JbcH6KTtliGMwLlDw7GmF_Odfl0Wz7JQE", "JSESSIONID":"aaa-ISi7b1d8ufz7S_UIv", "sct":"45", "usid":"NNV9tIXWQrAUgBuz", "clientId":"3A41D977ADC76C0D1A18D0A0AC666DB9",}
def __init__(self): # 接口 self.interface = "http://askapi.jianke.com/app/article/AddFromWeiXinArticle" # 数据库对象 self.db_oper = db_helper_class(conf.db_conf) # 偏移值 self.offset = 0
def __init__(self): # 浏览器对象 agent = comm.random_useragent.getRandomUAItem() # self.m_browser = spynner.Browser() self.m_browser = spynner.Browser(user_agent=agent) # self.m_browser.set_proxy("58.52.201.119:8080") self.m_browser.hide() # self.m_browser.show() # 创建数据库对象 self.db_oper = db_helper_class(conf.db_conf) # 分页大小 self.page_size = 20 # 读取偏移值 self.r_offset = 0 # 当前进度 self.curr_prog = 0 # 总共丢弃记录数 self.drop_count = 0 # 应用:康爱多天猫全量商品详情 #TODO: # self.app_id = conf.app_conf.app_tmall_all_products_detail # self.class_id = conf.class_conf.cls_tmall_all_products_detail self.app_id = 999 self.class_id = 999
def __init__(self): # 数据库对象 self.db_oper = db_helper_class(conf.db_conf) # 原创标识 self.original_word = '''<span\sid="copyright_logo"\sclass="rich_media_meta\smeta_original_tag">原创</span>''' # 简介 self.intro = '''(?:<p\sclass="txt-info".*?>)(?P<intro>.*?)(?:</p>)''' # cookie self.cookie = { "SUV":"00175E2BDA6B097D57CBCDBE7DA71981", "CXID":"860E089DD32C47EE1ECF55ED807B8483", "weixinIndexVisited":"1", "Hm_lvt_96d9d92b8a4aac83bc206b6c9fb2844a":"1474170419,1474181444", "pgv_pvi":"5441667072", "m":"3B3BF1EE5348A83F1126D107D0679A9B", "GOTO":"Af99046", "ABTEST":"6|1480328071|v1", "sw_uuid":"798063453", "sg_uuid":"8945378958", "ssuid":"5959137290", "SUID":"7D096BDA3320910A0000000057CD2EFF", "YYID":"3B3BF1EE5348A83F1126D107D0679A9B", "SUIR":"2A5F3C8D57521489E59E561E57F0BC8D", "ad":"PZllllllll2g8FjPlllllVPweaclllllLu7JKyllll9llllljVxlw@@@@@@@@@@@", "SNUID":"0D781BAB7074326DFB22BCF37153560F", "PHPSESSID":"kps01kgi2681ridvjbt5kgnu13", "JSESSIONID":"aaaBC8pLZXqTT85C1JCJv", "sct":"82", "IPLOC":"CN4401", "usid":"NNV9tIXWQrAUgBuz", "clientId":"36239E5B32D161140C0797BA330D60E4", "ppinf":"5|1481254589|1482464189|dHJ1c3Q6MToxfGNsaWVudGlkOjQ6MjAxN3x1bmlxbmFtZToyNzolRTclQjIlODklRTclQkElQTIlRTclOEMlQUF8Y3J0OjEwOjE0ODEyNTQ1ODl8cmVmbmljazoyNzolRTclQjIlODklRTclQkElQTIlRTclOEMlQUF8dXNlcmlkOjQ0OkM0RTE3Q0YwNkQyNEU2RDNENEI2RkNCOTU4RDlFM0MwQHFxLnNvaHUuY29tfA", "pprdig":"JyxNQefqTXRhf21x-VU-PgZrGMZrYBHF4YL8YdZqZ600VXLrd3ndG7TH3VgOsWSaJz8XIwVR3CWqGUzgWM7SMJG25PeaI2hyhpWOlsx3y23cAPuazlv8e5PXQu0eRBxveRVuW-rbzTaczCHJQySveDKH60BVf3AJxjB1s3JP1Rc", "ld":"wkllllllll2gc8WVQx31FOP3J7IY9rSnLu7JKyllll9llllljylll5@@@@@@@@@@" } # 请求头 self.headers = { "Accept":"*/*", "Accept-Encoding":"gzip, deflate, sdch", "Accept-Language":"zh-CN,zh;q=0.8", "Cache-Control":"max-age=0", "Cookie":'''SUV=00175E2BDA6B097D57CBCDBE7DA71981; CXID=860E089DD32C47EE1ECF55ED807B8483; weixinIndexVisited=1; Hm_lvt_96d9d92b8a4aac83bc206b6c9fb2844a=1474170419,1474181444; pgv_pvi=5441667072; m=3B3BF1EE5348A83F1126D107D0679A9B; GOTO=Af99046; ABTEST=6|1480328071|v1; sw_uuid=798063453; sg_uuid=8945378958; ssuid=5959137290; SUID=7D096BDA3320910A0000000057CD2EFF; YYID=3B3BF1EE5348A83F1126D107D0679A9B; SUIR=2A5F3C8D57521489E59E561E57F0BC8D; ad=PZllllllll2g8FjPlllllVPweaclllllLu7JKyllll9llllljVxlw@@@@@@@@@@@; SNUID=0D781BAB7074326DFB22BCF37153560F; ld=wkllllllll2gc8WVQx31FOP3J7IY9rSnLu7JKyllll9llllljylll5@@@@@@@@@@; sct=83; JSESSIONID=aaaaT7jc4kb2Tevy-LCJv; clientId=044136D5677C6A4D386D3F3466DD6B92; PHPSESSID=aivb167lgfhavakjhaf49j0d20; usid=NNV9tIXWQrAUgBuz; ppinf=5|1481276797|1482486397|dHJ1c3Q6MToxfGNsaWVudGlkOjQ6MjAxN3x1bmlxbmFtZToyNzolRTclQjIlODklRTclQkElQTIlRTclOEMlQUF8Y3J0OjEwOjE0ODEyNzY3OTd8cmVmbmljazoyNzolRTclQjIlODklRTclQkElQTIlRTclOEMlQUF8dXNlcmlkOjQ0OkM0RTE3Q0YwNkQyNEU2RDNENEI2RkNCOTU4RDlFM0MwQHFxLnNvaHUuY29tfA; pprdig=ag8bzSGYb76QsS6dyxdJpo52aXhgH9BfTlDOl-Mw552saDnXmMRrt4DPZGQ1A8Q7sp9-mdQtOr9lpK0-9kYIurCYYXaGDYUDUm5ug2_oJuWVPXqOmBL4ZhNnbVVp8gGH1rmjn7FvmKXlPh0pThqlYYldDeYm7AcKPiC-b5ToaUk; ppmdig=1481276797000000ecefe85006bd7f2e159fe5464c707c32; IPLOC=CN8100''', "Host":"weixin.sogou.com", "If-Modified-Since":"Tue, 29 Nov 2016 05:15:10 GMT", "If-None-Match":"583d0ede-123", "Proxy-Connection":"keep-alive", "Referer":"http://weixin.sogou.com/weixin?type=2&query=cctv&ie=utf8&_sug_=n&_sug_type_=", "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.22 Safari/537.36 SE 2.X MetaSr 1.0", } # 代理IP # self.IP = {"http":"121.14.6.236:80"} self.IP = {"http":"218.103.60.205:8080"} # 本地图片存储路径 self.pictrue_uri = "F:\\pictrue\\" # 服务器图片后缀 self.pictrue_suffix = ".jpg"
def __init__(self): # 图片保存路径 self.m_save_root = "" # 分页大小 self.page_size = 20 # 偏移大小 self.off_size = 0 self.m_db_oper = db_helper_class(conf.db_conf) pass
def __init__(self): self.db_oper = db_helper_class(conf.db_conf) # 应用:淘宝抢购 self.app_id = conf.app_conf.app_tbhdqg_collect_id # 类别:淘宝抢购活动 self.class_id = conf.class_conf.cls_tbhdqg # 名称 self.web_name = "淘宝抢购" self.job_stat = JobSta() self.job_id = "%s_%s_%s" % (time.strftime('T%Y%m%d%H%M'), self.app_id, self.class_id)
def __init__(self): self.m_browser = spynner.Browser() self.m_browser.hide() # 数据库对象 self.db_oper = db_helper_class(conf.db_conf) # 分页大小 self.page_size = 20 # 读取偏移值 self.r_offset = 0 self.app_id = conf.app_conf.app_360haoyao_id self.class_id = conf.class_conf.class_360haoyao_all
def __init__(self): # 创建数据库对象 self.db_oper = db_helper_class(conf.db_conf) # 分页大小 self.page_size = 20 # 读取偏移值 self.r_offset = 0 # 处理商品数 self.curr_prog = 0 # 商品入库数 self.load_num = 0 # 解析HTML字符实体 self.html_analysis = HTMLParser.HTMLParser()
def __init__(self): # 数据库对象 self.db_oper = db_helper_class(conf.db_conf) # 原创标识 self.original_word = '''<span\sid="copyright_logo"\sclass="rich_media_meta\smeta_original_tag">原创</span>''' # 简介 self.intro = '''(?:<p\sclass="txt-info".*?>)(?P<intro>.*?)(?:</p>)''' # 代理IP # self.IP = {"http":"121.14.6.236:80"} self.IP = {"http": "218.103.60.205:8080"} # 本地图片存储路径 self.pictrue_uri = "F:\\pictrue_1\\" # 服务器图片后缀 self.pictrue_suffix = "_1.jpg"
def __init__(self): self.db_oper = db_helper_class(conf.db_conf) # 分页大小 self.page_size = 20 # 读取偏移值 self.r_offset = 0 # 当前进度 self.curr_prog = 0 # 总共丢弃记录数 self.drop_count = 0 # 应用:天猫处方药详情 self.app_id = conf.app_conf.app_tmfcy_detail_id
def __init__(self): # 浏览器对象 agent = comm.random_useragent.getRandomUAItem() # self.m_browser = spynner.Browser() self.m_browser = spynner.Browser(user_agent=agent) self.m_browser.hide() # self.m_browser.show() self.db_oper = db_helper_class(conf.db_conf) # 分页大小 self.page_size = 20 # 读取偏移值 self.r_offset = 0 # 当前进度 self.curr_prog = 0 # 总共丢弃记录数 self.drop_count = 0 # 应用:天猫保健品详情 self.app_id = conf.app_conf.app_tmall_health_prods_detail self.class_id = conf.class_conf.cls_tmall_health_prods_detail
def __init__(self): self.db_oper = db_helper_class(conf.db_conf) # 分页大小 self.page_size = 20 # 读取偏移值 self.r_offset = 0 self.kad_id = 100001 # 应用:淘宝抢购 self.app_id = conf.app_conf.app_yyw_taozhang # 类别:淘宝抢购活动 self.class_id = conf.class_conf.cls_web_taozhuang_yyw self.job_stat = JobSta() self.job_id = "%s_%s_%s" % (time.strftime( 'T%Y%m%d%H%M'), self.app_id, self.class_id) # 名称 self.web_name = "壹药网疗程装套装" self.prog = 0
def __init__(self): # 扫描路径 self.m_scan_path = "" # 文件模板 self.m_file_tmpl = "" # 文件分隔符 self.m_fld_sep = "" # 目标表 self.m_target_table = "" # 字段列表 self.m_fld_list = "" # 改名模板 self.m_load_rename = "" # 是否ZIP self.m_iszip = False # ZIP路径 self.m_zip_path = "" # 字段数量 self.m_flds_count = 0 # 数据库连接 self.db_oper = db_helper_class(buz_db_conf)
def __init__(self): # 数据库对象 self.db_oper = db_helper_class(conf.db_conf)
def __init__(self): # 数据库对象 self.db_oper = db_helper_class(conf.db_conf) self.page_size = 20 self.r_offset = 0
def __init__(self): # 数据库对象 self.db_oper = db_helper_class(conf.db_conf) self.pictrue_suffix = "_pc.jpg" self.original_word = '''<span\sid="copyright_logo"\sclass="rich_media_meta\smeta_original_tag">原创</span>''' self.pictrue_uri = "F:\\pictrue_pc\\"