def __init__(self, name, time=None): super().__init__() self.logger = ContextLogger('task') self.name = name self.time = time self.cookies = {} self.headers = {}
def __init__(self): super().__init__('LaGou数据爬取') self.logger = ContextLogger('task_lagou') self.is_crawl = False self.headers = { 'Host': 'www.lagou.com', 'Upgrade - Insecure - Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Accept-Encoding': 'gzip, deflate', 'Referer': 'https://www.lagou.com/jobs/list_Python?px=new&city=%E5%85%A8%E5%9B%BD', 'Connection': 'keep-alive', 'Cookie': 'user_trace_token=20171103191801-9206e24f-9ca2-40ab-95a3-23947c0b972a; _ga=GA1.2.545192972.1509707889; LGUID=20171103191805-a9838dac-c088-11e7-9704-5254005c3644; JSESSIONID=ABAAABAACDBABJB2EE720304E451B2CEFA1723CE83F19CC; _gat=1; LGSID=20171228225143-9edb51dd-ebde-11e7-b670-525400f775ce; PRE_UTM=; PRE_HOST=www.baidu.com; PRE_SITE=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DKkJPgBHAnny1nUKaLpx2oDfUXv9ItIF3kBAWM2-fDNu%26ck%3D3065.1.126.376.140.374.139.129%26shh%3Dwww.baidu.com%26sht%3Dmonline_3_dg%26wd%3D%26eqid%3Db0ec59d100013c7f000000055a4504f6; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; LGRID=20171228225224-b6cc7abd-ebde-11e7-9f67-5254005c3644; index_location_city=%E5%85%A8%E5%9B%BD; TG-TRACK-CODE=index_search; SEARCH_ID=3ec21cea985a4a5fa2ab279d868560c8', 'X-Requested-With': 'XMLHttpRequest', 'X-Anit-Forge-Token': 'None', 'X-Anit-Forge-Code': '0', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', }
def __init__(self, name, time=None): super().__init__() self.logger = ContextLogger('task') self.name = name self.time = time self.header = { 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:52.0) Gecko/20100101' } self.cookies = {}
def __init__(self, is_crawl=True): self.ses = db.session self.is_crawl = is_crawl self.logger = ContextLogger('crawl') self.headers = { 'Host': 'www.lagou.com', 'Upgrade - Insecure - Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Accept-Encoding': 'gzip, deflate', 'Referer': 'https://www.lagou.com/jobs/list_Python?px=new&city=%E5%85%A8%E5%9B%BD', 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Content-Type': 'multipart/form-data' }
def __init__(self, name, time=None): super().__init__() self.logger = ContextLogger('task') self.name = name self.time = time self.headers = { 'Upgrade - Insecure - Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Accept-Encoding': 'gzip, deflate', 'Referer': 'http://www.baidu.com', 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', } self.cookies = {}
def __init__(self): super().__init__('greensnow 数据爬取') self.logger = ContextLogger('threat_ip')
def __init__(self): super().__init__('malwaredomains 数据爬取') self.logger = ContextLogger('threat_domain')
from app.engines import db from app.models import Show from datetime import datetime, timedelta from app.logger import ContextLogger logger = ContextLogger('show') def calc_ip_show_update_count(count): format_time = datetime.now().strftime("%Y-%m-%d") show = db.session.query(Show).filter_by(format_time=format_time, type='ip').first() if show: show.update_of_count = show.update_of_count + count else: try: show = Show() show.type = 'ip' show.update_of_count = count except Exception as e: logger.warning("计算展示数据更新次数出错" + str(e)) safe_commit(show) def calc_domain_show_update_count(count): format_time = datetime.now().strftime("%Y-%m-%d") show = db.session.query(Show).filter_by(format_time=format_time, type='domain').first() if show: show.update_of_count = show.update_of_count + count
def __init__(self): super().__init__('dataplane 数据爬取') self.logger = ContextLogger('threat_ip')
def __init__(self): super().__init__('freebuf数据爬取') self.logger = ContextLogger('task_freebuf')
def __init__(self): super().__init__('智联招聘数据爬取') self.logger = ContextLogger('task_zhilian')
def __init__(self): super().__init__('bambenekconsulting 数据爬取') self.logger = ContextLogger('threat_domain')
from app.engines import db from app.models import Patch, Vulner from app.logger import ContextLogger logger = ContextLogger('task') def switch_id(patch): if patch == '暂无': return 0 else: patch_ids = '' for key, value in patch.items(): try: patch_id = db.session.query( Patch.id).filter_by(title=key).first() if patch_id: patch_ids += str(patch_id) except Exception as e: logger.warning('补丁:{0}查询报错'.format(patch) + str(e)) return patch_ids if __name__ == '__main__': patch = db.session.query(Vulner.patch).filter_by(id=5555).first()[0] print(switch_id(patch))
def __init__(self): super().__init__('hackernews数据爬取') self.logger = ContextLogger('task_hackernews')
def __init__(self): super().__init__('alienvault 数据爬取') self.logger = ContextLogger('threat_ip')
from app.utill.req import BaseReq from app.logger import ContextLogger logger = ContextLogger('crawl') req = BaseReq(is_crawl=False) for ip in ips: data = req.get('http://ip.taobao.com/service/getIpInfo.php?ip='+ip.ip) data = data['data'] ip.country = data['country'].replace('XX', '') ip.region = data['region'].replace('XX', '') ip.city = data['city'].replace('XX', '') ip.county = data['county'].replace('XX', '') ip.isp = data['isp'].replace('XX', '') safe_commit(ip)
def __init__(self): super().__init__('blocklist 数据爬取') self.logger = ContextLogger('threat_ip')
def __init__(self): super().__init__('netlab 数据爬取') self.logger = ContextLogger('threat_domain')
from app.engines import db from app.logger import ContextLogger from app.models.patch import Patch logger = ContextLogger('task_cnnvd') def get_patch(title): try: patch = db.session.query(Patch).filter(Patch.title == title).first() return patch except Exception as e: logger.warning(e) return False if __name__ == '__main__': print(get_patch('firefox-3.6.9.tests'))
def __init__(self): super().__init__('cinsscore 数据爬取') self.logger = ContextLogger('threat_ip')
def __init__(self, is_crawl=True): self.ses = db.session self.is_crawl = is_crawl self.logger = ContextLogger('crawl')
def __init__(self): super().__init__('maxmind 数据爬取') self.logger = ContextLogger('threat_ip')
def __init__(self): super().__init__('zeustracker 数据爬取') self.logger = ContextLogger('threat_domain')
def __init__(self): super().__init__('cybercrime 数据爬取') self.logger = ContextLogger('threat_domain')
def __init__(self, name, time=None): super().__init__() self.logger = ContextLogger('sync_sqlite') self.name = name self.time = time
def __init__(self): super().__init__('Emergingthreats 数据爬取') self.logger = ContextLogger('threat_ip')
def __init__(self): super().__init__('cnnvd数据爬取') self.logger = ContextLogger('task_cnnvd')
def __init__(self): super().__init__('dshield 数据爬取') self.logger = ContextLogger('threat_domain')
def __init__(self): super().__init__('rulez 数据爬取') self.logger = ContextLogger('threat_ip')
def __init__(self): super().__init__('githubusercontent 数据爬取') self.logger = ContextLogger('threat_ip')