def getImages(self, url, title): try: r = self.get(url) html = r.text soup = BeautifulSoup(html, 'html.parser') p_images = soup.find_all('p', style='text-align:center') if len(p_images) > 0: # 新增专题 now = int(time.time()) topic_id = Mysql.create("INSERT INTO lab_topic (title, create_time, update_time) VALUES ('%s', %s, %s)" % (title, now, now)) images_values = [] for p_image in p_images: url = p_image.img['src'] p_name = p_image.img['alt'] p_object = re.search( r'id=(\d*)\..*', p_name, re.I) p_id = int(p_object.group(1)) if p_object else 0 # 获取文件后缀名 etc = os.path.splitext(url)[1] date = time.strftime('%Y%m%d',time.localtime(time.time())) old_name = date + str(round(time.time() * 1000)) + p_name name = hashlib.md5(old_name.encode(encoding='UTF-8')).hexdigest() + etc downloadPath = os.path.join(self.downloadPath, date) self.downloadImage(url, downloadPath, name) save_path = date + '/' + name images_values.append("('%s', %s, '%s', %s, %s, %s)" % (save_path, topic_id, p_name, p_id, now, now)) create_sql = 'INSERT INTO lab_image (url, topic_id, name, p_id, create_time, update_time) VALUES ' + (','.join(images_values)) Mysql.execute(create_sql) print('\033[1;32m--------------------已创建:', title, '\033[0m') except Exception as e: # TODO: log print(e) return
def createNews(self, url): try: r = self.get(url) html = r.text soup = BeautifulSoup(html, 'html.parser') # 获取相关文章的链接 linkHtml = soup.find('a', class_='dec_img') viewUrl = str(linkHtml['href']) if self.oldUrl == viewUrl: return -2 image = str(linkHtml.img['src']) subject = str(linkHtml['title']).replace("'", "''") introHtml = linkHtml.parent.parent intro = introHtml.find('p', class_='com_about').get_text().replace("'", "''") catalogName = introHtml.find('span', class_='bq_ico').get_text() if catalogName == '美图': # 美图类型不获取 return -2 category_id = self.getCatalog(catalogName) content = self.getNewsView(viewUrl) now = int(time.time()) user_id = 1 author = '网络' sql = "INSERT INTO news (user_id, author, catalog_id, title, intro, content, cover, ctime, utime) VALUES (%s, '%s', %s, '%s', '%s', '%s', '%s', %s, %s)" % (user_id, author, category_id, subject, intro, content, image, now, now) newsID = Mysql.create(sql) return '{ "id": ' + str(newsID) + ', "url": "' + viewUrl + '"}' except Exception as e: # TODO: log # print(e) return -1
def update_holidays(oldname,newname): a = oldname a.replace('%20', ' ') b=newname b.replace('%20', ' ') response = Mysql.update1(a,b) return jsonify(response)
def get_data(): response = Mysql.select1() return Response( response=json.dumps(response), status=200, mimetype="application/json" )
def _base(self): try: comments = int(re.findall(r'count=\\"(\d+)', self.source)[0]) if comments % 20 == 0: pages = comments // 20 else: pages = comments // 20 + 1 except IndexError: logger.error(f"no comments count\n{self.source}") sys.exit(1) try: weibo_id = re.findall(r'%3D(\d+)&title', self.source)[0] logging.info(f"Weibo_id:{weibo_id}") except IndexError as e: logger.error(f"no weibo id\n{self.source}") sys.exit(2) if MYSQL: self.db = Mysql(weibo_id) self.db.create_table(self.weibo_url) logger.info(f'总共{pages}页,{comments}评论') for page in range(1, pages + 1): url = f'https://www.weibo.com/aj/v6/comment/big?ajwvr=6&id={weibo_id}&filter=all&page={page}' self.urls.append(url)
def main(): fname = config.log_path + 'article_parse.' + time.strftime("%Y%m%d") log.set_logger(level='DEBUG', when="D", limit=1, filename=fname) alist = Mongo().scan() if not alist: log.warn("no articles in mongodb") return False MyObj = Mysql() mobj = Mongo() for doc in alist: if Parse(MyObj).do(doc): mobj.update(doc.get("_id"), done=1) log.info("insert mysql success, url:%s" % doc.get('url')) else: mobj.update(doc.get("_id"), done=-1) log.warning("insert mysql failure, task_id:%s, url:%s" % (doc.get('taskid'), doc.get('url')))
# -*- coding: utf-8 -*- # 爬取动漫之家 pixiv 图片,参考页面:https://news.dmzj.com/article/48293.html # TODO: 建立数据表与数据关联,图片上传至第三方云服务器存储 import requests from bs4 import BeautifulSoup import os import time import hashlib import re from db import Mysql Mysql = Mysql() class DmzjCrawler(): # 标识下载数量 x = 0 taskNum = 0 # 爬取第几页,进程运行时自增 page = 60 # 请求头 headers = { 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9', 'cache-control': 'max-age=0', 'cookie': 'UM_distinctid=165d8713f431c-03be91a8ec41a-54103715-1fa400-165d8713f4532b; show_tip_1=0', 'referer': 'https://news.dmzj.com/article/12875.html', 'upgrade-insecure-requests': '1',
jpos2=data.jpos[2], jpos3=data.jpos[3], jpos4=data.jpos[4], jpos5=data.jpos[5], jpos6=data.jpos[6], jpos7=data.jpos[7], jpos8=data.jpos[8], jpos9=data.jpos[9], jpos10=data.jpos[10], jpos11=data.jpos[11], jpos12=data.jpos[12], jpos13=data.jpos[13], jpos14=data.jpos[14], jpos15=data.jpos[15]) idx += 1 db._commit() def subscriber(): rospy.init_node('raven_state_subscriber', anonymous=True) rospy.Subscriber("/ravenstate", raven_state, callback) rospy.spin() if __name__ == '__main__': db = Mysql(host='130.126.140.209', user='******', password='******',database='raven') subscriber()
from binance.client import Client import hmac import hashlib import json, requests import traceback from config import * from utils.retry import retry from utils.logger import Logger logger = Logger.get_logger("market") from db import Mysql, Trade db = Mysql() import redis ex = 10 def market_factory(name): if name.lower() == 'binance': return Binance elif name.lower() == 'bibox': return Bibox class Market: def __init__(self, product, basecoin): self.product = product.upper() self.basecoin = basecoin.upper() self.feecoin = ""
def __init__(self, sqlmapapiurl, adminid): self.sqlmapapiurl = sqlmapapiurl #SQLMAP API服务地址 self.adminid = adminid #SQLMAP API adminid self.mysql = Mysql(conf.db_host, conf.db_port, conf.db_user, conf.db_pass, conf.db_name)
class SqliManage(object): def __init__(self, sqlmapapiurl, adminid): self.sqlmapapiurl = sqlmapapiurl #SQLMAP API服务地址 self.adminid = adminid #SQLMAP API adminid self.mysql = Mysql(conf.db_host, conf.db_port, conf.db_user, conf.db_pass, conf.db_name) #获取当前任务列表 def _get_task_list(self): checkurl = self.sqlmapapiurl + '/admin/' + self.adminid + '/list' resp = json.loads(do_get(checkurl)) return resp['tasks'] #漏洞结果入库 def _item2db(self, taskid): dataurl = self.sqlmapapiurl + '/scan/' + taskid + '/data' resp = json.loads(do_get(dataurl)) data = resp['data'] if data != []: dset = "data='%s', sqli=1" % base64.b64encode(str(data[0])) else: logurl = self.sqlmapapiurl + '/scan/' + taskid + '/log' resp = json.loads(do_get(logurl)) log = resp['log'] dset = "data='%s', sqli=0" % base64.b64encode(str(log)) where = "taskid='%s'" % taskid self.mysql.update('sub_sqli', dset, where) return #删除任务 def _delete_task(self, taskid): deleteurl = self.sqlmapapiurl + '/task/' + taskid + '/delete' do_get(deleteurl) return #处理任务结果 def handle_result(self): tasklist = self._get_task_list() for taskid, state in tasklist.items(): if state == 'terminated': self._item2db(taskid) return #sqli任务初入库 def _task2db(self, taskid, url, body, psw): self.mysql.insert('sub_sqli', ('taskid', 'url', 'body', 'hash'), (taskid, url, body, psw)) return #创建SQLI任务 def send2sqlmap(self, url, user_agent='', cookie='', body=''): flag, psw = self._is_need_sqli_test(url, body) if not flag: return False newurl = self.sqlmapapiurl + '/task/new' resp = json.loads(do_get(newurl)) taskid = resp['taskid'] log('send2sqlmap', 'task is created. id : %s' % taskid) data = {} data['url'] = url if cookie != '' and cookie != []: data['cookie'] = cookie[0] data['headers'] = 'User-Agent: ' + user_agent[0] if body != '': data['data'] = body if url[0:5] == 'https': forcesslurl = self.sqlmapapiurl + '/option/' + taskid + '/set' do_post(url=forcesslurl, data='{"forceSSL" : true}') starturl = self.sqlmapapiurl + '/scan/' + taskid + '/start' do_post(url=starturl, data=json.dumps(data)) log('send2sqlmap', 'task is started. id : %s' % taskid) self._task2db(taskid, url, body, psw) return True #检测该请求是否需要进行SQLI测试 def _is_need_sqli_test(self, url, body): parsedurl = urlparse(url) if parsedurl.query == '' and body == '': return False, '' paramlist = parsedurl.query.split('&') paramstring = '' for param in paramlist: paramstring += str(param.split('=')[0]) test = parsedurl.netloc + parsedurl.path + parsedurl.params + paramstring + body m = hashlib.md5() m.update(test) psw = m.hexdigest() for one in self.mysql.select(('hash'), 'sub_sqli'): if psw == one[0]: return False, '' f = open('plugins/mysub/config/targetdomain', 'r') domains = f.readlines() f.close() for one in domains: if one[:-1] in parsedurl.netloc: return True, psw return False, '' #获取漏洞结果 def get_sqli_result(self): return self.mysql.select(('url', 'body', 'data'), 'sub_sqli', 'sqli=1') #获取无漏洞结果 def get_no_sqli_result(self): return self.mysql.select(('url', 'body', 'data'), 'sub_sqli', 'sqli=0') #获取正在进行的任务列表 def get_scaning_list(self): return self.mysql.select(('url', 'body'), 'sub_sqli', 'sqli is NULL') #强行善后 def tasks_clean(self): tasklist = self._get_task_list() for taskid in tasklist: self._delete_task(taskid) self.mysql.delete('sub_sqli', 'sqli is NULL') return #清库 def clean_db(self): self.mysql.delete('sub_sqli') return
def del_holidays(name): a = name a.replace('%20', ' ') response = Mysql.delete1(a) return jsonify(response)
def add_holidays(): for dic in calendarifics['response']['holidays']: response = Mysql.insert(dic) return jsonify(response)
# -*- coding: utf-8 -*- import top.api import taobao_env from db import Mysql sm = Mysql("sm") sql = "select name,pic from item where status=1 and name is not null and name !=''"; req=top.api.TaobaokeItemsDetailGetRequest() # items, chunk = [1,2,3,4,5,6,7,8,9], 3 # zip(*[iter(items)]*chunk) req.fields = "click_url,shop_click_url,seller_credit_score,num_iid,title,nick,pic_url,price" ret = sm.select(sql) print len(ret) size = 10 for num in [ret[i: i + size] for i in range(0, len(ret), size)]: req.num_iids = ",".join([x["name"] for x in num]) try: resp= req.getResponse() for taobaoke_item_detail in resp.get("taobaoke_items_detail_get_response").get("taobaoke_item_details").get("taobaoke_item_detail"): tbPath = taobaoke_item_detail.get("click_url") item = taobaoke_item_detail.get("item") num_iid = item.get("num_iid") price = item.get("price") pic_url = item.get("pic_url") title = item.get("title") sql = "update item set tbPath='%s',newPrice=%s where name=%s" % (tbPath, price, num_iid) print sql sm.query(sql) sm.commit() print(resp)
class WeiboComment(object): """查看某人在某微博的评论 weibo_url指微博详情页的地址 user指所查看用户的昵称 """ def __init__(self, weibo_url=WEIBO_URL, user=USERNAME, proxies=PROXIES, timeout=TIMEOUT, headers=HEADERS): self.chrome_options = Options() self.chrome_options.add_argument("--headless") self.chrome_options.add_argument("--disable-gpu") self.proxies = proxies self.today = f"{today.month}月{today.day}日" self.urls = [] self.source = '' self.timeout = timeout self.user = user self.weibo_url = weibo_url self.cookies = {} self.headers = headers def get_cookies(self): driver = webdriver.Chrome(chrome_options=self.chrome_options) driver.get(self.weibo_url) time.sleep(5) self.source = driver.page_source logging.debug(self.source) _cookies = driver.get_cookies() for cookie in _cookies: self.cookies[cookie['name']] = cookie['value'] with open(COOKIES, 'wb') as f: pickle.dump(self.cookies, f) driver.quit() def _cookies(self): if os.path.exists(COOKIES): with open(COOKIES, 'rb') as f: _cookies = pickle.load(f) _res = requests.get(self.weibo_url, cookies=_cookies, headers=self.headers) if not _res.history: self.cookies.update(_cookies) self.source = _res.text logging.info(self.source) else: self.get_cookies() else: self.get_cookies() def _base(self): try: comments = int(re.findall(r'count=\\"(\d+)', self.source)[0]) if comments % 20 == 0: pages = comments // 20 else: pages = comments // 20 + 1 except IndexError: logger.error(f"no comments count\n{self.source}") sys.exit(1) try: weibo_id = re.findall(r'%3D(\d+)&title', self.source)[0] logging.info(f"Weibo_id:{weibo_id}") except IndexError as e: logger.error(f"no weibo id\n{self.source}") sys.exit(2) if MYSQL: self.db = Mysql(weibo_id) self.db.create_table(self.weibo_url) logger.info(f'总共{pages}页,{comments}评论') for page in range(1, pages + 1): url = f'https://www.weibo.com/aj/v6/comment/big?ajwvr=6&id={weibo_id}&filter=all&page={page}' self.urls.append(url) @staticmethod def exception_handler(request, exception): logger.error(f"{exception}\n{request.url}") return None def getcomments(self, urls=None): if urls: self.urls = urls ss = requests.Session() tasks = (grequests.get(url, session=ss, headers=self.headers, cookies=self.cookies, timeout=self.timeout, proxies=choice(self.proxies)) for url in self.urls) bs = grequests.map(tasks, size=5, exception_handler=self.exception_handler, gtimeout=3) for b in bs: _page = bs.index(b) if not b: continue if b.status_code == 200: logger.info(f"{b.url} --- {b.status_code}") _offset = 0 d = b.json() c_html = d['data']['html'] c = etree.HTML(c_html.encode('unicode_escape')) logger.info(f'第{_page + 1}页') logger.debug(f'{c_html}') uc = c.xpath('//div[@class="WB_text"]') dt = c.xpath('//div[@class="WB_from S_txt2"]') for i, j in zip(uc, dt): _offset += 1 user, comment = i.xpath('string(.)').encode( 'utf-8').decode('unicode_escape').strip().split( ':', 1) c_time = j.xpath('string(.)').encode('utf-8').decode( 'unicode_escape').strip() if '今天' in c_time: c_time = c_time.replace('今天', self.today) if MYSQL: self.db.add(user, comment, c_time, page=_page, offset=_offset) if user == self.user: logger.info(f'{user}:{comment}') logger.info(f"该页有{_offset}条评论") else: logger.error(f"{b.url} --- {b.status_code}") def run(self): self._cookies() self._base() self.getcomments() if MYSQL: self.db.close()