Ejemplo n.º 1
0
 def getImages(self, url, title):
     try:
         r = self.get(url)
         html = r.text
         soup = BeautifulSoup(html, 'html.parser')
         p_images = soup.find_all('p', style='text-align:center')
         if len(p_images) > 0:
             # 新增专题
             now = int(time.time())
             topic_id = Mysql.create("INSERT INTO lab_topic (title, create_time, update_time) VALUES ('%s', %s, %s)" % (title, now, now))
             images_values = [] 
             for p_image in p_images:
                 url = p_image.img['src']
                 p_name = p_image.img['alt']
                 p_object = re.search( r'id=(\d*)\..*', p_name, re.I)
                 p_id = int(p_object.group(1)) if p_object else 0
                 # 获取文件后缀名
                 etc = os.path.splitext(url)[1]
                 date = time.strftime('%Y%m%d',time.localtime(time.time()))
                 old_name = date + str(round(time.time() * 1000)) + p_name
                 name = hashlib.md5(old_name.encode(encoding='UTF-8')).hexdigest() + etc
                 downloadPath = os.path.join(self.downloadPath, date)
                 self.downloadImage(url, downloadPath, name)
                 save_path = date + '/' + name
                 images_values.append("('%s', %s, '%s', %s, %s, %s)" % (save_path, topic_id, p_name, p_id, now, now))
             create_sql = 'INSERT INTO lab_image (url, topic_id, name, p_id, create_time, update_time) VALUES ' + (','.join(images_values))
             Mysql.execute(create_sql)
             print('\033[1;32m--------------------已创建:', title, '\033[0m')
     except Exception as e:
         # TODO: log
         print(e)
         return
Ejemplo n.º 2
0
 def createNews(self, url):
     try:
         r = self.get(url)
         html = r.text
         soup = BeautifulSoup(html, 'html.parser')
         # 获取相关文章的链接
         linkHtml = soup.find('a', class_='dec_img')
         viewUrl = str(linkHtml['href'])
         if self.oldUrl == viewUrl:
             return -2
         image = str(linkHtml.img['src'])
         subject = str(linkHtml['title']).replace("'", "''")
         introHtml = linkHtml.parent.parent
         intro = introHtml.find('p', class_='com_about').get_text().replace("'", "''")
         catalogName = introHtml.find('span', class_='bq_ico').get_text()
         if catalogName == '美图':
             # 美图类型不获取
             return -2
         category_id = self.getCatalog(catalogName)
         content = self.getNewsView(viewUrl)
         now = int(time.time())
         user_id = 1
         author = '网络'
         sql = "INSERT INTO news (user_id, author, catalog_id, title, intro, content, cover, ctime, utime) VALUES (%s, '%s', %s, '%s', '%s', '%s', '%s', %s, %s)" % (user_id, author, category_id, subject, intro, content, image, now, now)
         newsID = Mysql.create(sql)
         return '{ "id": ' +  str(newsID) + ', "url": "' + viewUrl + '"}'
     except Exception as e:
         # TODO: log
         # print(e)
         return -1
Ejemplo n.º 3
0
def update_holidays(oldname,newname):
    a = oldname
    a.replace('%20', ' ')
    b=newname
    b.replace('%20', ' ')
    response = Mysql.update1(a,b)
    return jsonify(response)
Ejemplo n.º 4
0
def get_data():
    response = Mysql.select1()
    return Response(
        response=json.dumps(response),
        status=200,
        mimetype="application/json"
    )
Ejemplo n.º 5
0
 def _base(self):
     try:
         comments = int(re.findall(r'count=\\"(\d+)', self.source)[0])
         if comments % 20 == 0:
             pages = comments // 20
         else:
             pages = comments // 20 + 1
     except IndexError:
         logger.error(f"no comments count\n{self.source}")
         sys.exit(1)
     try:
         weibo_id = re.findall(r'%3D(\d+)&title', self.source)[0]
         logging.info(f"Weibo_id:{weibo_id}")
     except IndexError as e:
         logger.error(f"no weibo id\n{self.source}")
         sys.exit(2)
     if MYSQL:
         self.db = Mysql(weibo_id)
         self.db.create_table(self.weibo_url)
     logger.info(f'总共{pages}页,{comments}评论')
     for page in range(1, pages + 1):
         url = f'https://www.weibo.com/aj/v6/comment/big?ajwvr=6&id={weibo_id}&filter=all&page={page}'
         self.urls.append(url)
Ejemplo n.º 6
0
def main():
    fname = config.log_path + 'article_parse.' + time.strftime("%Y%m%d")
    log.set_logger(level='DEBUG', when="D", limit=1, filename=fname)
    alist = Mongo().scan()
    if not alist:
        log.warn("no articles in mongodb")
        return False

    MyObj = Mysql()

    mobj = Mongo()
    for doc in alist:
        if Parse(MyObj).do(doc):
            mobj.update(doc.get("_id"), done=1)
            log.info("insert mysql success, url:%s" % doc.get('url'))
        else:
            mobj.update(doc.get("_id"), done=-1)
            log.warning("insert mysql failure, task_id:%s, url:%s" %
                        (doc.get('taskid'), doc.get('url')))
Ejemplo n.º 7
0
# -*- coding: utf-8 -*-
# 爬取动漫之家 pixiv 图片,参考页面:https://news.dmzj.com/article/48293.html
# TODO: 建立数据表与数据关联,图片上传至第三方云服务器存储

import requests
from bs4 import BeautifulSoup
import os
import time
import hashlib
import re
from db import Mysql

Mysql = Mysql()

class DmzjCrawler():
    

    # 标识下载数量
    x = 0
    taskNum = 0
    # 爬取第几页,进程运行时自增
    page = 60
    # 请求头
    headers = {
        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'zh-CN,zh;q=0.9',
        'cache-control': 'max-age=0',
        'cookie': 'UM_distinctid=165d8713f431c-03be91a8ec41a-54103715-1fa400-165d8713f4532b; show_tip_1=0',
        'referer': 'https://news.dmzj.com/article/12875.html',
        'upgrade-insecure-requests': '1',
Ejemplo n.º 8
0
			jpos2=data.jpos[2],
			jpos3=data.jpos[3],
			jpos4=data.jpos[4],
			jpos5=data.jpos[5],
			jpos6=data.jpos[6],
			jpos7=data.jpos[7],
			jpos8=data.jpos[8],
			jpos9=data.jpos[9],
			jpos10=data.jpos[10],
			jpos11=data.jpos[11],
			jpos12=data.jpos[12],
			jpos13=data.jpos[13],
			jpos14=data.jpos[14],
			jpos15=data.jpos[15])
    idx += 1
    db._commit()
			

    
def subscriber():
    rospy.init_node('raven_state_subscriber', anonymous=True)

    rospy.Subscriber("/ravenstate", raven_state, callback)


    rospy.spin()

if __name__ == '__main__':
    db = Mysql(host='130.126.140.209', user='******', password='******',database='raven')
    subscriber()
Ejemplo n.º 9
0
from binance.client import Client

import hmac
import hashlib
import json, requests
import traceback

from config import *
from utils.retry import retry
from utils.logger import Logger
logger = Logger.get_logger("market")

from db import Mysql, Trade
db = Mysql()

import redis
ex = 10


def market_factory(name):
    if name.lower() == 'binance':
        return Binance
    elif name.lower() == 'bibox':
        return Bibox


class Market:
    def __init__(self, product, basecoin):
        self.product = product.upper()
        self.basecoin = basecoin.upper()
        self.feecoin = ""
Ejemplo n.º 10
0
 def __init__(self, sqlmapapiurl, adminid):
     self.sqlmapapiurl = sqlmapapiurl  #SQLMAP API服务地址
     self.adminid = adminid  #SQLMAP API adminid
     self.mysql = Mysql(conf.db_host, conf.db_port, conf.db_user,
                        conf.db_pass, conf.db_name)
Ejemplo n.º 11
0
class SqliManage(object):
    def __init__(self, sqlmapapiurl, adminid):
        self.sqlmapapiurl = sqlmapapiurl  #SQLMAP API服务地址
        self.adminid = adminid  #SQLMAP API adminid
        self.mysql = Mysql(conf.db_host, conf.db_port, conf.db_user,
                           conf.db_pass, conf.db_name)

    #获取当前任务列表
    def _get_task_list(self):
        checkurl = self.sqlmapapiurl + '/admin/' + self.adminid + '/list'
        resp = json.loads(do_get(checkurl))
        return resp['tasks']

    #漏洞结果入库
    def _item2db(self, taskid):
        dataurl = self.sqlmapapiurl + '/scan/' + taskid + '/data'
        resp = json.loads(do_get(dataurl))
        data = resp['data']
        if data != []:
            dset = "data='%s', sqli=1" % base64.b64encode(str(data[0]))
        else:
            logurl = self.sqlmapapiurl + '/scan/' + taskid + '/log'
            resp = json.loads(do_get(logurl))
            log = resp['log']
            dset = "data='%s', sqli=0" % base64.b64encode(str(log))
        where = "taskid='%s'" % taskid
        self.mysql.update('sub_sqli', dset, where)
        return

    #删除任务
    def _delete_task(self, taskid):
        deleteurl = self.sqlmapapiurl + '/task/' + taskid + '/delete'
        do_get(deleteurl)
        return

    #处理任务结果
    def handle_result(self):
        tasklist = self._get_task_list()
        for taskid, state in tasklist.items():
            if state == 'terminated':
                self._item2db(taskid)
        return

    #sqli任务初入库
    def _task2db(self, taskid, url, body, psw):
        self.mysql.insert('sub_sqli', ('taskid', 'url', 'body', 'hash'),
                          (taskid, url, body, psw))
        return

    #创建SQLI任务
    def send2sqlmap(self, url, user_agent='', cookie='', body=''):
        flag, psw = self._is_need_sqli_test(url, body)
        if not flag:
            return False
        newurl = self.sqlmapapiurl + '/task/new'
        resp = json.loads(do_get(newurl))
        taskid = resp['taskid']
        log('send2sqlmap', 'task is created. id : %s' % taskid)
        data = {}
        data['url'] = url
        if cookie != '' and cookie != []:
            data['cookie'] = cookie[0]
        data['headers'] = 'User-Agent: ' + user_agent[0]
        if body != '':
            data['data'] = body
        if url[0:5] == 'https':
            forcesslurl = self.sqlmapapiurl + '/option/' + taskid + '/set'
            do_post(url=forcesslurl, data='{"forceSSL" : true}')
        starturl = self.sqlmapapiurl + '/scan/' + taskid + '/start'
        do_post(url=starturl, data=json.dumps(data))
        log('send2sqlmap', 'task is started. id : %s' % taskid)
        self._task2db(taskid, url, body, psw)
        return True

    #检测该请求是否需要进行SQLI测试
    def _is_need_sqli_test(self, url, body):
        parsedurl = urlparse(url)
        if parsedurl.query == '' and body == '':
            return False, ''
        paramlist = parsedurl.query.split('&')
        paramstring = ''
        for param in paramlist:
            paramstring += str(param.split('=')[0])
        test = parsedurl.netloc + parsedurl.path + parsedurl.params + paramstring + body
        m = hashlib.md5()
        m.update(test)
        psw = m.hexdigest()
        for one in self.mysql.select(('hash'), 'sub_sqli'):
            if psw == one[0]:
                return False, ''
        f = open('plugins/mysub/config/targetdomain', 'r')
        domains = f.readlines()
        f.close()
        for one in domains:
            if one[:-1] in parsedurl.netloc:
                return True, psw
        return False, ''

    #获取漏洞结果
    def get_sqli_result(self):
        return self.mysql.select(('url', 'body', 'data'), 'sub_sqli', 'sqli=1')

    #获取无漏洞结果
    def get_no_sqli_result(self):
        return self.mysql.select(('url', 'body', 'data'), 'sub_sqli', 'sqli=0')

    #获取正在进行的任务列表
    def get_scaning_list(self):
        return self.mysql.select(('url', 'body'), 'sub_sqli', 'sqli is NULL')

    #强行善后
    def tasks_clean(self):
        tasklist = self._get_task_list()
        for taskid in tasklist:
            self._delete_task(taskid)
        self.mysql.delete('sub_sqli', 'sqli is NULL')
        return

    #清库
    def clean_db(self):
        self.mysql.delete('sub_sqli')
        return
Ejemplo n.º 12
0
def del_holidays(name):
    a = name
    a.replace('%20', ' ')
    response = Mysql.delete1(a)
    return jsonify(response)
Ejemplo n.º 13
0
def add_holidays():
    for dic in calendarifics['response']['holidays']:
        response = Mysql.insert(dic)
        return jsonify(response)
Ejemplo n.º 14
0
# -*- coding: utf-8 -*-
import top.api
import taobao_env
from db import Mysql
sm = Mysql("sm")


sql = "select name,pic from item where status=1 and name is not null and name !=''";
req=top.api.TaobaokeItemsDetailGetRequest()
#  items, chunk = [1,2,3,4,5,6,7,8,9], 3
#  zip(*[iter(items)]*chunk)
req.fields = "click_url,shop_click_url,seller_credit_score,num_iid,title,nick,pic_url,price"
ret = sm.select(sql)
print len(ret)
size = 10
for num in [ret[i: i + size] for i in range(0, len(ret), size)]:
    req.num_iids = ",".join([x["name"] for x in num])
    try:
        resp= req.getResponse()
        for taobaoke_item_detail in resp.get("taobaoke_items_detail_get_response").get("taobaoke_item_details").get("taobaoke_item_detail"):
            tbPath = taobaoke_item_detail.get("click_url")
            item = taobaoke_item_detail.get("item")
            num_iid = item.get("num_iid")
            price = item.get("price")
            pic_url = item.get("pic_url")
            title = item.get("title")
            sql = "update item set tbPath='%s',newPrice=%s where name=%s" % (tbPath, price, num_iid)
            print sql
            sm.query(sql)
        sm.commit()
        print(resp)
Ejemplo n.º 15
0
class WeiboComment(object):
    """查看某人在某微博的评论
        weibo_url指微博详情页的地址
        user指所查看用户的昵称
    """
    def __init__(self,
                 weibo_url=WEIBO_URL,
                 user=USERNAME,
                 proxies=PROXIES,
                 timeout=TIMEOUT,
                 headers=HEADERS):
        self.chrome_options = Options()
        self.chrome_options.add_argument("--headless")
        self.chrome_options.add_argument("--disable-gpu")
        self.proxies = proxies
        self.today = f"{today.month}月{today.day}日"
        self.urls = []
        self.source = ''
        self.timeout = timeout
        self.user = user
        self.weibo_url = weibo_url
        self.cookies = {}
        self.headers = headers

    def get_cookies(self):
        driver = webdriver.Chrome(chrome_options=self.chrome_options)
        driver.get(self.weibo_url)
        time.sleep(5)
        self.source = driver.page_source
        logging.debug(self.source)
        _cookies = driver.get_cookies()
        for cookie in _cookies:
            self.cookies[cookie['name']] = cookie['value']
        with open(COOKIES, 'wb') as f:
            pickle.dump(self.cookies, f)
        driver.quit()

    def _cookies(self):
        if os.path.exists(COOKIES):
            with open(COOKIES, 'rb') as f:
                _cookies = pickle.load(f)
            _res = requests.get(self.weibo_url,
                                cookies=_cookies,
                                headers=self.headers)
            if not _res.history:
                self.cookies.update(_cookies)
                self.source = _res.text
                logging.info(self.source)
            else:
                self.get_cookies()
        else:
            self.get_cookies()

    def _base(self):
        try:
            comments = int(re.findall(r'count=\\"(\d+)', self.source)[0])
            if comments % 20 == 0:
                pages = comments // 20
            else:
                pages = comments // 20 + 1
        except IndexError:
            logger.error(f"no comments count\n{self.source}")
            sys.exit(1)
        try:
            weibo_id = re.findall(r'%3D(\d+)&title', self.source)[0]
            logging.info(f"Weibo_id:{weibo_id}")
        except IndexError as e:
            logger.error(f"no weibo id\n{self.source}")
            sys.exit(2)
        if MYSQL:
            self.db = Mysql(weibo_id)
            self.db.create_table(self.weibo_url)
        logger.info(f'总共{pages}页,{comments}评论')
        for page in range(1, pages + 1):
            url = f'https://www.weibo.com/aj/v6/comment/big?ajwvr=6&id={weibo_id}&filter=all&page={page}'
            self.urls.append(url)

    @staticmethod
    def exception_handler(request, exception):
        logger.error(f"{exception}\n{request.url}")
        return None

    def getcomments(self, urls=None):
        if urls:
            self.urls = urls
        ss = requests.Session()
        tasks = (grequests.get(url,
                               session=ss,
                               headers=self.headers,
                               cookies=self.cookies,
                               timeout=self.timeout,
                               proxies=choice(self.proxies))
                 for url in self.urls)
        bs = grequests.map(tasks,
                           size=5,
                           exception_handler=self.exception_handler,
                           gtimeout=3)
        for b in bs:
            _page = bs.index(b)
            if not b:
                continue
            if b.status_code == 200:
                logger.info(f"{b.url} --- {b.status_code}")
                _offset = 0
                d = b.json()
                c_html = d['data']['html']
                c = etree.HTML(c_html.encode('unicode_escape'))
                logger.info(f'第{_page + 1}页')
                logger.debug(f'{c_html}')
                uc = c.xpath('//div[@class="WB_text"]')
                dt = c.xpath('//div[@class="WB_from S_txt2"]')
                for i, j in zip(uc, dt):
                    _offset += 1
                    user, comment = i.xpath('string(.)').encode(
                        'utf-8').decode('unicode_escape').strip().split(
                            ':', 1)
                    c_time = j.xpath('string(.)').encode('utf-8').decode(
                        'unicode_escape').strip()
                    if '今天' in c_time:
                        c_time = c_time.replace('今天', self.today)
                    if MYSQL:
                        self.db.add(user,
                                    comment,
                                    c_time,
                                    page=_page,
                                    offset=_offset)
                    if user == self.user:
                        logger.info(f'{user}:{comment}')
                logger.info(f"该页有{_offset}条评论")
            else:
                logger.error(f"{b.url} --- {b.status_code}")

    def run(self):
        self._cookies()
        self._base()
        self.getcomments()
        if MYSQL:
            self.db.close()