Ejemplo n.º 1
0
class WXSearch():
    def __init__(self):
        self.orm = ORM()
        self.session = self.orm.getSession()
        self.wxindex = WXIndex()

    def run(self, keywords):
        logging.info(keywords)
        now = time.time()
        end_time = str('%.3f' % (now - 24 * 3600))
        start_time = str('%.3f' % (now - 90 * 24 * 3600))
        o = 1490609811174
        headers = {
            'Cookie':
            WXIndexModel.getCookies().encode('utf8'),
            'Referer':
            'https://search.weixin.qq.com/cgi-bin/h5/wxindex/detail.html?q=%s&pass_ticket=zQE7LtY4Pl0uRAOeXONqdXkfeSp62IazVw4GAqC2u4nOO8pTXBfIL92x2f3h2BMe'
            % (keywords)
        }
        url = 'https://search.weixin.qq.com/cgi-bin/searchweb/getwxindex?query=%s&start_time=%s&end_time=%s&_=%s' % (
            keywords, start_time, end_time, o)
        response = requests.get(url, headers=headers)
        body = json.loads(response.text)
        if body.get('retcode') == 0:
            wxindex = body.get('data').get('wxindex')
            if wxindex != "":
                date_list = self.getDateList()
                wx_list = wxindex.split(',')
                try:
                    for item in zip(date_list, wx_list):
                        wxindex = WXIndex()
                        wxindex.keyword = keywords
                        wxindex.date = item[0]
                        wxindex.wx_index = float(item[1])
                        id = '%s%s%s' % (wxindex.keyword.decode('utf8'),
                                         wxindex.date.decode('utf8'),
                                         wxindex.wx_index)
                        wxindex.id = hashlib.md5(
                            id.encode('gb2312')).hexdigest()
                        wxindex.date_update = datetime.datetime.now().strftime(
                            '%Y-%m-%d %H:%M:%S')
                        if not self.wxindex.isExistById(wxindex.id):
                            self.orm.add(wxindex)
                except Exception as e:
                    traceback.print_exc()
                    logging.error(e)
            else:
                logging.error('%s:该词条未被收录' % keywords)
        else:
            logging.error(body.get('msg'))

    def getDateList(self):
        result = []
        end_date = datetime.datetime.now()
        start_date = end_date - datetime.timedelta(days=90)
        while start_date < end_date:
            result.append(start_date.strftime("%Y-%m-%d"))
            start_date += datetime.timedelta(1)
        return result
Ejemplo n.º 2
0
# coding:utf8
from sqlalchemy.orm import relationship, backref
from sqlalchemy import func
from sqlalchemy.sql.elements import and_, or_
from sqlalchemy import Column, String, FLOAT, INTEGER, ForeignKey, DateTime, BOOLEAN, TEXT, UniqueConstraint, Index, \
    TIMESTAMP, DATE
import datetime

from sqlalchemy.util import column_dict

from CuteScrapy.util.MysqlUtils import ORM

Base = ORM.getBase()
orm = ORM()


class News(Base):
    __tablename__ = 'news'
    id = Column(String(100), primary_key=True)
    site = Column(String(100))
    type = Column(String(100))
    title = Column(TEXT)
    keyword = Column(String(100))
    summary = Column(TEXT)
    content = Column(TEXT)
    positive = Column(FLOAT)
    negative = Column(FLOAT)
    page_url = Column(TEXT)
    status = Column(INTEGER)
    publish_time = Column(TIMESTAMP)  # 发布时间
    comment_time = Column(TIMESTAMP)  # 评论时间
Ejemplo n.º 3
0
 def __init__(self):
     self.orm = ORM()
Ejemplo n.º 4
0
# coding:utf8
from CuteScrapy.util.CommonParser import CommonParser
from CuteScrapy.util.MysqlUtils import ORM
from sqlalchemy import Column, String, FLOAT, INTEGER, ForeignKey, DateTime, BOOLEAN, TEXT, UniqueConstraint, Index, \
    TIMESTAMP
from datetime import datetime

__author__ = 'HuijunZhang'

Base = ORM.getBase()
orm = ORM()


class Proxy(Base):
    __tablename__ = 'proxy'
    id = Column(String(100), primary_key=True)
    site = Column(String(100))  # 站点
    ip = Column(String(100))  # ip
    port = Column(String(10))  # 端口
    type = Column(String(100))  # 类型:http,https,socks4/5
    site_conn_time = Column(String(100))  # 连接时间
    province = Column(String(100))
    city = Column(String(100))
    anonymity = Column(BOOLEAN)  # 高匿
    date_update = Column(DateTime, default=datetime.now)
    date_create = Column(DateTime, default=datetime.now)

    @classmethod
    def getProxyData(cls, _type='HTTP'):
        session = orm.getSession()
        result = session.query(cls).filter(cls.type == _type).all()
Ejemplo n.º 5
0
 def __init__(self):
     self.orm = ORM()
     self.session = self.orm.getSession()
     self.wxindex = WXIndex()
Ejemplo n.º 6
0
 def isExistsMoviesByid(cls, id):
     session = ORM().getSession()
     movies = session.query(cls).filter(Movies.id == id).first()
     session.close()
     return movies
Ejemplo n.º 7
0
# coding:utf8
import requests
import json

import time

import logging

from flask import jsonify

from CuteScrapy.item.ModelItem import ModelItem
from CuteScrapy.model.news import NewsModel
from CuteScrapy.util.MysqlUtils import ORM

orm = ORM()
session = orm.getSession()


class SubmitJson2mysql():
    def __init__(self):
        self.json = [{
            'id': 2,
            'keywords': u'禁言',
            'type': None,
            'site': 'weixin',
            'sentiment': False,
            'mail_group': 0
        }, {
            'id': 1,
            'keywords': u'余额宝1',
            'type': None,