class WXSearch(): def __init__(self): self.orm = ORM() self.session = self.orm.getSession() self.wxindex = WXIndex() def run(self, keywords): logging.info(keywords) now = time.time() end_time = str('%.3f' % (now - 24 * 3600)) start_time = str('%.3f' % (now - 90 * 24 * 3600)) o = 1490609811174 headers = { 'Cookie': WXIndexModel.getCookies().encode('utf8'), 'Referer': 'https://search.weixin.qq.com/cgi-bin/h5/wxindex/detail.html?q=%s&pass_ticket=zQE7LtY4Pl0uRAOeXONqdXkfeSp62IazVw4GAqC2u4nOO8pTXBfIL92x2f3h2BMe' % (keywords) } url = 'https://search.weixin.qq.com/cgi-bin/searchweb/getwxindex?query=%s&start_time=%s&end_time=%s&_=%s' % ( keywords, start_time, end_time, o) response = requests.get(url, headers=headers) body = json.loads(response.text) if body.get('retcode') == 0: wxindex = body.get('data').get('wxindex') if wxindex != "": date_list = self.getDateList() wx_list = wxindex.split(',') try: for item in zip(date_list, wx_list): wxindex = WXIndex() wxindex.keyword = keywords wxindex.date = item[0] wxindex.wx_index = float(item[1]) id = '%s%s%s' % (wxindex.keyword.decode('utf8'), wxindex.date.decode('utf8'), wxindex.wx_index) wxindex.id = hashlib.md5( id.encode('gb2312')).hexdigest() wxindex.date_update = datetime.datetime.now().strftime( '%Y-%m-%d %H:%M:%S') if not self.wxindex.isExistById(wxindex.id): self.orm.add(wxindex) except Exception as e: traceback.print_exc() logging.error(e) else: logging.error('%s:该词条未被收录' % keywords) else: logging.error(body.get('msg')) def getDateList(self): result = [] end_date = datetime.datetime.now() start_date = end_date - datetime.timedelta(days=90) while start_date < end_date: result.append(start_date.strftime("%Y-%m-%d")) start_date += datetime.timedelta(1) return result
# coding:utf8 from sqlalchemy.orm import relationship, backref from sqlalchemy import func from sqlalchemy.sql.elements import and_, or_ from sqlalchemy import Column, String, FLOAT, INTEGER, ForeignKey, DateTime, BOOLEAN, TEXT, UniqueConstraint, Index, \ TIMESTAMP, DATE import datetime from sqlalchemy.util import column_dict from CuteScrapy.util.MysqlUtils import ORM Base = ORM.getBase() orm = ORM() class News(Base): __tablename__ = 'news' id = Column(String(100), primary_key=True) site = Column(String(100)) type = Column(String(100)) title = Column(TEXT) keyword = Column(String(100)) summary = Column(TEXT) content = Column(TEXT) positive = Column(FLOAT) negative = Column(FLOAT) page_url = Column(TEXT) status = Column(INTEGER) publish_time = Column(TIMESTAMP) # 发布时间 comment_time = Column(TIMESTAMP) # 评论时间
def __init__(self): self.orm = ORM()
# coding:utf8 from CuteScrapy.util.CommonParser import CommonParser from CuteScrapy.util.MysqlUtils import ORM from sqlalchemy import Column, String, FLOAT, INTEGER, ForeignKey, DateTime, BOOLEAN, TEXT, UniqueConstraint, Index, \ TIMESTAMP from datetime import datetime __author__ = 'HuijunZhang' Base = ORM.getBase() orm = ORM() class Proxy(Base): __tablename__ = 'proxy' id = Column(String(100), primary_key=True) site = Column(String(100)) # 站点 ip = Column(String(100)) # ip port = Column(String(10)) # 端口 type = Column(String(100)) # 类型:http,https,socks4/5 site_conn_time = Column(String(100)) # 连接时间 province = Column(String(100)) city = Column(String(100)) anonymity = Column(BOOLEAN) # 高匿 date_update = Column(DateTime, default=datetime.now) date_create = Column(DateTime, default=datetime.now) @classmethod def getProxyData(cls, _type='HTTP'): session = orm.getSession() result = session.query(cls).filter(cls.type == _type).all()
def __init__(self): self.orm = ORM() self.session = self.orm.getSession() self.wxindex = WXIndex()
def isExistsMoviesByid(cls, id): session = ORM().getSession() movies = session.query(cls).filter(Movies.id == id).first() session.close() return movies
# coding:utf8 import requests import json import time import logging from flask import jsonify from CuteScrapy.item.ModelItem import ModelItem from CuteScrapy.model.news import NewsModel from CuteScrapy.util.MysqlUtils import ORM orm = ORM() session = orm.getSession() class SubmitJson2mysql(): def __init__(self): self.json = [{ 'id': 2, 'keywords': u'禁言', 'type': None, 'site': 'weixin', 'sentiment': False, 'mail_group': 0 }, { 'id': 1, 'keywords': u'余额宝1', 'type': None,