class Post(db.Model, ResourceMixin): __tablename__ = 'post' id = db.Column(db.Integer, primary_key=True) title = db.Column(db.String(100), nullable=False) content = db.Column(db.String(1024), nullable=False) # ===================== Relationships ================================== account_id = db.Column(db.Integer, db.ForeignKey('account.id'), nullable=False) account = db.relationship('Account', backref=db.backref('posts', lazy=True)) @classmethod def find(cls, identity): return Post.query.filter((cls.title == identity) | (cls.id == identity)).first() # @classmethod # def get_all_posts(cls): # return Post.query.all() @classmethod def get_all_posts(cls, page=None): if page is None: return Post.query.order_by(Post.create_date.desc()).all() else: return Post.query.order_by(Post.create_date.desc()).paginate( page=page, per_page=5)
class SimilarBooks(db.Model): __tablename__ = "similar_books" id = db.Column(db.Integer, primary_key=True, autoincrement=True, nullable=True) goodreads_book_id = db.Column(db.Integer) sim_goodreads_book_id = db.Column(db.Integer) @classmethod def get_sim_ids(cls, gid): """Gets list of similar book_ids for a given book. Args: gid ([string]): Goodreads book id provided in the dataset. Encountered as `goodreads_book_id` Returns: [List]: List of ids """ return [ x.__dict__.get("sim_goodreads_book_id") for x in cls.query.filter_by(goodreads_book_id=gid) ]
class BookTags(db.Model): __tablename__ = "book_tags" id = db.Column(db.Integer, primary_key=True, autoincrement=True, nullable=True) goodreads_book_id = db.Column(db.Integer) tag_id = db.Column(db.Integer) tag_name = db.Column(db.String(120)) count = db.Column(db.Integer) @classmethod def get_tags(cls, gid, n=5): """Gets list of tags for a given book. Args: gid ([string]): Goodreads book id provided in the dataset. Encountered as `goodreads_book_id` Returns: [List]: List of tag objects, each of which has a two fields (tag_id, count) """ return [ x.__dict__ for x in (cls.query.filter_by( goodreads_book_id=gid).order_by(cls.count.desc()).limit(n)) ]
class Ratings(db.Model): __tablename__ = "ratings" id = db.Column(db.Integer, primary_key=True, autoincrement=True, nullable=True) user_id = db.Column(db.Integer) book_id = db.Column(db.Integer) rating = db.Column(db.Integer)
class BookTags( db.Model ): # the table book_tags in the form of a class, that was able by SQL Alchemy ORM __tablename__ = "book_tags" #identifying the name of the table id = db.Column(db.Integer, primary_key=True, autoincrement=True, nullable=True) #the id column goodreads_book_id = db.Column( db.Integer ) #the column containing the id of the books from the goodreads dataset tag_id = db.Column( db.Integer) #The column containing the id of the tags from the dataset tag_name = db.Column( db.String(120) ) #the column containing the name of the tag corresponding to the tag id count = db.Column(db.Integer) @classmethod def get_tags(cls, gid, n=5): """Gets list of tags for a given book. Args: gid ([string]): Goodreads book id provided in the dataset. Encountered as `goodreads_book_id` Returns: [List]: List of tag objects, each of which has a two fields (tag_id, count) """ return [ x.__dict__ for x in (cls.query.filter_by( goodreads_book_id=gid).order_by(cls.count.desc()).limit(n)) ]
class Author(db.Model): id = db.Column(db.Integer, primary_key=True) name = db.Column(db.String(100), nullable=False) book = db.relationship('Book', backref='author', lazy='dynamic') def __init__(self, name): self.name = name
class Book(db.Model): id = db.Column(db.Integer, primary_key = True) title = db.Column(db.String(100), nullable=False) description = db.Column(db.String(200), nullable=False) author_id = db.Column(db.Integer, db.ForeignKey('author.id'), nullable=False) def __init__(self, title, description, author_id): self.title = title self.description = description self.author_id = author_id
class ArticleData(BaseModel, db.Model): __tablename__ = 'article_data' id = db.Column(db.INT, primary_key=True) article_id = db.Column(db.INT, db.ForeignKey('article.id')) visit_date = db.Column(db.Date, default=date.today) pv = db.Column(db.INT, default=0) uv = db.Column(db.INT, default=0) ext_property = db.Column(db.JSON, default={}) is_available = db.Column(db.INT, default=1) create_ts = db.Column(db.TIMESTAMP, default=datetime.now()) update_ts = db.Column(db.TIMESTAMP, default=datetime.now()) @classmethod def statistics_article_data(cls): def _statistics(vd): begin = time.time() VL = VisitorLog items = db.session.query(VL.url, func.count( VL.url)).filter(VL.visit_date == vd).group_by(VL.url).all() for item in items: article = Article.query_or_create(url=item[0]) if article: ad = cls.query_or_create(article_id=article.id, visit_date=vd) ad.pv = item[1] db.session.add(ad) db.session.commit() logger.debug('statistics_article_data time: %s', (time.time() - begin)) vd = date.today() _statistics(vd) vd = date.today() - timedelta(days=1) _statistics(vd)
class ResourceMixin(object): create_date = db.Column(db.DateTime, default=datetime.now()) update_date = db.Column(db.DateTime, default=datetime.now()) def save(self): db.session.add(self) db.session.commit() def delete(self): db.session.delete(self) db.session.commit()
class Product(db.Model): id = db.Column(db.Integer, primary_key=True) name = db.Column(db.String(10), unique=True) description = db.Column(db.String(200)) price = db.Column(db.Float) qty = db.Column(db.Integer) def __init__(self, name, description, price, qty): self.name = name self.description = description self.price = price self.qty = qty
class Record(db.Model): __tablename__ = 'record' id = db.Column(db.String(30), primary_key=True) score = db.Column(db.Integer, nullable=False, default=0) def save(self): db.session.add(self) db.session.commit() return self @classmethod def find(cls, id): return Record.query.filter(cls.id == id).first()
class AutoId(BaseModel, db.Model): __tablename__ = 'auto_id' id = db.Column(db.INT, primary_key=True) shard_id = db.Column(db.INT, default=0) item_id = db.Column(db.INT, default=1) @classmethod def generate_id(cls, shard_id=0, item_id=1): sql = 'select func_auto_id(%s, %s) as id' res = BaseDB.query(sql, [shard_id, item_id]) return res[0]['id'] @classmethod def generate_str_id(cls, suffix='W'): sql = 'select func_auto_id(%s, %s) as id' res = BaseDB.query(sql, [0, 1]) res = f'{datetime.now():%Y%m%d%H%M%S}-{res[0]["id"]}' return res
class Task(db.Model): id = db.Column(db.Integer, primary_key=True) task_name = db.Column(db.String(80), unique=True) date_created = db.Column(db.DateTime, default=datetime.datetime.utcnow) date_modified = db.Column(db.DateTime, default=datetime.datetime.utcnow) def __init__(self, task_name): self.task_name = task_name def __repr__(self): return '<Task {self.task_name}>' def to_json(self): return { 'id': self.id, 'task_name': self.task_name, 'date_created': self.date_created, 'date_modified': self.date_modified }
class Category(BaseModel, db.Model): __tablename__ = 'category' id = db.Column(db.BIGINT, primary_key=True) name = db.Column(db.String) content = db.Column(db.String, default="") ext = db.Column(db.JSON, default={}) is_del = db.Column(db.INT, default=0) create_ts = db.Column(db.TIMESTAMP, default=datetime.now()) update_ts = db.Column(db.TIMESTAMP, default=datetime.now()) @classmethod def get_categorys_md(cls): article = ['# 分类'] for cg in cls.get_categorys(): article.append('- [{}](/{})'.format(cg, cg)) content = '\n'.join(article) return Markdown(content=content) @classmethod def get_categorys(cls): categorys = list(set([o.split('-', 1)[0] for o in FILE_LIST])) # categorys = list(filter(lambda x: x in CATEGORYS, categorys)) FILTER = ['.DS_Store'] categorys = list(filter(lambda x: not x.startswith('.'), categorys)) categorys.sort() return categorys @classmethod def get_md(cls, category, level): """生成分类文章md内容""" file_list = list(filter(lambda x: 'item' not in x, FILE_LIST)) file_list.sort() article = [] article.append('{} {}'.format('#' * level, category)) items = [] for file in file_list: mf = Markdown(file) profix = '专辑' if 'album' in mf.route else '文章' res = re.findall(RE_DATE, mf.route) date = '' if res: date = res[0] items.append('- {} [{}] [{}]({})'.format(date, profix, mf.title, mf.route)) items.sort(reverse=True) article.extend(items) return '\n'.join(article) @classmethod def sync_data(cls): cgs = cls.get_categorys() for cg in cgs: content = cls.get_md(cg, 1) c = cls(name=cg, ext=dict(md=content)) db.session.add(c) db.session.commit()
class Nav(BaseModel, db.Model): __tablename__ = 'nav' id = db.Column(db.BIGINT, primary_key=True) name = db.Column(db.String, default="") url = db.Column(db.String, default="") ext_property = db.Column(db.JSON, default={}) is_available = db.Column(db.INT, default=1) create_ts = db.Column(db.TIMESTAMP, default=datetime.now()) update_ts = db.Column(db.TIMESTAMP, default=datetime.now())
class CallHistory(db.Model, ResourceMixin): __tablename__ = 'call_history' id = db.Column(db.Integer, primary_key=True) datetime = db.Column(db.DateTime, nullable=False, default=datetime.now()) # ======================== RELATIONSHIPS ================================= hr_id = db.Column(db.Integer, db.ForeignKey('account.id')) hr = db.relationship('Account', backref=db.backref('calls', lazy=True)) applicant_id = db.Column(db.Integer, db.ForeignKey('applicant.id')) applicant = db.relationship('Applicant', backref=db.backref('calls', lazy=True)) @classmethod def find_call(cls, id): return CallHistory.query.filter(cls.id == id) def save(self): self.datetime = datetime.now() db.session.add(self) db.session.commit()
class Crawler(BaseModel, db.Model): __tablename__ = 'crawler' id = db.Column(db.INT, primary_key=True) url = db.Column(db.String) ext = db.Column(db.JSON, default={}) is_del = db.Column(db.INT, default=0) create_ts = db.Column(db.TIMESTAMP, default=datetime.now()) update_ts = db.Column(db.TIMESTAMP, default=datetime.now())
class CityWeather(db.Model): __tablename__ = 'cityweather' id = db.Column(db.Integer, primary_key=True) # ID time = db.Column(db.String(32)) city = db.Column(db.String(128)) # 城市名字 AQI = db.Column(db.String(32)) # AQI quality = db.Column(db.String(8)) # 空气质量 pm = db.Column(db.String(32)) # pm2.5
class Config(BaseModel, db.Model): id = db.Column(db.INT, primary_key=True) name = db.Column(db.String) value = db.Column(db.String, default='') ext = db.Column(db.JSON, default={}) is_del = db.Column(db.INT, default=0) create_ts = db.Column(db.TIMESTAMP, default=datetime.now()) update_ts = db.Column(db.TIMESTAMP, default=datetime.now()) @classmethod def sync(cls, **kwargs): name = kwargs.get('name') item = cls.query_item(name=name) if item: cls.update_by_id(id=item.id, **kwargs) else: cls.create(**kwargs)
class FileMetadata(db.Model): """ The FileMetaData table """ __tablename__ = "filemetadata" __table_args__ = {"schema": "filemetadata"} id = db.Column(db.Integer, primary_key=True) size = db.Column(db.String) file_name = db.Column(db.String) sha1 = db.Column(db.String, unique=True) md5 = db.Column(db.String, unique=True) type = db.Column(db.String, default="unknown/unknown")
class VisitUser(BaseModel, db.Model): __tablename__ = 'visit_user' id = db.Column(db.INT, primary_key=True) ip = db.Column(db.String) user_agent = db.Column(db.String, default="") md5 = db.Column(db.String, default="") ext = db.Column(db.JSON, default={}) is_del = db.Column(db.INT, default=0) create_ts = db.Column(db.TIMESTAMP, default=datetime.now()) update_ts = db.Column(db.TIMESTAMP, default=datetime.now()) @classmethod def generate_md5(cls, ip, user_agent): return security.md5('{};{}'.format(ip, user_agent)) @classmethod def log(cls): headers = request.headers ip = request.remote_addr user_agent = request.user_agent md5 = cls.generate_md5(ip, user_agent) user = cls.query_item(md5=md5) if not user: user = cls(ip=ip, user_agent=str(user_agent), md5=md5, ext=dict(headers)) user.create_self() return user.visit() def visit(self): return VisitLog.create(visit_user_id=self.id, url=request.url, method=request.method, path=request.path, referrer=request.referrer, domain=request.headers.get('host'), ext=dict(request.headers))
class VisitorLogDate(BaseModel, db.Model): __tablename__ = 'visitor_log_date' id = db.Column(db.INT, primary_key=True) visit_date = db.Column(db.Date, default=date.today) pv = db.Column(db.INT, default=0) uv = db.Column(db.INT, default=0) ext_property = db.Column(db.JSON, default={}) is_available = db.Column(db.INT, default=1) create_ts = db.Column(db.TIMESTAMP, default=datetime.now()) update_ts = db.Column(db.TIMESTAMP, default=datetime.now()) @classmethod def statistics_visitor(cls): def _statistics(query_day): VL = VisitorLog # res = VL.query(VL.md5, func.count(VL.md5)).group_by(VL.md5).all() sql = 'is_bot = 0 and is_available = 1 and url like :url and\ visit_date = :date' res = db.session.query(VL.md5, func.count(VL.md5)).filter( text(sql)).params(date=query_day, url='https://wxnacy.com%').group_by( VL.md5).all() uv = len(res) pv = sum([o[1] for o in res]) item = cls.query_item(visit_date=query_day) if not item: item = cls.create(visit_date=query_day) item.pv = pv item.uv = uv item.update_self() logger.debug('statistics_visitor %s', item) query_day = date.today() _statistics(query_day) query_day = date.today() - timedelta(days=1) _statistics(query_day)
class Jobs(db.Model): __tablename__="data" sno=db.Column(db.Integer,primary_key=True) company=db.Column(db.String(100)) location=db.Column(db.String(100)) role=db.Column(db.String(60)) type=db.Column(db.String(30)) sector=db.Column(db.String(50)) link=db.Column(db.String(200)) def __init__(self,sno,company,location,role,type,sector,link): self.sno=sno self.company=company self.location=location self.role=role self.type=type self.sector=sector self.link=link self.list=[]
class User(BaseModel, db.Model): __tablename__ = 'user' id = db.Column(db.BIGINT, primary_key=True) name = db.Column(db.String(32), default="") password = db.Column(db.String(256), default="") ext_property = db.Column(db.JSON, default={}) is_available = db.Column(db.INT, default=1) create_ts = db.Column(db.TIMESTAMP, default=datetime.now()) update_ts = db.Column(db.TIMESTAMP, default=datetime.now()) def is_active(self): return self.id != None def is_authenticated(self): return self.id != None def get_id(self): return self.id def is_anonymous(self): return self.id == None
class Books(db.Model): #the books table as a class where all the columns are identified __tablename__ = "books" id = db.Column(db.Integer, primary_key=True, autoincrement=True, nullable=True) book_id = db.Column(db.Integer) goodreads_book_id = db.Column(db.Integer) isbn13 = db.Column(db.Integer) authors = db.Column(db.String(200)) original_publication_year = db.Column(db.Integer) original_title = db.Column(db.String(120)) title = db.Column(db.String(400)) language_code = db.Column(db.String(40)) average_rating = db.Column(db.Float) ratings_count = db.Column(db.Integer) image_url = db.Column(db.String(200)) @classmethod def get_paginated(cls, page): return cls.query.paginate(page, 12, False).items @classmethod def get_list(cls, ids): return cls.query.filter(cls.book_id.in_(ids)) @classmethod def get_list_from_goodreads_ids(cls, ids): return cls.query.filter(cls.goodreads_book_id.in_(ids)) @classmethod def get_book(cls, id): try: res = cls.query.filter_by(book_id=id).first() tags = BookTags.get_tags(res.__dict__.get("goodreads_book_id"), n=10) tag_names = [ x.get("tag_name") for x in tags ] res.__dict__["tags"] = tag_names if res is not None: return res.__dict__ return None except Exception as e: logging.error(e) return None
class Account(db.Model, UserMixin, ResourceMixin): __tablename__ = 'account' id = db.Column(db.Integer, primary_key=True) first_name = db.Column(db.String(50), nullable=False) last_name = db.Column(db.String(50), nullable=False) username = db.Column(db.String(30), nullable=False, unique=True) email = db.Column(db.String(50), nullable=False, unique=True) password = db.Column(db.String(1024), nullable=False) active = db.Column('is_active', db.Boolean(), nullable=False, server_default='1') profile_pic = db.Column(db.String(300), nullable=False, default='static/images/default.jpg') sign_in_count = db.Column(db.Integer, nullable=False, default=0) current_sign_in_date = db.Column(db.DateTime) current_sign_in_ip = db.Column(db.String(200)) last_sign_in_date = db.Column(db.DateTime) last_sign_in_ip = db.Column(db.String(200)) @classmethod def find(cls, identity): return Account.query.filter((cls.email == identity) | (cls.username == identity)).first() def is_active(self): return self.active def serialize_token(self, expiration=300): private_key = app.config['SECRET_KEY'] serializer = TimedJSONWebSignatureSerializer(private_key, expiration) return serializer.dumps({'email': self.email}).decode('utf-8') @classmethod def deserialize_token(cls, token): private_key = TimedJSONWebSignatureSerializer(app.config['SECRET_KEY']) try: decoded_payload = private_key.loads(token) return Account.find(decoded_payload.get('email')) except Exception: return None def update_activity_tracking(self, ip_address): self.sign_in_count = self.sign_in_count + 1 self.last_sign_in_date = self.current_sign_in_date self.last_sign_in_ip = self.current_sign_in_ip self.current_sign_in_date = datetime.now() self.current_sign_in_ip = ip_address self.save() def get_posts(self, page=None): if page is None: return self.posts else: return Post.query.filter(Post.account_id == self.id)\ .order_by(Post.create_date.desc())\ .paginate(page=page, per_page=5)
class User(BaseModel, db.Model): __tablename__ = 'user' id = db.Column(db.INT, primary_key=True) name = db.Column(db.String, default='') email = db.Column(db.String, default='') mobile = db.Column(db.String, default='') password = db.Column(db.String, default='') status = db.Column(db.String, default='') type = db.Column(db.String, default='') ext_property = db.Column(db.JSON, default={}) is_available = db.Column(db.INT, default=1) create_ts = db.Column(db.TIMESTAMP, default=datetime.now()) update_ts = db.Column(db.TIMESTAMP, default=datetime.now()) @property def real_mobile(self): if len(self.mobile) == 7: return '' return self.mobile def format(self): # item = self.to_dict(exclude=['password']) item = super().format() item.pop('password') item['mobile'] = self.real_mobile item[BaseConfig.HEAD_AUTHORIZATION] = self.generate_authorization() return item def generate_authorization(self): return aes.encrypt(f'{self.id};{int(time.time())};') @classmethod def create(cls, **kw): kw['name'] = kw['email'] kw['mobile'] = ID.random_str(7) return super().create(**kw) @property def authorization(self): return self.generate_authorization() @classmethod def generate_password(cls, password): return security.md5(f'{password};!@#$%') @classmethod def login(cls, email, password): pw = cls.generate_password(password) item = cls.query_item(email=email, password=pw) if not item: return 401, '邮箱或密码错误' return 200, item @classmethod def register(cls, email, password): item = cls.query_item(email=email) if item: return 403, '邮箱已注册,请直接登录' pw = cls.generate_password(password) item = cls.create(email=email, password=pw) return 200, item @classmethod def get_user_from_authorization(cls, authorization): try: plain = aes.decrypt(authorization) if ';' not in plain: return None except: logger.error(traceback.format_exc()) return None id = plain.split(';')[0] return cls.query_by_id(id)
class VisitorLog(BaseModel, db.Model): __tablename__ = 'visitor_log' id = db.Column(db.INT, primary_key=True) ip = db.Column(db.String, default='') url = db.Column(db.String, default='') referrer = db.Column(db.String, default='') user_agent = db.Column(db.String, default='') os = db.Column(db.String, default='') device = db.Column(db.String, default='') device_type = db.Column(db.String, default='') browser = db.Column(db.String, default='') md5 = db.Column(db.String, default='') visit_date = db.Column(db.Date, default=date.today) is_bot = db.Column(db.INT, default=0) ext_property = db.Column(db.JSON, default={}) is_available = db.Column(db.INT, default=1) create_ts = db.Column(db.TIMESTAMP, default=datetime.now()) update_ts = db.Column(db.TIMESTAMP, default=datetime.now()) @property def region(self): ext = self.ext_property return '{}-{}'.format(ext.get('country'), ext.get('city')) @classmethod def query_items(cls, **kw): query_date = kw.get('date', date.today().isoformat()) logger.debug(query_date) sql = 'is_bot = 0 and is_available = 1 and url like "%wxnacy.com%"\ and visit_date = :date' items = cls.query.filter(text(sql)).params(date=query_date).order_by( desc(cls.create_ts)).all() return items @classmethod def query_rank(cls, **kw): logger.debug(kw) qd = date.today() if 'day' in kw: qd = kw['day'] res = db.session.query( cls.url, func.count( cls.url).label('rank')).filter(cls.visit_date == qd).group_by( cls.url).order_by(cls.rank.name.desc()).all() return res @classmethod def query_hot(cls): rank = func.count(cls.url).label('rank') res = db.session.query(cls.url, rank).filter( cls.visit_date >= date.today() - timedelta(days=7)).group_by( cls.url).order_by(rank.desc()).all() return res @classmethod def visit(cls, **kw): begin = time.time() ua = kw['user_agent'] ua = parse(ua) kw['os'] = ua.os.family kw['device'] = ua.device.family kw['browser'] = ua.browser.family device_type = 'pc' if ua.is_mobile: device_type = 'mobile' elif ua.is_tablet: device_type = 'tablet' kw['device_type'] = device_type kw['is_bot'] = ua.is_bot kw['md5'] = security.md5('{};{}'.format(kw['ip'], kw['user_agent'])) kw['visit_date'] = date.today() res = VisitorLog.create(**kw) end = time.time() logger.debug('visit time: %s', (end - begin)) return res
class Article(BaseModel, db.Model): __tablename__ = 'article' id = db.Column(db.BIGINT, primary_key=True) name = db.Column(db.String, default="") url = db.Column(db.String, default="") publish_date = db.Column(db.DATE, default='2017-08-04') tag = db.Column(db.String, default="") pv = db.Column(db.INT, default=0, doc="观看数量") init_pv = db.Column(db.INT, default=0, doc="初始值") ext_property = db.Column(db.JSON, default={}) is_available = db.Column(db.INT, default=1) create_ts = db.Column(db.TIMESTAMP, default=datetime.now()) update_ts = db.Column(db.TIMESTAMP, default=datetime.now()) DOMAIN = 'https://wxnacy.com' @classmethod def query_or_create(cls, url, **kw): if not url.startswith('https://wxnacy.com'): return None if url == 'https://wxnacy.com' or url == 'https://wxnacy.com/': return None if url.startswith('https://wxnacy.com/archives'): return None if url.startswith('https://wxnacy.com/page'): return None if '?' in url: url = url[0:url.index('?')] if '#' in url: url = url[0:url.index('#')] if url.endswith('index.html'): url = url[0:url.index('index.html')] kw['url'] = url # print(url) return super().query_or_create(**kw) @classmethod def crawler(cls, url, **kw): item = cls.query_or_create(url=url) if not item: return None params = cls.get_crawler_data(url) cls.update_by_id(item.id, **params) return item def crawler_self(self): params = Article.get_crawler_data(self.url) print(params) Article.update_by_id(self.id, **params) return self @classmethod def crawler_article(cls): def _crawler(url): res = requests.get(url) soup = BeautifulSoup(res.content, 'html.parser') items = soup.find_all('a', class_='article-title') for item in items: item_url = '{}{}'.format(cls.DOMAIN, item.attrs['href']) print(item_url) cls.crawler(url=item_url) for i in range(23): if i == 0: url = 'https://wxnacy.com' else: url = '{}/page/{}'.format(cls.DOMAIN, i + 1) _crawler(url) def create_init_pv(self): jc = "BusuanziCallback_768395723167" url = "https://busuanzi.ibruce.info/busuanzi?jsonpCallback=BusuanziCallback_768395723167" res = requests.get(url, headers=dict(referer=self.url)) suffix = ');}catch(e){}' prefix = 'try{BusuanziCallback_768395723167(' text = res.content.decode('utf-8') if text.startswith(prefix) and text.endswith(suffix): json_text = text[text.index(prefix) + len(prefix):text.index(suffix)] res = json.loads(json_text) print(res) print(res.get('page_pv', 0)) self.init_pv = res.get('page_pv', 0) print(self.init_pv) self.update_self() @classmethod def statistics_init_pv(cls): items = Article.query_items(init_pv=0) print(len(items)) for item in items: item.create_init_pv() @classmethod def statistics_article(cls): begin = time.time() items = Article.query_items() for item in items: item.crawler_self() logger.debug('statistics_article time: %s', (time.time() - begin)) @classmethod def get_crawler_data(cls, url): params = {} res = requests.get(url) soup = BeautifulSoup(res.content, 'html.parser') metas = soup.find_all('meta') pd = '2017-08-04' for meta in metas: attrs = meta.attrs if attrs.get('property') == 'og:title': params['name'] = attrs['content'] elif attrs.get('name') == 'keywords': params['tag'] = attrs['content'] elif attrs.get('property') == 'og:updated_time': pd = attrs['content'][0:10] dp = url.split('/') if len(dp) > 5: pd = '{}-{}-{}'.format(dp[3], dp[4], dp[5]) params['publish_date'] = pd # print(params) return params @classmethod def statistics_pv(cls): items = cls.query_items() AD = ArticleData for item in items: ArticleData.query_items(article_id=item.id) res = db.session.query(func.sum( AD.pv)).filter_by(article_id=item.id).all() if res[0][0]: item.pv = res[0][0] db.session.add(item) db.session.commit()
class Test(BaseModel, db.Model): __tablename__ = 'test' id = db.Column(db.INT, primary_key=True)