Ejemplo n.º 1
0
class Post(db.Model, ResourceMixin):

    __tablename__ = 'post'

    id = db.Column(db.Integer, primary_key=True)

    title = db.Column(db.String(100), nullable=False)
    content = db.Column(db.String(1024), nullable=False)

    # ===================== Relationships ==================================
    account_id = db.Column(db.Integer,
                           db.ForeignKey('account.id'),
                           nullable=False)
    account = db.relationship('Account',
                              backref=db.backref('posts', lazy=True))

    @classmethod
    def find(cls, identity):
        return Post.query.filter((cls.title == identity)
                                 | (cls.id == identity)).first()

    # @classmethod
    # def get_all_posts(cls):
    #     return Post.query.all()

    @classmethod
    def get_all_posts(cls, page=None):
        if page is None:
            return Post.query.order_by(Post.create_date.desc()).all()
        else:
            return Post.query.order_by(Post.create_date.desc()).paginate(
                page=page, per_page=5)
Ejemplo n.º 2
0
class SimilarBooks(db.Model):
    __tablename__ = "similar_books"

    id = db.Column(db.Integer,
                   primary_key=True,
                   autoincrement=True,
                   nullable=True)
    goodreads_book_id = db.Column(db.Integer)
    sim_goodreads_book_id = db.Column(db.Integer)

    @classmethod
    def get_sim_ids(cls, gid):
        """Gets list of similar book_ids for a given book.

        Args:
            gid ([string]): Goodreads book id provided in the dataset.
                            Encountered as `goodreads_book_id`

        Returns:
            [List]: List of ids
        """
        return [
            x.__dict__.get("sim_goodreads_book_id")
            for x in cls.query.filter_by(goodreads_book_id=gid)
        ]
Ejemplo n.º 3
0
class BookTags(db.Model):
    __tablename__ = "book_tags"

    id = db.Column(db.Integer,
                   primary_key=True,
                   autoincrement=True,
                   nullable=True)
    goodreads_book_id = db.Column(db.Integer)
    tag_id = db.Column(db.Integer)
    tag_name = db.Column(db.String(120))
    count = db.Column(db.Integer)

    @classmethod
    def get_tags(cls, gid, n=5):
        """Gets list of tags for a given book.

        Args:
            gid ([string]): Goodreads book id provided in the dataset.
                            Encountered as `goodreads_book_id`

        Returns:
            [List]: List of tag objects, each of which has a two fields (tag_id, count)
        """
        return [
            x.__dict__ for x in (cls.query.filter_by(
                goodreads_book_id=gid).order_by(cls.count.desc()).limit(n))
        ]
Ejemplo n.º 4
0
class Ratings(db.Model):
    __tablename__ = "ratings"

    id = db.Column(db.Integer, primary_key=True, autoincrement=True, nullable=True)
    user_id = db.Column(db.Integer)
    book_id = db.Column(db.Integer)
    rating = db.Column(db.Integer)
Ejemplo n.º 5
0
class BookTags(
        db.Model
):  # the table book_tags in the form of a class, that was able by SQL Alchemy ORM
    __tablename__ = "book_tags"  #identifying the name of the table

    id = db.Column(db.Integer,
                   primary_key=True,
                   autoincrement=True,
                   nullable=True)  #the id column
    goodreads_book_id = db.Column(
        db.Integer
    )  #the column containing the id of the books from the goodreads dataset
    tag_id = db.Column(
        db.Integer)  #The column containing the id of the tags from the dataset
    tag_name = db.Column(
        db.String(120)
    )  #the column containing the name of the tag corresponding to the tag id
    count = db.Column(db.Integer)

    @classmethod
    def get_tags(cls, gid, n=5):
        """Gets list of tags for a given book.

        Args:
            gid ([string]): Goodreads book id provided in the dataset.
                            Encountered as `goodreads_book_id`

        Returns:
            [List]: List of tag objects, each of which has a two fields (tag_id, count)
        """
        return [
            x.__dict__ for x in (cls.query.filter_by(
                goodreads_book_id=gid).order_by(cls.count.desc()).limit(n))
        ]
Ejemplo n.º 6
0
class Author(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String(100), nullable=False)
    book = db.relationship('Book', backref='author', lazy='dynamic')

    def __init__(self, name):
        self.name = name
Ejemplo n.º 7
0
class Book(db.Model):
    id = db.Column(db.Integer, primary_key = True)
    title = db.Column(db.String(100), nullable=False)
    description = db.Column(db.String(200), nullable=False)
    author_id = db.Column(db.Integer, db.ForeignKey('author.id'), nullable=False)

    def __init__(self, title, description, author_id):
        self.title = title
        self.description = description
        self.author_id = author_id
Ejemplo n.º 8
0
class ArticleData(BaseModel, db.Model):
    __tablename__ = 'article_data'
    id = db.Column(db.INT, primary_key=True)
    article_id = db.Column(db.INT, db.ForeignKey('article.id'))
    visit_date = db.Column(db.Date, default=date.today)
    pv = db.Column(db.INT, default=0)
    uv = db.Column(db.INT, default=0)
    ext_property = db.Column(db.JSON, default={})
    is_available = db.Column(db.INT, default=1)
    create_ts = db.Column(db.TIMESTAMP, default=datetime.now())
    update_ts = db.Column(db.TIMESTAMP, default=datetime.now())

    @classmethod
    def statistics_article_data(cls):
        def _statistics(vd):
            begin = time.time()
            VL = VisitorLog
            items = db.session.query(VL.url, func.count(
                VL.url)).filter(VL.visit_date == vd).group_by(VL.url).all()
            for item in items:
                article = Article.query_or_create(url=item[0])
                if article:
                    ad = cls.query_or_create(article_id=article.id,
                                             visit_date=vd)
                    ad.pv = item[1]
                    db.session.add(ad)
            db.session.commit()

            logger.debug('statistics_article_data time: %s',
                         (time.time() - begin))

        vd = date.today()
        _statistics(vd)
        vd = date.today() - timedelta(days=1)
        _statistics(vd)
Ejemplo n.º 9
0
class ResourceMixin(object):
    create_date = db.Column(db.DateTime, default=datetime.now())
    update_date = db.Column(db.DateTime, default=datetime.now())

    def save(self):
        db.session.add(self)
        db.session.commit()

    def delete(self):
        db.session.delete(self)
        db.session.commit()
Ejemplo n.º 10
0
class Product(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String(10), unique=True)
    description = db.Column(db.String(200))
    price = db.Column(db.Float)
    qty = db.Column(db.Integer)

    def __init__(self, name, description, price, qty):
        self.name = name
        self.description = description
        self.price = price
        self.qty = qty
Ejemplo n.º 11
0
class Record(db.Model):
    __tablename__ = 'record'
    id = db.Column(db.String(30), primary_key=True)
    score = db.Column(db.Integer, nullable=False, default=0)

    def save(self):
        db.session.add(self)
        db.session.commit()
        return self

    @classmethod
    def find(cls, id):
        return Record.query.filter(cls.id == id).first()
Ejemplo n.º 12
0
class AutoId(BaseModel, db.Model):
    __tablename__ = 'auto_id'
    id = db.Column(db.INT, primary_key=True)
    shard_id = db.Column(db.INT, default=0)
    item_id = db.Column(db.INT, default=1)

    @classmethod
    def generate_id(cls, shard_id=0, item_id=1):
        sql = 'select func_auto_id(%s, %s) as id'
        res = BaseDB.query(sql, [shard_id, item_id])
        return res[0]['id']

    @classmethod
    def generate_str_id(cls, suffix='W'):
        sql = 'select func_auto_id(%s, %s) as id'
        res = BaseDB.query(sql, [0, 1])
        res = f'{datetime.now():%Y%m%d%H%M%S}-{res[0]["id"]}'
        return res
Ejemplo n.º 13
0
class Task(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    task_name = db.Column(db.String(80), unique=True)
    date_created = db.Column(db.DateTime, default=datetime.datetime.utcnow)
    date_modified = db.Column(db.DateTime, default=datetime.datetime.utcnow)

    def __init__(self, task_name):
        self.task_name = task_name

    def __repr__(self):
        return '<Task {self.task_name}>'

    def to_json(self):
        return {
            'id': self.id,
            'task_name': self.task_name,
            'date_created': self.date_created,
            'date_modified': self.date_modified
        }
Ejemplo n.º 14
0
class Category(BaseModel, db.Model):
    __tablename__ = 'category'
    id = db.Column(db.BIGINT, primary_key=True)
    name = db.Column(db.String)
    content = db.Column(db.String, default="")
    ext = db.Column(db.JSON, default={})
    is_del = db.Column(db.INT, default=0)
    create_ts = db.Column(db.TIMESTAMP, default=datetime.now())
    update_ts = db.Column(db.TIMESTAMP, default=datetime.now())

    @classmethod
    def get_categorys_md(cls):
        article = ['# 分类']
        for cg in cls.get_categorys():
            article.append('- [{}](/{})'.format(cg, cg))
        content = '\n'.join(article)
        return Markdown(content=content)

    @classmethod
    def get_categorys(cls):
        categorys = list(set([o.split('-', 1)[0] for o in FILE_LIST]))
        #  categorys = list(filter(lambda x: x in CATEGORYS, categorys))
        FILTER = ['.DS_Store']
        categorys = list(filter(lambda x: not x.startswith('.'), categorys))
        categorys.sort()
        return categorys

    @classmethod
    def get_md(cls, category, level):
        """生成分类文章md内容"""
        file_list = list(filter(lambda x: 'item' not in x, FILE_LIST))
        file_list.sort()
        article = []
        article.append('{} {}'.format('#' * level, category))
        items = []
        for file in file_list:
            mf = Markdown(file)
            profix = '专辑' if 'album' in mf.route else '文章'
            res = re.findall(RE_DATE, mf.route)
            date = ''
            if res:
                date = res[0]
            items.append('- {} [{}] [{}]({})'.format(date, profix, mf.title,
                                                     mf.route))
            items.sort(reverse=True)
        article.extend(items)

        return '\n'.join(article)

    @classmethod
    def sync_data(cls):
        cgs = cls.get_categorys()
        for cg in cgs:
            content = cls.get_md(cg, 1)
            c = cls(name=cg, ext=dict(md=content))
            db.session.add(c)
        db.session.commit()
Ejemplo n.º 15
0
class Nav(BaseModel, db.Model):
    __tablename__ = 'nav'
    id = db.Column(db.BIGINT, primary_key=True)
    name = db.Column(db.String, default="")
    url = db.Column(db.String, default="")
    ext_property = db.Column(db.JSON, default={})
    is_available = db.Column(db.INT, default=1)
    create_ts = db.Column(db.TIMESTAMP, default=datetime.now())
    update_ts = db.Column(db.TIMESTAMP, default=datetime.now())
Ejemplo n.º 16
0
class CallHistory(db.Model, ResourceMixin):

    __tablename__ = 'call_history'

    id = db.Column(db.Integer, primary_key=True)
    datetime = db.Column(db.DateTime, nullable=False, default=datetime.now())

    # ======================== RELATIONSHIPS =================================
    hr_id = db.Column(db.Integer, db.ForeignKey('account.id'))
    hr = db.relationship('Account', backref=db.backref('calls', lazy=True))

    applicant_id = db.Column(db.Integer, db.ForeignKey('applicant.id'))
    applicant = db.relationship('Applicant', backref=db.backref('calls', lazy=True))

    @classmethod
    def find_call(cls, id):
        return CallHistory.query.filter(cls.id == id)

    def save(self):
        self.datetime = datetime.now()
        db.session.add(self)
        db.session.commit()
Ejemplo n.º 17
0
class Crawler(BaseModel, db.Model):
    __tablename__ = 'crawler'
    id = db.Column(db.INT, primary_key=True)
    url = db.Column(db.String)
    ext = db.Column(db.JSON, default={})
    is_del = db.Column(db.INT, default=0)
    create_ts = db.Column(db.TIMESTAMP, default=datetime.now())
    update_ts = db.Column(db.TIMESTAMP, default=datetime.now())
Ejemplo n.º 18
0
class CityWeather(db.Model):
    __tablename__ = 'cityweather'
    id = db.Column(db.Integer, primary_key=True)  # ID
    time = db.Column(db.String(32))
    city = db.Column(db.String(128))  # 城市名字
    AQI = db.Column(db.String(32))  # AQI
    quality = db.Column(db.String(8))  # 空气质量
    pm = db.Column(db.String(32))  # pm2.5
Ejemplo n.º 19
0
class Config(BaseModel, db.Model):
    id = db.Column(db.INT, primary_key=True)
    name = db.Column(db.String)
    value = db.Column(db.String, default='')
    ext = db.Column(db.JSON, default={})
    is_del = db.Column(db.INT, default=0)
    create_ts = db.Column(db.TIMESTAMP, default=datetime.now())
    update_ts = db.Column(db.TIMESTAMP, default=datetime.now())

    @classmethod
    def sync(cls, **kwargs):
        name = kwargs.get('name')
        item = cls.query_item(name=name)
        if item:
            cls.update_by_id(id=item.id, **kwargs)
        else:
            cls.create(**kwargs)
Ejemplo n.º 20
0
class FileMetadata(db.Model):
    """
    The FileMetaData table
    """

    __tablename__ = "filemetadata"
    __table_args__ = {"schema": "filemetadata"}

    id = db.Column(db.Integer, primary_key=True)
    size = db.Column(db.String)
    file_name = db.Column(db.String)
    sha1 = db.Column(db.String, unique=True)
    md5 = db.Column(db.String, unique=True)
    type = db.Column(db.String, default="unknown/unknown")
Ejemplo n.º 21
0
class VisitUser(BaseModel, db.Model):
    __tablename__ = 'visit_user'
    id = db.Column(db.INT, primary_key=True)
    ip = db.Column(db.String)
    user_agent = db.Column(db.String, default="")
    md5 = db.Column(db.String, default="")
    ext = db.Column(db.JSON, default={})
    is_del = db.Column(db.INT, default=0)
    create_ts = db.Column(db.TIMESTAMP, default=datetime.now())
    update_ts = db.Column(db.TIMESTAMP, default=datetime.now())

    @classmethod
    def generate_md5(cls, ip, user_agent):
        return security.md5('{};{}'.format(ip, user_agent))

    @classmethod
    def log(cls):
        headers = request.headers
        ip = request.remote_addr
        user_agent = request.user_agent

        md5 = cls.generate_md5(ip, user_agent)
        user = cls.query_item(md5=md5)
        if not user:
            user = cls(ip=ip,
                       user_agent=str(user_agent),
                       md5=md5,
                       ext=dict(headers))
            user.create_self()
        return user.visit()

    def visit(self):
        return VisitLog.create(visit_user_id=self.id,
                               url=request.url,
                               method=request.method,
                               path=request.path,
                               referrer=request.referrer,
                               domain=request.headers.get('host'),
                               ext=dict(request.headers))
Ejemplo n.º 22
0
class VisitorLogDate(BaseModel, db.Model):
    __tablename__ = 'visitor_log_date'
    id = db.Column(db.INT, primary_key=True)
    visit_date = db.Column(db.Date, default=date.today)
    pv = db.Column(db.INT, default=0)
    uv = db.Column(db.INT, default=0)
    ext_property = db.Column(db.JSON, default={})
    is_available = db.Column(db.INT, default=1)
    create_ts = db.Column(db.TIMESTAMP, default=datetime.now())
    update_ts = db.Column(db.TIMESTAMP, default=datetime.now())

    @classmethod
    def statistics_visitor(cls):
        def _statistics(query_day):

            VL = VisitorLog
            #  res = VL.query(VL.md5, func.count(VL.md5)).group_by(VL.md5).all()
            sql = 'is_bot = 0 and is_available = 1 and url like :url and\
                visit_date = :date'

            res = db.session.query(VL.md5, func.count(VL.md5)).filter(
                text(sql)).params(date=query_day,
                                  url='https://wxnacy.com%').group_by(
                                      VL.md5).all()
            uv = len(res)
            pv = sum([o[1] for o in res])

            item = cls.query_item(visit_date=query_day)
            if not item:
                item = cls.create(visit_date=query_day)
            item.pv = pv
            item.uv = uv
            item.update_self()
            logger.debug('statistics_visitor %s', item)

        query_day = date.today()
        _statistics(query_day)
        query_day = date.today() - timedelta(days=1)
        _statistics(query_day)
Ejemplo n.º 23
0
class Jobs(db.Model):
    __tablename__="data"
    sno=db.Column(db.Integer,primary_key=True)
    company=db.Column(db.String(100))
    location=db.Column(db.String(100))
    role=db.Column(db.String(60))
    type=db.Column(db.String(30))
    sector=db.Column(db.String(50))
    link=db.Column(db.String(200))

    def __init__(self,sno,company,location,role,type,sector,link):
        self.sno=sno
        self.company=company
        self.location=location
        self.role=role
        self.type=type
        self.sector=sector
        self.link=link
        self.list=[]
Ejemplo n.º 24
0
class User(BaseModel, db.Model):
    __tablename__ = 'user'
    id = db.Column(db.BIGINT, primary_key=True)
    name = db.Column(db.String(32), default="")
    password = db.Column(db.String(256), default="")
    ext_property = db.Column(db.JSON, default={})
    is_available = db.Column(db.INT, default=1)
    create_ts = db.Column(db.TIMESTAMP, default=datetime.now())
    update_ts = db.Column(db.TIMESTAMP, default=datetime.now())

    def is_active(self):
        return self.id != None

    def is_authenticated(self):
        return self.id != None

    def get_id(self):
        return self.id

    def is_anonymous(self):
        return self.id == None
Ejemplo n.º 25
0
class Books(db.Model): #the books table as a class where all the columns are identified 
    __tablename__ = "books"
    id = db.Column(db.Integer, primary_key=True, autoincrement=True, nullable=True)
    book_id = db.Column(db.Integer)
    goodreads_book_id = db.Column(db.Integer)
    isbn13 = db.Column(db.Integer)
    authors = db.Column(db.String(200))
    original_publication_year = db.Column(db.Integer)
    original_title = db.Column(db.String(120))
    title = db.Column(db.String(400))
    language_code = db.Column(db.String(40))
    average_rating = db.Column(db.Float)
    ratings_count = db.Column(db.Integer)
    image_url = db.Column(db.String(200))


    @classmethod
    def get_paginated(cls, page):
        return cls.query.paginate(page, 12, False).items


    @classmethod
    def get_list(cls, ids):
        return cls.query.filter(cls.book_id.in_(ids))


    @classmethod
    def get_list_from_goodreads_ids(cls, ids):
        return cls.query.filter(cls.goodreads_book_id.in_(ids))


    @classmethod
    def get_book(cls, id):
        try:
            res = cls.query.filter_by(book_id=id).first()
            tags = BookTags.get_tags(res.__dict__.get("goodreads_book_id"), n=10)
            tag_names = [ x.get("tag_name") for x in tags ]
            res.__dict__["tags"] = tag_names

            if res is not None:
                return res.__dict__
            return None
        except Exception as e:
            logging.error(e)
            return None
Ejemplo n.º 26
0
class Account(db.Model, UserMixin, ResourceMixin):

    __tablename__ = 'account'

    id = db.Column(db.Integer, primary_key=True)

    first_name = db.Column(db.String(50), nullable=False)
    last_name = db.Column(db.String(50), nullable=False)
    username = db.Column(db.String(30), nullable=False, unique=True)
    email = db.Column(db.String(50), nullable=False, unique=True)
    password = db.Column(db.String(1024), nullable=False)
    active = db.Column('is_active',
                       db.Boolean(),
                       nullable=False,
                       server_default='1')
    profile_pic = db.Column(db.String(300),
                            nullable=False,
                            default='static/images/default.jpg')

    sign_in_count = db.Column(db.Integer, nullable=False, default=0)
    current_sign_in_date = db.Column(db.DateTime)
    current_sign_in_ip = db.Column(db.String(200))
    last_sign_in_date = db.Column(db.DateTime)
    last_sign_in_ip = db.Column(db.String(200))

    @classmethod
    def find(cls, identity):
        return Account.query.filter((cls.email == identity)
                                    | (cls.username == identity)).first()

    def is_active(self):
        return self.active

    def serialize_token(self, expiration=300):
        private_key = app.config['SECRET_KEY']
        serializer = TimedJSONWebSignatureSerializer(private_key, expiration)
        return serializer.dumps({'email': self.email}).decode('utf-8')

    @classmethod
    def deserialize_token(cls, token):
        private_key = TimedJSONWebSignatureSerializer(app.config['SECRET_KEY'])

        try:
            decoded_payload = private_key.loads(token)
            return Account.find(decoded_payload.get('email'))

        except Exception:
            return None

    def update_activity_tracking(self, ip_address):
        self.sign_in_count = self.sign_in_count + 1
        self.last_sign_in_date = self.current_sign_in_date
        self.last_sign_in_ip = self.current_sign_in_ip
        self.current_sign_in_date = datetime.now()
        self.current_sign_in_ip = ip_address
        self.save()

    def get_posts(self, page=None):
        if page is None:
            return self.posts
        else:
            return Post.query.filter(Post.account_id == self.id)\
                .order_by(Post.create_date.desc())\
                .paginate(page=page, per_page=5)
Ejemplo n.º 27
0
class User(BaseModel, db.Model):
    __tablename__ = 'user'
    id = db.Column(db.INT, primary_key=True)
    name = db.Column(db.String, default='')
    email = db.Column(db.String, default='')
    mobile = db.Column(db.String, default='')
    password = db.Column(db.String, default='')
    status = db.Column(db.String, default='')
    type = db.Column(db.String, default='')
    ext_property = db.Column(db.JSON, default={})
    is_available = db.Column(db.INT, default=1)
    create_ts = db.Column(db.TIMESTAMP, default=datetime.now())
    update_ts = db.Column(db.TIMESTAMP, default=datetime.now())

    @property
    def real_mobile(self):
        if len(self.mobile) == 7:
            return ''
        return self.mobile

    def format(self):
        #  item = self.to_dict(exclude=['password'])
        item = super().format()
        item.pop('password')
        item['mobile'] = self.real_mobile
        item[BaseConfig.HEAD_AUTHORIZATION] = self.generate_authorization()
        return item

    def generate_authorization(self):
        return aes.encrypt(f'{self.id};{int(time.time())};')

    @classmethod
    def create(cls, **kw):
        kw['name'] = kw['email']
        kw['mobile'] = ID.random_str(7)

        return super().create(**kw)

    @property
    def authorization(self):
        return self.generate_authorization()

    @classmethod
    def generate_password(cls, password):
        return security.md5(f'{password};!@#$%')

    @classmethod
    def login(cls, email, password):
        pw = cls.generate_password(password)
        item = cls.query_item(email=email, password=pw)
        if not item:
            return 401, '邮箱或密码错误'
        return 200, item

    @classmethod
    def register(cls, email, password):
        item = cls.query_item(email=email)
        if item:
            return 403, '邮箱已注册,请直接登录'
        pw = cls.generate_password(password)
        item = cls.create(email=email, password=pw)
        return 200, item

    @classmethod
    def get_user_from_authorization(cls, authorization):
        try:
            plain = aes.decrypt(authorization)
            if ';' not in plain:
                return None
        except:
            logger.error(traceback.format_exc())
            return None
        id = plain.split(';')[0]
        return cls.query_by_id(id)
Ejemplo n.º 28
0
class VisitorLog(BaseModel, db.Model):
    __tablename__ = 'visitor_log'
    id = db.Column(db.INT, primary_key=True)
    ip = db.Column(db.String, default='')
    url = db.Column(db.String, default='')
    referrer = db.Column(db.String, default='')
    user_agent = db.Column(db.String, default='')
    os = db.Column(db.String, default='')
    device = db.Column(db.String, default='')
    device_type = db.Column(db.String, default='')
    browser = db.Column(db.String, default='')
    md5 = db.Column(db.String, default='')
    visit_date = db.Column(db.Date, default=date.today)
    is_bot = db.Column(db.INT, default=0)
    ext_property = db.Column(db.JSON, default={})
    is_available = db.Column(db.INT, default=1)
    create_ts = db.Column(db.TIMESTAMP, default=datetime.now())
    update_ts = db.Column(db.TIMESTAMP, default=datetime.now())

    @property
    def region(self):
        ext = self.ext_property
        return '{}-{}'.format(ext.get('country'), ext.get('city'))

    @classmethod
    def query_items(cls, **kw):
        query_date = kw.get('date', date.today().isoformat())
        logger.debug(query_date)
        sql = 'is_bot = 0 and is_available = 1 and url like "%wxnacy.com%"\
                and visit_date = :date'

        items = cls.query.filter(text(sql)).params(date=query_date).order_by(
            desc(cls.create_ts)).all()
        return items

    @classmethod
    def query_rank(cls, **kw):
        logger.debug(kw)
        qd = date.today()
        if 'day' in kw:
            qd = kw['day']

        res = db.session.query(
            cls.url,
            func.count(
                cls.url).label('rank')).filter(cls.visit_date == qd).group_by(
                    cls.url).order_by(cls.rank.name.desc()).all()

        return res

    @classmethod
    def query_hot(cls):

        rank = func.count(cls.url).label('rank')
        res = db.session.query(cls.url, rank).filter(
            cls.visit_date >= date.today() - timedelta(days=7)).group_by(
                cls.url).order_by(rank.desc()).all()

        return res

    @classmethod
    def visit(cls, **kw):
        begin = time.time()
        ua = kw['user_agent']
        ua = parse(ua)
        kw['os'] = ua.os.family
        kw['device'] = ua.device.family
        kw['browser'] = ua.browser.family
        device_type = 'pc'
        if ua.is_mobile:
            device_type = 'mobile'
        elif ua.is_tablet:
            device_type = 'tablet'
        kw['device_type'] = device_type
        kw['is_bot'] = ua.is_bot
        kw['md5'] = security.md5('{};{}'.format(kw['ip'], kw['user_agent']))
        kw['visit_date'] = date.today()

        res = VisitorLog.create(**kw)
        end = time.time()
        logger.debug('visit time: %s', (end - begin))
        return res
Ejemplo n.º 29
0
class Article(BaseModel, db.Model):
    __tablename__ = 'article'
    id = db.Column(db.BIGINT, primary_key=True)
    name = db.Column(db.String, default="")
    url = db.Column(db.String, default="")
    publish_date = db.Column(db.DATE, default='2017-08-04')
    tag = db.Column(db.String, default="")
    pv = db.Column(db.INT, default=0, doc="观看数量")
    init_pv = db.Column(db.INT, default=0, doc="初始值")
    ext_property = db.Column(db.JSON, default={})
    is_available = db.Column(db.INT, default=1)
    create_ts = db.Column(db.TIMESTAMP, default=datetime.now())
    update_ts = db.Column(db.TIMESTAMP, default=datetime.now())

    DOMAIN = 'https://wxnacy.com'

    @classmethod
    def query_or_create(cls, url, **kw):
        if not url.startswith('https://wxnacy.com'):
            return None
        if url == 'https://wxnacy.com' or url == 'https://wxnacy.com/':
            return None
        if url.startswith('https://wxnacy.com/archives'):
            return None
        if url.startswith('https://wxnacy.com/page'):
            return None
        if '?' in url:
            url = url[0:url.index('?')]
        if '#' in url:
            url = url[0:url.index('#')]
        if url.endswith('index.html'):
            url = url[0:url.index('index.html')]

        kw['url'] = url
        #  print(url)
        return super().query_or_create(**kw)

    @classmethod
    def crawler(cls, url, **kw):
        item = cls.query_or_create(url=url)
        if not item:
            return None
        params = cls.get_crawler_data(url)

        cls.update_by_id(item.id, **params)
        return item

    def crawler_self(self):
        params = Article.get_crawler_data(self.url)
        print(params)
        Article.update_by_id(self.id, **params)
        return self

    @classmethod
    def crawler_article(cls):
        def _crawler(url):
            res = requests.get(url)
            soup = BeautifulSoup(res.content, 'html.parser')
            items = soup.find_all('a', class_='article-title')
            for item in items:
                item_url = '{}{}'.format(cls.DOMAIN, item.attrs['href'])
                print(item_url)
                cls.crawler(url=item_url)

        for i in range(23):
            if i == 0:
                url = 'https://wxnacy.com'
            else:
                url = '{}/page/{}'.format(cls.DOMAIN, i + 1)
            _crawler(url)

    def create_init_pv(self):
        jc = "BusuanziCallback_768395723167"
        url = "https://busuanzi.ibruce.info/busuanzi?jsonpCallback=BusuanziCallback_768395723167"
        res = requests.get(url, headers=dict(referer=self.url))
        suffix = ');}catch(e){}'
        prefix = 'try{BusuanziCallback_768395723167('
        text = res.content.decode('utf-8')
        if text.startswith(prefix) and text.endswith(suffix):
            json_text = text[text.index(prefix) +
                             len(prefix):text.index(suffix)]
            res = json.loads(json_text)
            print(res)
            print(res.get('page_pv', 0))
            self.init_pv = res.get('page_pv', 0)
            print(self.init_pv)
            self.update_self()

    @classmethod
    def statistics_init_pv(cls):
        items = Article.query_items(init_pv=0)
        print(len(items))
        for item in items:
            item.create_init_pv()

    @classmethod
    def statistics_article(cls):
        begin = time.time()
        items = Article.query_items()
        for item in items:
            item.crawler_self()

        logger.debug('statistics_article time: %s', (time.time() - begin))

    @classmethod
    def get_crawler_data(cls, url):
        params = {}
        res = requests.get(url)
        soup = BeautifulSoup(res.content, 'html.parser')
        metas = soup.find_all('meta')
        pd = '2017-08-04'
        for meta in metas:
            attrs = meta.attrs
            if attrs.get('property') == 'og:title':
                params['name'] = attrs['content']
            elif attrs.get('name') == 'keywords':
                params['tag'] = attrs['content']
            elif attrs.get('property') == 'og:updated_time':
                pd = attrs['content'][0:10]

        dp = url.split('/')
        if len(dp) > 5:
            pd = '{}-{}-{}'.format(dp[3], dp[4], dp[5])
        params['publish_date'] = pd
        #  print(params)
        return params

    @classmethod
    def statistics_pv(cls):
        items = cls.query_items()
        AD = ArticleData
        for item in items:
            ArticleData.query_items(article_id=item.id)
            res = db.session.query(func.sum(
                AD.pv)).filter_by(article_id=item.id).all()
            if res[0][0]:
                item.pv = res[0][0]
                db.session.add(item)
        db.session.commit()
Ejemplo n.º 30
0
class Test(BaseModel, db.Model):
    __tablename__ = 'test'
    id = db.Column(db.INT, primary_key=True)