Esempio n. 1
0
def verify_page_path(path, no):
    logger.debug(f'verify page {path} , args {no}')
    no = int(no)
    if no <= MAXPAGE:
        return True
    else:
        return False
Esempio n. 2
0
 def saveit(tag_info):
     tag, created = Tag.get_or_create(type_=tag_info.type,
                                      value=tag_info.value,
                                      defaults={'url': tag_info.link})
     if created:
         logger.debug(f'save tag:  {tag}')
     return tag
Esempio n. 3
0
def load_db():
    msg = ''
    errmsg = ''
    if request.POST.submit:
        upload = request.files.get('dbfile')
        if upload:
            logger.debug(upload.filename)
            name = get_data_path('uploaded.db')
            upload.save(name, overwrite=True)
            logger.debug(f'uploaded file saved to {name}')
            try:
                tag_file_added, missed_fanhaos = load_tags_db()
            except DBError:
                errmsg = '数据库文件错误, 请检查文件是否正确上传'
            else:
                urls = [
                    bus_spider.get_url_by_fanhao(fanhao)
                    for fanhao in missed_fanhaos
                ]
                add_download_job(urls)
                msg = f'上传 {tag_file_added} 条用户打标数据, {len(missed_fanhaos)} 个番号, '
                msg += '  注意: 需要下载其他数据才能开始建模, 请等候一定时间'
        else:
            errmsg = '请上传数据库文件'
    return template('load_db', path=request.path, msg=msg, errmsg=errmsg)
Esempio n. 4
0
def verify_fanhao(path, fanhao):
    '''
    verify fanhao before add it to queue
    '''
    exists = Item.get_by_fanhao(fanhao)
    logger.debug(
        f'verify {fanhao}: , exists:{exists is not None}, skip {path}')
    return exists is None
Esempio n. 5
0
 def saveit(item, tag):
     try:
         item_tag = ItemTag.create(item=item, tag=tag)
         logger.debug(f'save tag_item: {item_tag}')
     except Exception as ex:
         logger.exception(ex)
     else:
         return item_tag
Esempio n. 6
0
 def saveit(fanhao, path):
     local_item = None
     try:
         local_item = LocalItem.create(item=fanhao, path=path)
         logger.debug(f'save LocalItem: {fanhao}')
     except IntegrityError:
         logger.debug(f'LocalItem exists: {fanhao}')
     else:
         return local_item
Esempio n. 7
0
 def update_play(id):
     nrows = (LocalItem.update({
         LocalItem.last_view_date:
         get_now_time(),
         LocalItem.view_times:
         LocalItem.view_times + 1
     }).where(LocalItem.id == id).execute())
     logger.debug(f'update LocalItem {id} : rows:{nrows}')
     return LocalItem.get_by_id(id)
Esempio n. 8
0
def add_job(job_func, args):
    '''
    add a job to scheduler
    '''
    default_args = (loop, True)
    default_args = default_args + args
    logger.debug(default_args)
    t1 = datetime.now() + timedelta(seconds=10)
    date_trigger = DateTrigger(run_date=t1)
    scheduler.add_job(job_func, trigger=date_trigger, args=default_args)
Esempio n. 9
0
    def saveit(rate_type, rate_value, item):
        try:
            item_rate = ItemRate.create(item=item,
                                        rate_type=rate_type,
                                        rate_value=rate_value)
            logger.debug(f'save ItemRate: {item_rate}')
        except Exception as ex:
            logger.exception(ex)

        return item_rate
Esempio n. 10
0
    def saveit(tag_info):
        try:
            tag = Tag.create(type_=tag_info.type,
                             value=tag_info.value,
                             url=tag_info.link)
            logger.debug(f'save tag:  {tag}')
        except IntegrityError as ex:
            tag = Tag.get(Tag.value == tag_info.value)

        return tag
Esempio n. 11
0
def save(meta_info, tags):
    item_title = meta_info['title']
    try:
        item = Item.saveit(meta_info)
    except ExistError:
        logger.debug(f'item exists: {item_title}')
    else:
        for tag_info in tags:
            tag = Tag.saveit(tag_info)
            ItemTag.saveit(item, tag)
Esempio n. 12
0
 def saveit(rate_type, rate_value, fanhao):
     item_rate = None
     try:
         item_rate = ItemRate.create(item=fanhao,
                                     rate_type=rate_type,
                                     rate_value=rate_value)
         logger.debug(f'save ItemRate: {item_rate}')
     except IntegrityError:
         logger.debug(f'ItemRate exists: {fanhao}')
     else:
         return item_rate
Esempio n. 13
0
def process_item(text, path, fanhao):
    '''
    process item page
    '''
    logger.debug(f'process item {fanhao}')
    url = path
    meta, tags = parse_item(text)
    meta.update(url=url)
    #     logger.debug('meta keys', len(meta.keys()))
    #     logger.debug('tag count', len(tags))
    save(meta, tags)
    print(f'item {fanhao} is processed')
Esempio n. 14
0
def load_tags_db():
    '''
    load user tags data from uploaded db file

    Args:
        file: io.BufferedRandom -> uploaded db file stream
    '''
    db_name = get_data_path('uploaded.db')
    try:
        db_upload = SqliteDatabase(db_name)
        db_upload.get_tables()
    except DatabaseError:
        raise DBError()
    db_is_old = False
    tag_data = []
    missed_fanhaos = []
    tag_file_added = 0
    sql_old = '''select item_rate.rate_value, item.fanhao
                from item_rate inner
                join item on item_rate.item_id = item.id
                where item_rate.rate_type=1 '''

    sql_new = '''select item_rate.rate_value, item.fanhao
                from item_rate inner
                join item on item_rate.item_id = item.fanhao
                where item_rate.rate_type=1 '''
    cursor = db_upload.execute_sql(sql_old)
    res = cursor.fetchone()
    if res:
        db_is_old = True
    if db_is_old:
        cursor = db_upload.execute_sql(sql_old)
    else:
        cursor = db_upload.execute_sql(sql_new)

    for row in cursor.fetchall():
        tag_data.append(row)
    with db_upload.atomic():
        for rate_value, fanhao in tag_data:
            item_rate = ItemRate.saveit(RATE_TYPE.USER_RATE, rate_value,
                                        fanhao)
            if item_rate:
                tag_file_added += 1
            if not Item.get_by_fanhao(fanhao):
                # add to get from spider
                missed_fanhaos.append(fanhao)
    logger.debug(tag_data)
    logger.info(f'added user tag rate: {tag_file_added}')
    logger.info(f'added fanhao to download: {len(missed_fanhaos)}')
    return tag_file_added, missed_fanhaos
Esempio n. 15
0
 def saveit(meta_info):
     item_release_date = date.fromisoformat(meta_info.pop('release_date'))
     item_fanhao = meta_info.pop('fanhao')
     item_title = meta_info.pop('title')
     item_url = meta_info.pop('url')
     item_meta = json.dumps(meta_info)
     try:
         item = Item.create(fanhao=item_fanhao,
                            title=item_title,
                            url=item_url,
                            release_date=item_release_date,
                            meta_info=item_meta)
         logger.debug(f'save item:  {item}')
     except IntegrityError as ex:
         raise ExistError()
     return item
Esempio n. 16
0
def save(meta_info, tags):
    item_title = meta_info['title']
    tag_objs = []
    try:
        item = Item.saveit(meta_info)
    except ExistError:
        logger.debug(f'item exists: {item_title}')
    else:
        with db.atomic():
            for tag_info in tags:
                tag = Tag.saveit(tag_info)
                if tag:
                    tag_objs.append(tag)
        with db.atomic():
            for tag_obj in tag_objs:
                ItemTag.saveit(item, tag_obj)
Esempio n. 17
0
def tag(fanhao):
    if request.POST.submit:
        formid = request.POST.formid
        item_rate = ItemRate.get_by_fanhao(fanhao)
        rate_value = request.POST.submit
        if not item_rate:
            rate_type = RATE_TYPE.USER_RATE
            ItemRate.saveit(rate_type, rate_value, fanhao)
            logger.debug(f'add new item_rate for fanhao:{fanhao}')
        else:
            item_rate.rate_value = rate_value
            item_rate.save()
            logger.debug(f'updated item_rate for fanhao:{fanhao}')
    page = int(request.query.get('page', 1))
    like = request.query.get('like')
    url = f'/tagit?page={page}&like={like}'
    if formid:
        url += f'#{formid}'
    redirect(url)
Esempio n. 18
0
def correct(fanhao):
    if request.POST.submit:
        formid = request.POST.formid
        is_correct = int(request.POST.submit)
        item_rate = ItemRate.get_by_fanhao(fanhao)
        if item_rate:
            item_rate.rate_type = RATE_TYPE.USER_RATE
            if not is_correct:
                rate_value = item_rate.rate_value
                rate_value = 1 if rate_value == 0 else 0
                item_rate.rate_value = rate_value
            item_rate.save()
            logger.debug(
                f'updated item fanhao: {fanhao}, {"and correct the rate_value" if not is_correct else ""}'
            )
    page = int(request.query.get('page', 1))
    like = int(request.query.get('like', 1))
    url = f'/?page={page}&like={like}'
    if formid:
        url += f'#{formid}'
    redirect(url)
Esempio n. 19
0
def recommend():
    '''
    use trained model to recommend items
    '''
    ids, X = prepare_predict_data()
    if len(X) == 0:
        logger.error(f'no data for recommend')
        return
    count = 0
    total = len(ids)
    y_pred = predict(X)
    for id, y in zip(ids, y_pred):
        if y == 1:
            count += 1
        rate_type = RATE_TYPE.SYSTEM_RATE
        rate_value = y
        item_id = id
        item_rate = ItemRate(rate_type=rate_type,
                             rate_value=rate_value,
                             item_id=item_id)
        item_rate.save()
    logger.debug(f'predicted {total} items, recommended {count}')
    return total, count
Esempio n. 20
0
def add_local_fanhao(fanhao, tag_like):
    '''
    Args:
        fanhao:str - ',' separeted (fanhao, path)
    '''
    rows = fanhao.splitlines()
    items = []
    missed_fanhaos = []
    local_file_added = 0
    tag_file_added = 0
    pattern = r'([A-Z]+)-?([0-9]+)'
    for row in rows:
        if ',' in row:
            fanhao, path = row.split(',')
        else:
            fanhao = row
            path = None

        fanhao = fanhao.strip().upper()
        match = re.search(pattern, fanhao)
        if match and len(match.groups()) == 2:
            series, num = match.groups()
            matched_fanhao = f'{series}-{num}'
            path = path.strip() if path else None
            logger.debug(f'matched fanhao {matched_fanhao}')
            items.append((matched_fanhao, path))
    with db.atomic():
        for item in items:
            fanhao, path = item
            # if path is not None, add to local item
            if path:
                local_item = LocalItem.saveit(fanhao, path)
                if local_item:
                    local_file_added += 1
            # if tag_like is True, add it to item_rate table
            if tag_like:
                item_rate = ItemRate.saveit(RATE_TYPE.USER_RATE,
                                            RATE_VALUE.LIKE, fanhao)
                if item_rate:
                    tag_file_added += 1
            if not Item.get_by_fanhao(fanhao):
                # add to get from spider
                missed_fanhaos.append(fanhao)
    logger.debug(f'missed_fanhaos:{missed_fanhaos}')
    logger.debug(f'tag_file_added:{tag_file_added}')
    logger.debug(f'local_file_added:{local_file_added}')
    return missed_fanhaos, local_file_added, tag_file_added
Esempio n. 21
0
def process_page(text, path, no):
    '''
    process list page
    '''
    logger.debug(f'page {no} has length {len(text)}')
    print(f'process page {no}')
Esempio n. 22
0
def local_play(id):
    local_item = LocalItem.update_play(id)
    file_path = local_item.path
    logger.debug(file_path)
    redirect(file_path)