Example #1
0
def admin_save(request):
    if request.POST['article_id']:  # update
        uid = request.POST['article_id']
        article = ArticleModel(id=request.POST['article_id'],
                               title=request.POST['title'])
        try:
            article.update(
                actions=[
                    ArticleModel.category.set(request.POST['category']),
                    ArticleModel.content.set(request.POST['content']),
                    ArticleModel.tags.set(request.POST['tags'].split(',')),
                ],
                condition=((ArticleModel.id == request.POST['article_id'])))
        except Exception as e:
            print(str(e))
    else:  # save
        uid = str(uuid.uuid1())
        article = ArticleModel(id=uid,
                               title=request.POST['title'],
                               category=request.POST['category'],
                               content=request.POST['content'],
                               create_time=str(datetime.now()),
                               editor=request.POST['editor'],
                               tags=request.POST['tags'].split(','))
        try:
            article.save()
        except Exception as e:
            print(str(e))

    return redirect('admin_page', id=uid)
Example #2
0
    def PUT(self, id):
        data = web.data()
        item = json.loads(data)
        if id == '0':
            id = None
        code = item['code'] if ('code' in item) else 'code'
        title = item['title'] if ('title' in item) else 'title'
        try:
            posttime = datetime.datetime.strptime(item['posttime'], '%Y-%m-%d')
        except (KeyError, ValueError):
            posttime = datetime.datetime.now()
        remark = item['remark'] if ('remark' in item) else ''

        articleobj = ArticleModel(id, code, title, posttime, remark)
        articledict = {
            'code': code,
            'title': title,
            'posttime': posttime,
            'remark': remark
        }
        if id:
            Session.query(ArticleModel).filter(
                ArticleModel.id == id).update(articledict)
        else:
            Session.add(articleobj)
        Session.commit()
Example #3
0
 def listCategoryWithCount(limit=BaseService.LIMIT):
     cats = BaseService.list(model_name='CategoryModel', limit=limit)
     for cat in cats['data']:
         cat_cnt = ArticleModel.scan(
             ArticleModel.category == cat['category_name'])
         list(cat_cnt)
         cat['cnt'] = cat_cnt.total_count
     return cats
    def searchArticleByCat(cat, limit=None, last_evaluated_key=None):
        dataItr = ArticleModel.scan(ArticleModel.category==cat, limit=limit, last_evaluated_key=last_evaluated_key)
        data = []
        lek = None
        for e in dataItr:
            lek = dataItr.last_evaluated_key
            data.append(e.to_dict())

        return {'data': data, 'lek': lek}
    def searchArticleByKeyword(keyword, limit=None, last_evaluated_key=None):
        dataItr = ArticleModel.scan(ArticleModel.category.startswith(keyword) | ArticleModel.title.contains(keyword) | ArticleModel.content.contains(keyword), limit=limit, last_evaluated_key=last_evaluated_key)
        data = []
        lek = None
        for e in dataItr:
            lek = dataItr.last_evaluated_key
            data.append(e.to_dict())

        return {'data': data, 'lek': lek}
    def searchArticleByTag(tag, limit=None, last_evaluated_key=None):
        dataItr = ArticleModel.scan(ArticleModel.tags.contains(tag), limit=limit, last_evaluated_key=last_evaluated_key)
        data = []
        lek = None
        for e in dataItr:
            lek = dataItr.last_evaluated_key
            data.append(e.to_dict())

        return {'data': data, 'lek': lek}
Example #7
0
def crawl(indexUrl = '', 
          baseDir = BASE_DIR,
          listElem = 'a', 
          itemElem = ''):
    """crawl all pages and save detail urls to database"""
    print 'start crawl url: %s' % indexUrl
    urls = buildURLs(indexUrl, listElem)

    for url in urls:
    	if url:
	        p = Page(url, itemElem)
	        try:
	            print('fetching ' + p.url)
	            p.fetch()
	        except requests.ConnectionError as e:
	            print("Network Error")
	        print('{} items found'.format(len(p.items)))
	        for item in p.items:
	            print item
	            item.save_to_db()

    list = ArticleModel.select().where(ArticleModel.tags == 'Python')
    i = 0
    for item in list:
        content = Utils.replace_charentity(item.summary)
        content = content.replace(r'src="', 'src="' + BASE_URL)
        i +=1

        # 要转一下码,不然加到路径里就悲剧了
    	title = item.title.decode('utf-8').replace("/", " ")
        fileName = "%d " % i + title + '.html'
        
        base_folder = baseDir
        tool.FileUtils().mkdir(base_folder)
        filePath = base_folder + fileName
        if os.path.exists(filePath):
            os.remove(filePath)
            print("removing {}".format(fileName))
            # print("skipping {}".format(fileName))
            continue

        try:
            with open(filePath, 'wb') as f:
                f.write('<!DOCTYPE html>')
                f.write('<head>')
                f.write('<meta content="text/html; charset=UTF-8" http-equiv="Content-Type" />') # advoid wrong decoding
                f.write('</head>')
                f.write('<body>')
                f.write(content.decode('utf-8'))
                f.write('</body>')
                f.write('</html>')
                f.flush()
        except requests.ConnectionError as e:
            print("Network Error")
        except requests.exceptions.RequestException as e:
            print("Error")
Example #8
0
    def major(self):
        categories = ['news_society', 'news_entertainment', 'news_tech', 'news_military', 'news_sports', 'news_car',
                      'news_finance', 'news_world', 'news_fashion', 'news_travel', 'news_discovery', 'news_baby',
                      'news_regimen', 'news_story', 'news_essay', 'news_game', 'news_history', 'news_food']
        # categories = ['news_society']

        for category in categories:
            print("当前类别: %s" % category)
            logging.info("当前类别: %s" % category)

            """ 处理 art 
            """
            try:
                arts_brief_json = self.__art_pro.get_arts_brief_json_by_category(category)
                logging.info('%s arts_brief_json 获取 成功' % category)
            except:
                print('%s arts_brief_json 获取 失败' % category)
                logging.exception('%s arts_brief_json 获取 失败' % category)
                continue

            for art_i, art_brief_json in enumerate(arts_brief_json):
                print("当前新闻: %d/%d %s" % (art_i, len(arts_brief_json), category))
                logging.info("当前新闻: %d/%d %s" % (art_i, len(arts_brief_json), category))
                """ 新闻作者
                """
                art_cus_mod = CusMod.CustomerModel()
                try:
                    self.__cus_pro.set_art_cus(art_brief_json, art_cus_mod)
                    self.__cus_dao.insert_then_get_cus(art_cus_mod)
                    self.__cus_dao.update_cus_feature(category, art_cus_mod.cus_id, flag=True)
                    logging.info("%s-%d art_cus 处理 成功" % (category, art_i))
                except:
                    print("%s-%d art_cus 处理 失败" % (category, art_i))
                    logging.exception("%s-%d art_cus 处理 失败" % (category, art_i))
                    continue

                """ 新闻
                """
                art_mod = ArtMod.ArticleModel()
                try:
                    self.__art_pro.set_art(art_brief_json, category, art_cus_mod.cus_id, art_mod)
                    if not self.__art_dao.is_art_exist(art_mod.art_spider):
                        # 新闻不存在的情况
                        self.__art_dao.insert_art(art_mod)
                    else:
                        print("art 已存在")
                        continue
                    art_mod.art_id = self.__art_dao.search_art_id_by_spider(art_mod.art_spider)
                    # art_mod.art_time = self.__art_dao.search_art_time_by_spider(art_mod.art_spider)
                    logging.info("%s-%d art 操作 成功" % (category, art_i))
                except:
                    print("%s-%d art 操作 失败" % (category, art_i))
                    logging.exception("%s-%d art 操作 失败" % (category, art_i))
                    continue

                """ 新闻 用户 行为
                """
                try:
                    if self.__art_dao.check_art_cus_relationship(art_mod.art_id, art_cus_mod.cus_id):
                        self.__cus_dao.insert_cus_behavior(
                            art_cus_mod.cus_id, art_cus_mod.cus_id, 1, art_mod.art_id, 1,
                            art_mod.art_id, cbr_time=art_mod.art_time
                        )
                        self.__cus_dao.insert_cus_behavior(
                            art_cus_mod.cus_id, art_cus_mod.cus_id, 2, art_mod.art_id, 1,
                            art_mod.art_id
                        )
                        self.__cus_dao.update_cus_feature(category, art_cus_mod.cus_id)
                        self.__art_dao.update_art_feature(1, art_mod.art_id, art_mod.art_time)
                    else:
                        pass
                    logging.info("%s-%d rt-cus 行为 1 数据库操作 成功" % (category, art_i))
                except:
                    print("%s-%d rt-cus 行为 1 数据库操作 失败" % (category, art_i))
                    logging.exception("%s-%d rt-cus 行为 1 数据库操作 失败" % (category, art_i))
                    continue

                """ 评论与回复处理
                """
                try:
                    coms_json = self.__com_pro.get_coms_json(art_brief_json)
                    if coms_json is None:
                        continue
                    logging.info("%s-%d coms_json 获取 成功" % (category, art_i))
                except:
                    print("\t%s-%d coms_json 获取 失败" % (category, art_i))
                    logging.exception("%s-%d coms_json 获取 失败" % (category, art_i))
                    continue

                for com_i, com_json in enumerate(coms_json):
                    print("\t当前评论: %d/%d" % (com_i, len(coms_json)))
                    logging.info("当前评论: %d/%d" % (com_i, len(coms_json)))
                    """ 评论用户
                    """
                    com_cus_mod = CusMod.CustomerModel()
                    try:
                        self.__cus_pro.set_com_cus(com_json, com_cus_mod)
                        self.__cus_dao.insert_then_get_cus(com_cus_mod)
                        self.__cus_dao.update_cus_feature(category, com_cus_mod.cus_id, flag=True)
                        # self.__cus_dao.cus_watch_other_same_category_art(com_cus_mod.cus_id, art_mod.art_id, category)
                        logging.info("%s-%d-%d com_cus 处理 错误" % (category, art_i, com_i))
                    except:
                        print("\t%s-%d-%d com_cus 处理 错误" % (category, art_i, com_i))
                        logging.exception("%s-%d-%d com_cus 处理 错误" % (category, art_i, com_i))
                        continue

                    """ 评论
                    """
                    com_mod = ComMod.CommentModel()
                    try:
                        self.__com_pro.set_com(com_json, art_mod.art_id, com_cus_mod.cus_id, com_mod)
                        if not self.__com_dao.is_com_exist(com_mod.com_spider):
                            # 如果评论不存在
                            self.__com_dao.insert_com(com_mod)
                        else:
                            print("com 已存在")
                            continue
                        com_mod.com_id = self.__com_dao.search_com_id_by_spider(com_mod.com_spider)
                        logging.info("%s-%d-%d com 处理 失败" % (category, art_i, com_i))
                    except:
                        print("\t%s-%d-%d com 处理 失败" % (category, art_i, com_i))
                        logging.exception("%s-%d-%d com 处理 失败" % (category, art_i, com_i))
                        continue

                    """ 评论 用户 行为
                    """
                    try:
                        if self.__com_dao.check_com_cus_relationship(art_mod.art_id, com_mod.com_id, com_cus_mod.cus_id):
                            self.__cus_dao.insert_cus_behavior(
                                com_cus_mod.cus_id, art_cus_mod.cus_id, 5, art_mod.art_id, 2,
                                com_mod.com_id, cbr_time=com_mod.com_time
                            )
                            self.__cus_dao.insert_cus_behavior(
                                com_cus_mod.cus_id, art_cus_mod.cus_id, 2, art_mod.art_id, 1,
                                art_mod.art_id
                            )
                            self.__cus_dao.update_cus_feature(category, com_cus_mod.cus_id)
                            self.__art_dao.update_art_feature(4, art_mod.art_id, art_mod.art_time)
                        else:
                            pass
                        logging.info("%s-%d-%d art-cus 行为 4 数据库操作 成功" % (category, art_i, com_i))
                    except:
                        print("\t%s-%d-%d art-cus 行为 4 数据库操作 失败" % (category, art_i, com_i))
                        logging.exception("%s-%d-%d art-cus 行为 4 数据库操作 失败" % (category, art_i, com_i))
                        continue

                    """ 评论用户 模拟浏览
                    """
                    try:
                        result_list = None
                        rand_category_num = random.randint(1, 2)
                        rand_cates = random.sample(categories, rand_category_num)
                        for rand_cate in rand_cates:
                            result_list = self.__art_dao.get_same_category_art(art_mod.art_id, rand_cate)
                            if result_list is not None:
                                for back_art in result_list:
                                    try:
                                        self.__cus_dao.insert_cus_behavior(
                                            com_cus_mod.cus_id, back_art[1], 2, back_art[0], 1, back_art[0]
                                        )
                                        self.__cus_dao.update_cus_feature(rand_cate, com_cus_mod.cus_id, update_num=1)
                                        self.__art_dao.update_art_feature(6, back_art[0], art_mod.art_time)
                                    except:
                                        continue
                                print("\t%d 用户模拟浏览操作 数量 %d 完成" % (com_cus_mod.cus_id, len(result_list)))
                                logging.info("%d 模拟浏览操作 数量 %d 完成" % (com_cus_mod.cus_id, len(result_list)))
                    except:
                        print("\t%d 用户模拟浏览操作 失败" % com_cus_mod.cus_id)
                        logging.exception("%d 用户模拟浏览操作 失败" % com_cus_mod.cus_id)

                    """ 回复处理
                    """
                    try:
                        reps_json = self.__rep_pro.get_reps_json(com_json)
                        if reps_json is None:
                            continue
                        logging.info("%s-%d-%d reps_json 获取 成功" % (category, art_i, com_i))
                    except:
                        print("\t\t%s-%d-%d reps_json 获取 失败" % (category, art_i, com_i))
                        logging.exception("%s-%d-%d reps_json 获取 失败" % (category, art_i, com_i))
                        continue

                    for rep_i, rep_json in enumerate(reps_json):
                        """ 回复用户
                        """
                        rep_cus_mod = CusMod.CustomerModel()
                        try:
                            self.__cus_pro.set_rep_cus(rep_json, rep_cus_mod)
                            self.__cus_dao.insert_then_get_cus(rep_cus_mod)
                            self.__cus_dao.update_cus_feature(category, rep_cus_mod.cus_id, flag=True)
                            logging.info("%s-%d-%d-%d rep_cus 处理 成功" % (category, art_i, com_i, rep_i))
                        except:
                            print("\t\t%s-%d-%d-%d rep_cus 处理 失败" % (category, art_i, com_i, rep_i))
                            logging.exception("%s-%d-%d-%d rep_cus 处理 失败" % (category, art_i, com_i, rep_i))
                            continue

                        """ 回复
                        """
                        rep_mod = RepMod.ReplyModel()
                        try:
                            self.__rep_pro.set_rep(rep_json, art_mod.art_id,
                                                   com_mod.com_id, rep_cus_mod.cus_id, rep_mod)
                            if not self.__rep_dao.is_rep_exist(rep_mod.rep_spider):
                                self.__rep_dao.search_rep_rep_by_spyder(rep_json, rep_mod)
                                self.__rep_dao.insert_rep(rep_mod)
                            else:
                                print("rep 已存在")
                                continue
                            rep_mod.rep_id = self.__rep_dao.search_rep_id_by_spider(rep_mod.rep_spider)
                            logging.info("%s-%d-%d-%d rep 处理 成功" % (category, art_i, com_i, rep_i))
                        except:
                            print("\t\t%s-%d-%d-%d rep 处理 失败" % (category, art_i, com_i, rep_i))
                            logging.exception("%s-%d-%d-%d rep 处理 失败" % (category, art_i, com_i, rep_i))
                            continue

                        """ 回复 用户 行为
                        """
                        try:
                            if self.__rep_dao.check_rep_cus_relationship(art_mod.art_id, rep_mod.rep_id,
                                                                         rep_cus_mod.cus_id):
                                self.__cus_dao.insert_cus_behavior(
                                    rep_cus_mod.cus_id, art_cus_mod.cus_id, 8, art_mod.art_id, 3,
                                    rep_mod.rep_id, cbr_time=rep_mod.rep_time
                                )
                                self.__cus_dao.insert_cus_behavior(
                                    rep_cus_mod.cus_id, art_cus_mod.cus_id, 2, art_mod.art_id, 1,
                                    art_mod.art_id
                                )
                                self.__cus_dao.update_cus_feature(category, rep_cus_mod.cus_id)
                                self.__art_dao.update_art_feature(5, art_mod.art_id, art_mod.art_time)
                            else:
                                pass
                            logging.info("%s-%d-%d-%d art-cus 行为 5 数据库操作 成功" % (category, art_i, com_i, rep_i))
                        except:
                            print("\t\t%s-%d-%d-%d art-cus 行为 5 数据库操作 失败" % (category, art_i, com_i, rep_i))
                            logging.exception("%s-%d-%d-%d art-cus 行为 5 数据库操作 失败" % (category, art_i, com_i, rep_i))
                            continue

                        """ 回复用户 模拟浏览
                        """
                        try:
                            result_list = None
                            rand_category_num = random.randint(1, 2)
                            rand_cates = random.sample(categories, rand_category_num)
                            for rand_cate in rand_cates:
                                result_list = self.__art_dao.get_same_category_art(art_mod.art_id, rand_cate)
                                if result_list is not None:
                                    for back_art in result_list:
                                        try:
                                            self.__cus_dao.insert_cus_behavior(
                                                rep_cus_mod.cus_id, back_art[1], 2, back_art[0], 1, back_art[0]
                                            )
                                            self.__cus_dao.update_cus_feature(rand_cate, rep_cus_mod.cus_id, update_num=1)
                                            self.__art_dao.update_art_feature(6, back_art[0], art_mod.art_time)
                                        except:
                                            continue
                                    print("\t\t%d 用户模拟浏览操作 数量 %d 完成" % (rep_cus_mod.cus_id, len(result_list)))
                                    logging.info("%d 用户模拟浏览操作 数量 %d 完成" % (rep_cus_mod.cus_id, len(result_list)))
                        except:
                            print("\t\t%d 用户模拟浏览操作 失败" % rep_cus_mod.cus_id)
                            logging.exception("%d 用户模拟浏览操作 失败" % rep_cus_mod.cus_id)
Example #9
0
    def major(self):
        categories = [
            'news_society', 'news_entertainment', 'news_tech', 'news_military',
            'news_sports', 'news_car', 'news_finance', 'news_world',
            'news_fashion', 'news_travel', 'news_discovery', 'news_baby',
            'news_regimen', 'news_story', 'news_essay', 'news_game',
            'news_history', 'news_food'
        ]
        for category in categories:
            print("\n当前类别: %s" % category)
            """ 处理 art """
            try:
                arts_brief_json = self.__art_pro.get_arts_brief_json_by_category(
                    category)
                if len(arts_brief_json) != 0:
                    print("新闻总长度: %d" % len(arts_brief_json))
                # print('arts_brief_json 获取 成功')
            except:
                print('arts_brief_json 获取 失败')
                continue

            for art_brief_json in arts_brief_json:
                # art_cus
                art_cus_mod = CusMod.CustomerModel()
                try:
                    self.__cus_pro.set_art_cus(art_brief_json, art_cus_mod)
                    self.__cus_dao.group_check_insert_cus_then_search_id(
                        art_cus_mod)
                    # print("art_cus 处理 成功")
                except:
                    print("art_cus 处理 失败")
                    continue
                # art
                art_mod = ArtMod.ArticleModel()
                try:
                    self.__art_pro.set_art(art_brief_json, category,
                                           art_cus_mod.cus_id, art_mod)
                    if not self.__art_dao.is_art_exist(art_mod.art_spider):
                        # 新闻不存在的情况
                        self.__art_dao.insert_art(art_mod)
                    else:
                        print("art 已存在")
                        continue
                    art_mod.art_id = self.__art_dao.search_art_id_by_spider(
                        art_mod.art_spider)
                    # print("art 操作 成功")
                except:
                    print("art 操作 失败")
                    continue
                # art cus behavior
                try:
                    if self.__art_dao.check_art_cus_relationship(
                            art_mod.art_id, art_cus_mod.cus_id):
                        self.__cus_dao.insert_cus_behavior(
                            1, art_mod.art_id, art_cus_mod.cus_id,
                            art_mod.art_time)
                    else:
                        pass
                    # print("art-cus 行为 1 数据库操作 成功")
                except:
                    print("art-cus 行为 1 数据库操作 失败")
                    continue
                """ handel the coms """
                try:
                    coms_json = self.__com_pro.get_coms_json(art_brief_json)
                    if len(coms_json) != 0:
                        print("回复总长 %d" % len(coms_json))
                except:
                    print("coms_json 获取 失败")
                    continue

                for com_json in coms_json:
                    # com_cus
                    com_cus_mod = CusMod.CustomerModel()
                    try:
                        self.__cus_pro.set_com_cus(com_json, com_cus_mod)
                        self.__cus_dao.group_check_insert_cus_then_search_id(
                            com_cus_mod)
                        # print("com_cus 处理 成功")
                    except:
                        print("com_cus 处理 错误")
                        continue
                    # com
                    com_mod = ComMod.CommentModel()
                    try:
                        self.__com_pro.set_com(com_json, art_mod.art_id,
                                               com_cus_mod.cus_id, com_mod)
                        if not self.__com_dao.is_com_exist(com_mod.com_spider):
                            # if the com is not exist
                            self.__com_dao.insert_com(com_mod)
                        else:
                            print("com 已存在")
                            continue
                        com_mod.com_id = self.__com_dao.search_com_id_by_spider(
                            com_mod.com_spider)
                        self.__art_dao.update_art_com_number(art_mod.art_id)
                        # print("com 处理 成功")
                    except:
                        print("com 处理 失败")
                        continue
                    # com cus behavior
                    try:
                        if self.__com_dao.check_com_cus_relationship(
                                art_mod.art_id, com_mod.com_id,
                                com_cus_mod.cus_id):
                            self.__cus_dao.insert_cus_behavior(
                                4, art_mod.art_id, com_cus_mod.cus_id,
                                com_mod.com_time)
                        else:
                            pass
                        # print("art-cus 行为 4 数据库操作 成功")
                    except:
                        print("art-cus 行为 4 数据库操作 失败")
                        continue
                    """ handel the reps """
                    try:
                        reps_json = self.__rep_pro.get_reps_json(com_json)
                        if len(reps_json) != 0:
                            print("回复总长 %d" % len(reps_json))
                    except:
                        print("reps_json 获取 失败")
                        continue

                    for rep_json in reps_json:
                        # rep_cus
                        rep_cus_mod = CusMod.CustomerModel()
                        try:
                            self.__cus_pro.set_rep_cus(rep_json, rep_cus_mod)
                            self.__cus_dao.group_check_insert_cus_then_search_id(
                                rep_cus_mod)
                            # print("rep_cus 处理 成功")
                        except:
                            print("rep_cus 处理 失败")
                            continue
                        # rep
                        rep_mod = RepMod.ReplyModel()
                        try:
                            self.__rep_pro.set_rep(rep_json, art_mod.art_id,
                                                   com_mod.com_id,
                                                   rep_cus_mod.cus_id, rep_mod)
                            if not self.__rep_dao.is_rep_exist(
                                    rep_mod.rep_spider):
                                self.__rep_dao.search_rep_rep_by_spyder(
                                    rep_json, rep_mod)
                                self.__rep_dao.insert_rep(rep_mod)
                            else:
                                print("rep 已存在")
                                continue
                            rep_mod.rep_id = self.__rep_dao.search_rep_id_by_spider(
                                rep_mod.rep_spider)
                            # print("rep 处理 成功")
                        except:
                            print("rep 处理 失败")
                            continue

                        # rep cus behavior
                        try:
                            if self.__rep_dao.check_rep_cus_relationship(
                                    art_mod.art_id, rep_mod.rep_id,
                                    rep_cus_mod.cus_id):
                                self.__cus_dao.insert_cus_behavior(
                                    5, art_mod.art_id, rep_cus_mod.cus_id,
                                    rep_mod.rep_time)
                            else:
                                pass
                            # print("art-cus 行为 5 数据库操作 成功")
                        except:
                            print("art-cus 行为 5 数据库操作 失败")
                            continue