예제 #1
0
    async def getCrawledNews(self):
        # 抓取新闻数
        # 含相关新闻的抓取新闻数
        date = []
        crawledNews = []
        crawledNewsWithRelative = []

        end_time = arrow.get(
                str(self.date) + ' 17:00:00+00:00',
                'YYYYMMDD HH:mm:ss')
        start_time = end_time.replace(days=-1)

        sql = '''
            SELECT
                count(*) AS crawledNews
            FROM
                News
            WHERE
                [CreatedTime]>='{start}'
            AND
                [CreatedTime]<'{end}';
        '''.format(
                start=start_time,
                end=end_time
            )

        res = mssql.query(sql, LOCAL)
        for item in res:
            crawledNews.append(int(item['crawledNews']))

        sql = '''
            SELECT
                count(distinct(RelativeNews.LeftNewsId))
            AS
                crawledNewsWithRelative
            FROM
                RelativeNews
            JOIN
                News
            ON
                RelativeNews.LeftNewsId=News.NewsId
            WHERE
                News.CreatedTime>='{start}'
            AND
                News.CreatedTime<'{end}'
        '''.format(
                start=start_time,
                end=end_time)
        res = mssql.query(sql, LOCAL)
        for item in res:
            date.append(self.date)
            crawledNewsWithRelative.append(
                    int(item['crawledNewsWithRelative']))

        await self.insert(
            date=date,
            table='crawledNews',
            crawledNews=crawledNews,
            crawledNewsWithRelative=crawledNewsWithRelative
        )
예제 #2
0
    async def getNewsByTitle(self, title):
        """
        搜索新闻,使用sql like 匹配标题
        :param title: 搜索新闻标题关键字
        :return: 包含匹配结果的数组
        """
        ret = []

        sql = '''
            SELECT
                TOP 100 News.*, Tags.Name
            FROM
                {table}
            LEFT JOIN
                NewsTags
            ON
                News.NewsId = NewsTags.NewsId
            AND
                NewsTags.Score = 1
            LEFT JOIN
                Tags
            ON
                NewsTags.TagId = Tags.Id
            WHERE
                UPPER(title) LIKE '%{title}%'
            ORDER BY CreatedTime DESC
        '''.format(table='News', title=title.upper())

        res = query(sql, self.LOCAL)
        for item in res:
            ret.append(item)

        return ret
예제 #3
0
    async def getCountNewsByType(self):
        # 抓取新闻类型
        # 抓取新闻数量
        date = []
        newsType = []
        newsCount = []

        end_time = arrow.get(
                str(self.date) + ' 17:00:00+00:00',
                'YYYYMMDD HH:mm:ss')
        start_time = end_time.replace(days=-1)

        sql = '''
            SELECT
                count(*) AS Count,
                [Type]
            FROM
                [News]
            WHERE
                [CreatedTime]>='{start}'
            AND
                [CreatedTime]<'{end}'
            GROUP BY [Type]
        '''.format(
                start=start_time,
                end=end_time)

        res = mssql.query(sql, LOCAL)
        for item in res:
            date.append(self.date)
            newsType.append(int(item['Type']))
            newsCount.append(int(item['Count']))

        if len(date) == 0:
            date.append(self.date)
            newsType.append(0)
            newsCount.append(0)

        await self.insert(
            date=date,
            table='countNewsByType',
            newsType=newsType,
            newsCount=newsCount
        )
예제 #4
0
    async def getCommentUser(self):
        # 评论用户
        # 评论用户比例
        date = []
        commentUser = []
        totalComments = []
        commentsDict = dict()

        end_time = arrow.get(
                str(self.date) + ' 17:00:00+00:00',
                'YYYYMMDD HH:mm:ss')
        start_time = end_time.replace(days=-1)

        sql = '''
            SELECT
                [UserId],
                count(*) AS Count
            FROM
                Comments
            WHERE
                [Timestamp]>='{start}'
            AND
                [Timestamp]<'{end}'
            GROUP BY [UserId]
        '''.format(
                start=start_time,
                end=end_time
            )

        res = mssql.query(sql, LOCAL)
        for item in res:
            commentsDict.update({item['UserId']: item['Count']})

        date.append(self.date)
        commentUser.append(len(commentsDict))
        totalComments.append(sum(commentsDict.values()))

        await self.insert(
            date=date,
            table='commentUser',
            commentUser=commentUser,
            totalComments=totalComments
        )
예제 #5
0
    async def getFetchReadByCategory(self):
        # 新闻阅读数
        # 新闻展示数
        # 新闻类型
        date = []
        readCount = []
        fetchCount = []
        categoryName = []
        categoryIdNameDict = {-1: 'Recommand'}

        sql = 'SELECT [CategoryId], [Name] FROM Categories'
        res = mssql.query(sql, LOCAL)
        for item in res:
            categoryIdNameDict.update({item['CategoryId']: item['Name']})

        content = self.readFile('fetch_read_by_category')
        for line in content:
            try:
                categoryId = int(line[2])
                if categoryId not in categoryIdNameDict.keys():
                    continue
                date.append(self.date)
                readCount.append(int(line[0]))
                fetchCount.append(int(line[1]))
                categoryName.append(categoryIdNameDict[categoryId])
            except ValueError:
                pass

        if len(date) == 0:
            date.append(self.date)
            readCount.append(0)
            fetchCount.append(0)
            categoryName.append('')

        await self.insert(
            date=date,
            table='fetchReadByCategory',
            readCount=readCount,
            fetchCount=fetchCount,
            categoryName=categoryName
        )
예제 #6
0
    async def getQueryData(self, parameters):

        start_date = parameters.get('start_date')
        end_date = parameters.get('end_date')

        click_index = parameters.get('click_index')
        tag = parameters.get('tag')
        requestcategoryid = parameters.get('requestcategoryid')
        newstype = parameters.get('newstype')
        mediaid = parameters.get('mediaid')

        print(click_index, tag, requestcategoryid, newstype, mediaid)

        categoryid = parameters.get('categoryid')
        gb = parameters.get('gb')
        with_relative = parameters.get('with_relative')
        medias = dict()
        sql = 'SELECT * FROM Media'

        res = mssql.query(sql, MELOCAL)
        for dt in res:
            medias[str(dt['Id'])] = dt['Name']

        for i, column in enumerate(gb):
            if column == 'date':
                gb[i] = 'left(date, 8)'
        if start_date == '':
            self.finish(json.dumps([]))
        else:

            sfrom = 'FROM {}.{}_dashboard_data'.format(DATABASE, self.LOCAL)
            select = ('CAST(SUM(presents) AS UNSIGNED) AS present,'
                      'CAST(SUM(clicked) AS UNSIGNED) AS click ')
            where = 'WHERE date<=%s and date>=%s' % (end_date, start_date)

            for index, item in enumerate(gb):
                if item == 'clickindex':
                    gb[index] = 'click_index'

            gb = ','.join(gb)
            groupby = '%s ' % gb
            if len(with_relative) > 0 and with_relative[0] == '1':
                where += ' and tag like "RelativeNews:%"'
            elif len(with_relative) > 0 and with_relative[0] == '-1':
                where += ' and tag not like "RelativeNews:%"'

            for vari in [
                    'click_index', 'tag', 'requestcategoryid', 'newstype',
                    'mediaid', 'categoryid'
            ]:
                if (eval(vari) is not None and len(eval(vari)) != 0
                        and eval(vari)[0] != ''):

                    where += ' and  %s in (%s)' % (vari, ','.join(
                        ['"%s"' % i for i in eval(vari)[0].split(',')]))

            if gb != '':
                select += ', %s' % gb

            if groupby.strip() != '':
                groupby = 'GROUP BY ' + groupby
            sql = 'SELECT %s  %s %s  %s' % (select, sfrom, where, groupby)

            data = DB(**DBCONFIG).query(sql)
            res = []

            if len(data) > 0 and data[0].get('categoryid') is not None:
                sql = '''
                    SELECT
                        CategoryId, Name
                    FROM
                        Categories
                '''.format(categoryid)
                category_id_name = mssql.query(sql, self.LOCAL)
                category_id_name_dict = ({
                    item['CategoryId']: item['Name']
                    for item in category_id_name
                })
                for item in data:
                    item.update({
                        'CategoryName':
                        category_id_name_dict.get(int(item.get('categoryid')))
                    })

            for dts in data:
                if "mediaid" in gb:
                    mid = dts['mediaid']
                    dts['mediaid'] = '%s-%s' % (
                        mid, medias.get(str(mid), 'not know'))
                if dts['present'] == 0:
                    dts['ctr'] = 0
                else:
                    dts['ctr'] = '{0:%}'.format(dts['click'] / dts['present'])

                for key in [
                        'date', 'left(date, 8)', 'right(date, 2)',
                        'click_index', 'tag', 'requestcategoryid', 'newstype',
                        'mediaid', 'categoryid'
                ]:
                    dts.setdefault(key, '0')

                res.append(dts)
            return res