Python ClusterMessageObj Examples

Programming Language: Python

Namespace/Package Name: entity

Examples at hotexamples.com: 6

Python ClusterMessageObj - 6 examples found. These are the top rated real world Python examples of entity.ClusterMessageObj extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ClusterMessageObj(6)

__dict__(1)

Frequently Used Methods

ClusterMessageObj (6)

__dict__ (1)

Example #1

Show file

    def get_summary_corpus(self, start_time, end_time):
        """
       获得划线模型语料
       :param start_time:
       :param end_time:
       :return:
       """
        sql = """
            SELECT bd.id, bd.publishtime, bd.site_name, nar.abstract  
            FROM news_abstract_result nar, base_data bd 
            WHERE nar.bd_id = bd.id 
            AND bd.publishtime >= '%s' AND bd.publishtime < '%s' 
            AND nar.language_type = 1 
            AND nar.is_lined = 1""" % (start_time, end_time)

        records = self.fetch_all(sql)

        data = []
        for row in records:
            id = row['id']

            abstract = row['abstract']
            abstract = abstract.encode('utf-8')

            publish_time = row['publishtime'].strftime(
                "%Y-%m-%d %H:%M:%S").decode('utf-8')
            site_name = row['site_name']
            site_name = site_name.encode('utf-8')

            cluster_obj = ClusterMessageObj(id, abstract, publish_time, '',
                                            site_name)
            data.append(cluster_obj)
        return data

Example #2

Show file

    def parse_corpus_records(records, language_type=None):
        data = []
        for row in records:
            id = row['id']

            title = row['title']
            title = title.encode('utf-8')

            content = row['content']
            content = content.encode('utf-8')

            publish_time = row['publishtime'].strftime(
                "%Y-%m-%d %H:%M:%S").decode('utf-8')
            site_name = row['site_name']
            site_name = site_name.encode('utf-8')

            cluster_obj = ClusterMessageObj(id, title, publish_time, content,
                                            site_name)

            # 进行语言过滤
            if language_type is not None:
                if BaseDataView.is_valid_language(language_type, content):
                    data.append(cluster_obj)
            else:
                data.append(cluster_obj)
        return data

Example #3

Show file

    def parse_corpus_records(records):
        data = []
        for row in records:
            id = row['id']

            title = row['title']
            title = title.encode('utf-8')

            content = row['content']
            content = content.encode('utf-8')

            publish_time = row['publishtime'].strftime(
                "%Y-%m-%d %H:%M:%S").decode('utf-8')
            site_name = row['site_name']
            site_name = site_name.encode('utf-8')

            cluster_obj = ClusterMessageObj(id, title, publish_time, content,
                                            site_name)
            data.append(cluster_obj)
        return data

Example #4

Show file

def query_string(querystring):
    # 请求uri前缀
    cMList = []
    prefix_req_uri = "http://saas1:5000/enterprise_saas_platform/saas_platform/" + querystring + "/"
    suffix_uri_total = "display=id/1/1"
    start_time = datetime.now()
    logger.info('starting query_string, {prefix_req_uri: %s}' %
                (prefix_req_uri, start_time.strftime('%Y-%m-%d %H:%M:%S')))

    # 获得总数
    totalUri = prefix_req_uri + suffix_uri_total
    totalResp = requests.get(totalUri)
    total = json.loads(totalResp.text)["total"]

    # 每次请求条数
    pageNum = 100

    # 遍历, 获得数据
    display_uri_data = "display=id&title&pubtime/"
    for idx in range(0, total, pageNum):
        page_uri_data = str(idx) + "/" + str(pageNum)
        dataUri = prefix_req_uri + display_uri_data + page_uri_data
        dataResp = requests.get(dataUri)

        # 遍历数据, 保存到list列表中
        for mObj in json.loads(dataResp.text)["doc"]:
            id = mObj["id"]
            title = mObj["title"]
            publishtime = mObj["pubtime"]
            cMList.append(
                ClusterMessageObj(messageId=id,
                                  messageTitle=title,
                                  messagePublishtime=publishtime))
    # 返回数据
    logger.info(
        'end query_string: {prefix_req_uri: %s, total: %d, lost_seconds: %ds}'
        % (prefix_req_uri, total, (datetime.now() - start_time).seconds))
    return cMList

Example #5

Show file

def dic_clusterobj(dic):
    msg = ClusterMessageObj()
    msg.__dict__ = dic
    return msg

Example #6

Show file

def get_involved_china_corpus(start_time, end_time, language_type, group_id):
    """
    获得聚类语料
    :param start_time:
    :param end_time:
    :param group_id:
    :return:
    """
    data = []
    start_timestamp = time.time()

    # SQL 查询语句, 内容只取第一段内容
    sql = """
    SELECT id, title, content, publishtime, site_name 
    FROM base_data_view 
    WHERE publishtime >= '%s' AND publishtime < '%s' 
    AND language_type = %s 
    AND group_id IN %s 
    AND involved_china = 1 
    group by titlehash """ % (start_time, end_time, language_type, group_id)

    logger.debug("starting get_involved_china_corpus, {sql: %s}." % sql)

    # 打开数据库连接  内网: 10.30.248.210    外网: 47.93.162.134
    # 47.93.162.134
    db = MySQLdb.Connection(host='10.30.248.210',
                            user='******',
                            passwd='Wi$eWeb123',
                            db='wjbdb',
                            charset='utf8',
                            port=5720)

    # 使用cursor()方法获取操作游标
    cursor = db.cursor()

    try:
        # 执行SQL语句
        cursor.execute(sql)

        # 获取所有记录列表
        for row in cursor:
            id = row[0]

            title = row[1]
            try:
                title = title.encode('utf-8')
            except Exception as e:
                logger.debug("Error: title.encode('utf-8'), {exception: %s}" %
                             e)

            content = row[2]
            try:
                content = content.encode('utf-8')
            except Exception as e:
                logger.debug(
                    "Error: content.encode('utf-8'), {exception: %s}" % e)

            publish_time = row[3].strftime("%Y-%m-%d %H:%M:%S").decode('utf-8')

            site_name = row[4]
            try:
                site_name = site_name.encode('utf-8')
            except Exception as e:
                logger.debug(
                    "Error: site_name.encode('utf-8'), {exception: %s}" % e)

            cluster_obj = ClusterMessageObj(id, title, publish_time, content,
                                            site_name)

            # 加入结果
            data.append(cluster_obj)

        logger.debug(
            "ending get_involved_china_corpus, {data length: %s, cost_times: %ds}"
            % (len(data), time.time() - start_timestamp))
    except Exception as e:
        logger.error("Error: get_involved_china_corpus, {exception: %s}" % e)
    finally:
        # 关闭数据库连接
        cursor.close()
        db.close()
        return data