def load_extractors(self, topic_id=-1):
        '''加载解析器'''
        mysql_db = get_mysql_conn(self.conf)
        if topic_id != -1:
            sql = '''select f_topic_id, f_module_path, f_module_name, f_extractor_name from extractors where f_topic_id=%s'''
        else:
            sql = '''select f_topic_id, f_module_path, f_module_name, f_extractor_name from extractors'''
        try:
            cursor = mysql_db.cursor()
            if topic_id != -1:
                param = [str(topic_id)]
                cursor.execute(sql, param)
            else:
                cursor.execute(sql)
            result = cursor.fetchall()
            mysql_db.close()
            for item in result:
                extractor_info = dbrecord_to_dict(cursor.description, item)
                topic_id = int(extractor_info.get("f_topic_id", -1))
                self.modules_config[topic_id] = extractor_info
        except:
            log.error("query data from mysql error, sql:%s, ret:%s" % (sql, traceback.format_exc()))
            mysql_db.close()
            return None

        log.info("load_modules_success, moudle:%s" % json.dumps(self.modules_config))

        return self.modules_config.keys()
    def add_topic(self, topic_info):
        ''''''
        resp = toolsutil.result()
        try:
            id = topic_info['id']
            table_name = topic_info['table_name']
            schema = topic_info['schema']
            primary_keys = topic_info['primary_keys']
        except:
            log.error("topic_info error,\ttopic_info:%s\tret:%s" % (topic_info, traceback.format_exc()))
            resp['MSG'] = "topic_info error,\ttopic_info:%s\tret:%s" % (topic_info, traceback.format_exc())
            resp['CODE'] = -10000
            return resp

        sql = '''INSERT INTO topic (id, name, table_name, `schema`, primary_keys) VALUES(%s,%s,%s,%s,%s)'''
        param = (id, topic_info.get('name', ''), table_name, schema, primary_keys)

        mysql_db = get_mysql_conn(self.conf)
        try:
            cursor = mysql_db.cursor()
            cursor.execute(sql, param)
        except:
            resp['MSG'] = "insert data into mysql error,\ttopic_info:%s\tsql:%s\tret:%s" % (
                topic_info, sql, traceback.format_exc())
            resp['CODE'] = -10000
            mysql_db.rollback()
            mysql_db.close()
            return resp

        mysql_db.commit()
        mysql_db.close()
        resp['MSG'] = "insert data into mysql success, topic_info:%s, sql:%s" % (topic_info, sql)
        return resp
Esempio n. 3
0
 def __init__(self, conf, mongodb_conf, redis_tasks, log):
     self.log = log
     # 初始化mysql
     self.mysql_conn = get_mysql_conn(conf)
     # 初始化mongodb
     self.mongodb_conf = mongodb_conf
     try:
         self.mongo_conn = pymongo.MongoClient(host=mongodb_conf['host'],
                                               port=mongodb_conf['port'])
     except Exception, e:
         self.log.error("connect mongodb fail:%s" % traceback.format_exc())
         exit(1)
    def insert_extractor(self, extractor_info):
        '''往数据库中写入解析器'''

        resp = toolsutil.result()
        try:
            topic_id = extractor_info['topic_id']
            target_dir_name = extractor_info['target_dir_name']
            extractor_name = extractor_info.get('extractor_name', '')
        except:
            log.error("extractor_info error,\textractor_info:%s\tret:%s" % (extractor_info, traceback.format_exc()))
            resp['MSG'] = "extractor_info error,\textractor_info:%s\tret:%s" % (extractor_info, traceback.format_exc())
            resp['CODE'] = -10000
            return resp

        moudule_path = "extractors/" + target_dir_name + "/" + target_dir_name + "_extractor.py"

        if target_dir_name:
            moudule_name = target_dir_name[0].upper() + target_dir_name[1:] + "Extractor"
        else:
            moudule_name = "TemplateExtractor"

        # 1 创建解析器默认文件和默认代码
        target_dir = self.basic_path + "i_entity_extractor/extractors/" + target_dir_name
        ret = self.copy_files(self.src_dir, target_dir)
        if not ret:
            resp['MSG'] = "copy_files_error,\textractor_info:%s\tret:%s" % (extractor_info, traceback.format_exc())
            resp['CODE'] = -10000
            return resp

        target_file = target_dir + "/" + target_dir_name + "_extractor.py"
        source_file = target_dir + "/template_extractor.py"
        open(target_file, "wb").write(open(source_file, "rb").read().replace("TemplateExtractor", moudule_name))
        os.remove(source_file)

        # 2 在数据库中添加解析器配置
        sql = '''INSERT INTO extractors (f_topic_id, f_module_path, f_module_name, f_extractor_name) VALUES(%s,%s,%s,%s)'''
        mysql_db = get_mysql_conn(self.conf)
        try:
            cursor = mysql_db.cursor()
            cursor.execute(sql, (topic_id, moudule_path, moudule_name, extractor_name))
        except:
            resp['MSG'] = "insert data into mysql error,\textractor_info:%s\tsql:%s\tret:%s" % (
                extractor_info, sql, traceback.format_exc())
            resp['CODE'] = -10000
            mysql_db.rollback()
            mysql_db.close()
            return resp

        mysql_db.commit()
        mysql_db.close()
        resp['MSG'] = "insert data into mysql success, extractor_info:%s, sql:%s" % (extractor_info, sql)
        return resp
    def load_topics(self, topic_id=-1):
        '''加载topic'''
        mysql_db = get_mysql_conn(self.conf)
        if topic_id != -1:
            sql = '''select id, name, table_name, `schema`, primary_keys from topic where id=%s'''
        else:
            sql = '''select id, name, table_name, `schema`, primary_keys from topic'''
        try:
            cursor = mysql_db.cursor()
            if topic_id != -1:
                param = [str(topic_id)]
                cursor.execute(sql, param)
            else:
                cursor.execute(sql)
            result = cursor.fetchall()
            mysql_db.close()
        except:
            log.error("query data from mysql error, sql:%s, ret:%s" % (sql, traceback.format_exc()))
            mysql_db.close()
            return None

        for item in result:
            topic_info = dbrecord_to_dict(cursor.description, item)
            topic = {}
            try:
                topic['schema'] = json.loads(topic_info['schema'])
            except:
                topic['schema'] = None
                log.warning('schema parse failed[%s], topic[%s] is not writable!' % (
                    traceback.format_exc(), topic_info['name'].encode('utf8')))

            topic_id = topic_info['id']
            topic_id = int(topic_id)
            topic['topic_id'] = topic_id
            topic['name'] = topic_info['name'].encode('utf8')
            topic['table_name'] = topic_info.get('table_name')
            topic['primary_keys'] = [[]]
            if topic_info.get('primary_keys'):
                topic['primary_keys'] = json.loads(topic_info.get('primary_keys'))
            self.all_topics[topic_id] = topic

        log.info("load_topic_success, topic_info_keys:%s" % json.dumps(self.all_topics.keys()))

        return self.all_topics.keys()