コード例 #1
0
class Update_data():
    def __init__(self,
                 db_name,
                 solr_name=SOLR_CORE_NAME,
                 mongodb_ip='127.0.0.1',
                 solr_ip='127.0.0.1',
                 port=27017):
        self.db_name = db_name
        self.db = MongoClient(mongodb_ip, port)[db_name]
        self.solr_url = 'http://' + solr_ip + ':8999/solr'
        self.solr_core = solr_name
        self.solr = SOLR(self.solr_url)

    def write_data2solr(self, collection_name):
        query = 'scene_str:' + self.db_name + ' AND topic_str:' + collection_name
        self.solr.delete_solr_by_query(self.solr_core, query)
        for x in self.db[collection_name].find():
            data_one = x.copy()
            data_one['scene'] = self.db_name
            data_one['topic'] = collection_name
            data_one['_id'] = str(data_one['_id'])
            if collection_name in ['refuse2chat', 'sentiment']:
                self.solr.update_solr(data_one, self.solr_core)
                continue
            if 'super_intention' in data_one:
                if data_one['super_intention'] == '':
                    data_one['super_intention'] = 'null'
            data_one.pop('equal_questions')
            for q in x['equal_questions']:
                data_one['question'] = q
                data_one['question_ik'] = q
                data_one['question_cn'] = q
                self.solr.update_solr(data_one, self.solr_core)
コード例 #2
0
ファイル: utils.py プロジェクト: zhengxin2016/corpus
class BaseClass():
    def __init__(self,
                 ip,
                 port,
                 db_name,
                 collection_name,
                 solr_name=SOLR_CORE_NAME):
        self.dirpath = 'data/' + db_name
        self.db_name = db_name
        self.collection_name = collection_name
        self.db = MongoClient(ip, port)[db_name]
        self.collection = self.db[collection_name]
        self.data = []
        self.solr_url = 'http://' + ip + ':8999/solr'
        self.solr_core = solr_name
        self.solr = SOLR(self.solr_url)

    def write_data2mongodb(self):
        self.collection.drop()
        self.collection.insert(self.data)
        if self.collection_name in ['refuse2chat', 'sentiment']:
            self.collection.create_index('question')
        else:
            self.collection.create_index('group')
            self.collection.create_index('label')

    def write_data2solr(self):
        query = 'scene_str:'+self.db_name + ' AND topic_str:' +\
                        self.collection_name
        self.solr.delete_solr_by_query(self.solr_core, query)
        for x in self.collection.find():
            data_one = x.copy()
            data_one['scene'] = self.db_name
            data_one['topic'] = self.collection_name
            data_one['_id'] = str(data_one['_id'])
            if self.collection_name in ['refuse2chat', 'sentiment']:
                self.solr.update_solr(data_one, self.solr_core)
                continue
            if 'super_intention' in data_one:
                if data_one['super_intention'] == '':
                    data_one['super_intention'] = 'null'
            data_one.pop('equal_questions')
            for q in x['equal_questions']:
                data_one['question'] = q
                data_one['question_ik'] = q
                data_one['question_cn'] = q
                self.solr.update_solr(data_one, self.solr_core)
コード例 #3
0
ファイル: load_data.py プロジェクト: zhengxin2016/corpus
class Mongodb():
    def __init__(self, db_name, ip='127.0.0.1', port=27017):
        self.db_name = db_name
        self.db = MongoClient(ip, port)[db_name]
        self.db_test = MongoClient(ip, port)[db_name + '_test']
        self.solr_url = 'http://' + ip + ':8999/solr'
        self.solr_core = SOLR_CORE_NAME
        self.solr = SOLR(self.solr_url)

    def write(self, collection, data):
        try:
            self.db[collection].drop()
            self.db[collection].insert(data)
            self.db_test[collection].drop()
            self.db_test[collection].insert(data)
            return 1
        except:
            traceback.print_exc()
            return 0

    def write_data2solr(self, collection):
        query = 'scene_str:'+self.db_name+' AND topic_str:' +\
                collection
        self.solr.delete_solr_by_query(self.solr_core, query)
        for x in self.db[collection].find():
            data_one = x.copy()
            data_one['scene'] = self.db_name
            data_one['topic'] = collection
            data_one['_id'] = str(data_one['_id'])
            if collection in ['instruction']:
                self.solr.update_solr(data_one, self.solr_core)
                continue
            if 'super_intention' in data_one:
                if data_one['super_intention'] == '':
                    data_one['super_intention'] = 'null'
            data_one.pop('questions')
            for q in x['questions']:
                data_one['question'] = q
                data_one['question_ik'] = q
                data_one['question_cn'] = q
                self.solr.update_solr(data_one, self.solr_core)
コード例 #4
0
class Update():
    def __init__(self, ip, db_name):
        self.db_name = db_name
        self.db = MongoClient('127.0.0.1', 27017)[db_name]
        self.core_name = SOLR_CORE_NAME
        self.solr_url = 'http://127.0.0.1:8999/solr'
        self.solr = SOLR(self.solr_url)

    def check_solr_core(self):
        if not self.solr.solr_core_exists(self.core_name):
            self.solr.create_solr_core(self.core_name)

    def update_data(self, collection):
        def insert(data):
            if not data:
                return
            data_one = data.copy()
            data_one['_id'] = str(data_one['_id'])
            data_one['scene'] = self.db_name
            data_one['topic'] = collection
            if 'super_intention' in data_one:
                if data_one['super_intention'] == '':
                    data_one['super_intention'] = 'null'
            if 'equal_questions' in data_one:
                data_one.pop('equal_questions')
                for q in data['equal_questions']:
                    data_one['question'] = q
                    data_one['question_ik'] = q
                    data_one['question_cn'] = q
                    self.solr.update_solr(data_one, self.core_name)
            elif 'questions' in data_one:
                data_one.pop('questions')
                for q in data['questions']:
                    data_one['question'] = q
                    data_one['question_ik'] = q
                    data_one['question_cn'] = q
                    self.solr.update_solr(data_one, self.core_name)
            else:
                self.solr.update_solr(data_one, self.core_name)

        self.solr.delete_solr_by_query(
            self.core_name,
            'scene_str:' + self.db_name + ' AND topic_str:' + collection)
        data = [x for x in self.db[collection].find()]
        for d in data:
            insert(d)

    def update(self):
        try:
            collections = self.db.collection_names()
            if 'log' in collections:
                collections.remove('log')
            for collection in collections:
                print('start ' + collection)
                self.update_data(collection)
            return 1
        except Exception:
            traceback.print_exc()
            return 0
コード例 #5
0
class Update():
    def __init__(self, ip, db_name):
        self.db_name = db_name
        self.db = MongoClient('127.0.0.1', 27017)[db_name]
        self.core_name = SOLR_CORE_NAME
        self.solr_url = 'http://127.0.0.1:8999/solr'
        self.solr = SOLR(self.solr_url)

    def load_log(self, server_name):
        #_id, collection, cmd, ids, comment, status, time
        if server_name == 'develop':
            query = {'status': '0'}
        elif server_name == 'master':
            query = {'status': '1'}
        else:
            query = {'status': '3'}
        logs = [x for x in self.db.log.find(query).sort('time')]
        return logs

    def check_solr_core(self):
        if not self.solr.solr_core_exists(self.core_name):
            self.solr.create_solr_core(self.core_name)

    def update_data(self, collection, cmd, _id):
        def insert_automata(data, collection):
            if collection in ['automata']:
                questions = data['questions'].copy()
                data.pop('questions')
                for q in questions:
                    data['question'] = q
                    self.solr.update_solr(data, self.core_name)
            elif collection in ['instruction']:
                self.solr.update_solr(data, self.core_name)
            else:
                return None

        def insert(collection, _id):
            data = self.db[collection].find_one({'_id': ObjectId(_id)})
            if not data:
                return
            data_one = data.copy()
            data_one['_id'] = str(data_one['_id'])
            data_one['scene'] = self.db_name
            data_one['topic'] = collection
            if self.db_name == 'automata':
                return insert_automata(data_one)
            if collection in ['refuse2chat', 'sentiment']:
                self.solr.update_solr(data_one, self.core_name)
                return None
            if 'super_intention' in data_one:
                if data_one['super_intention'] == '':
                    data_one['super_intention'] = 'null'
            data_one.pop('equal_questions')
            for q in data['equal_questions']:
                data_one['question'] = q
                data_one['question_ik'] = q
                data_one['question_cn'] = q
                self.solr.update_solr(data_one, self.core_name)

        if cmd == 'create':
            insert(collection, _id)
        elif cmd == 'update':
            self.solr.delete_solr_by_query(self.core_name, '_id_str:' + _id)
            insert(collection, _id)
        elif cmd == 'delete':
            self.solr.delete_solr_by_query(self.core_name, '_id_str:' + _id)
        else:
            return 0

    def update(self, server_name):
        try:
            logs = self.load_log(server_name)
            if not logs:
                print('no update!')
                return 1
            for log in logs:
                if log['cmd'] == 'create':
                    self.check_solr_core()
                for _id in log['ids']:
                    self.update_data(log['collection'], log['cmd'], _id)
                if server_name == 'develop':
                    value = {'status': '1'}
                elif server_name == 'master':
                    value = {'status': '2'}
                else:
                    return 0
                self.db.log.update_one({'_id': log['_id']}, {'$set': value})
            return 1
        except Exception:
            traceback.print_exc()
            return 0
コード例 #6
0
class Data_backup():
    def __init__(self, db_name):
        self.db_name = db_name
        self.client = MongoClient('127.0.0.1', 27017)
        self.db = self.client[self.db_name]
        self.solr_url = 'http://127.0.0.1:8999/solr'
        self.solr = SOLR(self.solr_url)
        self.core_name = SOLR_CORE_NAME

    def data_dump(self, datapath, log_id):
        if not os.path.exists(datapath):
            os.mkdir(datapath)
        dirpath = os.path.join(datapath, log_id)
        if os.path.exists(dirpath):
            shutil.rmtree(dirpath)
        os.mkdir(dirpath)
        cmd_dump = 'mongodump -d ' + self.db_name + ' -o ' + dirpath
        try:
            os.system(cmd_dump)
            return 1
        except:
            traceback.print_exc()
            return 0

    def mongodb_restore(self, dirpath):
        self.client.drop_database(self.db_name)
        self.client.drop_database(self.db_name + '_test')
        dbpath = os.path.join(dirpath, self.db_name)
        cmd_restore1 = 'mongorestore -d ' + self.db_name + ' ' + dbpath
        cmd_restore2 = 'mongorestore -d ' + self.db_name + '_test ' + dbpath
        if os.system(cmd_restore1):
            return 0
        if os.system(cmd_restore2):
            return 0
        return 1

    def solr_restore(self):
        collections = self.db.collection_names()
        if 'log' in collections:
            collections.remove('log')
        try:
            for collection in collections:
                query = '(scene_str:' + self.db_name + \
                        ' AND topic_str:' + collection + ')'
                self.solr.delete_solr_by_query(self.core_name, query)
                for data in self.db[collection].find():
                    data_one = data.copy()
                    data_one['scene'] = self.db_name
                    data_one['topic'] = collection
                    data_one['_id'] = str(data_one['_id'])
                    if collection in ['refuse2chat', 'sentiment']:
                        self.solr.update_solr(data_one, self.core_name)
                        break
                    data_one.pop('equal_questions')
                    for q in data['equal_questions']:
                        data_one['question'] = q
                        self.solr.update_solr(data_one, self.core_name)
            return 1
        except:
            traceback.print_exc()
            return 0

    def data_restore(self, dirpath, _id):
        dirpath = os.path.join(dirpath, _id)
        return self.mongodb_restore(dirpath) and self.solr_restore()