Ejemplo n.º 1
0
 def __init__(self, db_name):
     self.db_name = db_name
     self.client = MongoClient('127.0.0.1', 27017)
     self.db = self.client[self.db_name]
     self.solr_url = 'http://127.0.0.1:8999/solr'
     self.solr = SOLR(self.solr_url)
     self.core_name = SOLR_CORE_NAME
Ejemplo n.º 2
0
 def __init__(self, db_name, ip='127.0.0.1', port=27017):
     self.db_name = db_name
     self.db = MongoClient(ip, port)[db_name]
     self.db_test = MongoClient(ip, port)[db_name + '_test']
     self.solr_url = 'http://' + ip + ':8999/solr'
     self.solr_core = SOLR_CORE_NAME
     self.solr = SOLR(self.solr_url)
Ejemplo n.º 3
0
 def __init__(self,
              db_name,
              solr_name=SOLR_CORE_NAME,
              mongodb_ip='127.0.0.1',
              solr_ip='127.0.0.1',
              port=27017):
     self.db_name = db_name
     self.db = MongoClient(mongodb_ip, port)[db_name]
     self.solr_url = 'http://' + solr_ip + ':8999/solr'
     self.solr_core = solr_name
     self.solr = SOLR(self.solr_url)
Ejemplo n.º 4
0
 def __init__(self,
              ip,
              port,
              db_name,
              collection_name,
              solr_name=SOLR_CORE_NAME):
     self.dirpath = 'data/' + db_name
     self.db_name = db_name
     self.collection_name = collection_name
     self.db = MongoClient(ip, port)[db_name]
     self.collection = self.db[collection_name]
     self.data = []
     self.solr_url = 'http://' + ip + ':8999/solr'
     self.solr_core = solr_name
     self.solr = SOLR(self.solr_url)
Ejemplo n.º 5
0
class Update_data():
    def __init__(self,
                 db_name,
                 solr_name=SOLR_CORE_NAME,
                 mongodb_ip='127.0.0.1',
                 solr_ip='127.0.0.1',
                 port=27017):
        self.db_name = db_name
        self.db = MongoClient(mongodb_ip, port)[db_name]
        self.solr_url = 'http://' + solr_ip + ':8999/solr'
        self.solr_core = solr_name
        self.solr = SOLR(self.solr_url)

    def write_data2solr(self, collection_name):
        query = 'scene_str:' + self.db_name + ' AND topic_str:' + collection_name
        self.solr.delete_solr_by_query(self.solr_core, query)
        for x in self.db[collection_name].find():
            data_one = x.copy()
            data_one['scene'] = self.db_name
            data_one['topic'] = collection_name
            data_one['_id'] = str(data_one['_id'])
            if collection_name in ['refuse2chat', 'sentiment']:
                self.solr.update_solr(data_one, self.solr_core)
                continue
            if 'super_intention' in data_one:
                if data_one['super_intention'] == '':
                    data_one['super_intention'] = 'null'
            data_one.pop('equal_questions')
            for q in x['equal_questions']:
                data_one['question'] = q
                data_one['question_ik'] = q
                data_one['question_cn'] = q
                self.solr.update_solr(data_one, self.solr_core)
Ejemplo n.º 6
0
class BaseClass():
    def __init__(self,
                 ip,
                 port,
                 db_name,
                 collection_name,
                 solr_name=SOLR_CORE_NAME):
        self.dirpath = 'data/' + db_name
        self.db_name = db_name
        self.collection_name = collection_name
        self.db = MongoClient(ip, port)[db_name]
        self.collection = self.db[collection_name]
        self.data = []
        self.solr_url = 'http://' + ip + ':8999/solr'
        self.solr_core = solr_name
        self.solr = SOLR(self.solr_url)

    def write_data2mongodb(self):
        self.collection.drop()
        self.collection.insert(self.data)
        if self.collection_name in ['refuse2chat', 'sentiment']:
            self.collection.create_index('question')
        else:
            self.collection.create_index('group')
            self.collection.create_index('label')

    def write_data2solr(self):
        query = 'scene_str:'+self.db_name + ' AND topic_str:' +\
                        self.collection_name
        self.solr.delete_solr_by_query(self.solr_core, query)
        for x in self.collection.find():
            data_one = x.copy()
            data_one['scene'] = self.db_name
            data_one['topic'] = self.collection_name
            data_one['_id'] = str(data_one['_id'])
            if self.collection_name in ['refuse2chat', 'sentiment']:
                self.solr.update_solr(data_one, self.solr_core)
                continue
            if 'super_intention' in data_one:
                if data_one['super_intention'] == '':
                    data_one['super_intention'] = 'null'
            data_one.pop('equal_questions')
            for q in x['equal_questions']:
                data_one['question'] = q
                data_one['question_ik'] = q
                data_one['question_cn'] = q
                self.solr.update_solr(data_one, self.solr_core)
Ejemplo n.º 7
0
class Mongodb():
    def __init__(self, db_name, ip='127.0.0.1', port=27017):
        self.db_name = db_name
        self.db = MongoClient(ip, port)[db_name]
        self.db_test = MongoClient(ip, port)[db_name + '_test']
        self.solr_url = 'http://' + ip + ':8999/solr'
        self.solr_core = SOLR_CORE_NAME
        self.solr = SOLR(self.solr_url)

    def write(self, collection, data):
        try:
            self.db[collection].drop()
            self.db[collection].insert(data)
            self.db_test[collection].drop()
            self.db_test[collection].insert(data)
            return 1
        except:
            traceback.print_exc()
            return 0

    def write_data2solr(self, collection):
        query = 'scene_str:'+self.db_name+' AND topic_str:' +\
                collection
        self.solr.delete_solr_by_query(self.solr_core, query)
        for x in self.db[collection].find():
            data_one = x.copy()
            data_one['scene'] = self.db_name
            data_one['topic'] = collection
            data_one['_id'] = str(data_one['_id'])
            if collection in ['instruction']:
                self.solr.update_solr(data_one, self.solr_core)
                continue
            if 'super_intention' in data_one:
                if data_one['super_intention'] == '':
                    data_one['super_intention'] = 'null'
            data_one.pop('questions')
            for q in x['questions']:
                data_one['question'] = q
                data_one['question_ik'] = q
                data_one['question_cn'] = q
                self.solr.update_solr(data_one, self.solr_core)
Ejemplo n.º 8
0
class Update():
    def __init__(self, ip, db_name):
        self.db_name = db_name
        self.db = MongoClient('127.0.0.1', 27017)[db_name]
        self.core_name = SOLR_CORE_NAME
        self.solr_url = 'http://127.0.0.1:8999/solr'
        self.solr = SOLR(self.solr_url)

    def load_log(self, server_name):
        #_id, collection, cmd, ids, comment, status, time
        if server_name == 'develop':
            query = {'status': '0'}
        elif server_name == 'master':
            query = {'status': '1'}
        else:
            query = {'status': '3'}
        logs = [x for x in self.db.log.find(query).sort('time')]
        return logs

    def check_solr_core(self):
        if not self.solr.solr_core_exists(self.core_name):
            self.solr.create_solr_core(self.core_name)

    def update_data(self, collection, cmd, _id):
        def insert_automata(data, collection):
            if collection in ['automata']:
                questions = data['questions'].copy()
                data.pop('questions')
                for q in questions:
                    data['question'] = q
                    self.solr.update_solr(data, self.core_name)
            elif collection in ['instruction']:
                self.solr.update_solr(data, self.core_name)
            else:
                return None

        def insert(collection, _id):
            data = self.db[collection].find_one({'_id': ObjectId(_id)})
            if not data:
                return
            data_one = data.copy()
            data_one['_id'] = str(data_one['_id'])
            data_one['scene'] = self.db_name
            data_one['topic'] = collection
            if self.db_name == 'automata':
                return insert_automata(data_one)
            if collection in ['refuse2chat', 'sentiment']:
                self.solr.update_solr(data_one, self.core_name)
                return None
            if 'super_intention' in data_one:
                if data_one['super_intention'] == '':
                    data_one['super_intention'] = 'null'
            data_one.pop('equal_questions')
            for q in data['equal_questions']:
                data_one['question'] = q
                data_one['question_ik'] = q
                data_one['question_cn'] = q
                self.solr.update_solr(data_one, self.core_name)

        if cmd == 'create':
            insert(collection, _id)
        elif cmd == 'update':
            self.solr.delete_solr_by_query(self.core_name, '_id_str:' + _id)
            insert(collection, _id)
        elif cmd == 'delete':
            self.solr.delete_solr_by_query(self.core_name, '_id_str:' + _id)
        else:
            return 0

    def update(self, server_name):
        try:
            logs = self.load_log(server_name)
            if not logs:
                print('no update!')
                return 1
            for log in logs:
                if log['cmd'] == 'create':
                    self.check_solr_core()
                for _id in log['ids']:
                    self.update_data(log['collection'], log['cmd'], _id)
                if server_name == 'develop':
                    value = {'status': '1'}
                elif server_name == 'master':
                    value = {'status': '2'}
                else:
                    return 0
                self.db.log.update_one({'_id': log['_id']}, {'$set': value})
            return 1
        except Exception:
            traceback.print_exc()
            return 0
Ejemplo n.º 9
0
class Update():
    def __init__(self, ip, db_name):
        self.db_name = db_name
        self.db = MongoClient('127.0.0.1', 27017)[db_name]
        self.core_name = SOLR_CORE_NAME
        self.solr_url = 'http://127.0.0.1:8999/solr'
        self.solr = SOLR(self.solr_url)

    def check_solr_core(self):
        if not self.solr.solr_core_exists(self.core_name):
            self.solr.create_solr_core(self.core_name)

    def update_data(self, collection):
        def insert(data):
            if not data:
                return
            data_one = data.copy()
            data_one['_id'] = str(data_one['_id'])
            data_one['scene'] = self.db_name
            data_one['topic'] = collection
            if 'super_intention' in data_one:
                if data_one['super_intention'] == '':
                    data_one['super_intention'] = 'null'
            if 'equal_questions' in data_one:
                data_one.pop('equal_questions')
                for q in data['equal_questions']:
                    data_one['question'] = q
                    data_one['question_ik'] = q
                    data_one['question_cn'] = q
                    self.solr.update_solr(data_one, self.core_name)
            elif 'questions' in data_one:
                data_one.pop('questions')
                for q in data['questions']:
                    data_one['question'] = q
                    data_one['question_ik'] = q
                    data_one['question_cn'] = q
                    self.solr.update_solr(data_one, self.core_name)
            else:
                self.solr.update_solr(data_one, self.core_name)

        self.solr.delete_solr_by_query(
            self.core_name,
            'scene_str:' + self.db_name + ' AND topic_str:' + collection)
        data = [x for x in self.db[collection].find()]
        for d in data:
            insert(d)

    def update(self):
        try:
            collections = self.db.collection_names()
            if 'log' in collections:
                collections.remove('log')
            for collection in collections:
                print('start ' + collection)
                self.update_data(collection)
            return 1
        except Exception:
            traceback.print_exc()
            return 0
Ejemplo n.º 10
0
 def __init__(self, ip='127.0.0.1', solr_core=SOLR_CORE_NAME):
     self.solr_url = 'http://' + ip + ':8999/solr'
     self.solr_core = solr_core
     self.solr = SOLR(self.solr_url)
Ejemplo n.º 11
0
class SearchData():
    def __init__(self, ip='127.0.0.1', solr_core=SOLR_CORE_NAME):
        self.solr_url = 'http://' + ip + ':8999/solr'
        self.solr_core = solr_core
        self.solr = SOLR(self.solr_url)

    def search_answer(self, select='*:*', scene_topic=[]):
        try:
            fields = ['answers', 'emotion_url', 'media', 'timeout']
            select_parts = []
            for scene in scene_topic:
                s = 'scene_str:' + scene
                if scene_topic[scene] != []:
                    s = '('+ s + ' AND (topic_str:'+ \
                            ' OR topic_str:'.join(scene_topic[scene]) +'))'
                select_parts.append(s)
            select = '(' + ' OR '.join(select_parts) + ') AND ' + select
            data = [
                x for x in self.solr.query_solr(self.solr_core, select, fields,
                                                1).docs
            ]
            data = data[0]
            return {
                'answer': random.sample(data['answers'], 1)[0],
                'emotion': data['emotion_url'][0],
                'media': data['media'][0],
                'timeout': data['timeout'][0]
            }
        except:
            traceback.print_exc()
            return {
                'answer': None,
                'emotion': None,
                'media': None,
                'timeout': None
            }

    def search_questions(self,
                         select='*:*',
                         scene_topic=[],
                         fields=['question'],
                         max_num=10):
        try:

            def pro_data(data):
                for key in data.keys():
                    data[key] = data[key][0]
                return data

            select_parts = []
            for scene in scene_topic:
                s = 'scene_str:' + scene
                if scene_topic[scene] != []:
                    s = '('+ s + ' AND (topic_str:'+ \
                            ' OR topic_str:'.join(scene_topic[scene]) +'))'
                select_parts.append(s)
            select = '(' + ' OR '.join(select_parts) + ') AND ' + select
            data = [
                pro_data(x) for x in self.solr.query_solr(
                    self.solr_core, select, fields, max_num).docs
            ]
            return data
        except:
            traceback.print_exc()
            return None

    def sale_id2description(self, _id, scene):
        try:
            select = 'scene_str:' + scene + ' AND _id_str:' + _id
            fields = ['description']
            data = [
                x for x in self.solr.query_solr(self.solr_core, select, fields,
                                                1).docs
            ]
            data = data[0]
            return data['description']
        except:
            traceback.print_exc()
            return None

    def sale_type2answers(self, scene, t=''):
        try:

            def pro_data(data):
                for key in data.keys():
                    data[key] = data[key][0]
                return data

            if t:
                select = 'scene_str:' + scene + ' AND type_str:' + t
            else:
                select = 'scene_str' + scene + ' AND type_str:*'
            fields = [
                '_id', 'answers', 'type', 'emotion_url', 'media', 'timeout'
            ]
            max_num = 20
            data = [
                pro_data(x) for x in self.solr.query_solr(
                    self.solr_core, select, fields, max_num).docs
            ]
            return data
        except:
            traceback.print_exc()
            return None
Ejemplo n.º 12
0
class Data_backup():
    def __init__(self, db_name):
        self.db_name = db_name
        self.client = MongoClient('127.0.0.1', 27017)
        self.db = self.client[self.db_name]
        self.solr_url = 'http://127.0.0.1:8999/solr'
        self.solr = SOLR(self.solr_url)
        self.core_name = SOLR_CORE_NAME

    def data_dump(self, datapath, log_id):
        if not os.path.exists(datapath):
            os.mkdir(datapath)
        dirpath = os.path.join(datapath, log_id)
        if os.path.exists(dirpath):
            shutil.rmtree(dirpath)
        os.mkdir(dirpath)
        cmd_dump = 'mongodump -d ' + self.db_name + ' -o ' + dirpath
        try:
            os.system(cmd_dump)
            return 1
        except:
            traceback.print_exc()
            return 0

    def mongodb_restore(self, dirpath):
        self.client.drop_database(self.db_name)
        self.client.drop_database(self.db_name + '_test')
        dbpath = os.path.join(dirpath, self.db_name)
        cmd_restore1 = 'mongorestore -d ' + self.db_name + ' ' + dbpath
        cmd_restore2 = 'mongorestore -d ' + self.db_name + '_test ' + dbpath
        if os.system(cmd_restore1):
            return 0
        if os.system(cmd_restore2):
            return 0
        return 1

    def solr_restore(self):
        collections = self.db.collection_names()
        if 'log' in collections:
            collections.remove('log')
        try:
            for collection in collections:
                query = '(scene_str:' + self.db_name + \
                        ' AND topic_str:' + collection + ')'
                self.solr.delete_solr_by_query(self.core_name, query)
                for data in self.db[collection].find():
                    data_one = data.copy()
                    data_one['scene'] = self.db_name
                    data_one['topic'] = collection
                    data_one['_id'] = str(data_one['_id'])
                    if collection in ['refuse2chat', 'sentiment']:
                        self.solr.update_solr(data_one, self.core_name)
                        break
                    data_one.pop('equal_questions')
                    for q in data['equal_questions']:
                        data_one['question'] = q
                        self.solr.update_solr(data_one, self.core_name)
            return 1
        except:
            traceback.print_exc()
            return 0

    def data_restore(self, dirpath, _id):
        dirpath = os.path.join(dirpath, _id)
        return self.mongodb_restore(dirpath) and self.solr_restore()
Ejemplo n.º 13
0
class SearchSolr():
    def __init__(self, ip='127.0.0.1', solr_core=SOLR_CORE_NAME):
        self.solr_url = 'http://' + ip + ':8999/solr'
        self.solr_core = solr_core
        self.solr = SOLR(self.solr_url)

    def load_data(self, select='*:*', fields=[], max_num=10, flag=False):
        try:

            def pro_x(x):
                y = {}
                y['store_id'] = x['store_id'][0]
                y['category'] = x['category'][0]
                y['instruction'] = x['instruction'][0]
                if 'entities' in x:
                    y['entities'] = x['entities']
                else:
                    y['entities'] = ['']
                y['answers'] = x['answer']
                y['emotion_name'] = 'null'
                y['emotion_url'] = 'null'
                if 'media' in x:
                    y['media'] = x['media'][0]
                    y['timeout'] = '15'
                else:
                    y['media'] = 'null'
                    y['timeout'] = '0'
                return y

            Data = {}

            def pro_y(x):
                y = {}
                y['store_id'] = x['store_id'][0]
                y['category'] = x['category'][0]
                y['intent'] = x['intent']
                y['questions'] = x['question']
                if 'entities' in x:
                    y['entities'] = x['entities']
                else:
                    y['entities'] = ''
                if y['intent'] + '|' + y['entities'] in Data:
                    Data[y['intent'] + '|' +
                         y['entities']]['questions'].append(x['question'][0])
                else:
                    Data[y['intent'] + '|' + y['entities']] = y
                return y

            if flag == True:
                data = [
                    pro_x(x) for x in self.solr.query_solr(
                        self.solr_core, select, fields, max_num).docs
                ]
            else:
                data = [
                    pro_y(x) for x in self.solr.query_solr(
                        self.solr_core, select, fields, max_num).docs
                ]
                data = []
                for key in Data.keys():
                    data.append(Data[key])
            return data
        except:
            traceback.print_exc()
            return None