Esempio n. 1
0
 def test_server_registration(self):
   with patch('importlib.import_module', ) as mock_import:
     with patch('app.main.lib.shared_models.shared_model.SharedModel.bulk_run') as mock_bulk_run:
       ModuleStub = namedtuple('ModuleStub', 'SharedModelStub')
       mock_import.return_value = ModuleStub(SharedModelStub=SharedModelStub)
       SharedModel.start_server('SharedModelStub', SharedModelStub.model_key)
       self.assertIsInstance(SharedModel.get_client(SharedModelStub.model_key), SharedModelStub)
       self.assertTrue(SharedModelStub.model_key in SharedModel.get_servers())
Esempio n. 2
0
 def post(self):
     model_key = 'elasticsearch'
     if 'model' in request.json:
         model_key = request.json['model']
     es = Elasticsearch(app.config['ELASTICSEARCH_URL'])
     body = {'content': request.json['text']}
     if model_key.lower() != 'elasticsearch':
         model = SharedModel.get_client(model_key)
         vector = model.get_shared_model_response(request.json['text'])
         body['vector_' + str(len(vector))] = vector
         body['model'] = model_key
     if 'context' in request.json:
         body['context'] = request.json['context']
     result = es.index(body=body,
                       index=app.config['ELASTICSEARCH_SIMILARITY'])
     es.indices.refresh(index=app.config['ELASTICSEARCH_SIMILARITY'])
     success = False
     if result['result'] == 'created':
         success = True
     return {'success': success}
Esempio n. 3
0
    def get(self):
        model_key = 'elasticsearch'
        if 'model' in request.json:
            model_key = request.json['model']
        es = Elasticsearch(app.config['ELASTICSEARCH_URL'], timeout=30)
        conditions = []
        threshold = 0.9
        if 'threshold' in request.json:
            threshold = request.json['threshold']

        if model_key.lower() == 'elasticsearch':
            conditions = [
                {
                    'match': {
                      'content': {
                          'query': request.json['text'],
                          'minimum_should_match': str(int(round(threshold * 100))) + '%'
                      }
                    }
                },
            ]

            # FIXME: `analyzer` and `minimum_should_match` don't play well together.
            if 'language' in request.json:
                conditions[0]['match']['content']['analyzer'] = language_to_analyzer(request.json['language'])
                del conditions[0]['match']['content']['minimum_should_match']

        else:
            model = SharedModel.get_client(model_key)
            vector = model.get_shared_model_response(request.json['text'])
            conditions = [
                {
                    'function_score': {
                        'min_score': threshold,
                        'query': {
                            'match_all': {}
                        },
                        'functions': [
                            {
                                'script_score': {
                                    'script': {
                                        'source': 'similarity',
                                        'lang': 'meedan_scripts',
                                        'params': {
                                            'vector': vector
                                        }
                                    }
                                }
                            }
                        ]
                    }
                }
            ]

            # Add model to be matched.
            conditions.append(
                {
                    'match': {
                        'model': {
                          'query': model_key,
                        }
                    }
                },
            )

        if 'context' in request.json:
            matches = []
            for key in request.json['context']:
                matches.append({
                    'match': { 'context.' + key: request.json['context'][key] }
                })
            context = {
                'nested': {
                    'score_mode': 'none',
                    'path': 'context',
                    'query': {
                        'bool': {
                            'must': matches
                        }
                    }
                }
            }
            conditions.append(context)

        body = {
            'query': {
                'bool': {
                    'must': conditions
                }
            }
        }
        result = es.search(
            body=body,
            doc_type='_doc',
            index=app.config['ELASTICSEARCH_SIMILARITY']
        )
        return {
            'result': result['hits']['hits']
        }
Esempio n. 4
0
 def post(self):
     model = SharedModel.get_client(request.json['model'])
     vec1 = np.asarray(json.loads(request.json['vector1']))
     vec2 = np.asarray(json.loads(request.json['vector2']))
     return {'similarity': model.similarity(vec1, vec2)}
Esempio n. 5
0
 def post(self):
     model = SharedModel.get_client(request.json['model'])
     vector = model.get_shared_model_response(request.json['text'])
     return {'vector': json.dumps(vector)}
Esempio n. 6
0
    def get(self):
        model_key = 'elasticsearch'
        if 'model' in request.json:
            model_key = request.json['model']
        es = Elasticsearch(app.config['ELASTICSEARCH_URL'], timeout=30)
        conditions = []
        threshold = 0.9
        if 'threshold' in request.json:
            threshold = request.json['threshold']

        if model_key.lower() == 'elasticsearch':
            conditions = [
                {
                    'match': {
                        'content': {
                            'query':
                            request.json['text'],
                            'minimum_should_match':
                            str(int(round(threshold * 100))) + '%'
                        }
                    }
                },
            ]

            # FIXME: `analyzer` and `minimum_should_match` don't play well together.
            if 'language' in request.json:
                conditions[0]['match']['content'][
                    'analyzer'] = language_to_analyzer(
                        request.json['language'])
                del conditions[0]['match']['content']['minimum_should_match']

        else:
            model = SharedModel.get_client(model_key)
            vector = model.get_shared_model_response(request.json['text'])
            conditions = {
                'query': {
                    'script_score': {
                        'min_score': threshold,
                        'query': {
                            'bool': {
                                'must': [{
                                    'match': {
                                        'model': {
                                            'query': model_key,
                                        }
                                    }
                                }]
                            }
                        },
                        'script': {
                            'source':
                            "cosineSimilarity(params.query_vector, 'vector_" +
                            str(len(vector)) + "') + 1.0",
                            'params': {
                                'query_vector': vector
                            }
                        }
                    }
                }
            }

        if 'context' in request.json:
            matches = []
            for key in request.json['context']:
                matches.append({
                    'match': {
                        'context.' + key: request.json['context'][key]
                    }
                })
            context = {
                'nested': {
                    'score_mode': 'none',
                    'path': 'context',
                    'query': {
                        'bool': {
                            'must': matches
                        }
                    }
                }
            }
            if isinstance(conditions, list):
                conditions.append(context)
            else:
                conditions['query']['script_score']['query']['bool'][
                    'must'].append(context)

        if isinstance(conditions, list):
            body = {'query': {'bool': {'must': conditions}}}
        else:
            body = conditions

        result = es.search(body=body,
                           index=app.config['ELASTICSEARCH_SIMILARITY'])
        return {'result': result['hits']['hits']}