def check_if_has_texta_mapping(self): request_url = '{0}/{1}'.format(self.es_url, self._index) response = ES_Manager.plain_get(request_url) mappings = response[self._index]['mappings'] if self.TEXTA not in mappings: error_msg = 'Mapping [{0}] was not found'.format(self.TEXTA) raise CheckCritical(error_msg) self.maybe_print('OK', 'Mapping [{0}] is present'.format(self.TEXTA))
def check_index_present(self): request_url = '{0}/_aliases'.format(self.es_url) response = ES_Manager.plain_get(request_url) indexes = response.keys() if self._index not in indexes: error_msg = 'Index {0} was not found'.format(self._index) raise CheckCritical(error_msg) self.maybe_print('OK', 'Index {0} is present'.format(self._index))
def get_texta_link_facts_by_id(self, doc_id): base_url = '{0}/{1}/{2}/{3}?fields=texta_link.facts' request_url = base_url.format(self.es_url, self._index, self._type, doc_id) response = ES_Manager.plain_get(request_url) doc = None try: if response['found']: doc = [] if 'fields' in response: doc = response['fields']['texta_link.facts'] except KeyError: return None return doc
def main(): args = sys.argv script_name = args[0] commands = [] commands.append( ['--indexes', 0, 'python {0} port --indexes'.format(script_name)]) commands.append([ '--check', 1, 'python {0} port --maps index_name'.format(script_name) ]) commands.append([ '--check', 1, 'python {0} port --check index_name'.format(script_name) ]) commands.append([ '--link', 1, 'python {0} port --link index_name map_name'.format(script_name) ]) try: port = long(args[1]) c = args[2] es_url = 'http://localhost:{0}'.format(port) if c == '--indexes': request_url = '{0}/_aliases'.format(es_url) response = ES_Manager.plain_get(request_url) for k in response.keys(): print k return if c == '--maps': _index = u'{0}'.format(args[3]) request_url = '{0}/{1}'.format(es_url, _index) response = ES_Manager.plain_get(request_url).json() for k in response[_index]['mappings'].keys(): print k return if c == '--check': _index = u'{0}'.format(args[3]) print('Checking... URL: {0}/{1} \n'.format(es_url, _index)) start_time = time.time() check = FactsCheck(es_url, _index) check.check_all() check.summary() end_time = time.time() print '\n... total time: {0:2.2f} [min]'.format( (end_time - start_time) / 60.0) return if c == '--link': _index = u'{0}'.format(args[3]) _type = u'{0}'.format(args[4]) if _type == u'texta': raise Exception('Mapping link cant be texta!') print('Linking... URL: {0}/{1} - mapping: {2} \n'.format( es_url, _index, _type)) start_time = time.time() link = FactsLink(es_url, _index, _type) link.link_all() end_time = time.time() print '\n... total time: {0:2.2f} [min]'.format( (end_time - start_time) / 60.0) return except Exception as e: print '--- Error: {0} \n'.format(e) print_help(commands)
def _check_element(self, _id, fact): doc_type = fact['doc_type'] fact_name = fact['fact'] doc_path = fact['doc_path'] doc_id = fact['doc_id'] spans = fact['spans'] # Check fact name size if len(fact_name) == 0: error_msg = 'Fact _id:{0} has empty fact_name'.format(_id) raise CheckError(error_msg) # Check fact name with dots if '.' in fact_name: error_msg = 'Fact _id:{0} contains dot (.) - {1}'.format( _id, fact_name) raise CheckError(error_msg) # Check fact name max size (warning) if len(fact_name) > 100: warning_msg = 'Fact _id:{0} has long fact_name'.format(_id) self._set_warning(warning_msg) # Check doc_id and recover document request_url = 'http://localhost:9200/{0}/{1}/{2}'.format( self._index, doc_type, doc_id) response = ES_Manager.plain_get(request_url) if not response['found']: error_msg = 'Fact _id:{0} has an invalid document [doc_id:{1}]'.format( _id, doc_id) raise CheckError(error_msg) try: spans = json.loads(spans) assert isinstance(spans, list) except Exception: error_msg = 'Fact _id:{0} has invalid spans field '.format(_id) raise CheckError(error_msg) _source = response['_source'] # Check spans len_spans = len(spans) if len_spans == 0: warning_msg = 'Fact _id:{0} has empty spans'.format(_id) self._set_warning(warning_msg) # Check doc_path doc = _source path_parts = doc_path.split('.') try: for p in path_parts: doc = doc[p] except KeyError: error_msg = 'Fact _id:{0} has invalid doc_path [doc_path:{1}]'.format( _id, doc_path) raise CheckError(error_msg) # Check fact link is_linked = False if 'texta_link' not in _source or 'facts' not in _source['texta_link']: is_linked = False else: for fact_link in _source['texta_link']['facts']: is_linked = is_linked or (doc_path in fact_link) if not is_linked: error_msg = 'Fact _id:{0} is not linked with document [doc_id:{1}]'.format( _id, doc_id) raise CheckError(error_msg) # Check spanned content len_field = len(doc) + 1 max_span = max([s[1] for s in spans]) if max_span > len_field: warning_msg = 'Fact _id:{0} has likely a wrong span'.format(_id) self._set_warning(warning_msg)
def check_version(self): request_url = '{0}'.format(self.es_url) response = ES_Manager.plain_get(request_url) ver = response['version']['number'] self.maybe_print('OK', 'ES version {0}'.format(ver))