Beispiel #1
0
    def check_if_has_texta_mapping(self):
        request_url = '{0}/{1}'.format(self.es_url, self._index)
        response = ES_Manager.plain_get(request_url)
        mappings = response[self._index]['mappings']
        if self.TEXTA not in mappings:
            error_msg = 'Mapping [{0}] was not found'.format(self.TEXTA)
            raise CheckCritical(error_msg)

        self.maybe_print('OK', 'Mapping [{0}] is present'.format(self.TEXTA))
Beispiel #2
0
    def check_index_present(self):
        request_url = '{0}/_aliases'.format(self.es_url)
        response = ES_Manager.plain_get(request_url)
        indexes = response.keys()
        if self._index not in indexes:
            error_msg = 'Index {0} was not found'.format(self._index)
            raise CheckCritical(error_msg)

        self.maybe_print('OK', 'Index {0} is present'.format(self._index))
Beispiel #3
0
 def get_texta_link_facts_by_id(self, doc_id):
     base_url = '{0}/{1}/{2}/{3}?fields=texta_link.facts'
     request_url = base_url.format(self.es_url, self._index, self._type,
                                   doc_id)
     response = ES_Manager.plain_get(request_url)
     doc = None
     try:
         if response['found']:
             doc = []
         if 'fields' in response:
             doc = response['fields']['texta_link.facts']
     except KeyError:
         return None
     return doc
Beispiel #4
0
def main():

    args = sys.argv
    script_name = args[0]
    commands = []
    commands.append(
        ['--indexes', 0, 'python {0} port --indexes'.format(script_name)])
    commands.append([
        '--check', 1, 'python {0} port --maps index_name'.format(script_name)
    ])
    commands.append([
        '--check', 1, 'python {0} port --check index_name'.format(script_name)
    ])
    commands.append([
        '--link', 1,
        'python {0} port --link index_name map_name'.format(script_name)
    ])
    try:

        port = long(args[1])
        c = args[2]
        es_url = 'http://localhost:{0}'.format(port)

        if c == '--indexes':
            request_url = '{0}/_aliases'.format(es_url)
            response = ES_Manager.plain_get(request_url)
            for k in response.keys():
                print k
            return

        if c == '--maps':
            _index = u'{0}'.format(args[3])
            request_url = '{0}/{1}'.format(es_url, _index)
            response = ES_Manager.plain_get(request_url).json()
            for k in response[_index]['mappings'].keys():
                print k
            return

        if c == '--check':
            _index = u'{0}'.format(args[3])
            print('Checking... URL: {0}/{1} \n'.format(es_url, _index))
            start_time = time.time()
            check = FactsCheck(es_url, _index)
            check.check_all()
            check.summary()
            end_time = time.time()
            print '\n... total time: {0:2.2f} [min]'.format(
                (end_time - start_time) / 60.0)
            return

        if c == '--link':
            _index = u'{0}'.format(args[3])
            _type = u'{0}'.format(args[4])
            if _type == u'texta':
                raise Exception('Mapping link cant be texta!')
            print('Linking... URL: {0}/{1} - mapping: {2} \n'.format(
                es_url, _index, _type))
            start_time = time.time()
            link = FactsLink(es_url, _index, _type)
            link.link_all()
            end_time = time.time()
            print '\n... total time: {0:2.2f} [min]'.format(
                (end_time - start_time) / 60.0)
            return

    except Exception as e:
        print '--- Error: {0} \n'.format(e)
    print_help(commands)
Beispiel #5
0
    def _check_element(self, _id, fact):
        doc_type = fact['doc_type']
        fact_name = fact['fact']
        doc_path = fact['doc_path']
        doc_id = fact['doc_id']
        spans = fact['spans']

        # Check fact name size
        if len(fact_name) == 0:
            error_msg = 'Fact _id:{0} has empty fact_name'.format(_id)
            raise CheckError(error_msg)

        # Check fact name with dots
        if '.' in fact_name:
            error_msg = 'Fact _id:{0} contains dot (.) - {1}'.format(
                _id, fact_name)
            raise CheckError(error_msg)

        # Check fact name max size (warning)
        if len(fact_name) > 100:
            warning_msg = 'Fact _id:{0} has long fact_name'.format(_id)
            self._set_warning(warning_msg)

        # Check doc_id and recover document
        request_url = 'http://localhost:9200/{0}/{1}/{2}'.format(
            self._index, doc_type, doc_id)
        response = ES_Manager.plain_get(request_url)
        if not response['found']:
            error_msg = 'Fact _id:{0} has an invalid document [doc_id:{1}]'.format(
                _id, doc_id)
            raise CheckError(error_msg)

        try:
            spans = json.loads(spans)
            assert isinstance(spans, list)
        except Exception:
            error_msg = 'Fact _id:{0} has invalid spans field '.format(_id)
            raise CheckError(error_msg)

        _source = response['_source']

        # Check spans
        len_spans = len(spans)
        if len_spans == 0:
            warning_msg = 'Fact _id:{0} has empty spans'.format(_id)
            self._set_warning(warning_msg)

        # Check doc_path
        doc = _source
        path_parts = doc_path.split('.')
        try:
            for p in path_parts:
                doc = doc[p]
        except KeyError:
            error_msg = 'Fact _id:{0} has invalid doc_path [doc_path:{1}]'.format(
                _id, doc_path)
            raise CheckError(error_msg)

        # Check fact link
        is_linked = False
        if 'texta_link' not in _source or 'facts' not in _source['texta_link']:
            is_linked = False
        else:
            for fact_link in _source['texta_link']['facts']:
                is_linked = is_linked or (doc_path in fact_link)
        if not is_linked:
            error_msg = 'Fact _id:{0} is not linked with document [doc_id:{1}]'.format(
                _id, doc_id)
            raise CheckError(error_msg)

        # Check spanned content
        len_field = len(doc) + 1
        max_span = max([s[1] for s in spans])
        if max_span > len_field:
            warning_msg = 'Fact _id:{0} has likely a wrong span'.format(_id)
            self._set_warning(warning_msg)
Beispiel #6
0
 def check_version(self):
     request_url = '{0}'.format(self.es_url)
     response = ES_Manager.plain_get(request_url)
     ver = response['version']['number']
     self.maybe_print('OK', 'ES version {0}'.format(ver))