Esempio n. 1
0
    def __init__(self,
                 url,
                 index,
                 mappings=None,
                 clean=False,
                 insecure=True,
                 analyzers=None,
                 aliases=None):
        """Class to handle the operations with the ElasticSearch database, such as
        creating indexes, mappings, setting up aliases and uploading documents.

        :param url: ES url
        :param index: index name
        :param mappings: an instance of the Mapping class
        :param clean: if True, deletes an existing index and create it again
        :param insecure: support https with invalid certificates
        :param analyzers: analyzers for ElasticSearch
        :param aliases: list of aliases, defined as strings, to be added to the index
        """
        # Get major version of Elasticsearch instance
        self.major = self.check_instance(url, insecure)
        logger.debug("Found version of ES instance at {}: {}.".format(
            anonymize_url(url), self.major))

        self.url = url

        # Valid index for elastic
        self.index = self.safe_index(index)
        self.aliases = aliases

        self.index_url = self.url + "/" + self.index
        self.wait_bulk_seconds = 2  # time to wait to complete a bulk operation

        self.requests = grimoire_con(insecure)

        analyzer_settings = None

        if analyzers:
            analyzers_dict = analyzers.get_elastic_analyzers(
                es_major=self.major)
            analyzer_settings = analyzers_dict['items']

        self.create_index(analyzer_settings, clean)

        if analyzers:
            self.update_analyzers(analyzer_settings)
        if mappings:
            map_dict = mappings.get_elastic_mappings(es_major=self.major)
            self.create_mappings(map_dict)

        if aliases:
            for alias in aliases:
                if self.alias_in_use(alias):
                    logger.debug(
                        "Alias {} won't be set on {}, it already exists on {}".
                        format(alias, anonymize_url(self.index_url),
                               anonymize_url(self.url)))
                    continue

                self.add_alias(alias)
Esempio n. 2
0
    def check_instance(url, insecure):
        """Checks if there is an instance of Elasticsearch in url.

        Actually, it checks if GET on the url returns a JSON document
        with a field tagline "You know, for search",
        and a field version.number.

        :value      url: url of the instance to check
        :value insecure: don't verify ssl connection (boolean)

        :returns:        major version of Elasticsearch, as string.
        """
        res = grimoire_con(insecure).get(url)
        if res.status_code != 200:
            msg = "Got {} from url {}".format(res.status_code, url)
            logger.error(msg)
            raise ElasticError(cause=msg)
        else:
            try:
                version_str = res.json()['version']['number']
                version_major = version_str.split('.')[0]
                return version_major
            except Exception:
                msg = "Could not read proper welcome message from url {}, {}".format(
                    anonymize_url(url), res.text)
                logger.error(msg)
                raise ElasticError(cause=msg)
Esempio n. 3
0
    def _check_instance(url, insecure):
        """Checks if there is an instance of Elasticsearch in url.

        Actually, it checks if GET on the url returns a JSON document
        with a field tagline "You know, for search",
        and a field version.number.

        :value      url: url of the instance to check
        :value insecure: don't verify ssl connection (boolean)
        :returns:        major version of Ellasticsearch, as string.
        """

        res = grimoire_con(insecure).get(url)
        if res.status_code != 200:
            logger.error("Didn't get 200 OK from url %s", url)
            raise ElasticConnectException
        else:
            try:
                version_str = res.json()['version']['number']
                version_major = version_str.split('.')[0]
                return version_major
            except Exception:
                logger.error(
                    "Could not read proper welcome message from url %s", url)
                logger.error("Message read: %s", res.text)
                raise ElasticConnectException
Esempio n. 4
0
 def __init__(self, config):
     self.backend_section = None
     self.config = config
     self.conf = config.get_conf()
     self.db_sh = self.conf['sortinghat']['database']
     self.db_user = self.conf['sortinghat']['user']
     self.db_password = self.conf['sortinghat']['password']
     self.db_host = self.conf['sortinghat']['host']
     self.grimoire_con = grimoire_con(conn_retries=12)  # 30m retry
Esempio n. 5
0
    def __init__(self, url, index, mappings=None, clean=False,
                 insecure=True, analyzers=None, aliases=None):
        ''' clean: remove already existing index
            insecure: support https with invalid certificates
        '''

        # Get major version of Elasticsearch instance
        self.major = self._check_instance(url, insecure)
        logger.debug("Found version of ES instance at %s: %s.",
                     self.anonymize_url(url), self.major)

        self.url = url

        # Valid index for elastic
        self.index = self.safe_index(index)
        self.aliases = aliases

        self.index_url = self.url + "/" + self.index
        self.wait_bulk_seconds = 2  # time to wait to complete a bulk operation

        self.requests = grimoire_con(insecure)

        res = self.requests.get(self.index_url)

        headers = {"Content-Type": "application/json"}
        if res.status_code != 200:
            # Index does no exists
            r = self.requests.put(self.index_url, data=analyzers,
                                  headers=headers)
            if r.status_code != 200:
                logger.error("Can't create index %s (%s)",
                             self.anonymize_url(self.index_url), r.status_code)
                raise ElasticWriteException()
            else:
                logger.info("Created index " + self.anonymize_url(self.index_url))
        else:
            if clean:
                res = self.requests.delete(self.index_url)
                res.raise_for_status()
                res = self.requests.put(self.index_url, data=analyzers,
                                        headers=headers)
                res.raise_for_status()
                logger.info("Deleted and created index " + self.anonymize_url(self.index_url))
        if mappings:
            map_dict = mappings.get_elastic_mappings(es_major=self.major)
            self.create_mappings(map_dict)

        if aliases:
            for alias in aliases:
                if self.alias_in_use(alias):
                    logger.debug("Alias %s won't be set on %s, it already exists on %s",
                                 alias, self.anonymize_url(self.index_url), self.anonymize_url(self.url))
                    continue

                self.add_alias(alias)
Esempio n. 6
0
    def __init__(self, config):
        self.backend_section = None
        self.config = config
        self.conf = config.get_conf()

        sortinghat = self.conf.get('sortinghat', None)
        self.db_sh = sortinghat['database'] if sortinghat else None
        self.db_user = sortinghat['user'] if sortinghat else None
        self.db_password = sortinghat['password'] if sortinghat else None
        self.db_host = sortinghat['host'] if sortinghat else None
        self.db_unaffiliate_group = sortinghat['unaffiliated_group'] if sortinghat else None

        self.sh_kwargs = {'user': self.db_user, 'password': self.db_password,
                          'database': self.db_sh, 'host': self.db_host,
                          'port': None}

        self.grimoire_con = grimoire_con(conn_retries=12)  # 30m retry
Esempio n. 7
0
 def __init__(self, config):
     """ config is a Config object """
     self.config = config
     self.conf = config.get_conf()
     self.grimoire_con = grimoire_con(conn_retries=12)  # 30m retry
Esempio n. 8
0
import json
import logging
import os
import tempfile

import requests

from grimoire_elk.elk import load_identities
from grimoire_elk.enriched.gerrit import GerritEnrich
from grimoire_elk.enriched.git import GitEnrich
from grimoire_elk.enriched.utils import grimoire_con

logger = logging.getLogger(__name__)

requests_ses = grimoire_con()


def fetch_track_items(upstream_file_url, data_source):
    """ The file format is:

    # Upstream contributions, bitergia will crawl this and extract the relevant information
    # system is one of Gerrit, Bugzilla, Launchpad (insert more)
    ---
    -
      url: https://review.openstack.org/169836
      system: Gerrit
    """

    track_uris = []
    req = requests_ses.get(upstream_file_url)
Esempio n. 9
0
 def __init__(self, config):
     """ config is a Config object """
     self.config = config
     self.conf = config.get_conf()
     self.grimoire_con = grimoire_con(conn_retries=12)  # 30m retry