コード例 #1
0
def get_backend_id(backend_name, backend_params):

    if backend_name not in get_connectors():
        raise RuntimeError("Unknown backend %s" % backend_name)
    connector = get_connectors()[backend_name]
    klass = connector[3]  # BackendCmd for the connector

    backend_cmd = klass(*backend_params)

    backend = backend_cmd.backend

    return backend.unique_id
コード例 #2
0
ファイル: rest.py プロジェクト: jgbarah/GrimoireELK
def get_backend_id(backend_name, backend_params):

    if backend_name not in get_connectors():
        raise RuntimeError("Unknown backend %s" % backend_name)
    connector = get_connectors()[backend_name]
    klass = connector[3]  # BackendCmd for the connector

    backend_cmd = klass(*backend_params)

    backend = backend_cmd.backend

    return backend.unique_id
コード例 #3
0
    def test_enrich(self, sortinghat=False, projects=False):
        """Test enrich all sources"""
        config = configparser.ConfigParser()
        config.read(CONFIG_FILE)
        es_con = dict(config.items('ElasticSearch'))['url']
        logging.info("Enriching data in: %s", es_con)
        connectors = get_connectors()
        for con in sorted(connectors.keys()):
            perceval_backend = None
            ocean_index = "test_" + con
            enrich_index = "test_" + con + "_enrich"
            clean = False
            ocean_backend = connectors[con][1](perceval_backend)
            elastic_ocean = get_elastic(es_con, ocean_index, clean,
                                        ocean_backend)
            ocean_backend.set_elastic(elastic_ocean)
            clean = True
            if not sortinghat and not projects:
                enrich_backend = connectors[con][2]()
            elif sortinghat and not projects:
                enrich_backend = connectors[con][2](
                    db_sortinghat=DB_SORTINGHAT)
            elif not sortinghat and projects:
                enrich_backend = connectors[con][2](
                    db_projects_map=DB_PROJECTS)
            elastic_enrich = get_elastic(es_con, enrich_index, clean,
                                         enrich_backend)
            enrich_backend.set_elastic(elastic_enrich)
            if sortinghat:
                # Load SH identities
                load_identities(ocean_backend, enrich_backend)
            enrich_count = enrich_backend.enrich_items(ocean_backend)

            if enrich_count is not None:
                logging.info("Total items enriched %i ", enrich_count)
コード例 #4
0
ファイル: test_enrich.py プロジェクト: alpgarcia/GrimoireELK
    def test_add_alias(self):
        """Test whether add_alias properly works"""

        config = configparser.ConfigParser()
        config.read(CONFIG_FILE)
        es_con = dict(config.items('ElasticSearch'))['url']

        tmp_index = "test-add-aliases"
        tmp_index_url = es_con + "/" + tmp_index

        enrich_backend = get_connectors()["git"][2]()
        elastic_enrich = get_elastic(es_con, tmp_index, True, enrich_backend)
        self._enrich.set_elastic(elastic_enrich)

        # add alias
        with self.assertLogs(logger, level='INFO') as cm:
            self._enrich.elastic.add_alias(DEMOGRAPHICS_ALIAS)

        self.assertEqual(cm.output[0],
                         'INFO:grimoire_elk.elastic:Alias %s created on %s.'
                         % (DEMOGRAPHICS_ALIAS, anonymize_url(tmp_index_url)))

        r = self._enrich.requests.get(self._enrich.elastic.index_url + "/_alias", headers=HEADER_JSON, verify=False)
        self.assertIn(DEMOGRAPHICS_ALIAS, r.json()[self._enrich.elastic.index]['aliases'])

        # add alias again
        with self.assertLogs(logger, level='DEBUG') as cm:
            self._enrich.elastic.add_alias(DEMOGRAPHICS_ALIAS)

        self.assertEqual(cm.output[0],
                         'DEBUG:grimoire_elk.elastic:Alias %s already exists on %s.'
                         % (DEMOGRAPHICS_ALIAS, anonymize_url(tmp_index_url)))

        requests.delete(tmp_index_url, verify=False)
コード例 #5
0
    def setUp(self):

        # The name of the connector is needed only to get access to the SortingHat DB
        self.enrich_backend = get_connectors()["github"][2](
            db_sortinghat=DB_SORTINGHAT,
            db_user=self.db_user,
            db_password=self.db_password)
コード例 #6
0
ファイル: config.py プロジェクト: olblak/mordred
    def get_backend_sections(cls):
        # a backend name could include and extra ":<param>"
        # to have several backend entries with different configs
        gelk_backends = list(get_connectors().keys())
        extra_backends = ["apache", "google_hits", "remo:activities"]

        return gelk_backends + extra_backends
コード例 #7
0
def find_ds_mapping(data_source, es_major_version):
    """
    Find the mapping given a perceval data source

    :param data_source: name of the perceval data source
    :param es_major_version: string with the major version for Elasticsearch
    :return: a dict with the mappings (raw and enriched)
    """
    mappings = {"raw": None,
                "enriched": None}

    # Backend connectors
    connectors = get_connectors()

    try:
        raw_klass = connectors[data_source][1]
        enrich_klass = connectors[data_source][2]
    except KeyError:
        print("Data source not found", data_source)
        sys.exit(1)

    # Mapping for raw index
    backend = raw_klass(None)
    if backend:
        mapping = json.loads(backend.mapping.get_elastic_mappings(es_major_version)['items'])
        mappings['raw'] = [mapping, find_general_mappings(es_major_version)]

    # Mapping for enriched index
    backend = enrich_klass(None)
    if backend:
        mapping = json.loads(backend.mapping.get_elastic_mappings(es_major_version)['items'])
        mappings['enriched'] = [mapping, find_general_mappings(es_major_version)]

    return mappings
コード例 #8
0
ファイル: config.py プロジェクト: zhquan/mordred
    def get_backend_sections(cls):
        # a backend name could include and extra ":<param>"
        # to have several backend entries with different configs
        gelk_backends = list(get_connectors().keys())
        extra_backends = ["apache", "google_hits", "remo:activities"]

        return gelk_backends + extra_backends
コード例 #9
0
    def test_refresh_project(self):
        """Test refresh project field for all sources"""
        # self.test_enrich_sh() # Load the identities in ES
        config = configparser.ConfigParser()
        config.read(CONFIG_FILE)
        es_con = dict(config.items('ElasticSearch'))['url']
        db_user = ''
        db_password = ''
        if 'Database' in config:
            if 'user' in config['Database']:
                db_user = config['Database']['user']
            if 'password' in config['Database']:
                db_password = config['Database']['password']

        logging.info("Refreshing data in: %s", es_con)
        connectors = get_connectors()
        for con in sorted(connectors.keys()):
            enrich_index = "test_" + con + "_enrich"
            enrich_backend = connectors[con][2](db_projects_map=DB_PROJECTS,
                                                db_user=db_user,
                                                db_password=db_password)
            clean = False
            elastic_enrich = get_elastic(es_con, enrich_index, clean,
                                         enrich_backend)
            enrich_backend.set_elastic(elastic_enrich)
            logging.info("Refreshing projects fields in enriched index %s",
                         elastic_enrich.index_url)
            self.__refresh_projects(enrich_backend)
コード例 #10
0
 def test_read_data(self):
     """Test load all sources JSON"""
     config = configparser.ConfigParser()
     config.read(CONFIG_FILE)
     connectors = get_connectors()
     # Check we have data for all the data sources
     for con in sorted(connectors.keys()):
         with open(os.path.join("data", con + ".json")) as f:
             json.load(f)
コード例 #11
0
    def get_study_sections(cls):
        # a study name could include and extra ":<param>"
        # to have several backend entries with different configs
        studies = []

        connectors = get_connectors()
        for _, backends in connectors.items():
            enrich_backend = backends[2]()
            for study in enrich_backend.studies:
                studies.append(study.__name__)

        return tuple(set(studies))
コード例 #12
0
    def setUpClass(cls):
        cls.config = configparser.ConfigParser()
        cls.config.read(CONFIG_FILE)
        cls.es_con = dict(cls.config.items('ElasticSearch'))['url']
        cls.connectors = get_connectors()

        # Sorting hat settings
        cls.db_user = ''
        cls.db_password = ''
        if 'Database' in cls.config:
            if 'user' in cls.config['Database']:
                cls.db_user = cls.config['Database']['user']
            if 'password' in cls.config['Database']:
                cls.db_password = cls.config['Database']['password']
コード例 #13
0
    def test_enrich(self, sortinghat=False, projects=False):
        """Test enrich all sources"""
        config = configparser.ConfigParser()
        config.read(CONFIG_FILE)
        es_con = dict(config.items('ElasticSearch'))['url']
        db_user = ''
        db_password = ''
        if 'Database' in config:
            if 'user' in config['Database']:
                db_user = config['Database']['user']
            if 'password' in config['Database']:
                db_password = config['Database']['password']
        logging.info("Enriching data in: %s", es_con)
        connectors = get_connectors()
        for con in sorted(connectors.keys()):
            perceval_backend = None
            ocean_index = "test_" + con
            enrich_index = "test_" + con + "_enrich"
            clean = False
            ocean_backend = connectors[con][1](perceval_backend)
            elastic_ocean = get_elastic(es_con, ocean_index, clean,
                                        ocean_backend)
            ocean_backend.set_elastic(elastic_ocean)
            clean = True
            if not sortinghat and not projects:
                enrich_backend = connectors[con][2]()
            elif sortinghat and not projects:
                enrich_backend = connectors[con][2](
                    db_sortinghat=DB_SORTINGHAT,
                    db_user=db_user,
                    db_password=db_password)
            elif not sortinghat and projects:
                enrich_backend = connectors[con][2](
                    db_projects_map=DB_PROJECTS,
                    db_user=db_user,
                    db_password=db_password)
            elastic_enrich = get_elastic(es_con, enrich_index, clean,
                                         enrich_backend)
            enrich_backend.set_elastic(elastic_enrich)
            if sortinghat:
                # Load SH identities
                load_identities(ocean_backend, enrich_backend)
            raw_count = len([item for item in ocean_backend.fetch()])
            enrich_count = enrich_backend.enrich_items(ocean_backend)

            self.assertEqual(raw_count, enrich_count)
コード例 #14
0
 def test_data_load(self):
     """Test load all sources JSON data into ES"""
     config = configparser.ConfigParser()
     config.read(CONFIG_FILE)
     es_con = dict(config.items('ElasticSearch'))['url']
     logging.info("Loading data in: %s", es_con)
     connectors = get_connectors()
     for con in sorted(connectors.keys()):
         with open(os.path.join("data", con + ".json")) as f:
             items = json.load(f)
             es_index = "test_" + con
             clean = True
             perceval_backend = None
             ocean_backend = connectors[con][1](perceval_backend)
             elastic_ocean = get_elastic(es_con, es_index, clean,
                                         ocean_backend)
             ocean_backend.set_elastic(elastic_ocean)
             self.__data2es(items, ocean_backend)
コード例 #15
0
 def test_refresh_identities(self):
     """Test refresh identities for all sources"""
     # self.test_enrich_sh() # Load the identities in ES
     config = configparser.ConfigParser()
     config.read(CONFIG_FILE)
     es_con = dict(config.items('ElasticSearch'))['url']
     logging.info("Refreshing data in: %s", es_con)
     connectors = get_connectors()
     for con in sorted(connectors.keys()):
         enrich_index = "test_" + con + "_enrich"
         enrich_backend = connectors[con][2](db_sortinghat=DB_SORTINGHAT)
         clean = False
         elastic_enrich = get_elastic(es_con, enrich_index, clean,
                                      enrich_backend)
         enrich_backend.set_elastic(elastic_enrich)
         logging.info("Refreshing identities fields in enriched index %s",
                      elastic_enrich.index_url)
         self.__refresh_identities(enrich_backend)
コード例 #16
0
    def test_data_load_error(self):
        """Test whether an exception is thrown when inserting data intO"""

        config = configparser.ConfigParser()
        config.read(CONFIG_FILE)
        es_con = dict(config.items('ElasticSearch'))['url']
        logging.info("Loading data in: %s", es_con)
        connector = get_connectors()['functest']
        with open(os.path.join("data", "functest_wrong.json")) as f:
            items = json.load(f)
            es_index = "test_functest"
            clean = True
            perceval_backend = None
            ocean_backend = connector[1](perceval_backend)
            elastic_ocean = get_elastic(es_con, es_index, clean, ocean_backend)
            ocean_backend.set_elastic(elastic_ocean)

            inserted = self.__data2es(items, ocean_backend)
            self.assertGreater(len(items), inserted)
コード例 #17
0
def find_perceval_backend(es_url, index):

    backend = None

    # Backend connectors
    connectors = get_connectors()

    # Get the first item to detect the data source and raw/enriched type
    res = requests.get('%s/%s/_search?size=1' % (es_url, index))
    first_item = res.json()['hits']['hits'][0]['_source']
    fields = first_item.keys()
    if 'metadata__enriched_on' in fields:
        enrich_class = first_item['metadata__gelk_backend_name']
        logging.debug("Detected enriched index for %s", enrich_class)
        # Time to get the mapping
        con_name = get_connector_name_from_cls_name(enrich_class)
        logging.debug("Getting the mapping for %s", con_name)
        klass = connectors[con_name][2]
        backend = klass()
    elif 'perceval_version' in fields:
        logging.debug("Detected raw index for %s", first_item['backend_name'])
        con_name = get_connector_name_from_cls_name(first_item['backend_name'])
        klass = connectors[con_name][1]
        backend = klass(None)
    elif 'retweet_count' in fields:
        con_name = 'twitter'
        logging.debug("Detected raw index for %s", con_name)
    elif 'type' in fields and first_item['type'] == 'googleSearchHits':
        logging.debug("Detected raw index for googleSearchHits")
    elif 'httpversion' in fields:
        logging.debug("Detected raw index for apache")
    else:
        logging.error("Can not find is the index if raw or enriched: %s",
                      index)
        sys.exit(1)

    return backend
コード例 #18
0
    def fetch(self):

        if not self.state or self.state.is_empty():
            supported_data_sources = list(gelk_utils.get_connectors())
            for data_source_name in supported_data_sources:
                data_source = DataSource(name=data_source_name)
                yield data_source
        elif self.state.data_sources:
            data_sources = DataSource.objects.filter(name__in=self.state.data_sources)
            for data_source in data_sources:
                yield data_source
        elif self.state.repository_views:
            views = RepositoryView.objects.filter(id__in=self.state.repository_views)
            for data_source in self.__fetch_from_repository_views(views):
                yield data_source
        elif self.state.projects:
            projects = Project.objects.filter(name__in=self.state.projects)
            for data_source in self.__fetch_from_projects(projects):
                yield data_source
        elif self.state.eco_name:
            ecosystem = Ecosystem.objects.get(name=self.state.eco_name)
            projects = ecosystem.projects.all()
            for data_source in self.__fetch_from_projects(projects):
                yield data_source
コード例 #19
0
 def setUp(self):
     self.__tests_dir = os.path.dirname(os.path.realpath(__file__))
     self.__events_dir = os.path.join(self.__tests_dir, "data/events/")
     self.connectors = get_connectors()
コード例 #20
0
    def __get_backends(self):
        gelk_backends = list(get_connectors().keys())
        extra_backends = ["google_hits"]

        return gelk_backends + extra_backends
コード例 #21
0
 def test_init(self):
     """Test whether the backends can be loaded """
     self.assertEqual(len(get_connectors()), NUMBER_BACKENDS)
コード例 #22
0
from grimoire_elk.utils import get_connectors

print(get_connectors())