def main():
    args = parse_args()

    if args.config_template_file is not None:
        Config.create_config_file(args.config_template_file)
        Logger.info("Sample config file created in {}".format(
            args.config_template_file))
        return 0
    elif args.config_files is None:
        Logger.error("Option -t or -c is required")
        return 1

    try:
        config = Config(args.config_files[0], args.config_files[1:])
        config_dict = config.get_conf()
        logs_dir = config_dict['general']['logs_dir']
        debug_mode = config_dict['general']['debug']
        logger = setup_logs(logs_dir, debug_mode)
    except RuntimeError as error:
        print("Error while consuming configuration: {}".format(error))
        return 1

    if args.phases:
        logger.info("Executing sirmordred for phases: {}".format(args.phases))
        # HACK: the internal dict of Config is modified directly
        # In manual phases execute sirmordred as an script
        config_dict['general']['update'] = False
        for phase in config_dict['phases']:
            config_dict['phases'][
                phase] = True if phase in args.phases else False

    SirMordred(config).start()
    def test_run(self):
        """Test whether the Task could be run"""
        config = Config(CONF_FILE)
        cfg = config.get_conf()
        # We need to load the projects
        TaskProjects(config).execute()
        backend_section = GIT_BACKEND_SECTION
        task = TaskEnrich(config, backend_section=backend_section)
        self.assertEqual(task.execute(), None)

        # Check that the enrichment went well
        es_collection = cfg['es_collection']['url']
        es_enrichment = cfg['es_enrichment']['url']
        raw_index = es_collection + "/" + cfg[GIT_BACKEND_SECTION]['raw_index']
        enrich_index = es_enrichment + "/" + cfg[GIT_BACKEND_SECTION]['enriched_index']

        r = requests.get(raw_index + "/_search?size=0")
        raw_items = r.json()['hits']['total']
        r = requests.get(enrich_index + "/_search?size=0")
        enriched_items = r.json()['hits']['total']

        # the number of raw items is bigger since the enriched items are generated based on:
        # https://github.com/VizGrimoire/GrimoireLib
        # --filters-raw-prefix data.files.file:grimoirelib_alch data.files.file:README.md
        # see [git] section in tests/test-projects.json
        self.assertGreater(raw_items, enriched_items)
예제 #3
0
    def test_create_config_file(self):
        """Test whether a config file is correctly created"""

        tmp_path = tempfile.mktemp(prefix='mordred_')

        config = Config(CONF_FULL)
        config.create_config_file(tmp_path)
        copied_config = Config(tmp_path)
예제 #4
0
    def test_set_param_not_found(self):
        """Test whether an error is logged if a param does not exist"""

        config = Config(CONF_FULL)

        with self.assertLogs(logger, level='ERROR') as cm:
            config.set_param("twitter", "acme", "true")
            self.assertEqual(cm.output[-1], 'ERROR:sirmordred.config:Config section twitter and param acme not exists')
예제 #5
0
    def test_set_param(self):
        """Test whether a param is correctly modified"""

        config = Config(CONF_FULL)

        self.assertFalse(config.conf['twitter']['collect'])
        config.set_param("twitter", "collect", "true")
        self.assertTrue(config.conf['twitter']['collect'])
예제 #6
0
    def test__get_projects_from_url(self):
        """Test downloading projects from an URL """
        setup_http_server()

        projects_url = 'http://localhost/projects.json'
        config = Config(CONF_FILE)
        config.set_param('projects', 'projects_url', projects_url)
        task = TaskProjects(config)
        self.assertEqual(task.execute(), None)

        projects = task.get_projects()
        self.assertTrue(URL_PROJECTS_MAIN in projects)
예제 #7
0
    def test_get_data_sources(self):
        """Test whether all data sources are properly retrieved"""

        config = Config(CONF_FULL)

        expected = ['askbot', 'bugzilla', 'bugzillarest', 'confluence', 'discourse', 'dockerhub', 'functest',
                    'gerrit', 'git', 'gitlab', 'github', 'google_hits', 'groupsio', 'hyperkitty', 'jenkins', 'jira',
                    'mbox', 'meetup', 'mediawiki', 'mozillaclub', 'nntp', 'phabricator', 'pipermail', 'puppetforge',
                    'redmine', 'remo', 'rss', 'stackexchange', 'slack', 'supybot', 'telegram', 'twitter']
        data_sources = config.get_data_sources()

        self.assertEqual(len(data_sources), len(expected))
        self.assertEqual(data_sources.sort(), expected.sort())
    def setUp(self):
        config = Config(CONF_FILE)
        sh = config.get_conf()['sortinghat']

        self.sh_kwargs = {'user': sh['user'], 'password': sh['password'],
                          'database': sh['database'], 'host': sh['host'],
                          'port': None}

        # Clean the database to start an empty state
        Database.drop(**self.sh_kwargs)

        # Create command
        Database.create(**self.sh_kwargs)
        self.sh_db = Database(**self.sh_kwargs)
예제 #9
0
    def test_compose_p2o_params(self):
        """Test whether p2o params are built correctly for a backend and a repository"""

        config = Config(CONF_FILE)
        task = Task(config)
        params = task._compose_p2o_params(
            "stackexchange",
            "https://stackoverflow.com/questions/tagged/example")
        self.assertDictEqual(
            params,
            {'url': "https://stackoverflow.com/questions/tagged/example"})

        params = task._compose_p2o_params(
            "mediawiki", "https://wiki-archive.opendaylight.org "
            "https://wiki-archive.opendaylight.org/view")
        self.assertDictEqual(
            params, {
                'url':
                "https://wiki-archive.opendaylight.org "
                "https://wiki-archive.opendaylight.org/view"
            })

        params = task._compose_p2o_params(
            "mediawiki", "https://wiki-archive.opendaylight.org "
            "https://wiki-archive.opendaylight.org/view "
            "--filter-no-collection=true")
        self.assertDictEqual(
            params, {
                'url': "https://wiki-archive.opendaylight.org "
                "https://wiki-archive.opendaylight.org/view",
                "filter-no-collection": "true"
            })
예제 #10
0
def micro_mordred(cfg_path, backend_sections, repos_to_check, raw, identities_load, identities_merge, enrich, panels):
    """Execute the Mordred tasks using the configuration file (`cfg_path`).

    :param cfg_path: the path of a Mordred configuration file
    :param backend_sections: the backend sections where the raw/enrich/identities phases will be executed
    :param repos_to_check: process a repository only if it is in this list, or `None` for all repos
    :param raw: if true, it activates the collection of raw data
    :param identities_load: if true, it activates the identities loading process
    :param identities_merge: if true, it activates the identities merging process
    :param enrich: if true, it activates the enrichment of the raw data
    :param panels: if true, it activates the upload of all panels declared in the configuration file
    """

    config = Config(cfg_path)

    if raw:
        for backend in backend_sections:
            get_raw(config, backend, repos_to_check)

    if identities_load:
        get_identities_load(config)

    if identities_merge:
        get_identities_merge(config)

    if enrich:
        for backend in backend_sections:
            get_enrich(config, backend, repos_to_check)

    if panels:
        get_panels(config)
예제 #11
0
def micro_mordred(cfg_path, backend_sections, raw, arthur, identities_load,
                  identities_merge, enrich, panels):
    """Execute the Mordred tasks using the configuration file (`cfg_path`).

    :param cfg_path: the path of a Mordred configuration file
    :param backend_sections: the backend sections where the raw/enrich/identities phases will be executed
    :param raw: if true, it activates the collection of raw data
    :param arthur: if true, it enables Arthur to collect the raw data
    :param identities_load: if true, it activates the identities loading process
    :param identities_merge: if true, it activates the identities merging process
    :param enrich: if true, it activates the enrichment of the raw data
    :param panels: if true, it activates the upload of all panels declared in the configuration file
    """

    config = Config(cfg_path)

    if raw:
        for backend in backend_sections:
            get_raw(config, backend, arthur)

    if identities_load:
        get_identities_load(config)

    if identities_merge:
        get_identities_merge(config)

    if enrich:
        for backend in backend_sections:
            get_enrich(config, backend)

    if panels:
        get_panels(config)
예제 #12
0
    def test_backend_params(self):
        """Test whether the backend parameters are initializated"""

        config = Config(CONF_FILE)
        backend_section = GITHUB_BACKEND_SECTION
        task = TaskRawDataCollection(config, backend_section=backend_section)
        params = task._compose_perceval_params(GITHUB_BACKEND_SECTION, GITHUB_REPO)

        expected_params = [
            'grimoirelab',
            'perceval',
            '--api-token',
            'XXXXX',
            '--sleep-time',
            '300',
            '--sleep-for-rate',
            '--category',
            'issue',
            '--archive-path',
            '/tmp/test_github_archive'
        ]

        self.assertEqual(len(params), len(expected_params))

        for p in params:
            self.assertTrue(p in expected_params)
예제 #13
0
    def _get_repos_by_backend(self):
        #
        # return dict with backend and list of repositories
        #
        output = {}
        projects = TaskProjects.get_projects()

        for pro in projects:
            # remove duplicates in backends_section with list(set(..))
            backend_sections = list(
                set([
                    sect for sect in projects[pro].keys()
                    for backend_section in Config.get_backend_sections()
                    if sect and sect.startswith(backend_section)
                ]))

            # sort backends section
            backend_sections.sort()
            for backend_section in backend_sections:
                if backend_section not in output:
                    output[backend_section] = projects[pro][backend_section]
                else:
                    output[backend_section] += projects[pro][backend_section]

        # backend could be in project/repo file but not enabled in
        # sirmordred conf file
        enabled = {}
        for k in output:
            if k in self.conf:
                enabled[k] = output[k]

        # logger.debug('repos to be retrieved: %s ', enabled)
        return enabled
예제 #14
0
파일: micro.py 프로젝트: valeriocos/mordred
def micro_mordred(cfg_path, backend_sections, raw, arthur, identities, enrich,
                  panels):
    """Execute the raw and/or the enrich phases of a given backend section defined in a Mordred configuration file.

    :param cfg_path: the path of a Mordred configuration file
    :param backend_sections: the backend sections where the raw and/or enrich phases will be executed
    :param raw: if true, it activates the collection of raw data
    :param arthur: if true, it enables Arthur to collect the raw data
    :param identities: if true, it activates the identities merge in SortingHat
    :param enrich: if true, it activates the collection of enrich data
    :param panels: if true, it activates the upload of panels
    """

    config = Config(cfg_path)

    if raw:
        for backend in backend_sections:
            get_raw(config, backend, arthur)

    if identities:
        get_identities(config)

    if enrich:
        for backend in backend_sections:
            get_enrich(config, backend)

    if panels:
        get_panels(config)
    def test_execute_from_archive(self):
        """Test fetching data from archives"""

        # proj_file -> 'test-projects-archive.json' stored within the conf file
        conf_file = 'archives-test.cfg'
        config = Config(conf_file)

        backend_sections = [
            'askbot', 'bugzilla', 'bugzillarest', 'confluence', 'discourse',
            'dockerhub', 'gerrit', 'github:issue', 'github:pull',
            'gitlab:issue', 'gitlab:merge', 'google_hits', 'jenkins', 'jira',
            'mediawiki', 'meetup', 'mozillaclub', 'nntp', 'phabricator',
            'redmine', 'remo', 'rss', 'stackexchange', 'slack', 'telegram',
            'twitter'
        ]

        # We need to load the projects
        TaskProjects(config).execute()
        for backend_section in backend_sections:
            task = TaskRawDataCollection(config,
                                         backend_section=backend_section)
            task.execute()

        for backend_section in backend_sections:
            task = TaskEnrich(config, backend_section=backend_section)
            self.assertEqual(task.execute(), None)
예제 #16
0
    def test_compose_p2o_params(self):
        """Test whether p2o params are built correctly for a backend and a repository"""

        config = Config(CONF_FILE)
        task = Task(config)
        params = task._compose_p2o_params("stackexchange", "https://stackoverflow.com/questions/tagged/example")
        self.assertEqual(params, {'url': "https://stackoverflow.com/questions/tagged/example"})
    def test_create_dashboard_multi_ds_kibiter_6(self):
        """ Test the creation of dashboards with filtered data sources """

        config = Config(CONF_FILE)
        es_url = config.conf['es_enrichment']['url']
        es_kibana_url = urljoin(es_url + "/", '.kibana')
        kibiter_api_url = urljoin(config.conf['panels']['kibiter_url'],
                                  KIBANA_SETTINGS_URL)
        kibiter_defaultIndex_url = kibiter_api_url + '/defaultIndex'
        kibiter_timePicker_url = kibiter_api_url + '/timepicker:timeDefaults'

        headers = {"Content-Type": "application/json", "kbn-xsrf": "true"}

        httpretty.register_uri(httpretty.GET,
                               es_kibana_url,
                               body={},
                               status=200)

        httpretty.register_uri(httpretty.POST,
                               kibiter_defaultIndex_url,
                               body={},
                               status=200,
                               forcing_headers=headers)

        httpretty.register_uri(httpretty.POST,
                               kibiter_timePicker_url,
                               body={},
                               status=200,
                               forcing_headers=headers)

        MockedTaskPanels.VERSION = '6.1.0'
        task = MockedTaskPanels(config)
        task.execute()
예제 #18
0
    def _get_repos_by_backend(self):
        #
        # return dict with backend and list of repositories
        #
        output = {}
        projects = TaskProjects.get_projects()

        for backend_section in Config.get_backend_sections():
            for pro in projects:
                backend = Task.get_backend(backend_section)
                if backend in projects[pro]:
                    if backend_section not in output:
                        output[backend_section] = projects[pro][backend]
                    else:
                        output[backend_section] += projects[pro][backend]

        # backend could be in project/repo file but not enabled in
        # sirmordred conf file
        enabled = {}
        for k in output:
            if k in self.conf:
                enabled[k] = output[k]

        # logger.debug('repos to be retrieved: %s ', enabled)
        return enabled
예제 #19
0
    def test_initialization(self):
        """Test whether attributes are initializated"""

        config = Config(CONF_FILE)
        task = TaskProjects(config)

        self.assertEqual(task.config, config)
예제 #20
0
    def test_init_studies(self):
        """Test whether studies' attributes are initializated"""

        config = Config(CONF_SLIM)

        self.assertIsNotNone(config.conf)
        self.assertIsNone(config.raw_conf)
        self.assertEqual(config.conf_list, [CONF_SLIM])

        top_sections = config.conf.keys()
        demography_params = config.conf['enrich_demography:git'].keys()
        enrich_areas_of_code_params = config.conf[
            'enrich_areas_of_code:git'].keys()
        enrich_onion_git_params = config.conf['enrich_onion:git'].keys()
        enrich_onion_github_params = config.conf['enrich_onion:github'].keys()

        self.assertEqual(len(config.conf.keys()), 18)

        self.assertIn('general', top_sections)
        self.assertIn('projects', top_sections)
        self.assertIn('es_collection', top_sections)
        self.assertIn('es_enrichment', top_sections)
        self.assertIn('sortinghat', top_sections)
        self.assertIn('panels', top_sections)
        self.assertIn('phases', top_sections)

        self.assertIn('git', top_sections)
        self.assertIn('enrich_demography:git', top_sections)
        self.assertIn('date_field', demography_params)
        self.assertIn('author_field', demography_params)

        self.assertIn('enrich_areas_of_code:git', top_sections)
        self.assertIn('in_index', enrich_areas_of_code_params)
        self.assertIn('out_index', enrich_areas_of_code_params)
        self.assertIn('sort_on_field', enrich_areas_of_code_params)
        self.assertIn('no_incremental', enrich_areas_of_code_params)

        self.assertIn('enrich_onion:git', top_sections)
        self.assertIn('in_index', enrich_onion_git_params)
        self.assertIn('out_index', enrich_onion_git_params)
        self.assertIn('data_source', enrich_onion_git_params)
        self.assertIn('contribs_field', enrich_onion_git_params)
        self.assertIn('timeframe_field', enrich_onion_git_params)
        self.assertIn('sort_on_field', enrich_onion_git_params)
        self.assertIn('no_incremental', enrich_onion_git_params)

        self.assertIn('github:issues', top_sections)
        self.assertIn('github:pulls', top_sections)
        self.assertIn('enrich_onion:github', top_sections)
        self.assertIn('in_index_iss', enrich_onion_github_params)
        self.assertIn('in_index_prs', enrich_onion_github_params)
        self.assertIn('out_index_iss', enrich_onion_github_params)
        self.assertIn('in_index_prs', enrich_onion_github_params)
        self.assertIn('data_source_iss', enrich_onion_github_params)
        self.assertIn('data_source_prs', enrich_onion_github_params)
        self.assertIn('contribs_field', enrich_onion_github_params)
        self.assertIn('timeframe_field', enrich_onion_github_params)
        self.assertIn('sort_on_field', enrich_onion_github_params)
        self.assertIn('no_incremental', enrich_onion_github_params)
예제 #21
0
    def test_create_dashboard_stackexchange(self, mock_get_dashboard_name, mock_import_dashboard):
        """ Test the creation of a dashboard which includes stackexchange in data sources """
        mock_get_dashboard_name.return_value = ''

        config = Config(CONF_FILE)
        task = TaskPanels(config)

        task.create_dashboard(None, data_sources=["stackexchange"])
예제 #22
0
    def test_create_dashboard_multi_ds(self, mock_get_dashboard_name):
        """ Test the creation of dashboards with filtered data sources """
        mock_get_dashboard_name.return_value = ''

        config = Config(CONF_FILE)
        task = TaskPanels(config)

        task.execute()
예제 #23
0
    def test_get_collection_url(self):
        """Test whether the collection url could be overwritten in a backend"""

        config = Config(CONF_FILE)
        task = Task(config)
        task.backend_section = "stackexchange"

        self.assertEqual(task._get_collection_url(), COLLECTION_URL_STACKEXCHANGE)
    def test_contains(self):

        config = Config(CONF_FULL)
        config.conf = {
            'backend:param1': {
                'shared_param': 'value 1',
                'unique_to_1': 'value 2',
            },
            'backend:param2': {
                'shared_param': 'value 3',
                'unique_to_2': 'value 4',
            },
            'backend:param1:param2': {
                'param_combo': 'value 5',
            }
        }

        # Directly contained
        self.assertIn('backend:param1', config)
        self.assertIn('backend:param2', config)
        self.assertIn('backend:param1:param2', config)

        # Implicitly contained
        self.assertIn('backend:param2:param1', config)
        self.assertIn('backend:fake:param1', config)
        self.assertIn('backend:param1:fake', config)

        # Not contained
        self.assertNotIn('backend', config)
        self.assertNotIn('backend:fake', config)
        self.assertNotIn('fake-backend:param1', config)

        # Set the previously missing backend param
        config.conf['backend'] = {
            'arbitrary_data': 'idc',
        }

        # Contained after backend specified
        self.assertIn('backend', config)
        self.assertIn('backend:fake', config)
        self.assertIn('backend:param1', config)

        # Still not contained
        self.assertNotIn('fake-backend:param1', config)
        self.assertNotIn('fake-backend', config)
예제 #25
0
    def test_init(self):
        """Test whether attributes are initializated"""

        config = Config(CONF_FULL)

        self.assertIsNotNone(config.conf)
        self.assertIsNone(config.raw_conf)
        self.assertEqual(config.conf_list, [CONF_FULL])
        self.assertEqual(len(config.conf.keys()), 47)
    def test_execute_no_collection(self):
        """Test whether the raw data is not downloaded when --filter-no-collection is true"""

        config = Config(CONF_FILE_NO_COLL)
        cfg = config.get_conf()
        backend_section = GIT_BACKEND_SECTION
        task = TaskRawDataCollection(config, backend_section=backend_section)
        # We need to load the projects
        TaskProjects(config).execute()
        self.assertIsNotNone(task.execute())

        # Check that the fitler --filter-no-collection works
        es_collection = cfg['es_collection']['url']
        raw_index = es_collection + "/" + cfg[GIT_BACKEND_SECTION]['raw_index']

        r = requests.get(raw_index + "/_search?size=0", verify=False)
        raw_items = r.json()['hits']['total']
        self.assertEqual(raw_items, 40)
    def test_execute(self):
        """Test whether the Task could be run"""

        config = Config(CONF_FILE)
        cfg = config.get_conf()
        backend_section = GIT_BACKEND_SECTION
        task = TaskRawDataCollection(config, backend_section=backend_section)
        # We need to load the projects
        TaskProjects(config).execute()
        self.assertIsNotNone(task.execute())

        # Check that the collection went well
        es_collection = cfg['es_collection']['url']
        raw_index = es_collection + "/" + cfg[GIT_BACKEND_SECTION]['raw_index']

        r = requests.get(raw_index + "/_search?size=0", verify=False)
        raw_items = r.json()['hits']['total']
        self.assertEqual(raw_items, 3603)
    def test_execute(self):
        """Test whether the Task could be run"""

        config = Config(CONF_FILE)
        backend_section = GIT_BACKEND_SECTION
        task = TaskRawDataCollection(config, backend_section=backend_section)
        # We need to load the projects
        TaskProjects(config).execute()
        self.assertEqual(task.execute(), None)
    def test_initialization(self):
        """Test whether attributes are initializated"""

        config = Config(CONF_FILE)
        backend_section = GIT_BACKEND_SECTION
        task = TaskEnrich(config, backend_section=backend_section)

        self.assertEqual(task.config, config)
        self.assertEqual(task.backend_section, backend_section)
    def test_execute(self):
        """Test whether the Task could be run"""
        def setUp(self):
            config = Config(CONF_FILE)
            sh = config.get_conf()['sortinghat']

            self.sh_kwargs = {
                'user': sh['user'],
                'password': sh['password'],
                'database': sh['database'],
                'host': sh['host'],
                'port': None
            }

            # Clean the database to start an empty state

            Database.drop(**self.sh_kwargs)

            # Create command
            Database.create(**self.sh_kwargs)
            self.sh_db = Database(**self.sh_kwargs)

        config = Config(CONF_FILE)
        cfg = config.get_conf()
        # We need to load the projects
        TaskProjects(config).execute()
        backend_section = GIT_BACKEND_SECTION
        task = TaskEnrich(config, backend_section=backend_section)
        self.assertEqual(task.execute(), None)

        # Check that the enrichment went well
        es_collection = cfg['es_collection']['url']
        es_enrichment = cfg['es_enrichment']['url']
        raw_index = es_collection + "/" + cfg[GIT_BACKEND_SECTION]['raw_index']
        enrich_index = es_enrichment + "/" + cfg[GIT_BACKEND_SECTION][
            'enriched_index']

        r = requests.get(raw_index + "/_search?size=0", verify=False)
        raw_items = r.json()['hits']['total']
        r = requests.get(enrich_index + "/_search?size=0", verify=False)
        enriched_items = r.json()['hits']['total']

        self.assertEqual(raw_items, enriched_items)