Beispiel #1
0
 def test_scan(self):
     """
     Test internal query: _scan
     """
     params = set_params({})
     ret = queries._scan(self.eldb.es, self.eldb.index, "unit/repo1",
                         params)
     ids = [obj["id"] for obj in ret]
     expected = ["c1_e1", "c1_e2", "c1_e3", "c1_e4", "c1_e5"]
     self.assertCountEqual(ids, expected)
Beispiel #2
0
 def test_scan(self):
     """
     Test internal query: _scan
     """
     params = set_params({})
     ret = queries._scan(self.eldb.es, self.eldb.index, 'unit/repo1',
                         params)
     ids = [obj['id'] for obj in ret]
     expected = ['c1_e1', 'c1_e2', 'c1_e3', 'c1_e4', 'c1_e5']
     self.assertCountEqual(ids, expected)
Beispiel #3
0
 def test_unknown_query(self):
     """
     Test unknown query exception
     """
     params = set_params({})
     self.assertRaises(
         UnknownQueryException,
         self.eldb.run_named_query,
         "unknown",
         "unit/repo1",
         params,
     )
Beispiel #4
0
 def test_all_queries(self):
     """
     Test all public queries
     """
     failing = []
     for query in queries.public_queries:
         params = set_params({})
         ret = self.eldb.run_named_query(query, "unit/repo1", params)
         if (not isinstance(ret, dict) and not isinstance(ret, list)
                 and not isinstance(ret, tuple)
                 and not isinstance(ret, int)):
             failing.append((query, ret))
     self.assertEqual(failing, [])
Beispiel #5
0
def do_query(index, repository_fullname, args, name):
    params = utils.set_params(args)
    db = ELmonocleDB(
        elastic_conn=os.getenv('ELASTIC_CONN', 'localhost:9200'),
        index=index,
        prefix=CHANGE_PREFIX,
        create=False,
    )
    try:
        result = db.run_named_query(name, repository_fullname, params)
    except InvalidIndexError:
        return 'Invalid index: %s' % request.args.get('index'), 404
    return jsonify(result)
Beispiel #6
0
    def test_approvals_param(self):
        """
        Test approvals param: changes_and_events
        """
        params = set_params({
            "approvals": "Code-Review+2",
            "gte": "2020-01-01"
        })
        ret = self.eldb.run_named_query("changes_and_events", "unit/repo[12]",
                                        params)
        self.assertEqual(ret["total"], 2, ret)
        self.assertCountEqual([item["id"] for item in ret["items"]],
                              ["c1", "c1_e4"])

        params = set_params({
            "approvals": "CHANGES_REQUESTED,APPROVED",
            "gte": "2020-01-01"
        })
        ret = self.eldb.run_named_query("changes_and_events", "unit/repo[12]",
                                        params)
        self.assertEqual(ret["total"], 4, ret)
        self.assertCountEqual([item["id"] for item in ret["items"]],
                              ["c2", "c2_e4", "c3", "c3_e2"])
Beispiel #7
0
 def test_change_and_events(self):
     """
     Test change_and_events query
     """
     params = set_params({})
     ret = self.eldb.run_named_query('changes_and_events', 'unit/repo1', params)
     self.assertEqual(ret['total'], 6)
     change = [c for c in ret['items'] if c['type'] == 'Change'][0]
     self.assertTrue(change['tests_included'])
     self.assertTrue(change['has_issue_tracker_links'])
     self.assertListEqual(
         change['issue_tracker_links'][0],
         ['#42', 'https://github.com/unit/repo1/issues/42'],
     )
Beispiel #8
0
    def test_approvals_param(self):
        """
        Test approvals param: changes_and_events
        """
        params = set_params({
            'approvals': 'Code-Review+2',
            'gte': '2020-01-01'
        })
        ret = self.eldb.run_named_query('changes_and_events', 'unit/repo[12]',
                                        params)
        self.assertEqual(ret['total'], 2, ret)
        self.assertCountEqual([item['id'] for item in ret['items']],
                              ['c1', 'c1_e4'])

        params = set_params({
            'approvals': 'CHANGES_REQUESTED,APPROVED',
            'gte': '2020-01-01'
        })
        ret = self.eldb.run_named_query('changes_and_events', 'unit/repo[12]',
                                        params)
        self.assertEqual(ret['total'], 4, ret)
        self.assertCountEqual([item['id'] for item in ret['items']],
                              ['c2', 'c2_e4', 'c3', 'c3_e2'])
Beispiel #9
0
 def test_change_and_events(self):
     """
     Test change_and_events query
     """
     params = set_params({})
     ret = self.eldb.run_named_query("changes_and_events", "unit/repo1", params)
     self.assertEqual(ret["total"], 6)
     change = [c for c in ret["items"] if c["type"] == "Change"][0]
     self.assertTrue(change["tests_included"])
     self.assertTrue(change["has_issue_tracker_links"])
     self.assertListEqual(
         change["issue_tracker_links"][0],
         ["#42", "https://github.com/unit/repo1/issues/42"],
     )
 def test_project_param(self):
     """
     Test project param: last_changes
     """
     params = set_params({"project": "mytestproject"})
     params["_project_defs"] = [
         ProjectDefinition(
             name="mytestproject",
             repository_regex=None,
             branch_regex=None,
             file_regex=r".*backend.py",
         )
     ]
     ret = self.eldb.run_named_query("last_changes", ".*", params)
     self.assertEqual(ret["total"], 1, ret)
Beispiel #11
0
 def test_events_histo(self):
     """
     Test query: events_histo
     """
     params = set_params({"gte": "2020-01-01", "lte": "2020-01-02"})
     ret = self.eldb.run_named_query("events_histo", "unit/repo1", params)
     expected = (
         [
             {"doc_count": 4, "key": 1577836800000, "key_as_string": "2020-01-01"},
             {"doc_count": 1, "key": 1577923200000, "key_as_string": "2020-01-02"},
         ],
         2.5,
     )
     ddiff = DeepDiff(ret, expected)
     if ddiff:
         raise DiffException(ddiff)
Beispiel #12
0
 def test_events_top_authors(self):
     """
     Test query: events_top_authors
     """
     params = set_params({})
     ret = self.eldb.run_named_query("events_top_authors", "unit/repo1", params)
     expected = {
         "count_avg": 2.5,
         "count_median": 2.5,
         "items": [{"doc_count": 3, "key": "jane"}, {"doc_count": 2, "key": "john"}],
         "total": 2,
         "total_hits": 5,
     }
     ddiff = DeepDiff(ret, expected)
     if ddiff:
         raise DiffException(ddiff)
Beispiel #13
0
 def test_events_top_authors(self):
     """
     Test query: events_top_authors
     """
     params = set_params({})
     ret = self.eldb.run_named_query('events_top_authors', 'unit/repo1', params)
     expected = {
         'count_avg': 2.5,
         'count_median': 2.5,
         'items': [{'doc_count': 3, 'key': 'jane'}, {'doc_count': 2, 'key': 'john'}],
         'total': 2,
         'total_hits': 5,
     }
     ddiff = DeepDiff(ret, expected)
     if ddiff:
         raise DiffException(ddiff)
Beispiel #14
0
 def test_events_histo(self):
     """
     Test query: events_histo
     """
     params = set_params({'gte': '2020-01-01', 'lte': '2020-01-02'})
     ret = self.eldb.run_named_query('events_histo', 'unit/repo1', params)
     expected = (
         [
             {'doc_count': 0, 'key': 1577750400000, 'key_as_string': '2019-12-31'},
             {'doc_count': 4, 'key': 1577836800000, 'key_as_string': '2020-01-01'},
             {'doc_count': 1, 'key': 1577923200000, 'key_as_string': '2020-01-02'},
         ],
         1.6666666666666667,
     )
     ddiff = DeepDiff(ret, expected)
     if ddiff:
         raise DiffException(ddiff)
Beispiel #15
0
def do_query(index, repository_fullname, args, name):
    params = utils.set_params(args)
    db = ELmonocleDB(
        elastic_conn=os.getenv("ELASTIC_CONN", "localhost:9200"),
        index=index,
        prefix=CHANGE_PREFIX,
        create=False,
        user=os.getenv("ELASTIC_USER", None),
        password=os.getenv("ELASTIC_PASSWORD", None),
        use_ssl=os.getenv("ELASTIC_USE_SSL", None),
        verify_certs=os.getenv("ELASTIC_INSECURE", None),
        ssl_show_warn=os.getenv("ELASTIC_SSL_SHOW_WARN", None),
    )
    try:
        result = db.run_named_query(name, repository_fullname, params)
    except InvalidIndexError:
        return "Invalid index: %s" % request.args.get("index"), 404
    return jsonify(result)
Beispiel #16
0
 def test_repos_top_merged(self):
     """
     Test query: repos_top_merged
     """
     params = set_params({"state": "MERGED"})
     ret = self.eldb.run_named_query("repos_top", "unit/repo[12]", params)
     expected = {
         "items": [
             {"key": "unit/repo2", "doc_count": 2},
             {"key": "unit/repo1", "doc_count": 1},
         ],
         "count_avg": 1.5,
         "count_median": 1.5,
         "total": 2,
         "total_hits": 3,
     }
     ddiff = DeepDiff(ret, expected)
     if ddiff:
         raise DiffException(ddiff)
Beispiel #17
0
 def test_repos_top_merged(self):
     """
     Test query: repos_top_merged
     """
     params = set_params({'state': 'MERGED'})
     ret = self.eldb.run_named_query('repos_top', 'unit/repo[12]', params)
     expected = {
         'items': [
             {'key': 'unit/repo2', 'doc_count': 2},
             {'key': 'unit/repo1', 'doc_count': 1},
         ],
         'count_avg': 1.5,
         'count_median': 1.5,
         'total': 2,
         'total_hits': 3,
     }
     ddiff = DeepDiff(ret, expected)
     if ddiff:
         raise DiffException(ddiff)
Beispiel #18
0
 def test_events_top(self):
     """
     Test internal query: _events_top
     """
     params = set_params({})
     ret = queries._events_top(self.eldb.es, self.eldb.index, "unit/repo1",
                               "type", params)
     expected = {
         "count_avg":
         1.25,
         "count_median":
         1.0,
         "items": [
             {
                 "doc_count": 2,
                 "key": "ChangeReviewedEvent"
             },
             {
                 "doc_count": 1,
                 "key": "ChangeCommentedEvent"
             },
             {
                 "doc_count": 1,
                 "key": "ChangeCreatedEvent"
             },
             {
                 "doc_count": 1,
                 "key": "ChangeMergedEvent"
             },
         ],
         "total":
         4,
         "total_hits":
         5,
     }
     ddiff = DeepDiff(ret, expected)
     if ddiff:
         raise DiffException(ddiff)
Beispiel #19
0
 def test_events_top(self):
     """
     Test internal query: _events_top
     """
     params = set_params({})
     ret = queries._events_top(self.eldb.es, self.eldb.index, 'unit/repo1',
                               'type', params)
     expected = {
         'count_avg':
         1.25,
         'count_median':
         1.0,
         'items': [
             {
                 'doc_count': 2,
                 'key': 'ChangeReviewedEvent'
             },
             {
                 'doc_count': 1,
                 'key': 'ChangeCommentedEvent'
             },
             {
                 'doc_count': 1,
                 'key': 'ChangeCreatedEvent'
             },
             {
                 'doc_count': 1,
                 'key': 'ChangeMergedEvent'
             },
         ],
         'total':
         4,
         'total_hits':
         5,
     }
     ddiff = DeepDiff(ret, expected)
     if ddiff:
         raise DiffException(ddiff)
Beispiel #20
0
def main():
    parser = argparse.ArgumentParser(prog='monocle')
    parser.add_argument(
        '--loglevel', help='logging level', default='INFO')
    subparsers = parser.add_subparsers(title='Subcommands',
                                       description='valid subcommands',
                                       dest="command")

    for crawler_driver in (pullrequest, review):
        parser_crawler = subparsers.add_parser(
            crawler_driver.name, help=crawler_driver.help)
        parser_crawler.add_argument(
            '--loop-delay', help='Request last updated events every N secs',
            default=900)
        parser_crawler.add_argument(
            '--host', help='Base url of the code review server',
            required=True)
        crawler_driver.init_crawler_args_parser(parser_crawler)

    parser_dbmanage = subparsers.add_parser(
        'dbmanage', help='Database manager')
    parser_dbmanage.add_argument(
        '--delete-repository',
        help='Delete events related to a repository (regexp)',
        required=True)

    parser_dbquery = subparsers.add_parser(
        'dbquery', help='Run an existsing query on stored events')
    parser_dbquery.add_argument(
        '--interval', help='Histogram interval',
        default="3h")
    parser_dbquery.add_argument(
        '--name', help='The query name',
        required=True)
    parser_dbquery.add_argument(
        '--repository', help='Scope to events of a repository (regexp)',
        required=True)
    parser_dbquery.add_argument(
        '--gte', help='Scope to events created after date')
    parser_dbquery.add_argument(
        '--lte', help='Scope to events created before date')
    parser_dbquery.add_argument(
        '--on_cc_gte',
        help='Scope to events related to changes created after date')
    parser_dbquery.add_argument(
        '--on_cc_lte',
        help='Scope to events related to changes created before date')
    parser_dbquery.add_argument(
        '--ec-same-date',
        help='Scope to events related to changes created during the '
        'same date bondaries defined by gte/lte arguments',
        action='store_true')
    parser_dbquery.add_argument(
        '--type', help='Scope to events types list (comma separated)')
    parser_dbquery.add_argument(
        '--authors', help='Scope to authors (comma separated)')
    parser_dbquery.add_argument(
        '--approval', help='Scope to events with approval')
    parser_dbquery.add_argument(
        '--size', help='Return maximum of size results',
        default=10)
    parser_dbquery.add_argument(
        '--exclude-authors', help='Authors exclude list (comma separated)')

    args = parser.parse_args()

    logging.basicConfig(
        level=getattr(logging, args.loglevel.upper()))

    if not args.command:
        parser.print_usage()
        return 1

    if args.command.endswith("_crawler"):
        crawler = MonocleCrawler(args)
        crawler.run()

    if args.command == "dbmanage":
        if args.delete_repository:
            db = ELmonocleDB()
            db.delete_repository(args.delete_repository)

    if args.command == "dbquery":
        db = ELmonocleDB()
        params = utils.set_params(args)
        ret = db.run_named_query(
            args.name,
            args.repository.lstrip('^'),
            params)
        pprint(ret)
Beispiel #21
0
    def test_most_active_authors_stats(self):
        """
        Test query: most_active_authors_stats
        """
        params = set_params({})
        ret = self.eldb.run_named_query('most_active_authors_stats', '.*', params)
        expected = {
            'ChangeCommentedEvent': {
                'count_avg': 1,
                'count_median': 1.0,
                'items': [
                    {'doc_count': 1, 'key': 'jane'},
                    {'doc_count': 1, 'key': 'steve'},
                ],
                'total': 2,
                'total_hits': 2,
            },
            'ChangeCreatedEvent': {
                'count_avg': 1.3333333333333333,
                'count_median': 1,
                'items': [
                    {'doc_count': 2, 'key': 'jane'},
                    {'doc_count': 1, 'key': 'john'},
                    {'doc_count': 1, 'key': 'steve'},
                ],
                'total': 3,
                'total_hits': 4,
            },
            'ChangeMergedEvent': {
                'count_avg': 1,
                'count_median': 1,
                'items': [
                    {'doc_count': 1, 'key': 'jane'},
                    {'doc_count': 1, 'key': 'john'},
                    {'doc_count': 1, 'key': 'steve'},
                ],
                'total': 3,
                'total_hits': 3,
            },
            'ChangeReviewedEvent': {
                'count_avg': 1.3333333333333333,
                'count_median': 1,
                'items': [
                    {'doc_count': 2, 'key': 'john'},
                    {'doc_count': 1, 'key': 'jane'},
                    {'doc_count': 1, 'key': 'steve'},
                ],
                'total': 3,
                'total_hits': 4,
            },
        }

        ddiff = DeepDiff(ret, expected)
        if ddiff:
            raise DiffException(ddiff)

        params = set_params({'authors': 'jane'})
        ret = self.eldb.run_named_query('most_active_authors_stats', '.*', params)
        expected = {
            'ChangeCommentedEvent': {
                'count_avg': 1,
                'count_median': 1,
                'items': [{'doc_count': 1, 'key': 'jane'}],
                'total': 1,
                'total_hits': 1,
            },
            'ChangeCreatedEvent': {
                'count_avg': 2,
                'count_median': 2,
                'items': [{'doc_count': 2, 'key': 'jane'}],
                'total': 1,
                'total_hits': 2,
            },
            'ChangeMergedEvent': {
                'count_avg': 1,
                'count_median': 1,
                'items': [{'doc_count': 1, 'key': 'jane'}],
                'total': 1,
                'total_hits': 1,
            },
            'ChangeReviewedEvent': {
                'count_avg': 1,
                'count_median': 1,
                'items': [{'doc_count': 1, 'key': 'jane'}],
                'total': 1,
                'total_hits': 1,
            },
        }
        ddiff = DeepDiff(ret, expected)
        if ddiff:
            raise DiffException(ddiff)
Beispiel #22
0
def main() -> None:
    parser = argparse.ArgumentParser(prog="monocle")
    parser.add_argument("--loglevel", help="logging level", default="INFO")
    parser.add_argument(
        "--elastic-timeout",
        help="Elasticsearch connection retry timeout",
        default=10,
        type=int,
    )
    parser.add_argument("--elastic-conn",
                        help="Elasticsearch connection info",
                        default="localhost:9200")
    parser.add_argument(
        "--use-ssl",
        help="Use https protocol for communication with Elasticsearch",
        action="store_true",
    )
    parser.add_argument(
        "--insecure",
        help="Skip SSL CA cert validation",
        action="store_false",
    )
    parser.add_argument(
        "--ssl_show_warn",
        help="Skip showing a SSL warning message if it is not signed "
        "by CA authority",
        action="store_false",
    )
    parser.add_argument(
        "--elastic-user",
        help="Username for Elasticsearch authorization",
    )
    parser.add_argument(
        "--elastic-password",
        help="Password for Elasticsearch authorization",
    )
    subparsers = parser.add_subparsers(title="Subcommands",
                                       description="valid subcommands",
                                       dest="command")

    parser_crawler = subparsers.add_parser("crawler",
                                           help="Threaded crawlers pool")
    parser_crawler.add_argument("--config",
                                help="Configuration file of the crawlers pool",
                                required=True)

    parser_dbmanage = subparsers.add_parser("dbmanage",
                                            help="Database manager")
    parser_dbmanage.add_argument("--config",
                                 help="Configuration file",
                                 required=False)
    parser_dbmanage.add_argument(
        "--delete-repository",
        help="Delete events related to a repository (regexp)",
    )
    parser_dbmanage.add_argument(
        "--delete-index",
        help="Delete the index",
        action="store_true",
    )
    parser_dbmanage.add_argument("--index",
                                 help="The Elastisearch index name",
                                 required=True)
    parser_dbmanage.add_argument(
        "--run-migrate",
        help="Run the migration process",
    )

    parser_dbmanage.add_argument(
        "--update-idents",
        help="Update identities",
        action="store_true",
    )

    parser_dbquery = subparsers.add_parser(
        "dbquery", help="Run an existsing query on stored events")
    parser_dbquery.add_argument("--index",
                                help="The Elastisearch index name",
                                required=True)
    parser_dbquery.add_argument("--name", help="The query name", required=True)
    parser_dbquery.add_argument(
        "--repository",
        help="Scope to events of repositories (regexp)",
        required=True)
    parser_dbquery.add_argument(
        "--target-branch",
        help="Scope to events of a target branches (regexp)")
    parser_dbquery.add_argument("--gte",
                                help="Scope to events created after date")
    parser_dbquery.add_argument("--lte",
                                help="Scope to events created before date")
    parser_dbquery.add_argument(
        "--on_cc_gte",
        help="Scope to events related to changes created after date")
    parser_dbquery.add_argument(
        "--on_cc_lte",
        help="Scope to events related to changes created before date")
    parser_dbquery.add_argument(
        "--ec-same-date",
        help="Scope to events related to changes created during the "
        "same date bondaries defined by gte/lte arguments",
        action="store_true",
    )
    parser_dbquery.add_argument(
        "--type", help="Scope to events types list (comma separated)")
    parser_dbquery.add_argument(
        "--files", help="Scope to changes containing this file regexp")
    parser_dbquery.add_argument(
        "--state",
        help="Scope to changes with state (comma separated)",
    )
    parser_dbquery.add_argument("--change-ids",
                                help="Scope to change ids (comma separated)")
    parser_dbquery.add_argument("--authors",
                                help="Scope to authors (comma separated)")
    parser_dbquery.add_argument(
        "--approvals",
        help="Scope to objects with approvals (comma separated)")
    parser_dbquery.add_argument(
        "--exclude-approvals", help="Approvals exclude list (comma separated)")
    parser_dbquery.add_argument("--size",
                                help="Return maximum of size results",
                                default=10)
    parser_dbquery.add_argument(
        "--from", help="Starting index of the elements to retrieve", default=0)
    parser_dbquery.add_argument("--exclude-authors",
                                help="Authors exclude list (comma separated)")
    parser_dbquery.add_argument(
        "--tests-included",
        help="Scope to changes containing tests",
        action="store_true",
    )
    parser_dbquery.add_argument(
        "--self-merged",
        help="Scope to changes merged by their authors",
        action="store_true",
    )
    parser_dbquery.add_argument(
        "--has-issue-tracker-links",
        help="Scope to changes containing an issue tracker link",
        choices=["generic", "github.com", "altassian.net"],
    )
    parser_dbquery.add_argument(
        "--task-priority",
        help="Scope to changes related to task priorities (comma separated)",
    )
    parser_dbquery.add_argument(
        "--task-severity",
        help="Scope to changes related to task severities (comma separated)",
    )
    parser_dbquery.add_argument(
        "--task-issue-type",
        help="Scope to changes related to task type (comma separated)",
    )

    parser_dbquery.add_argument(
        "--task-score",
        help="Scope to changes related to task score '<op>: <val>'",
    )

    args = parser.parse_args()

    logging.basicConfig(
        level=getattr(logging, args.loglevel.upper()),
        format="%(asctime)s - %(name)s - %(thread)d - %(threadName)s - " +
        "%(levelname)s - %(message)s",
    )
    log = logging.getLogger(__name__)

    if not args.command:
        parser.print_usage()
        sys.exit(1)

    if args.command == "crawler":
        realpath = os.path.expanduser(args.config)
        if not os.path.isfile(realpath):
            log.error("Unable to access config: %s" % realpath)
            sys.exit(1)
        configdata = yaml.safe_load(open(realpath).read())
        config.validate(configdata, config.schema)
        tpool: List[Union[Crawler, GroupCrawler]] = []
        group = {}
        app = None
        if os.getenv("APP_ID") and os.getenv("APP_KEY_PATH"):
            app = application.get_app(os.getenv("APP_ID"),
                                      os.getenv("APP_KEY_PATH"))
        for tenant in configdata["tenants"]:
            idents_config = config.get_idents_config(configdata,
                                                     tenant["index"])
            for crawler_item in tenant.get("crawler",
                                           {}).get("github_orgs", []):
                tg = pullrequest.TokenGetter(crawler_item["name"],
                                             crawler_item.get("token"), app)
                github_c_args = pullrequest.GithubCrawlerArgs(
                    command="github_crawler",
                    org=crawler_item["name"],
                    updated_since=crawler_item["updated_since"],
                    loop_delay=tenant["crawler"]["loop_delay"],
                    repository=crawler_item.get("repository"),
                    base_url=utils.strip_url(crawler_item["base_url"]),
                    token_getter=tg,
                    db=ELmonocleDB(
                        elastic_conn=args.elastic_conn,
                        index=tenant["index"],
                        timeout=args.elastic_timeout,
                        user=args.elastic_user,
                        password=args.elastic_password,
                        use_ssl=args.use_ssl,
                        verify_certs=args.insecure,
                        ssl_show_warn=args.ssl_show_warn,
                    ),
                    idents_config=idents_config,
                )
                gid = crawler_item.get("token")
                if not gid:
                    if app:
                        # No token, if we have a app then get the token from the app
                        gid = app.get_token(org=crawler_item["name"])
                    else:
                        log.info("Skip crawler because no token: %s" %
                                 github_c_args)
                        continue
                if gid not in group:
                    group[gid] = GroupCrawler()
                    tpool.append(group[gid])
                if github_c_args.repository:
                    repositories = [github_c_args.repository]
                else:
                    log.info("Discovering repositories in %s ..." %
                             github_c_args.org)
                    # No repository specified for that organization so
                    # try to discover all of them
                    rf = organization.RepositoriesFetcher(
                        graphql.GithubGraphQLQuery(token_getter=tg))
                    repos = rf.get(github_c_args.org)
                    repositories = [
                        repo["name"] for repo in repos
                        if not repo["isArchived"]
                    ]
                    log.info("Found %s repositories in %s ..." %
                             (len(repositories), github_c_args.org))
                for repository in repositories:
                    github_c_args.repository = repository
                    group[gid].add_crawler(Runner(github_c_args))
            for crawler_item in tenant.get("crawler",
                                           {}).get("gerrit_repositories", []):
                gerrit_c_args = review.GerritCrawlerArgs(
                    command="gerrit_crawler",
                    repository=crawler_item["name"],
                    updated_since=crawler_item["updated_since"],
                    loop_delay=tenant["crawler"]["loop_delay"],
                    base_url=utils.strip_url(crawler_item["base_url"]),
                    insecure=crawler_item.get("insecure", False),
                    login=crawler_item.get("login"),
                    password=crawler_item.get("password"),
                    db=ELmonocleDB(
                        elastic_conn=args.elastic_conn,
                        index=tenant["index"],
                        timeout=args.elastic_timeout,
                        user=args.elastic_user,
                        password=args.elastic_password,
                        use_ssl=args.use_ssl,
                        verify_certs=args.insecure,
                        ssl_show_warn=args.ssl_show_warn,
                    ),
                    prefix=crawler_item.get("prefix"),
                    idents_config=idents_config,
                )
                tpool.append(Crawler(gerrit_c_args))
        log.info("%d configured threads" % len(tpool))
        for cthread in tpool:
            cthread.start()

    if args.command == "dbmanage":

        if args.update_idents and not args.config:
            log.error("Please provide the --config option")
            sys.exit(1)
        if args.update_idents:
            idents_config = config.get_idents_config(
                yaml.safe_load(open(args.config)), args.index)
        else:
            idents_config = []
        db = ELmonocleDB(
            elastic_conn=args.elastic_conn,
            index=args.index,
            idents_config=idents_config,
            user=args.elastic_user,
            password=args.elastic_password,
            use_ssl=args.use_ssl,
            verify_certs=args.insecure,
            ssl_show_warn=args.ssl_show_warn,
        )
        if args.delete_repository:
            db.delete_repository(args.delete_repository)
        if args.delete_index:
            db.delete_index()
        if args.update_idents:
            db.update_idents()
        if args.run_migrate:
            try:
                migrate.run_migrate(args.run_migrate, args.elastic_conn,
                                    args.index)
            except migrate.NotAvailableException:
                log.error("Error: %s is not a valid migration process" %
                          args.run_migrate)

    if args.command == "dbquery":
        db = ELmonocleDB(
            elastic_conn=args.elastic_conn,
            index=args.index,
            user=args.elastic_user,
            password=args.elastic_password,
            use_ssl=args.use_ssl,
            verify_certs=args.insecure,
            ssl_show_warn=args.ssl_show_warn,
        )
        params = utils.set_params(args)
        try:
            ret = db.run_named_query(args.name, args.repository.lstrip("^"),
                                     params)
        except UnknownQueryException as err:
            log.error("Unable to run query: %s" % err)
            sys.exit(1)
        pprint(ret)
    def test_task_params(self):
        """
        Test task related params
        """
        params = set_params({"task_priority": "HIGH"})
        ret = self.eldb.run_named_query("last_changes", ".*", params)
        self.assertEqual(ret["total"], 1, ret)

        params = set_params({"task_priority": "HIGH,MEDIUM,LOW"})
        ret = self.eldb.run_named_query("last_changes", ".*", params)
        self.assertEqual(ret["total"], 2, ret)

        params = set_params({"task_type": "BUG"})
        ret = self.eldb.run_named_query("last_changes", ".*", params)
        self.assertEqual(ret["total"], 2, ret)

        params = set_params({"task_type": "BUG,CLIENT_IMPACT"})
        ret = self.eldb.run_named_query("last_changes", ".*", params)
        self.assertEqual(ret["total"], 2, ret)

        params = set_params({
            "task_priority": "LOW",
            "task_type": "BUG,CLIENT_IMPACT"
        })
        ret = self.eldb.run_named_query("last_changes", ".*", params)
        self.assertEqual(ret["total"], 1, ret)

        params = set_params({"task_priority": "HIGH"})
        ret = self.eldb.run_named_query("changes_and_events", ".*", params)
        self.assertEqual(ret["total"], 2, ret)
        self.assertListEqual([o["id"] for o in ret["items"]], ["c1", "c1_e2"])

        params = set_params({"task_score": "> 10"})
        ret = self.eldb.run_named_query("last_changes", ".*", params)
        self.assertEqual(ret["total"], 1, ret)

        params = set_params({"task_score": ">= 10"})
        ret = self.eldb.run_named_query("last_changes", ".*", params)
        self.assertEqual(ret["total"], 2, ret)

        params = set_params({"task_score": "< 10"})
        ret = self.eldb.run_named_query("last_changes", ".*", params)
        self.assertEqual(ret["total"], 0, ret)

        params = set_params({"task_score": "== 50"})
        ret = self.eldb.run_named_query("last_changes", ".*", params)
        self.assertEqual(ret["total"], 1, ret)

        params = set_params({"task_score": "== 51"})
        ret = self.eldb.run_named_query("last_changes", ".*", params)
        self.assertEqual(ret["total"], 0, ret)
Beispiel #24
0
 def test_self_merged_param(self):
     params = set_params({"state": "MERGED", "self_merged": True})
     ret = self.eldb.run_named_query("last_changes", "unit/repo[12]", params)
     self.assertEqual(ret["total"], 1)
     self.assertEqual(ret["items"][0]["author"], ret["items"][0]["merged_by"])
Beispiel #25
0
    def test_changes_lifecycle_stats(self):
        """
        Test changes_lifecycle_stats query
        """
        params = set_params({"gte": "2020-01-01", "lte": "2020-01-03"})
        ret = self.eldb.run_named_query("changes_lifecycle_stats", ".*", params)
        expected = {
            "ChangeCommitForcePushedEvent": {"authors_count": 0, "events_count": 0},
            "ChangeCommitPushedEvent": {"authors_count": 1, "events_count": 1},
            "ChangeCreatedEvent": {"authors_count": 2, "events_count": 2},
            "abandoned": 0,
            "self_merged": 0,
            "commits": 1.0,
            "duration": 86400.0,
            "duration_variability": 0.0,
            "histos": {
                "ChangeAbandonedEvent": (
                    [
                        {
                            "doc_count": 0,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 0,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 0,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0,
                ),
                "ChangeCommitForcePushedEvent": (
                    [
                        {
                            "doc_count": 0,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 0,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 0,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0,
                ),
                "ChangeCommitPushedEvent": (
                    [
                        {
                            "doc_count": 0,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 0,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 1,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0.3333333333333333,
                ),
                "ChangeCreatedEvent": (
                    [
                        {
                            "doc_count": 1,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 0,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 1,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0.6666666666666666,
                ),
                "ChangeMergedEvent": (
                    [
                        {
                            "doc_count": 0,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 1,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 0,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0.3333333333333333,
                ),
            },
            "merged": 1,
            "opened": 1,
            "ratios": {
                "abandoned/created": 0.0,
                "iterations/created": 1.5,
                "merged/created": 50.0,
                "self_merged/created": 0.0,
            },
            "tests": 50.0,
        }

        ddiff = DeepDiff(ret, expected)
        if ddiff:
            raise DiffException(ddiff)

        params = set_params(
            {"gte": "2020-01-01", "lte": "2020-01-03", "authors": "john,jane"}
        )
        ret = self.eldb.run_named_query("changes_lifecycle_stats", ".*", params)
        ddiff = DeepDiff(ret, expected)
        if ddiff:
            raise DiffException(ddiff)

        params = set_params(
            {"gte": "2020-01-01", "lte": "2020-01-03", "authors": "john"}
        )
        ret = self.eldb.run_named_query("changes_lifecycle_stats", ".*", params)
        expected = {
            "ChangeCommitForcePushedEvent": {"authors_count": 0, "events_count": 0},
            "ChangeCommitPushedEvent": {"authors_count": 0, "events_count": 0},
            "ChangeCreatedEvent": {"authors_count": 1, "events_count": 1},
            "abandoned": 0,
            "self_merged": 0,
            "commits": 1.0,
            "duration": 86400.0,
            "duration_variability": 0.0,
            "histos": {
                "ChangeAbandonedEvent": (
                    [
                        {
                            "doc_count": 0,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 0,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 0,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0,
                ),
                "ChangeCommitForcePushedEvent": (
                    [
                        {
                            "doc_count": 0,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 0,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 0,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0,
                ),
                "ChangeCommitPushedEvent": (
                    [
                        {
                            "doc_count": 0,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 0,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 0,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0,
                ),
                "ChangeCreatedEvent": (
                    [
                        {
                            "doc_count": 1,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 0,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 0,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0.3333333333333333,
                ),
                "ChangeMergedEvent": (
                    [
                        {
                            "doc_count": 0,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 1,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 0,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0.3333333333333333,
                ),
            },
            "merged": 1,
            "opened": 0,
            "ratios": {
                "abandoned/created": 0.0,
                "iterations/created": 1.0,
                "merged/created": 100.0,
                "self_merged/created": 0.0,
            },
            "tests": 100.0,
        }

        ddiff = DeepDiff(ret, expected)
        if ddiff:
            raise DiffException(ddiff)
Beispiel #26
0
def main():
    parser_dbquery = argparse.ArgumentParser(prog=sys.argv[0])
    parser_dbquery.add_argument(
        "--repository",
        help="Scope to events of a repository (regexp)",
        default=r".*")
    parser_dbquery.add_argument("--gte",
                                help="Scope to events created after date")
    parser_dbquery.add_argument("--lte",
                                help="Scope to events created before date")
    parser_dbquery.add_argument("--size",
                                help="Return maximum of size results",
                                default=1000)
    parser_dbquery.add_argument("--exclude-authors",
                                help="Authors exclude list (comma separated)")
    args = parser_dbquery.parse_args()

    db = ELmonocleDB()
    params = utils.set_params(args)
    data = db.run_named_query("last_merged_changes",
                              args.repository.lstrip("^"), params)

    lte_time = (datetime.datetime.strptime(args.lte, "%Y-%m-%d") +
                datetime.timedelta(days=1) if args.lte else None)

    title = {}

    for entry in data:
        # example: 2020-02-24T19:05:13Z
        created_time = datetime.datetime.strptime(entry["created_at"],
                                                  "%Y-%m-%dT%H:%M:%SZ")
        merge_time = datetime.datetime.strptime(entry["merged_at"],
                                                "%Y-%m-%dT%H:%M:%SZ")
        if lte_time and merge_time > lte_time:
            continue

        print("%.0f|%s|A|/%s/%s|" % (
            created_time.timestamp(),
            entry["author"],
            entry["repository_fullname"],
            entry["title"],
        ))
        print("%.0f|%s|M|/%s/%s|" % (
            merge_time.timestamp(),
            entry["author"],
            entry["repository_fullname"],
            entry["title"],
        ))
        title[entry["repository_fullname_and_number"]] = entry["title"]

    params["etype"] = ("ChangeCommentedEvent", )
    data = db.run_named_query("_scan", args.repository.lstrip("^"), params)

    for entry in data:
        # example: 2020-02-24T19:05:13Z
        created_time = datetime.datetime.strptime(entry["created_at"],
                                                  "%Y-%m-%dT%H:%M:%SZ")
        try:
            print("%.0f|%s|M|/%s/%s|" % (
                created_time.timestamp(),
                entry["author"],
                entry["repository_fullname"],
                title[entry["repository_fullname_and_number"]],
            ))
        except KeyError:
            print(
                "%s not merged" % entry["repository_fullname_and_number"],
                file=sys.stderr,
            )
Beispiel #27
0
    def test_most_active_authors_stats(self):
        """
        Test query: most_active_authors_stats
        """
        params = set_params({})
        ret = self.eldb.run_named_query("most_active_authors_stats", ".*", params)
        expected = {
            "ChangeCommentedEvent": {
                "count_avg": 1,
                "count_median": 1.0,
                "items": [
                    {"doc_count": 1, "key": "jane"},
                    {"doc_count": 1, "key": "steve"},
                ],
                "total": 2,
                "total_hits": 2,
            },
            "ChangeCreatedEvent": {
                "count_avg": 1.3333333333333333,
                "count_median": 1,
                "items": [
                    {"doc_count": 2, "key": "jane"},
                    {"doc_count": 1, "key": "john"},
                    {"doc_count": 1, "key": "steve"},
                ],
                "total": 3,
                "total_hits": 4,
            },
            "ChangeMergedEvent": {
                "count_avg": 1,
                "count_median": 1,
                "items": [
                    {"doc_count": 1, "key": "jane"},
                    {"doc_count": 1, "key": "john"},
                    {"doc_count": 1, "key": "steve"},
                ],
                "total": 3,
                "total_hits": 3,
            },
            "ChangeReviewedEvent": {
                "count_avg": 1.3333333333333333,
                "count_median": 1,
                "items": [
                    {"doc_count": 2, "key": "john"},
                    {"doc_count": 1, "key": "jane"},
                    {"doc_count": 1, "key": "steve"},
                ],
                "total": 3,
                "total_hits": 4,
            },
        }

        ddiff = DeepDiff(ret, expected)
        if ddiff:
            raise DiffException(ddiff)

        params = set_params({"authors": "jane"})
        ret = self.eldb.run_named_query("most_active_authors_stats", ".*", params)
        expected = {
            "ChangeCommentedEvent": {
                "count_avg": 1,
                "count_median": 1,
                "items": [{"doc_count": 1, "key": "jane"}],
                "total": 1,
                "total_hits": 1,
            },
            "ChangeCreatedEvent": {
                "count_avg": 2,
                "count_median": 2,
                "items": [{"doc_count": 2, "key": "jane"}],
                "total": 1,
                "total_hits": 2,
            },
            "ChangeMergedEvent": {
                "count_avg": 1,
                "count_median": 1,
                "items": [{"doc_count": 1, "key": "jane"}],
                "total": 1,
                "total_hits": 1,
            },
            "ChangeReviewedEvent": {
                "count_avg": 1,
                "count_median": 1,
                "items": [{"doc_count": 1, "key": "jane"}],
                "total": 1,
                "total_hits": 1,
            },
        }

        ddiff = DeepDiff(ret, expected)
        if ddiff:
            raise DiffException(ddiff)
Beispiel #28
0
def main():
    parser = argparse.ArgumentParser(prog='monocle')
    parser.add_argument('--loglevel', help='logging level', default='INFO')
    parser.add_argument(
        '--elastic-timeout',
        help='Elasticsearch connection retry timeout',
        default=10,
        type=int,
    )
    parser.add_argument('--elastic-conn',
                        help='Elasticsearch connection info',
                        default='localhost:9200')
    subparsers = parser.add_subparsers(title='Subcommands',
                                       description='valid subcommands',
                                       dest="command")

    parser_crawler = subparsers.add_parser('crawler',
                                           help='Threaded crawlers pool')
    parser_crawler.add_argument('--config',
                                help='Configuration file of the crawlers pool',
                                required=True)

    parser_dbmanage = subparsers.add_parser('dbmanage',
                                            help='Database manager')
    parser_dbmanage.add_argument(
        '--delete-repository',
        help='Delete events related to a repository (regexp)',
        required=True,
    )
    parser_dbmanage.add_argument('--index',
                                 help='The Elastisearch index name',
                                 required=True)

    parser_dbquery = subparsers.add_parser(
        'dbquery', help='Run an existsing query on stored events')
    parser_dbquery.add_argument('--index',
                                help='The Elastisearch index name',
                                required=True)
    parser_dbquery.add_argument('--name', help='The query name', required=True)
    parser_dbquery.add_argument(
        '--repository',
        help='Scope to events of repositories (regexp)',
        required=True)
    parser_dbquery.add_argument(
        '--target-branch',
        help='Scope to events of a target branches (regexp)')
    parser_dbquery.add_argument('--gte',
                                help='Scope to events created after date')
    parser_dbquery.add_argument('--lte',
                                help='Scope to events created before date')
    parser_dbquery.add_argument(
        '--on_cc_gte',
        help='Scope to events related to changes created after date')
    parser_dbquery.add_argument(
        '--on_cc_lte',
        help='Scope to events related to changes created before date')
    parser_dbquery.add_argument(
        '--ec-same-date',
        help='Scope to events related to changes created during the '
        'same date bondaries defined by gte/lte arguments',
        action='store_true',
    )
    parser_dbquery.add_argument(
        '--type', help='Scope to events types list (comma separated)')
    parser_dbquery.add_argument(
        '--files', help='Scope to changes containing this file regexp')
    parser_dbquery.add_argument(
        '--state',
        help='Scope to changes having this state',
        choices=['OPEN', 'CLOSED', 'MERGED'],
    )
    parser_dbquery.add_argument('--change-ids',
                                help='Scope to change ids (comma separated)')
    parser_dbquery.add_argument('--authors',
                                help='Scope to authors (comma separated)')
    parser_dbquery.add_argument('--approval',
                                help='Scope to events with approval')
    parser_dbquery.add_argument('--size',
                                help='Return maximum of size results',
                                default=10)
    parser_dbquery.add_argument(
        '--from', help='Starting index of the elements to retrieve', default=0)
    parser_dbquery.add_argument('--exclude-authors',
                                help='Authors exclude list (comma separated)')
    parser_dbquery.add_argument(
        '--tests-included',
        help='Scope to changes containing tests',
        action='store_true',
    )
    parser_dbquery.add_argument(
        '--has-issue-tracker-links',
        help='Scope to changes containing an issue tracker link',
        choices=['generic', 'github.com', 'altassian.net'],
    )

    args = parser.parse_args()

    logging.basicConfig(
        level=getattr(logging, args.loglevel.upper()),
        format="%(asctime)s - %(name)s - %(threadName)s - " +
        "%(levelname)s - %(message)s",
    )
    log = logging.getLogger(__name__)

    if not args.command:
        parser.print_usage()
        return 1

    if args.command == "crawler":
        realpath = os.path.expanduser(args.config)
        if not os.path.isfile(realpath):
            log.error('Unable to access config: %s' % realpath)
            sys.exit(1)
        configdata = yaml.safe_load(open(realpath).read())
        validate(instance=configdata, schema=config.schema)
        tpool = []
        group = {}
        for tenant in configdata['tenants']:
            for crawler_item in tenant['crawler'].get('github_orgs', []):
                c_args = pullrequest.GithubCrawlerArgs(
                    command='github_crawler',
                    index=tenant['index'],
                    org=crawler_item['name'],
                    updated_since=crawler_item['updated_since'],
                    loop_delay=tenant['crawler']['loop_delay'],
                    token=crawler_item['token'],
                    repository=crawler_item.get('repository'),
                    base_url=crawler_item['base_url'],
                )
                log.info('args=%s' % c_args)
                if crawler_item['token'] not in group:
                    group[crawler_item['token']] = GroupCrawler()
                    tpool.append(group[crawler_item['token']])
                group[crawler_item['token']].add_crawler(
                    Runner(
                        c_args,
                        elastic_conn=args.elastic_conn,
                        elastic_timeout=args.elastic_timeout,
                    ))
            for crawler_item in tenant['crawler'].get('gerrit_repositories',
                                                      []):
                c_args = review.GerritCrawlerArgs(
                    command='gerrit_crawler',
                    index=tenant['index'],
                    repository=crawler_item['name'],
                    updated_since=crawler_item['updated_since'],
                    loop_delay=tenant['crawler']['loop_delay'],
                    base_url=crawler_item['base_url'],
                )
                tpool.append(
                    Crawler(
                        c_args,
                        elastic_conn=args.elastic_conn,
                        elastic_timeout=args.elastic_timeout,
                    ))
        log.info('%d configured threads' % len(tpool))
        for cthread in tpool:
            cthread.start()

    if args.command == "dbmanage":
        if args.delete_repository:
            db = ELmonocleDB(elastic_conn=args.elastic_conn, index=args.index)
            db.delete_repository(args.delete_repository)

    if args.command == "dbquery":
        db = ELmonocleDB(elastic_conn=args.elastic_conn, index=args.index)
        params = utils.set_params(args)
        try:
            ret = db.run_named_query(args.name, args.repository.lstrip('^'),
                                     params)
        except UnknownQueryException as err:
            log.error('Unable to run query: %s' % err)
            sys.exit(1)
        pprint(ret)
Beispiel #29
0
def main():
    parser = argparse.ArgumentParser(prog='monocle')
    parser.add_argument('--loglevel', help='logging level', default='INFO')
    parser.add_argument(
        '--elastic-timeout',
        help='Elasticsearch connection retry timeout',
        default=10,
        type=int,
    )
    parser.add_argument(
        '--elastic-conn', help='Elasticsearch connection info', default='localhost:9200'
    )
    subparsers = parser.add_subparsers(
        title='Subcommands', description='valid subcommands', dest="command"
    )

    parser_crawler = subparsers.add_parser('crawler', help='Threaded crawlers pool')
    parser_crawler.add_argument(
        '--config', help='Configuration file of the crawlers pool', required=True
    )

    parser_dbmanage = subparsers.add_parser('dbmanage', help='Database manager')
    parser_dbmanage.add_argument(
        '--delete-repository', help='Delete events related to a repository (regexp)',
    )
    parser_dbmanage.add_argument(
        '--delete-index', help='Delete the index', action='store_true',
    )
    parser_dbmanage.add_argument(
        '--index', help='The Elastisearch index name', required=True
    )

    parser_dbquery = subparsers.add_parser(
        'dbquery', help='Run an existsing query on stored events'
    )
    parser_dbquery.add_argument(
        '--index', help='The Elastisearch index name', required=True
    )
    parser_dbquery.add_argument('--name', help='The query name', required=True)
    parser_dbquery.add_argument(
        '--repository', help='Scope to events of repositories (regexp)', required=True
    )
    parser_dbquery.add_argument(
        '--target-branch', help='Scope to events of a target branches (regexp)'
    )
    parser_dbquery.add_argument('--gte', help='Scope to events created after date')
    parser_dbquery.add_argument('--lte', help='Scope to events created before date')
    parser_dbquery.add_argument(
        '--on_cc_gte', help='Scope to events related to changes created after date'
    )
    parser_dbquery.add_argument(
        '--on_cc_lte', help='Scope to events related to changes created before date'
    )
    parser_dbquery.add_argument(
        '--ec-same-date',
        help='Scope to events related to changes created during the '
        'same date bondaries defined by gte/lte arguments',
        action='store_true',
    )
    parser_dbquery.add_argument(
        '--type', help='Scope to events types list (comma separated)'
    )
    parser_dbquery.add_argument(
        '--files', help='Scope to changes containing this file regexp'
    )
    parser_dbquery.add_argument(
        '--state',
        help='Scope to changes having this state',
        choices=['OPEN', 'CLOSED', 'MERGED'],
    )
    parser_dbquery.add_argument(
        '--change-ids', help='Scope to change ids (comma separated)'
    )
    parser_dbquery.add_argument('--authors', help='Scope to authors (comma separated)')
    parser_dbquery.add_argument(
        '--approvals', help='Scope to objects with approvals (comma separated)'
    )
    parser_dbquery.add_argument(
        '--exclude-approvals', help='Approvals exclude list (comma separated)'
    )
    parser_dbquery.add_argument(
        '--size', help='Return maximum of size results', default=10
    )
    parser_dbquery.add_argument(
        '--from', help='Starting index of the elements to retrieve', default=0
    )
    parser_dbquery.add_argument(
        '--exclude-authors', help='Authors exclude list (comma separated)'
    )
    parser_dbquery.add_argument(
        '--tests-included',
        help='Scope to changes containing tests',
        action='store_true',
    )
    parser_dbquery.add_argument(
        '--has-issue-tracker-links',
        help='Scope to changes containing an issue tracker link',
        choices=['generic', 'github.com', 'altassian.net'],
    )

    args = parser.parse_args()

    logging.basicConfig(
        level=getattr(logging, args.loglevel.upper()),
        format="%(asctime)s - %(name)s - %(thread)d - %(threadName)s - "
        + "%(levelname)s - %(message)s",
    )
    log = logging.getLogger(__name__)

    if not args.command:
        parser.print_usage()
        return 1

    if args.command == "crawler":
        realpath = os.path.expanduser(args.config)
        if not os.path.isfile(realpath):
            log.error('Unable to access config: %s' % realpath)
            sys.exit(1)
        configdata = yaml.safe_load(open(realpath).read())
        validate(instance=configdata, schema=config.schema)
        tpool = []
        group = {}
        app = None
        if os.getenv('APP_ID') and os.getenv('APP_KEY_PATH'):
            app = application.get_app(os.getenv('APP_ID'), os.getenv('APP_KEY_PATH'))
        for tenant in configdata['tenants']:
            for crawler_item in tenant['crawler'].get('github_orgs', []):
                tg = pullrequest.TokenGetter(
                    crawler_item['name'], crawler_item.get('token'), app
                )
                c_args = pullrequest.GithubCrawlerArgs(
                    command='github_crawler',
                    org=crawler_item['name'],
                    updated_since=crawler_item['updated_since'],
                    loop_delay=tenant['crawler']['loop_delay'],
                    repository=crawler_item.get('repository'),
                    base_url=crawler_item['base_url'],
                    token_getter=tg,
                    db=ELmonocleDB(
                        elastic_conn=args.elastic_conn,
                        index=tenant['index'],
                        timeout=args.elastic_timeout,
                    ),
                )
                gid = crawler_item.get('token')
                if not gid:
                    if app:
                        # No token, if we have a app then get the token from the app
                        gid = app.get_token(org=crawler_item['name'])
                    else:
                        log.info('Skip crawler because no token: %s' % c_args)
                        continue
                if gid not in group:
                    group[gid] = GroupCrawler()
                    tpool.append(group[gid])
                if c_args.repository:
                    repositories = [c_args.repository]
                else:
                    log.info('Discovering repositories in %s ...' % c_args.org)
                    # No repository specified for that organization so
                    # try to discover all of them
                    rf = organization.RepositoriesFetcher(
                        graphql.GithubGraphQLQuery(token_getter=tg)
                    )
                    repos = rf.get(c_args.org)
                    repositories = [
                        repo['name'] for repo in repos if not repo['isArchived']
                    ]
                    log.info(
                        'Found %s repositories in %s ...'
                        % (len(repositories), c_args.org)
                    )
                for repository in repositories:
                    c_args.repository = repository
                    group[gid].add_crawler(Runner(c_args))
            for crawler_item in tenant['crawler'].get('gerrit_repositories', []):
                c_args = review.GerritCrawlerArgs(
                    command='gerrit_crawler',
                    repository=crawler_item['name'],
                    updated_since=crawler_item['updated_since'],
                    loop_delay=tenant['crawler']['loop_delay'],
                    base_url=crawler_item['base_url'],
                    insecure=crawler_item.get('insecure', False),
                    login=crawler_item.get('login'),
                    password=crawler_item.get('password'),
                    db=ELmonocleDB(
                        elastic_conn=args.elastic_conn,
                        index=tenant['index'],
                        timeout=args.elastic_timeout,
                    ),
                )
                tpool.append(Crawler(c_args))
        log.info('%d configured threads' % len(tpool))
        for cthread in tpool:
            cthread.start()

    if args.command == "dbmanage":
        db = ELmonocleDB(elastic_conn=args.elastic_conn, index=args.index)
        if args.delete_repository:
            db.delete_repository(args.delete_repository)
        if args.delete_index:
            db.delete_index()

    if args.command == "dbquery":
        db = ELmonocleDB(elastic_conn=args.elastic_conn, index=args.index)
        params = utils.set_params(args)
        try:
            ret = db.run_named_query(args.name, args.repository.lstrip('^'), params)
        except UnknownQueryException as err:
            log.error('Unable to run query: %s' % err)
            sys.exit(1)
        pprint(ret)
Beispiel #30
0
def query(name):
    repository_fullname = request.args.get('repository')
    params = utils.set_params(request.args)
    db = ELmonocleDB()
    result = db.run_named_query(name, repository_fullname, params)
    return jsonify(result)