Example #1
0
def test_ingest_hg_pushlog(jm, test_base_dir,
                           test_repository, mock_post_json,
                           activate_responses):
    """ingesting a number of pushes should populate result set and revisions"""

    pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json')
    with open(pushlog_path) as f:
        pushlog_content = f.read()
    pushlog_fake_url = "http://www.thisismypushlog.com"
    push_num = 10
    responses.add(responses.GET, pushlog_fake_url,
                  body=pushlog_content, status=200,
                  content_type='application/json')

    process = HgPushlogProcess()

    process.run(pushlog_fake_url, jm.project)

    pushes_stored = jm.get_dhub().execute(
        proc="jobs_test.selects.result_set_ids",
        return_type='tuple'
    )

    assert len(pushes_stored) == push_num

    revisions_stored = jm.get_dhub().execute(
        proc="jobs_test.selects.revision_ids",
        return_type='tuple'
    )

    assert len(revisions_stored) == 15
Example #2
0
def test_empty_json_pushes(jm, test_base_dir,
                           test_repository, mock_post_json,
                           activate_responses):
    """
    Gracefully handle getting an empty list of pushes from json-pushes

    """

    pushlog_fake_url = "http://www.thisismypushlog.com/?full=1&version=2"

    # store the first push only
    empty_push_json = json.dumps({"lastpushid": 123, "pushes": {}})
    responses.add(responses.GET, pushlog_fake_url,
                  body=empty_push_json, status=200,
                  content_type='application/json',
                  match_querystring=True,
                  )

    process = HgPushlogProcess()
    process.run(pushlog_fake_url, jm.project)

    pushes_stored = jm.get_dhub().execute(
        proc="jobs_test.selects.result_set_ids",
        return_type='tuple'
    )

    assert len(pushes_stored) == 0
def test_empty_json_pushes(jm, test_base_dir,
                           test_repository,
                           activate_responses):
    """
    Gracefully handle getting an empty list of pushes from json-pushes

    """

    pushlog_fake_url = "http://www.thisismypushlog.com/?full=1&version=2"

    # store the first push only
    empty_push_json = json.dumps({"lastpushid": 123, "pushes": {}})
    responses.add(responses.GET, pushlog_fake_url,
                  body=empty_push_json, status=200,
                  content_type='application/json',
                  match_querystring=True,
                  )

    process = HgPushlogProcess()
    process.run(pushlog_fake_url, jm.project)

    pushes_stored = jm.get_dhub().execute(
        proc="jobs_test.selects.result_set_ids",
        return_type='tuple'
    )

    assert len(pushes_stored) == 0
Example #4
0
    def handle(self, *args, **options):
        taskId = options["taskId"]
        if taskId:
            root_url = options["root_url"]
            loop.run_until_complete(handleTaskId(taskId, root_url))
        else:
            project = options["project"]
            changeset = options["changeset"]

            # get reference to repo
            repo = Repository.objects.get(name=project, active_status='active')
            fetch_push_id = None

            # make sure all tasks are run synchronously / immediately
            settings.CELERY_TASK_ALWAYS_EAGER = True

            # get hg pushlog
            pushlog_url = '%s/json-pushes/?full=1&version=2' % repo.url

            # ingest this particular revision for this project
            process = HgPushlogProcess()
            # Use the actual push SHA, in case the changeset specified was a tag
            # or branch name (eg tip). HgPushlogProcess returns the full SHA.
            process.run(pushlog_url, project, changeset=changeset, last_push_id=fetch_push_id)

            # XXX: Need logic to get from project/revision to taskGroupId
            taskGroupId = 'ZYnMSfwCS5Cc_Wi_e-ZlSA'
            logger.info("## START ##")
            loop.run_until_complete(processTasks(taskGroupId, repo.tc_root_url))
            logger.info("## END ##")
Example #5
0
def test_ingest_hg_pushlog_cache_last_push(test_repository, test_base_dir,
                                           activate_responses):
    """
    ingesting a number of pushes should cache the top revision of the last push
    """

    pushlog_path = os.path.join(test_base_dir, 'sample_data',
                                'hg_pushlog.json')
    with open(pushlog_path) as f:
        pushlog_content = f.read()
    pushlog_fake_url = "http://www.thisismypushlog.com"
    responses.add(responses.GET,
                  pushlog_fake_url,
                  body=pushlog_content,
                  status=200,
                  content_type='application/json')

    process = HgPushlogProcess()
    process.run(pushlog_fake_url, test_repository.name)

    pushlog_dict = json.loads(pushlog_content)
    pushes = pushlog_dict['pushes']
    max_push_id = max([int(k) for k in pushes.keys()])

    cache_key = "{}:last_push_id".format(test_repository.name)
    assert cache.get(cache_key) == max_push_id
Example #6
0
    def _handle(self, *args, **options):
        if len(args) != 2:
            raise CommandError("Need to specify (only) branch and changeset")

        (project, changeset) = args

        # get reference to repo
        repo = Repository.objects.get(name=project, active_status='active')

        # make sure all tasks are run synchronously / immediately
        settings.CELERY_ALWAYS_EAGER = True

        # get hg pushlog
        pushlog_url = '%s/json-pushes/?full=1&version=2' % repo.url

        # ingest this particular revision for this project
        process = HgPushlogProcess()
        # Use the actual push SHA, in case the changeset specified was a tag
        # or branch name (eg tip). HgPushlogProcess returns the full SHA.
        push_sha = process.run(pushlog_url, project, changeset=changeset)

        Builds4hJobsProcess().run(project_filter=project,
                                  revision_filter=push_sha,
                                  job_group_filter=options['filter_job_group'])
        PendingJobsProcess().run(project_filter=project,
                                 revision_filter=push_sha,
                                 job_group_filter=options['filter_job_group'])
        RunningJobsProcess().run(project_filter=project,
                                 revision_filter=push_sha,
                                 job_group_filter=options['filter_job_group'])
Example #7
0
def test_ingest_hg_pushlog(jm, initial_data, test_base_dir, test_repository,
                           mock_post_json_data, activate_responses):
    """ingesting a number of pushes should populate result set and revisions"""

    pushlog_path = os.path.join(test_base_dir, 'sample_data',
                                'hg_pushlog.json')
    pushlog_content = open(pushlog_path).read()
    pushlog_fake_url = "http://www.thisismypushlog.com"
    responses.add(responses.GET,
                  pushlog_fake_url,
                  body=pushlog_content,
                  status=200,
                  content_type='application/json')

    process = HgPushlogProcess()

    process.run(pushlog_fake_url, jm.project)

    pushes_stored = jm.get_jobs_dhub().execute(
        proc="jobs_test.selects.result_set_ids", return_type='tuple')

    assert len(pushes_stored) == 10

    revisions_stored = jm.get_jobs_dhub().execute(
        proc="jobs_test.selects.revision_ids", return_type='tuple')

    assert len(revisions_stored) == 15
Example #8
0
def fetch_hg_push_log(repo_name, repo_url):
    """
    Run a HgPushlog etl process
    """
    newrelic.agent.add_custom_parameter("repo_name", repo_name)
    process = HgPushlogProcess()
    process.run(repo_url + '/json-pushes/?full=1&version=2', repo_name)
Example #9
0
def test_ingest_hg_pushlog_cache_last_push(jm, initial_data, test_repository,
                                           test_base_dir, mock_post_json_data,
                                           activate_responses):
    """
    ingesting a number of pushes should cache the top revision of the last push
    """

    pushlog_path = os.path.join(test_base_dir, 'sample_data',
                                'hg_pushlog.json')
    pushlog_content = open(pushlog_path).read()
    pushlog_fake_url = "http://www.thisismypushlog.com"
    responses.add(responses.GET,
                  pushlog_fake_url,
                  body=pushlog_content,
                  status=200,
                  content_type='application/json')

    process = HgPushlogProcess()
    process.run(pushlog_fake_url, jm.project)

    pushlog_dict = json.loads(pushlog_content)
    max_push_id = max([int(k) for k in pushlog_dict.keys()])
    last_push = pushlog_dict[str(max_push_id)]
    last_push_revision = last_push["changesets"][0]["node"]

    assert cache.get("test_treeherder:last_push") == last_push_revision
Example #10
0
    def _handle(self, *args, **options):
        if len(args) != 2:
            raise CommandError("Need to specify (only) branch and changeset")

        (project, changeset) = args

        # get reference to repo
        rdm = RefDataManager()
        repos = filter(lambda x: x['name'] == project,
                       rdm.get_all_repository_info())
        if not repos:
            raise CommandError("No project found named '%s'" % project)
        repo = repos[0]

        # make sure all tasks are run synchronously / immediately
        settings.CELERY_ALWAYS_EAGER = True

        # get hg pushlog
        pushlog_url = '%s/json-pushes/?full=1&version=2' % repo['url']

        # ingest this particular revision for this project
        process = HgPushlogProcess()
        # Use the actual push SHA, in case the changeset specified was a tag
        # or branch name (eg tip). HgPushlogProcess returns the full SHA.
        push_sha = process.run(pushlog_url, project, changeset=changeset)[:12]

        Builds4hJobsProcess().run(project_filter=project,
                                  revision_filter=push_sha,
                                  job_group_filter=options['filter_job_group'])
        PendingJobsProcess().run(project_filter=project,
                                 revision_filter=push_sha,
                                 job_group_filter=options['filter_job_group'])
        RunningJobsProcess().run(project_filter=project,
                                 revision_filter=push_sha,
                                 job_group_filter=options['filter_job_group'])
Example #11
0
    def _handle(self, *args, **options):
        if len(args) != 2:
            raise CommandError("Need to specify (only) branch and changeset")

        (project, changeset) = args

        # get reference to repo
        rdm = RefDataManager()
        repos = filter(lambda x: x['name'] == project,
                       rdm.get_all_repository_info())
        if not repos:
            raise CommandError("No project found named '%s'" % project)
        repo = repos[0]

        # make sure all tasks are run synchronously / immediately
        settings.CELERY_ALWAYS_EAGER = True

        # get hg pushlog
        pushlog_url = '%s/json-pushes/?full=1' % repo['url']

        # ingest this particular revision for this project
        process = HgPushlogProcess()
        process.run(pushlog_url, project, changeset=changeset)

        self._process_all_objects_for_project(project)

        Builds4hJobsProcess().run(filter_to_project=project,
                                  filter_to_revision=changeset)
        PendingJobsProcess().run(filter_to_project=project,
                                 filter_to_revision=changeset)
        RunningJobsProcess().run(filter_to_project=project,
                                 filter_to_revision=changeset)

        self._process_all_objects_for_project(project)
Example #12
0
    def _handle(self, *args, **options):
        if len(args) != 2:
            raise CommandError("Need to specify (only) branch and changeset")

        (project, changeset) = args

        # get reference to repo
        rdm = RefDataManager()
        repos = filter(lambda x: x['name'] == project,
                       rdm.get_all_repository_info())
        if not repos:
            raise CommandError("No project found named '%s'" % project)
        repo = repos[0]

        # make sure all tasks are run synchronously / immediately
        settings.CELERY_ALWAYS_EAGER = True

        # get hg pushlog
        pushlog_url = '%s/json-pushes/?full=1&version=2' % repo['url']

        # ingest this particular revision for this project
        process = HgPushlogProcess()
        # Use the actual push SHA, in case the changeset specified was a tag
        # or branch name (eg tip). HgPushlogProcess returns the full SHA.
        push_sha = process.run(pushlog_url, project, changeset=changeset)[:12]

        Builds4hJobsProcess().run(project_filter=project,
                                  revision_filter=push_sha,
                                  job_group_filter=options['filter_job_group'])
        PendingJobsProcess().run(project_filter=project,
                                 revision_filter=push_sha,
                                 job_group_filter=options['filter_job_group'])
        RunningJobsProcess().run(project_filter=project,
                                 revision_filter=push_sha,
                                 job_group_filter=options['filter_job_group'])
Example #13
0
def fetch_hg_push_log(repo_name, repo_url):
    """
    Run a HgPushlog etl process
    """
    newrelic.agent.add_custom_parameter("repo_name", repo_name)
    process = HgPushlogProcess()
    process.run(repo_url + '/json-pushes/?full=1&version=2', repo_name)
Example #14
0
    def handle(self, *args, **options):
        typeOfIngestion = options["ingestion_type"][0]
        root_url = options["root_url"]

        if typeOfIngestion == "task":
            assert options["taskId"]
            loop.run_until_complete(handleTaskId(options["taskId"], root_url))
        elif typeOfIngestion == "pr":
            assert options["prUrl"]
            pr_url = options["prUrl"]
            splitUrl = pr_url.split("/")
            org = splitUrl[3]
            repo = splitUrl[4]
            pulse = {
                "exchange": "exchange/taskcluster-github/v1/pull-request",
                "routingKey": "primary.{}.{}.synchronize".format(org, repo),
                "payload": {
                    "repository": repo,
                    "organization": org,
                    "action": "synchronize",
                    "details": {
                        "event.pullNumber": splitUrl[6],
                        "event.base.repo.url": "https://github.com/{}/{}.git".format(org, repo),
                        "event.head.repo.url": "https://github.com/{}/{}.git".format(org, repo),
                    },
                }
            }
            PushLoader().process(pulse["payload"], pulse["exchange"], root_url)
        elif typeOfIngestion == "git-push":
            raise Exception("This is not yet implemented")
        elif typeOfIngestion == "push":
            if not options["enable_eager_celery"]:
                logger.info(
                    "If you want all logs to be parsed use --enable-eager-celery"
                )
            else:
                # Make sure all tasks are run synchronously / immediately
                settings.CELERY_TASK_ALWAYS_EAGER = True

            # get reference to repo and ingest this particular revision for this project
            project = options["project"]
            commit = options["commit"]
            repo = Repository.objects.get(name=project, active_status="active")
            pushlog_url = "%s/json-pushes/?full=1&version=2" % repo.url
            process = HgPushlogProcess()
            process.run(pushlog_url, project, changeset=commit, last_push_id=None)

            if options["ingest_all_tasks"]:
                gecko_decision_task = get_decision_task_id(project, commit, repo.tc_root_url)
                logger.info("## START ##")
                loop.run_until_complete(processTasks(gecko_decision_task, repo.tc_root_url))
                logger.info("## END ##")
            else:
                logger.info(
                    "You can ingest all tasks for a push with -a/--ingest-all-tasks."
                )
Example #15
0
def _ingest_hg_push(project, revision, fetch_push_id=None):
    # get reference to repo
    repo = Repository.objects.get(name=project, active_status="active")
    # get hg pushlog
    pushlog_url = "%s/json-pushes/?full=1&version=2" % repo.url
    # ingest this particular revision for this project
    process = HgPushlogProcess()
    # Use the actual push SHA, in case the changeset specified was a tag
    # or branch name (eg tip). HgPushlogProcess returns the full SHA.
    process.run(pushlog_url,
                project,
                changeset=revision,
                last_push_id=fetch_push_id)
Example #16
0
def test_ingest_hg_pushlog_already_stored(test_repository, test_base_dir,
                                          activate_responses):
    """test that trying to ingest a push already stored doesn't doesn't affect
    all the pushes in the request,
    e.g. trying to store [A,B] with A already stored, B will be stored"""

    pushlog_path = os.path.join(test_base_dir, 'sample_data',
                                'hg_pushlog.json')
    with open(pushlog_path) as f:
        pushlog_json = json.load(f)
    pushes = list(pushlog_json['pushes'].values())
    first_push, second_push = pushes[0:2]

    pushlog_fake_url = "http://www.thisismypushlog.com/?full=1&version=2"

    # store the first push only
    first_push_json = json.dumps({
        "lastpushid": 1,
        "pushes": {
            "1": first_push
        }
    })
    responses.add(
        responses.GET,
        pushlog_fake_url,
        body=first_push_json,
        status=200,
        content_type='application/json',
        match_querystring=True,
    )

    process = HgPushlogProcess()
    process.run(pushlog_fake_url, test_repository.name)

    assert Push.objects.count() == 1

    # store both first and second push
    first_and_second_push_json = json.dumps({
        "lastpushid": 2,
        "pushes": {
            "1": first_push,
            "2": second_push
        }
    })

    responses.add(
        responses.GET,
        pushlog_fake_url + "&startID=1",
        body=first_and_second_push_json,
        status=200,
        content_type='application/json',
        match_querystring=True,
    )

    process = HgPushlogProcess()

    process.run(pushlog_fake_url, test_repository.name)

    assert Push.objects.count() == 2
Example #17
0
    def _handle(self, *args, **options):
        project = options['project']
        changeset = options['changeset']

        if not options['last_n_pushes'] and not changeset:
            raise CommandError('must specify --last-n-pushes or a positional '
                               'changeset argument')

        # get reference to repo
        repo = Repository.objects.get(name=project, active_status='active')

        if options['last_n_pushes']:
            last_push_id = last_push_id_from_server(repo)
            fetch_push_id = max(1, last_push_id - options['last_n_pushes'])
            logger.info('last server push id: %d; fetching push %d and newer' %
                        (last_push_id, fetch_push_id))
        else:
            fetch_push_id = None

        # make sure all tasks are run synchronously / immediately
        settings.CELERY_ALWAYS_EAGER = True

        # get hg pushlog
        pushlog_url = '%s/json-pushes/?full=1&version=2' % repo.url

        # ingest this particular revision for this project
        process = HgPushlogProcess()
        # Use the actual push SHA, in case the changeset specified was a tag
        # or branch name (eg tip). HgPushlogProcess returns the full SHA.
        push_sha = process.run(pushlog_url,
                               project,
                               changeset=changeset,
                               last_push_id=fetch_push_id)

        # Only perform additional processing if fetching a single changeset
        # because we only have the sha1 if the tip-most push in "last N pushes"
        # mode and can't filter appropriately.
        if not fetch_push_id:
            group_filter = options['filter_job_group']
            Builds4hJobsProcess().run(project_filter=project,
                                      revision_filter=push_sha,
                                      job_group_filter=group_filter)
            PendingJobsProcess().run(project_filter=project,
                                     revision_filter=push_sha,
                                     job_group_filter=group_filter)
            RunningJobsProcess().run(project_filter=project,
                                     revision_filter=push_sha,
                                     job_group_filter=group_filter)
Example #18
0
    def _handle(self, *args, **options):
        project = options['project']
        changeset = options['changeset']

        if not options['last_n_pushes'] and not changeset:
            raise CommandError('must specify --last-n-pushes or a positional '
                               'changeset argument')

        # get reference to repo
        repo = Repository.objects.get(name=project, active_status='active')

        if options['last_n_pushes']:
            last_push_id = last_push_id_from_server(repo)
            fetch_push_id = max(1, last_push_id - options['last_n_pushes'])
            logger.info('last server push id: %d; fetching push %d and newer'
                        % (last_push_id, fetch_push_id))
        else:
            fetch_push_id = None

        # make sure all tasks are run synchronously / immediately
        settings.CELERY_ALWAYS_EAGER = True

        # get hg pushlog
        pushlog_url = '%s/json-pushes/?full=1&version=2' % repo.url

        # ingest this particular revision for this project
        process = HgPushlogProcess()
        # Use the actual push SHA, in case the changeset specified was a tag
        # or branch name (eg tip). HgPushlogProcess returns the full SHA.
        push_sha = process.run(pushlog_url, project, changeset=changeset,
                               last_push_id=fetch_push_id)

        # Only perform additional processing if fetching a single changeset
        # because we only have the sha1 if the tip-most push in "last N pushes"
        # mode and can't filter appropriately.
        if not fetch_push_id:
            group_filter = options['filter_job_group']
            Builds4hJobsProcess().run(project_filter=project,
                                      revision_filter=push_sha,
                                      job_group_filter=group_filter)
            PendingJobsProcess().run(project_filter=project,
                                     revision_filter=push_sha,
                                     job_group_filter=group_filter)
            RunningJobsProcess().run(project_filter=project,
                                     revision_filter=push_sha,
                                     job_group_filter=group_filter)
Example #19
0
def test_ingest_hg_pushlog_already_stored(jm, initial_data, test_base_dir,
                                          test_repository, mock_post_json_data,
                                          activate_responses):
    """test that trying to ingest a push already stored doesn't doesn't affect
    all the pushes in the request,
    e.g. trying to store [A,B] with A already stored, B will be stored"""

    pushlog_path = os.path.join(test_base_dir, 'sample_data',
                                'hg_pushlog.json')
    pushlog_content = open(pushlog_path).read()
    pushes = json.loads(pushlog_content).values()
    first_push, second_push = pushes[0:2]

    pushlog_fake_url = "http://www.thisismypushlog.com/?full=1"

    # store the first push only
    first_push_json = json.dumps({"1": first_push})
    responses.add(
        responses.GET,
        pushlog_fake_url,
        body=first_push_json,
        status=200,
        content_type='application/json',
        match_querystring=True,
    )

    process = HgPushlogProcess()
    process.run(pushlog_fake_url, jm.project)

    pushes_stored = jm.get_jobs_dhub().execute(
        proc="jobs_test.selects.result_set_ids", return_type='tuple')

    assert len(pushes_stored) == 1

    # store both first and second push
    first_and_second_push_json = json.dumps({
        "1": first_push,
        "2": second_push
    })
    second_push
    responses.add(responses.GET,
                  pushlog_fake_url +
                  "&fromchange=2c25d2bbbcd6ddbd45962606911fd429e366b8e1",
                  body=first_and_second_push_json,
                  status=200,
                  content_type='application/json',
                  match_querystring=True)

    process = HgPushlogProcess()

    process.run(pushlog_fake_url, jm.project)

    pushes_stored = jm.get_jobs_dhub().execute(
        proc="jobs_test.selects.result_set_ids", return_type='tuple')

    assert len(pushes_stored) == 2
Example #20
0
def test_ingest_hg_pushlog(jm, initial_data, test_base_dir,
                           test_repository, mock_post_json,
                           activate_responses, pulse_resultset_consumer):
    """ingesting a number of pushes should populate result set and revisions"""

    pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json')
    with open(pushlog_path) as f:
        pushlog_content = f.read()
    pushlog_fake_url = "http://www.thisismypushlog.com"
    push_num = 10
    responses.add(responses.GET, pushlog_fake_url,
                  body=pushlog_content, status=200,
                  content_type='application/json')

    process = HgPushlogProcess()

    process.run(pushlog_fake_url, jm.project)

    pushes_stored = jm.get_dhub().execute(
        proc="jobs_test.selects.result_set_ids",
        return_type='tuple'
    )

    assert len(pushes_stored) == push_num

    rev_to_push = set()
    for push in json.loads(pushlog_content).values():
        # Add each rev to the set remember we shorten them all down to 12 chars
        rev_to_push.add(push['changesets'][-1]['node'][0:12])

    # Ensure for each push we sent a pulse notification...
    for _ in range(0, push_num):
        message = pulse_resultset_consumer.get(block=True, timeout=2)
        content = json.loads(message.body)
        assert content['revision'] in rev_to_push
        # Ensure we don't match the same revision twice...
        rev_to_push.remove(content['revision'])

    revisions_stored = jm.get_dhub().execute(
        proc="jobs_test.selects.revision_ids",
        return_type='tuple'
    )

    assert len(revisions_stored) == 15
Example #21
0
def test_ingest_hg_pushlog(jm, initial_data, test_base_dir,
                           test_repository, mock_post_json_data,
                           activate_responses, pulse_resultset_consumer):
    """ingesting a number of pushes should populate result set and revisions"""

    pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json')
    with open(pushlog_path) as f:
        pushlog_content = f.read()
    pushlog_fake_url = "http://www.thisismypushlog.com"
    push_num = 10
    responses.add(responses.GET, pushlog_fake_url,
                  body=pushlog_content, status=200,
                  content_type='application/json')

    process = HgPushlogProcess()

    process.run(pushlog_fake_url, jm.project)

    pushes_stored = jm.get_jobs_dhub().execute(
        proc="jobs_test.selects.result_set_ids",
        return_type='tuple'
    )

    assert len(pushes_stored) == push_num

    rev_to_push = set()
    for push in json.loads(pushlog_content).values():
        # Add each rev to the set remember we shorten them all down to 12 chars
        rev_to_push.add(push['changesets'][-1]['node'][0:12])

    # Ensure for each push we sent a pulse notification...
    for _ in range(0, push_num):
        message = pulse_resultset_consumer.get(block=True, timeout=2)
        content = json.loads(message.body)
        assert content['revision'] in rev_to_push
        # Ensure we don't match the same revision twice...
        rev_to_push.remove(content['revision'])

    revisions_stored = jm.get_jobs_dhub().execute(
        proc="jobs_test.selects.revision_ids",
        return_type='tuple'
    )

    assert len(revisions_stored) == 15
Example #22
0
def test_ingest_hg_pushlog(test_repository, test_base_dir,
                           activate_responses):
    """ingesting a number of pushes should populate result set and revisions"""

    pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json')
    with open(pushlog_path) as f:
        pushlog_content = f.read()
    pushlog_fake_url = "http://www.thisismypushlog.com"
    responses.add(responses.GET, pushlog_fake_url,
                  body=pushlog_content, status=200,
                  content_type='application/json')

    process = HgPushlogProcess()

    process.run(pushlog_fake_url, test_repository.name)

    # should be 10 pushes, 15 revisions
    assert Push.objects.count() == 10
    assert Commit.objects.count() == 15
Example #23
0
def test_ingest_hg_pushlog(test_repository, test_base_dir,
                           activate_responses):
    """ingesting a number of pushes should populate push and revisions"""

    pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json')
    with open(pushlog_path) as f:
        pushlog_content = f.read()
    pushlog_fake_url = "http://www.thisismypushlog.com"
    responses.add(responses.GET, pushlog_fake_url,
                  body=pushlog_content, status=200,
                  content_type='application/json')

    process = HgPushlogProcess()

    process.run(pushlog_fake_url, test_repository.name)

    # should be 10 pushes, 15 revisions
    assert Push.objects.count() == 10
    assert Commit.objects.count() == 15
Example #24
0
    def _handle(self, *args, **options):
        project = options['project']
        changeset = options['changeset']

        if not options['last_n_pushes'] and not changeset:
            raise CommandError('must specify --last-n-pushes or a positional '
                               'changeset argument')

        # get reference to repo
        repo = Repository.objects.get(name=project, active_status='active')

        if options['last_n_pushes']:
            last_push_id = last_push_id_from_server(repo)
            fetch_push_id = max(1, last_push_id - options['last_n_pushes'])
            logger.info('last server push id: %d; fetching push %d and newer',
                        last_push_id, fetch_push_id)
        else:
            fetch_push_id = None

        # make sure all tasks are run synchronously / immediately
        settings.CELERY_ALWAYS_EAGER = True

        # get hg pushlog
        pushlog_url = '%s/json-pushes/?full=1&version=2' % repo.url

        # ingest this particular revision for this project
        process = HgPushlogProcess()
        # Use the actual push SHA, in case the changeset specified was a tag
        # or branch name (eg tip). HgPushlogProcess returns the full SHA.
        process.run(pushlog_url,
                    project,
                    changeset=changeset,
                    last_push_id=fetch_push_id)

        # Only perform additional processing if fetching a single changeset
        # because we only have the sha1 if the tip-most push in "last N pushes"
        # mode and can't filter appropriately.
        if not fetch_push_id:
            raise CommandError(
                'This command is not yet able to ingest Taskcluster jobs automatically. '
                'Please manually configure pulse job ingestion using this guide: '
                'https://treeherder.readthedocs.io/pulseload.html')
def test_ingest_hg_pushlog(jm, initial_data, test_base_dir, test_repository,
                           mock_post_json_data):
    """ingesting a number of pushes should populate result set and revisions"""

    pushlog = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json')
    process = HgPushlogProcess()

    process.run("file://{0}".format(pushlog), jm.project)

    pushes_stored = jm.get_jobs_dhub().execute(
        proc="jobs_test.selects.result_set_ids", return_type='tuple')

    assert len(pushes_stored) == 10

    revisions_stored = jm.get_jobs_dhub().execute(
        proc="jobs_test.selects.revision_ids", return_type='tuple')

    assert len(revisions_stored) == 15

    jm.disconnect()
Example #26
0
def test_empty_json_pushes(test_repository, test_base_dir,
                           activate_responses):
    """
    Gracefully handle getting an empty list of pushes from json-pushes

    """

    pushlog_fake_url = "http://www.thisismypushlog.com/?full=1&version=2"

    # store the first push only
    empty_push_json = json.dumps({"lastpushid": 123, "pushes": {}})
    responses.add(responses.GET, pushlog_fake_url,
                  body=empty_push_json, status=200,
                  content_type='application/json',
                  match_querystring=True,
                  )

    process = HgPushlogProcess()
    process.run(pushlog_fake_url, test_repository.name)

    assert Push.objects.count() == 0
Example #27
0
def test_empty_json_pushes(test_repository, test_base_dir,
                           activate_responses):
    """
    Gracefully handle getting an empty list of pushes from json-pushes

    """

    pushlog_fake_url = "http://www.thisismypushlog.com/?full=1&version=2"

    # store the first push only
    empty_push_json = json.dumps({"lastpushid": 123, "pushes": {}})
    responses.add(responses.GET, pushlog_fake_url,
                  body=empty_push_json, status=200,
                  content_type='application/json',
                  match_querystring=True,
                  )

    process = HgPushlogProcess()
    process.run(pushlog_fake_url, test_repository.name)

    assert Push.objects.count() == 0
Example #28
0
    def _handle(self, *args, **options):
        project = options['project']
        changeset = options['changeset']

        if not options['last_n_pushes'] and not changeset:
            raise CommandError('must specify --last-n-pushes or a positional '
                               'changeset argument')

        # get reference to repo
        repo = Repository.objects.get(name=project, active_status='active')

        if options['last_n_pushes']:
            last_push_id = last_push_id_from_server(repo)
            fetch_push_id = max(1, last_push_id - options['last_n_pushes'])
            logger.info('last server push id: %d; fetching push %d and newer',
                        last_push_id, fetch_push_id)
        else:
            fetch_push_id = None

        # make sure all tasks are run synchronously / immediately
        settings.CELERY_TASK_ALWAYS_EAGER = True

        # get hg pushlog
        pushlog_url = '%s/json-pushes/?full=1&version=2' % repo.url

        # ingest this particular revision for this project
        process = HgPushlogProcess()
        # Use the actual push SHA, in case the changeset specified was a tag
        # or branch name (eg tip). HgPushlogProcess returns the full SHA.
        process.run(pushlog_url, project, changeset=changeset, last_push_id=fetch_push_id)

        # Only perform additional processing if fetching a single changeset
        # because we only have the sha1 if the tip-most push in "last N pushes"
        # mode and can't filter appropriately.
        if not fetch_push_id:
            raise CommandError(
                'This command is not yet able to ingest Taskcluster jobs automatically. '
                'Please manually configure pulse job ingestion using this guide: '
                'https://treeherder.readthedocs.io/pulseload.html'
            )
Example #29
0
def test_ingest_hg_pushlog_cache_last_push(jm, initial_data, test_repository,
                                           test_base_dir, mock_post_json,
                                           activate_responses):
    """
    ingesting a number of pushes should cache the top revision of the last push
    """

    pushlog_path = os.path.join(test_base_dir, 'sample_data',
                                'hg_pushlog.json')
    with open(pushlog_path) as f:
        pushlog_content = f.read()
    pushlog_fake_url = "http://www.thisismypushlog.com"
    responses.add(responses.GET, pushlog_fake_url, body=pushlog_content,
                  status=200, content_type='application/json')

    process = HgPushlogProcess()
    process.run(pushlog_fake_url, jm.project)

    pushlog_dict = json.loads(pushlog_content)
    pushes = pushlog_dict['pushes']
    max_push_id = max([int(k) for k in pushes.keys()])

    assert cache.get("test_treeherder:last_push_id") == max_push_id
def test_ingest_hg_pushlog(jm, initial_data, test_base_dir,
                           test_repository, mock_post_json_data):
    """ingesting a number of pushes should populate result set and revisions"""

    pushlog = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json')
    process = HgPushlogProcess()

    process.run("file://{0}".format(pushlog), jm.project)

    pushes_stored = jm.get_jobs_dhub().execute(
        proc="jobs_test.selects.result_set_ids",
        return_type='tuple'
    )

    assert len(pushes_stored) == 10

    revisions_stored = jm.get_jobs_dhub().execute(
        proc="jobs_test.selects.revision_ids",
        return_type='tuple'
    )

    assert len(revisions_stored) == 15

    jm.disconnect()
Example #31
0
def test_ingest_hg_pushlog_already_stored(jm, initial_data, test_base_dir,
                                          test_repository, mock_post_json, activate_responses):
    """test that trying to ingest a push already stored doesn't doesn't affect
    all the pushes in the request,
    e.g. trying to store [A,B] with A already stored, B will be stored"""

    pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json')
    with open(pushlog_path) as f:
        pushlog_content = f.read()
    pushes = json.loads(pushlog_content).values()
    first_push, second_push = pushes[0:2]

    pushlog_fake_url = "http://www.thisismypushlog.com/?full=1"

    # store the first push only
    first_push_json = json.dumps({"1": first_push})
    responses.add(responses.GET, pushlog_fake_url,
                  body=first_push_json, status=200,
                  content_type='application/json',
                  match_querystring=True,
                  )

    process = HgPushlogProcess()
    process.run(pushlog_fake_url, jm.project)

    pushes_stored = jm.get_dhub().execute(
        proc="jobs_test.selects.result_set_ids",
        return_type='tuple'
    )

    assert len(pushes_stored) == 1

    # store both first and second push
    first_and_second_push_json = json.dumps(
        {"1": first_push, "2": second_push}
    )
    second_push
    responses.add(
        responses.GET,
        pushlog_fake_url + "&fromchange=2c25d2bbbcd6ddbd45962606911fd429e366b8e1",
        body=first_and_second_push_json,
        status=200, content_type='application/json',
        match_querystring=True)

    process = HgPushlogProcess()

    process.run(pushlog_fake_url, jm.project)

    pushes_stored = jm.get_dhub().execute(
        proc="jobs_test.selects.result_set_ids",
        return_type='tuple'
    )

    assert len(pushes_stored) == 2
Example #32
0
def test_ingest_hg_pushlog_already_stored(jm, test_base_dir,
                                          test_repository, activate_responses):
    """test that trying to ingest a push already stored doesn't doesn't affect
    all the pushes in the request,
    e.g. trying to store [A,B] with A already stored, B will be stored"""

    pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json')
    with open(pushlog_path) as f:
        pushlog_json = json.load(f)
    pushes = pushlog_json['pushes'].values()
    first_push, second_push = pushes[0:2]

    pushlog_fake_url = "http://www.thisismypushlog.com/?full=1&version=2"

    # store the first push only
    first_push_json = json.dumps({"lastpushid": 1, "pushes": {"1": first_push}})
    responses.add(responses.GET, pushlog_fake_url,
                  body=first_push_json, status=200,
                  content_type='application/json',
                  match_querystring=True,
                  )

    process = HgPushlogProcess()
    process.run(pushlog_fake_url, jm.project)

    assert Push.objects.count() == 1

    # store both first and second push
    first_and_second_push_json = json.dumps(
        {"lastpushid": 2, "pushes": {"1": first_push, "2": second_push}}
    )

    responses.add(
        responses.GET,
        pushlog_fake_url + "&startID=1",
        body=first_and_second_push_json,
        status=200, content_type='application/json',
        match_querystring=True)

    process = HgPushlogProcess()

    process.run(pushlog_fake_url, jm.project)

    assert Push.objects.count() == 2
Example #33
0
    def handle(self, *args, **options):
        typeOfIngestion = options["ingestion_type"][0]
        root_url = options["root_url"]

        if typeOfIngestion == "task":
            assert options["taskId"]
            loop.run_until_complete(handleTaskId(options["taskId"], root_url))
        elif typeOfIngestion == "pr":
            assert options["prUrl"]
            pr_url = options["prUrl"]
            splitUrl = pr_url.split("/")
            org = splitUrl[3]
            repo = splitUrl[4]
            pulse = {
                "exchange": "exchange/taskcluster-github/v1/pull-request",
                "routingKey": "primary.{}.{}.synchronize".format(org, repo),
                "payload": {
                    "repository": repo,
                    "organization": org,
                    "action": "synchronize",
                    "details": {
                        "event.pullNumber":
                        splitUrl[6],
                        "event.base.repo.url":
                        "https://github.com/{}/{}.git".format(org, repo),
                        "event.head.repo.url":
                        "https://github.com/{}/{}.git".format(org, repo),
                    },
                }
            }
            PushLoader().process(pulse["payload"], pulse["exchange"], root_url)
        elif typeOfIngestion == "git-push":
            raise Exception("This is not yet implemented")
        elif typeOfIngestion == "push":
            project = options["project"]
            commit = options["commit"]

            # get reference to repo
            repo = Repository.objects.get(name=project, active_status="active")
            fetch_push_id = None

            # make sure all tasks are run synchronously / immediately
            settings.CELERY_TASK_ALWAYS_EAGER = True

            # get hg pushlog
            pushlog_url = "%s/json-pushes/?full=1&version=2" % repo.url

            # ingest this particular revision for this project
            process = HgPushlogProcess()
            # Use the actual push SHA, in case the changeset specified was a tag
            # or branch name (eg tip). HgPushlogProcess returns the full SHA.
            process.run(pushlog_url,
                        project,
                        changeset=commit,
                        last_push_id=fetch_push_id)

            if options["ingest_all_tasks"]:
                # XXX: Need logic to get from project/revision to taskGroupId
                logger.info("## START ##")
                # loop.run_until_complete(processTasks("ZYnMSfwCS5Cc_Wi_e-ZlSA", repo.tc_root_url))
                logger.info("## END ##")
                raise Exception(
                    "This is not yet implemented. You can still use it by changing the code to "
                    "grab the task group ID for your push.")
            else:
                logger.info(
                    "When implemented you will be able to use --ingest-all-tasks to ingest "
                    "all tasks associated to this push.")
Example #34
0
def fetch_hg_push_log(repo_name, repo_url):
    """
    Run a HgPushlog etl process
    """
    process = HgPushlogProcess()
    process.run(repo_url + '/json-pushes/?full=1&version=2', repo_name)
Example #35
0
    def handle(self, *args, **options):
        typeOfIngestion = options["ingestion_type"][0]
        root_url = options["root_url"]

        if typeOfIngestion == "task":
            assert options["taskId"]
            loop.run_until_complete(handleTaskId(options["taskId"], root_url))
        elif typeOfIngestion == "pr":
            assert options["prUrl"]
            pr_url = options["prUrl"]
            splitUrl = pr_url.split("/")
            org = splitUrl[3]
            repo = splitUrl[4]
            pulse = {
                "exchange": "exchange/taskcluster-github/v1/pull-request",
                "routingKey": "primary.{}.{}.synchronize".format(org, repo),
                "payload": {
                    "repository": repo,
                    "organization": org,
                    "action": "synchronize",
                    "details": {
                        "event.pullNumber":
                        splitUrl[6],
                        "event.base.repo.url":
                        "https://github.com/{}/{}.git".format(org, repo),
                        "event.head.repo.url":
                        "https://github.com/{}/{}.git".format(org, repo),
                    },
                }
            }
            PushLoader().process(pulse["payload"], pulse["exchange"], root_url)
        elif typeOfIngestion.find("git") > -1:
            if not os.environ.get("GITHUB_TOKEN"):
                logger.warning(
                    "If you don't set up GITHUB_TOKEN you might hit Github's rate limiting. See docs for info."
                )

            if typeOfIngestion == "git-push":
                ingest_git_push(options["project"], options["commit"])
            elif typeOfIngestion == "git-pushes":
                ingest_git_pushes(options["project"], options["dryRun"])
        elif typeOfIngestion == "push":
            if not options["enable_eager_celery"]:
                logger.info(
                    "If you want all logs to be parsed use --enable-eager-celery"
                )
            else:
                # Make sure all tasks are run synchronously / immediately
                settings.CELERY_TASK_ALWAYS_EAGER = True

            # get reference to repo and ingest this particular revision for this project
            project = options["project"]
            commit = options["commit"]

            if not options['last_n_pushes'] and not commit:
                raise CommandError(
                    'must specify --last_n_pushes or a positional commit argument'
                )
            elif options['last_n_pushes'] and options['ingest_all_tasks']:
                raise CommandError(
                    'Can\'t specify last_n_pushes and ingest_all_tasks at same time'
                )
            elif options['last_n_pushes'] and options['commit']:
                raise CommandError(
                    'Can\'t specify last_n_pushes and commit/revision at the same time'
                )
            # get reference to repo
            repo = Repository.objects.get(name=project, active_status="active")
            fetch_push_id = None

            if options['last_n_pushes']:
                last_push_id = last_push_id_from_server(repo)
                fetch_push_id = max(1, last_push_id - options['last_n_pushes'])
                logger.info(
                    'last server push id: %d; fetching push %d and newer',
                    last_push_id, fetch_push_id)
            elif options["ingest_all_tasks"]:
                gecko_decision_task = get_decision_task_id(
                    project, commit, repo.tc_root_url)
                logger.info("## START ##")
                loop.run_until_complete(
                    processTasks(gecko_decision_task, repo.tc_root_url))
                logger.info("## END ##")
            else:
                logger.info(
                    "You can ingest all tasks for a push with -a/--ingest-all-tasks."
                )

            # get hg pushlog
            pushlog_url = "%s/json-pushes/?full=1&version=2" % repo.url
            # ingest this particular revision for this project
            process = HgPushlogProcess()
            # Use the actual push SHA, in case the changeset specified was a tag
            # or branch name (eg tip). HgPushlogProcess returns the full SHA.
            process.run(pushlog_url,
                        project,
                        changeset=commit,
                        last_push_id=fetch_push_id)
def fetch_hg_push_log(repo_name, repo_url):
    """
    Run a HgPushlog etl process
    """
    process = HgPushlogProcess()
    process.run(repo_url + '/json-pushes/?full=1', repo_name)