def test_ingest_hg_pushlog(jm, test_base_dir, test_repository, mock_post_json, activate_responses): """ingesting a number of pushes should populate result set and revisions""" pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json') with open(pushlog_path) as f: pushlog_content = f.read() pushlog_fake_url = "http://www.thisismypushlog.com" push_num = 10 responses.add(responses.GET, pushlog_fake_url, body=pushlog_content, status=200, content_type='application/json') process = HgPushlogProcess() process.run(pushlog_fake_url, jm.project) pushes_stored = jm.get_dhub().execute( proc="jobs_test.selects.result_set_ids", return_type='tuple' ) assert len(pushes_stored) == push_num revisions_stored = jm.get_dhub().execute( proc="jobs_test.selects.revision_ids", return_type='tuple' ) assert len(revisions_stored) == 15
def test_empty_json_pushes(jm, test_base_dir, test_repository, mock_post_json, activate_responses): """ Gracefully handle getting an empty list of pushes from json-pushes """ pushlog_fake_url = "http://www.thisismypushlog.com/?full=1&version=2" # store the first push only empty_push_json = json.dumps({"lastpushid": 123, "pushes": {}}) responses.add(responses.GET, pushlog_fake_url, body=empty_push_json, status=200, content_type='application/json', match_querystring=True, ) process = HgPushlogProcess() process.run(pushlog_fake_url, jm.project) pushes_stored = jm.get_dhub().execute( proc="jobs_test.selects.result_set_ids", return_type='tuple' ) assert len(pushes_stored) == 0
def test_empty_json_pushes(jm, test_base_dir, test_repository, activate_responses): """ Gracefully handle getting an empty list of pushes from json-pushes """ pushlog_fake_url = "http://www.thisismypushlog.com/?full=1&version=2" # store the first push only empty_push_json = json.dumps({"lastpushid": 123, "pushes": {}}) responses.add(responses.GET, pushlog_fake_url, body=empty_push_json, status=200, content_type='application/json', match_querystring=True, ) process = HgPushlogProcess() process.run(pushlog_fake_url, jm.project) pushes_stored = jm.get_dhub().execute( proc="jobs_test.selects.result_set_ids", return_type='tuple' ) assert len(pushes_stored) == 0
def handle(self, *args, **options): taskId = options["taskId"] if taskId: root_url = options["root_url"] loop.run_until_complete(handleTaskId(taskId, root_url)) else: project = options["project"] changeset = options["changeset"] # get reference to repo repo = Repository.objects.get(name=project, active_status='active') fetch_push_id = None # make sure all tasks are run synchronously / immediately settings.CELERY_TASK_ALWAYS_EAGER = True # get hg pushlog pushlog_url = '%s/json-pushes/?full=1&version=2' % repo.url # ingest this particular revision for this project process = HgPushlogProcess() # Use the actual push SHA, in case the changeset specified was a tag # or branch name (eg tip). HgPushlogProcess returns the full SHA. process.run(pushlog_url, project, changeset=changeset, last_push_id=fetch_push_id) # XXX: Need logic to get from project/revision to taskGroupId taskGroupId = 'ZYnMSfwCS5Cc_Wi_e-ZlSA' logger.info("## START ##") loop.run_until_complete(processTasks(taskGroupId, repo.tc_root_url)) logger.info("## END ##")
def test_ingest_hg_pushlog_cache_last_push(test_repository, test_base_dir, activate_responses): """ ingesting a number of pushes should cache the top revision of the last push """ pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json') with open(pushlog_path) as f: pushlog_content = f.read() pushlog_fake_url = "http://www.thisismypushlog.com" responses.add(responses.GET, pushlog_fake_url, body=pushlog_content, status=200, content_type='application/json') process = HgPushlogProcess() process.run(pushlog_fake_url, test_repository.name) pushlog_dict = json.loads(pushlog_content) pushes = pushlog_dict['pushes'] max_push_id = max([int(k) for k in pushes.keys()]) cache_key = "{}:last_push_id".format(test_repository.name) assert cache.get(cache_key) == max_push_id
def _handle(self, *args, **options): if len(args) != 2: raise CommandError("Need to specify (only) branch and changeset") (project, changeset) = args # get reference to repo repo = Repository.objects.get(name=project, active_status='active') # make sure all tasks are run synchronously / immediately settings.CELERY_ALWAYS_EAGER = True # get hg pushlog pushlog_url = '%s/json-pushes/?full=1&version=2' % repo.url # ingest this particular revision for this project process = HgPushlogProcess() # Use the actual push SHA, in case the changeset specified was a tag # or branch name (eg tip). HgPushlogProcess returns the full SHA. push_sha = process.run(pushlog_url, project, changeset=changeset) Builds4hJobsProcess().run(project_filter=project, revision_filter=push_sha, job_group_filter=options['filter_job_group']) PendingJobsProcess().run(project_filter=project, revision_filter=push_sha, job_group_filter=options['filter_job_group']) RunningJobsProcess().run(project_filter=project, revision_filter=push_sha, job_group_filter=options['filter_job_group'])
def test_ingest_hg_pushlog(jm, initial_data, test_base_dir, test_repository, mock_post_json_data, activate_responses): """ingesting a number of pushes should populate result set and revisions""" pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json') pushlog_content = open(pushlog_path).read() pushlog_fake_url = "http://www.thisismypushlog.com" responses.add(responses.GET, pushlog_fake_url, body=pushlog_content, status=200, content_type='application/json') process = HgPushlogProcess() process.run(pushlog_fake_url, jm.project) pushes_stored = jm.get_jobs_dhub().execute( proc="jobs_test.selects.result_set_ids", return_type='tuple') assert len(pushes_stored) == 10 revisions_stored = jm.get_jobs_dhub().execute( proc="jobs_test.selects.revision_ids", return_type='tuple') assert len(revisions_stored) == 15
def fetch_hg_push_log(repo_name, repo_url): """ Run a HgPushlog etl process """ newrelic.agent.add_custom_parameter("repo_name", repo_name) process = HgPushlogProcess() process.run(repo_url + '/json-pushes/?full=1&version=2', repo_name)
def test_ingest_hg_pushlog_cache_last_push(jm, initial_data, test_repository, test_base_dir, mock_post_json_data, activate_responses): """ ingesting a number of pushes should cache the top revision of the last push """ pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json') pushlog_content = open(pushlog_path).read() pushlog_fake_url = "http://www.thisismypushlog.com" responses.add(responses.GET, pushlog_fake_url, body=pushlog_content, status=200, content_type='application/json') process = HgPushlogProcess() process.run(pushlog_fake_url, jm.project) pushlog_dict = json.loads(pushlog_content) max_push_id = max([int(k) for k in pushlog_dict.keys()]) last_push = pushlog_dict[str(max_push_id)] last_push_revision = last_push["changesets"][0]["node"] assert cache.get("test_treeherder:last_push") == last_push_revision
def _handle(self, *args, **options): if len(args) != 2: raise CommandError("Need to specify (only) branch and changeset") (project, changeset) = args # get reference to repo rdm = RefDataManager() repos = filter(lambda x: x['name'] == project, rdm.get_all_repository_info()) if not repos: raise CommandError("No project found named '%s'" % project) repo = repos[0] # make sure all tasks are run synchronously / immediately settings.CELERY_ALWAYS_EAGER = True # get hg pushlog pushlog_url = '%s/json-pushes/?full=1&version=2' % repo['url'] # ingest this particular revision for this project process = HgPushlogProcess() # Use the actual push SHA, in case the changeset specified was a tag # or branch name (eg tip). HgPushlogProcess returns the full SHA. push_sha = process.run(pushlog_url, project, changeset=changeset)[:12] Builds4hJobsProcess().run(project_filter=project, revision_filter=push_sha, job_group_filter=options['filter_job_group']) PendingJobsProcess().run(project_filter=project, revision_filter=push_sha, job_group_filter=options['filter_job_group']) RunningJobsProcess().run(project_filter=project, revision_filter=push_sha, job_group_filter=options['filter_job_group'])
def _handle(self, *args, **options): if len(args) != 2: raise CommandError("Need to specify (only) branch and changeset") (project, changeset) = args # get reference to repo rdm = RefDataManager() repos = filter(lambda x: x['name'] == project, rdm.get_all_repository_info()) if not repos: raise CommandError("No project found named '%s'" % project) repo = repos[0] # make sure all tasks are run synchronously / immediately settings.CELERY_ALWAYS_EAGER = True # get hg pushlog pushlog_url = '%s/json-pushes/?full=1' % repo['url'] # ingest this particular revision for this project process = HgPushlogProcess() process.run(pushlog_url, project, changeset=changeset) self._process_all_objects_for_project(project) Builds4hJobsProcess().run(filter_to_project=project, filter_to_revision=changeset) PendingJobsProcess().run(filter_to_project=project, filter_to_revision=changeset) RunningJobsProcess().run(filter_to_project=project, filter_to_revision=changeset) self._process_all_objects_for_project(project)
def handle(self, *args, **options): typeOfIngestion = options["ingestion_type"][0] root_url = options["root_url"] if typeOfIngestion == "task": assert options["taskId"] loop.run_until_complete(handleTaskId(options["taskId"], root_url)) elif typeOfIngestion == "pr": assert options["prUrl"] pr_url = options["prUrl"] splitUrl = pr_url.split("/") org = splitUrl[3] repo = splitUrl[4] pulse = { "exchange": "exchange/taskcluster-github/v1/pull-request", "routingKey": "primary.{}.{}.synchronize".format(org, repo), "payload": { "repository": repo, "organization": org, "action": "synchronize", "details": { "event.pullNumber": splitUrl[6], "event.base.repo.url": "https://github.com/{}/{}.git".format(org, repo), "event.head.repo.url": "https://github.com/{}/{}.git".format(org, repo), }, } } PushLoader().process(pulse["payload"], pulse["exchange"], root_url) elif typeOfIngestion == "git-push": raise Exception("This is not yet implemented") elif typeOfIngestion == "push": if not options["enable_eager_celery"]: logger.info( "If you want all logs to be parsed use --enable-eager-celery" ) else: # Make sure all tasks are run synchronously / immediately settings.CELERY_TASK_ALWAYS_EAGER = True # get reference to repo and ingest this particular revision for this project project = options["project"] commit = options["commit"] repo = Repository.objects.get(name=project, active_status="active") pushlog_url = "%s/json-pushes/?full=1&version=2" % repo.url process = HgPushlogProcess() process.run(pushlog_url, project, changeset=commit, last_push_id=None) if options["ingest_all_tasks"]: gecko_decision_task = get_decision_task_id(project, commit, repo.tc_root_url) logger.info("## START ##") loop.run_until_complete(processTasks(gecko_decision_task, repo.tc_root_url)) logger.info("## END ##") else: logger.info( "You can ingest all tasks for a push with -a/--ingest-all-tasks." )
def _ingest_hg_push(project, revision, fetch_push_id=None): # get reference to repo repo = Repository.objects.get(name=project, active_status="active") # get hg pushlog pushlog_url = "%s/json-pushes/?full=1&version=2" % repo.url # ingest this particular revision for this project process = HgPushlogProcess() # Use the actual push SHA, in case the changeset specified was a tag # or branch name (eg tip). HgPushlogProcess returns the full SHA. process.run(pushlog_url, project, changeset=revision, last_push_id=fetch_push_id)
def test_ingest_hg_pushlog_already_stored(test_repository, test_base_dir, activate_responses): """test that trying to ingest a push already stored doesn't doesn't affect all the pushes in the request, e.g. trying to store [A,B] with A already stored, B will be stored""" pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json') with open(pushlog_path) as f: pushlog_json = json.load(f) pushes = list(pushlog_json['pushes'].values()) first_push, second_push = pushes[0:2] pushlog_fake_url = "http://www.thisismypushlog.com/?full=1&version=2" # store the first push only first_push_json = json.dumps({ "lastpushid": 1, "pushes": { "1": first_push } }) responses.add( responses.GET, pushlog_fake_url, body=first_push_json, status=200, content_type='application/json', match_querystring=True, ) process = HgPushlogProcess() process.run(pushlog_fake_url, test_repository.name) assert Push.objects.count() == 1 # store both first and second push first_and_second_push_json = json.dumps({ "lastpushid": 2, "pushes": { "1": first_push, "2": second_push } }) responses.add( responses.GET, pushlog_fake_url + "&startID=1", body=first_and_second_push_json, status=200, content_type='application/json', match_querystring=True, ) process = HgPushlogProcess() process.run(pushlog_fake_url, test_repository.name) assert Push.objects.count() == 2
def _handle(self, *args, **options): project = options['project'] changeset = options['changeset'] if not options['last_n_pushes'] and not changeset: raise CommandError('must specify --last-n-pushes or a positional ' 'changeset argument') # get reference to repo repo = Repository.objects.get(name=project, active_status='active') if options['last_n_pushes']: last_push_id = last_push_id_from_server(repo) fetch_push_id = max(1, last_push_id - options['last_n_pushes']) logger.info('last server push id: %d; fetching push %d and newer' % (last_push_id, fetch_push_id)) else: fetch_push_id = None # make sure all tasks are run synchronously / immediately settings.CELERY_ALWAYS_EAGER = True # get hg pushlog pushlog_url = '%s/json-pushes/?full=1&version=2' % repo.url # ingest this particular revision for this project process = HgPushlogProcess() # Use the actual push SHA, in case the changeset specified was a tag # or branch name (eg tip). HgPushlogProcess returns the full SHA. push_sha = process.run(pushlog_url, project, changeset=changeset, last_push_id=fetch_push_id) # Only perform additional processing if fetching a single changeset # because we only have the sha1 if the tip-most push in "last N pushes" # mode and can't filter appropriately. if not fetch_push_id: group_filter = options['filter_job_group'] Builds4hJobsProcess().run(project_filter=project, revision_filter=push_sha, job_group_filter=group_filter) PendingJobsProcess().run(project_filter=project, revision_filter=push_sha, job_group_filter=group_filter) RunningJobsProcess().run(project_filter=project, revision_filter=push_sha, job_group_filter=group_filter)
def test_ingest_hg_pushlog_already_stored(jm, initial_data, test_base_dir, test_repository, mock_post_json_data, activate_responses): """test that trying to ingest a push already stored doesn't doesn't affect all the pushes in the request, e.g. trying to store [A,B] with A already stored, B will be stored""" pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json') pushlog_content = open(pushlog_path).read() pushes = json.loads(pushlog_content).values() first_push, second_push = pushes[0:2] pushlog_fake_url = "http://www.thisismypushlog.com/?full=1" # store the first push only first_push_json = json.dumps({"1": first_push}) responses.add( responses.GET, pushlog_fake_url, body=first_push_json, status=200, content_type='application/json', match_querystring=True, ) process = HgPushlogProcess() process.run(pushlog_fake_url, jm.project) pushes_stored = jm.get_jobs_dhub().execute( proc="jobs_test.selects.result_set_ids", return_type='tuple') assert len(pushes_stored) == 1 # store both first and second push first_and_second_push_json = json.dumps({ "1": first_push, "2": second_push }) second_push responses.add(responses.GET, pushlog_fake_url + "&fromchange=2c25d2bbbcd6ddbd45962606911fd429e366b8e1", body=first_and_second_push_json, status=200, content_type='application/json', match_querystring=True) process = HgPushlogProcess() process.run(pushlog_fake_url, jm.project) pushes_stored = jm.get_jobs_dhub().execute( proc="jobs_test.selects.result_set_ids", return_type='tuple') assert len(pushes_stored) == 2
def test_ingest_hg_pushlog(jm, initial_data, test_base_dir, test_repository, mock_post_json, activate_responses, pulse_resultset_consumer): """ingesting a number of pushes should populate result set and revisions""" pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json') with open(pushlog_path) as f: pushlog_content = f.read() pushlog_fake_url = "http://www.thisismypushlog.com" push_num = 10 responses.add(responses.GET, pushlog_fake_url, body=pushlog_content, status=200, content_type='application/json') process = HgPushlogProcess() process.run(pushlog_fake_url, jm.project) pushes_stored = jm.get_dhub().execute( proc="jobs_test.selects.result_set_ids", return_type='tuple' ) assert len(pushes_stored) == push_num rev_to_push = set() for push in json.loads(pushlog_content).values(): # Add each rev to the set remember we shorten them all down to 12 chars rev_to_push.add(push['changesets'][-1]['node'][0:12]) # Ensure for each push we sent a pulse notification... for _ in range(0, push_num): message = pulse_resultset_consumer.get(block=True, timeout=2) content = json.loads(message.body) assert content['revision'] in rev_to_push # Ensure we don't match the same revision twice... rev_to_push.remove(content['revision']) revisions_stored = jm.get_dhub().execute( proc="jobs_test.selects.revision_ids", return_type='tuple' ) assert len(revisions_stored) == 15
def test_ingest_hg_pushlog(jm, initial_data, test_base_dir, test_repository, mock_post_json_data, activate_responses, pulse_resultset_consumer): """ingesting a number of pushes should populate result set and revisions""" pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json') with open(pushlog_path) as f: pushlog_content = f.read() pushlog_fake_url = "http://www.thisismypushlog.com" push_num = 10 responses.add(responses.GET, pushlog_fake_url, body=pushlog_content, status=200, content_type='application/json') process = HgPushlogProcess() process.run(pushlog_fake_url, jm.project) pushes_stored = jm.get_jobs_dhub().execute( proc="jobs_test.selects.result_set_ids", return_type='tuple' ) assert len(pushes_stored) == push_num rev_to_push = set() for push in json.loads(pushlog_content).values(): # Add each rev to the set remember we shorten them all down to 12 chars rev_to_push.add(push['changesets'][-1]['node'][0:12]) # Ensure for each push we sent a pulse notification... for _ in range(0, push_num): message = pulse_resultset_consumer.get(block=True, timeout=2) content = json.loads(message.body) assert content['revision'] in rev_to_push # Ensure we don't match the same revision twice... rev_to_push.remove(content['revision']) revisions_stored = jm.get_jobs_dhub().execute( proc="jobs_test.selects.revision_ids", return_type='tuple' ) assert len(revisions_stored) == 15
def test_ingest_hg_pushlog(test_repository, test_base_dir, activate_responses): """ingesting a number of pushes should populate result set and revisions""" pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json') with open(pushlog_path) as f: pushlog_content = f.read() pushlog_fake_url = "http://www.thisismypushlog.com" responses.add(responses.GET, pushlog_fake_url, body=pushlog_content, status=200, content_type='application/json') process = HgPushlogProcess() process.run(pushlog_fake_url, test_repository.name) # should be 10 pushes, 15 revisions assert Push.objects.count() == 10 assert Commit.objects.count() == 15
def test_ingest_hg_pushlog(test_repository, test_base_dir, activate_responses): """ingesting a number of pushes should populate push and revisions""" pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json') with open(pushlog_path) as f: pushlog_content = f.read() pushlog_fake_url = "http://www.thisismypushlog.com" responses.add(responses.GET, pushlog_fake_url, body=pushlog_content, status=200, content_type='application/json') process = HgPushlogProcess() process.run(pushlog_fake_url, test_repository.name) # should be 10 pushes, 15 revisions assert Push.objects.count() == 10 assert Commit.objects.count() == 15
def _handle(self, *args, **options): project = options['project'] changeset = options['changeset'] if not options['last_n_pushes'] and not changeset: raise CommandError('must specify --last-n-pushes or a positional ' 'changeset argument') # get reference to repo repo = Repository.objects.get(name=project, active_status='active') if options['last_n_pushes']: last_push_id = last_push_id_from_server(repo) fetch_push_id = max(1, last_push_id - options['last_n_pushes']) logger.info('last server push id: %d; fetching push %d and newer', last_push_id, fetch_push_id) else: fetch_push_id = None # make sure all tasks are run synchronously / immediately settings.CELERY_ALWAYS_EAGER = True # get hg pushlog pushlog_url = '%s/json-pushes/?full=1&version=2' % repo.url # ingest this particular revision for this project process = HgPushlogProcess() # Use the actual push SHA, in case the changeset specified was a tag # or branch name (eg tip). HgPushlogProcess returns the full SHA. process.run(pushlog_url, project, changeset=changeset, last_push_id=fetch_push_id) # Only perform additional processing if fetching a single changeset # because we only have the sha1 if the tip-most push in "last N pushes" # mode and can't filter appropriately. if not fetch_push_id: raise CommandError( 'This command is not yet able to ingest Taskcluster jobs automatically. ' 'Please manually configure pulse job ingestion using this guide: ' 'https://treeherder.readthedocs.io/pulseload.html')
def test_ingest_hg_pushlog(jm, initial_data, test_base_dir, test_repository, mock_post_json_data): """ingesting a number of pushes should populate result set and revisions""" pushlog = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json') process = HgPushlogProcess() process.run("file://{0}".format(pushlog), jm.project) pushes_stored = jm.get_jobs_dhub().execute( proc="jobs_test.selects.result_set_ids", return_type='tuple') assert len(pushes_stored) == 10 revisions_stored = jm.get_jobs_dhub().execute( proc="jobs_test.selects.revision_ids", return_type='tuple') assert len(revisions_stored) == 15 jm.disconnect()
def test_empty_json_pushes(test_repository, test_base_dir, activate_responses): """ Gracefully handle getting an empty list of pushes from json-pushes """ pushlog_fake_url = "http://www.thisismypushlog.com/?full=1&version=2" # store the first push only empty_push_json = json.dumps({"lastpushid": 123, "pushes": {}}) responses.add(responses.GET, pushlog_fake_url, body=empty_push_json, status=200, content_type='application/json', match_querystring=True, ) process = HgPushlogProcess() process.run(pushlog_fake_url, test_repository.name) assert Push.objects.count() == 0
def _handle(self, *args, **options): project = options['project'] changeset = options['changeset'] if not options['last_n_pushes'] and not changeset: raise CommandError('must specify --last-n-pushes or a positional ' 'changeset argument') # get reference to repo repo = Repository.objects.get(name=project, active_status='active') if options['last_n_pushes']: last_push_id = last_push_id_from_server(repo) fetch_push_id = max(1, last_push_id - options['last_n_pushes']) logger.info('last server push id: %d; fetching push %d and newer', last_push_id, fetch_push_id) else: fetch_push_id = None # make sure all tasks are run synchronously / immediately settings.CELERY_TASK_ALWAYS_EAGER = True # get hg pushlog pushlog_url = '%s/json-pushes/?full=1&version=2' % repo.url # ingest this particular revision for this project process = HgPushlogProcess() # Use the actual push SHA, in case the changeset specified was a tag # or branch name (eg tip). HgPushlogProcess returns the full SHA. process.run(pushlog_url, project, changeset=changeset, last_push_id=fetch_push_id) # Only perform additional processing if fetching a single changeset # because we only have the sha1 if the tip-most push in "last N pushes" # mode and can't filter appropriately. if not fetch_push_id: raise CommandError( 'This command is not yet able to ingest Taskcluster jobs automatically. ' 'Please manually configure pulse job ingestion using this guide: ' 'https://treeherder.readthedocs.io/pulseload.html' )
def test_ingest_hg_pushlog_cache_last_push(jm, initial_data, test_repository, test_base_dir, mock_post_json, activate_responses): """ ingesting a number of pushes should cache the top revision of the last push """ pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json') with open(pushlog_path) as f: pushlog_content = f.read() pushlog_fake_url = "http://www.thisismypushlog.com" responses.add(responses.GET, pushlog_fake_url, body=pushlog_content, status=200, content_type='application/json') process = HgPushlogProcess() process.run(pushlog_fake_url, jm.project) pushlog_dict = json.loads(pushlog_content) pushes = pushlog_dict['pushes'] max_push_id = max([int(k) for k in pushes.keys()]) assert cache.get("test_treeherder:last_push_id") == max_push_id
def test_ingest_hg_pushlog(jm, initial_data, test_base_dir, test_repository, mock_post_json_data): """ingesting a number of pushes should populate result set and revisions""" pushlog = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json') process = HgPushlogProcess() process.run("file://{0}".format(pushlog), jm.project) pushes_stored = jm.get_jobs_dhub().execute( proc="jobs_test.selects.result_set_ids", return_type='tuple' ) assert len(pushes_stored) == 10 revisions_stored = jm.get_jobs_dhub().execute( proc="jobs_test.selects.revision_ids", return_type='tuple' ) assert len(revisions_stored) == 15 jm.disconnect()
def test_ingest_hg_pushlog_already_stored(jm, initial_data, test_base_dir, test_repository, mock_post_json, activate_responses): """test that trying to ingest a push already stored doesn't doesn't affect all the pushes in the request, e.g. trying to store [A,B] with A already stored, B will be stored""" pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json') with open(pushlog_path) as f: pushlog_content = f.read() pushes = json.loads(pushlog_content).values() first_push, second_push = pushes[0:2] pushlog_fake_url = "http://www.thisismypushlog.com/?full=1" # store the first push only first_push_json = json.dumps({"1": first_push}) responses.add(responses.GET, pushlog_fake_url, body=first_push_json, status=200, content_type='application/json', match_querystring=True, ) process = HgPushlogProcess() process.run(pushlog_fake_url, jm.project) pushes_stored = jm.get_dhub().execute( proc="jobs_test.selects.result_set_ids", return_type='tuple' ) assert len(pushes_stored) == 1 # store both first and second push first_and_second_push_json = json.dumps( {"1": first_push, "2": second_push} ) second_push responses.add( responses.GET, pushlog_fake_url + "&fromchange=2c25d2bbbcd6ddbd45962606911fd429e366b8e1", body=first_and_second_push_json, status=200, content_type='application/json', match_querystring=True) process = HgPushlogProcess() process.run(pushlog_fake_url, jm.project) pushes_stored = jm.get_dhub().execute( proc="jobs_test.selects.result_set_ids", return_type='tuple' ) assert len(pushes_stored) == 2
def test_ingest_hg_pushlog_already_stored(jm, test_base_dir, test_repository, activate_responses): """test that trying to ingest a push already stored doesn't doesn't affect all the pushes in the request, e.g. trying to store [A,B] with A already stored, B will be stored""" pushlog_path = os.path.join(test_base_dir, 'sample_data', 'hg_pushlog.json') with open(pushlog_path) as f: pushlog_json = json.load(f) pushes = pushlog_json['pushes'].values() first_push, second_push = pushes[0:2] pushlog_fake_url = "http://www.thisismypushlog.com/?full=1&version=2" # store the first push only first_push_json = json.dumps({"lastpushid": 1, "pushes": {"1": first_push}}) responses.add(responses.GET, pushlog_fake_url, body=first_push_json, status=200, content_type='application/json', match_querystring=True, ) process = HgPushlogProcess() process.run(pushlog_fake_url, jm.project) assert Push.objects.count() == 1 # store both first and second push first_and_second_push_json = json.dumps( {"lastpushid": 2, "pushes": {"1": first_push, "2": second_push}} ) responses.add( responses.GET, pushlog_fake_url + "&startID=1", body=first_and_second_push_json, status=200, content_type='application/json', match_querystring=True) process = HgPushlogProcess() process.run(pushlog_fake_url, jm.project) assert Push.objects.count() == 2
def handle(self, *args, **options): typeOfIngestion = options["ingestion_type"][0] root_url = options["root_url"] if typeOfIngestion == "task": assert options["taskId"] loop.run_until_complete(handleTaskId(options["taskId"], root_url)) elif typeOfIngestion == "pr": assert options["prUrl"] pr_url = options["prUrl"] splitUrl = pr_url.split("/") org = splitUrl[3] repo = splitUrl[4] pulse = { "exchange": "exchange/taskcluster-github/v1/pull-request", "routingKey": "primary.{}.{}.synchronize".format(org, repo), "payload": { "repository": repo, "organization": org, "action": "synchronize", "details": { "event.pullNumber": splitUrl[6], "event.base.repo.url": "https://github.com/{}/{}.git".format(org, repo), "event.head.repo.url": "https://github.com/{}/{}.git".format(org, repo), }, } } PushLoader().process(pulse["payload"], pulse["exchange"], root_url) elif typeOfIngestion == "git-push": raise Exception("This is not yet implemented") elif typeOfIngestion == "push": project = options["project"] commit = options["commit"] # get reference to repo repo = Repository.objects.get(name=project, active_status="active") fetch_push_id = None # make sure all tasks are run synchronously / immediately settings.CELERY_TASK_ALWAYS_EAGER = True # get hg pushlog pushlog_url = "%s/json-pushes/?full=1&version=2" % repo.url # ingest this particular revision for this project process = HgPushlogProcess() # Use the actual push SHA, in case the changeset specified was a tag # or branch name (eg tip). HgPushlogProcess returns the full SHA. process.run(pushlog_url, project, changeset=commit, last_push_id=fetch_push_id) if options["ingest_all_tasks"]: # XXX: Need logic to get from project/revision to taskGroupId logger.info("## START ##") # loop.run_until_complete(processTasks("ZYnMSfwCS5Cc_Wi_e-ZlSA", repo.tc_root_url)) logger.info("## END ##") raise Exception( "This is not yet implemented. You can still use it by changing the code to " "grab the task group ID for your push.") else: logger.info( "When implemented you will be able to use --ingest-all-tasks to ingest " "all tasks associated to this push.")
def fetch_hg_push_log(repo_name, repo_url): """ Run a HgPushlog etl process """ process = HgPushlogProcess() process.run(repo_url + '/json-pushes/?full=1&version=2', repo_name)
def handle(self, *args, **options): typeOfIngestion = options["ingestion_type"][0] root_url = options["root_url"] if typeOfIngestion == "task": assert options["taskId"] loop.run_until_complete(handleTaskId(options["taskId"], root_url)) elif typeOfIngestion == "pr": assert options["prUrl"] pr_url = options["prUrl"] splitUrl = pr_url.split("/") org = splitUrl[3] repo = splitUrl[4] pulse = { "exchange": "exchange/taskcluster-github/v1/pull-request", "routingKey": "primary.{}.{}.synchronize".format(org, repo), "payload": { "repository": repo, "organization": org, "action": "synchronize", "details": { "event.pullNumber": splitUrl[6], "event.base.repo.url": "https://github.com/{}/{}.git".format(org, repo), "event.head.repo.url": "https://github.com/{}/{}.git".format(org, repo), }, } } PushLoader().process(pulse["payload"], pulse["exchange"], root_url) elif typeOfIngestion.find("git") > -1: if not os.environ.get("GITHUB_TOKEN"): logger.warning( "If you don't set up GITHUB_TOKEN you might hit Github's rate limiting. See docs for info." ) if typeOfIngestion == "git-push": ingest_git_push(options["project"], options["commit"]) elif typeOfIngestion == "git-pushes": ingest_git_pushes(options["project"], options["dryRun"]) elif typeOfIngestion == "push": if not options["enable_eager_celery"]: logger.info( "If you want all logs to be parsed use --enable-eager-celery" ) else: # Make sure all tasks are run synchronously / immediately settings.CELERY_TASK_ALWAYS_EAGER = True # get reference to repo and ingest this particular revision for this project project = options["project"] commit = options["commit"] if not options['last_n_pushes'] and not commit: raise CommandError( 'must specify --last_n_pushes or a positional commit argument' ) elif options['last_n_pushes'] and options['ingest_all_tasks']: raise CommandError( 'Can\'t specify last_n_pushes and ingest_all_tasks at same time' ) elif options['last_n_pushes'] and options['commit']: raise CommandError( 'Can\'t specify last_n_pushes and commit/revision at the same time' ) # get reference to repo repo = Repository.objects.get(name=project, active_status="active") fetch_push_id = None if options['last_n_pushes']: last_push_id = last_push_id_from_server(repo) fetch_push_id = max(1, last_push_id - options['last_n_pushes']) logger.info( 'last server push id: %d; fetching push %d and newer', last_push_id, fetch_push_id) elif options["ingest_all_tasks"]: gecko_decision_task = get_decision_task_id( project, commit, repo.tc_root_url) logger.info("## START ##") loop.run_until_complete( processTasks(gecko_decision_task, repo.tc_root_url)) logger.info("## END ##") else: logger.info( "You can ingest all tasks for a push with -a/--ingest-all-tasks." ) # get hg pushlog pushlog_url = "%s/json-pushes/?full=1&version=2" % repo.url # ingest this particular revision for this project process = HgPushlogProcess() # Use the actual push SHA, in case the changeset specified was a tag # or branch name (eg tip). HgPushlogProcess returns the full SHA. process.run(pushlog_url, project, changeset=commit, last_push_id=fetch_push_id)
def fetch_hg_push_log(repo_name, repo_url): """ Run a HgPushlog etl process """ process = HgPushlogProcess() process.run(repo_url + '/json-pushes/?full=1', repo_name)