def sync_label(self, owner, repo, name, children=False, requestor_id=None): label_url = "/repos/{owner}/{repo}/labels/{name}".format( owner=owner, repo=repo, name=name, ) try: resp = fetch_url_from_github(label_url, requestor_id=requestor_id) except NotFound: # add more context msg = "Label {name} on {owner}/{repo} not found".format( name=name, owner=owner, repo=repo, ) raise NotFound(msg, { "type": "label", "name": name, "owner": owner, "repo": repo, }) label_data = resp.json() try: label = process_label( label_data, via="api", fetched_at=datetime.now(), commit=True, ) except IntegrityError as exc: # multiple workers tried to insert the same label simulataneously. Retry! self.retry(exc=exc) return label.name
def sync_page_of_labels(self, owner, repo, children=False, requestor_id=None, per_page=100, page=1): label_page_url = ("/repos/{owner}/{repo}/labels?" "per_page={per_page}&page={page}").format( owner=owner, repo=repo, per_page=per_page, page=page) resp = fetch_url_from_github(label_page_url, requestor_id=requestor_id) fetched_at = datetime.now() label_data_list = resp.json() results = [] repo_id = None for label_data in label_data_list: try: label = process_label( label_data, via="api", fetched_at=fetched_at, commit=True, repo_id=repo_id, ) repo_id = repo_id or label.repo_id results.append(label.name) except IntegrityError as exc: self.retry(exc=exc) return results
def sync_pull_request(self, owner, repo, number, children=False, requestor_id=None): pr_url = "/repos/{owner}/{repo}/pulls/{number}".format( owner=owner, repo=repo, number=number, ) try: resp = fetch_url_from_github(pr_url, requestor_id=requestor_id) except NotFound: # add more context msg = "PR {owner}/{repo}#{number} not found".format( owner=owner, repo=repo, number=number, ) raise NotFound(msg, { "type": "pull_request", "owner": owner, "repo": repo, "number": number, }) pr_data = resp.json() try: pr = process_pull_request( pr_data, via="api", fetched_at=datetime.now(), commit=True, ) except IntegrityError as exc: self.retry(exc=exc) if children: spawn_page_tasks_for_pull_request_files.delay( owner, repo, number, children=children, requestor_id=requestor_id, ) return pr.id
def sync_milestone(self, owner, repo, number, children=False, requestor_id=None): milestone_url = "/repos/{owner}/{repo}/milestones/{number}".format( owner=owner, repo=repo, number=number, ) try: resp = fetch_url_from_github(milestone_url, requestor_id=requestor_id) except NotFound: # add more context msg = "Milestone #{number} on {owner}/{repo} not found".format( number=number, owner=owner, repo=repo, ) raise NotFound(msg, { "type": "milestone", "number": number, "owner": owner, "repo": repo, }) milestone_data = resp.json() try: milestone = process_milestone( milestone_data, via="api", fetched_at=datetime.now(), commit=True, ) except IntegrityError as exc: # multiple workers tried to insert the same milestone simulataneously. Retry! self.retry(exc=exc) return milestone.number
def sync_page_of_pull_requests(self, owner, repo, state="all", children=False, requestor_id=None, per_page=100, page=1): pr_page_url = ( "/repos/{owner}/{repo}/pulls?" "state={state}&per_page={per_page}&page={page}" ).format( owner=owner, repo=repo, state=state, per_page=per_page, page=page ) resp = fetch_url_from_github(pr_page_url, requestor_id=requestor_id) fetched_at = datetime.now() pr_data_list = resp.json() results = [] for pr_data in pr_data_list: try: pr = process_pull_request( pr_data, via="api", fetched_at=fetched_at, commit=True, ) results.append(pr.id) except IntegrityError as exc: self.retry(exc=exc) if children: spawn_page_tasks_for_pull_request_files.delay( owner, repo, pr.number, children=children, requestor_id=requestor_id, ) return results
def sync_page_of_milestones(self, owner, repo, state="all", children=False, requestor_id=None, per_page=100, page=1): milestone_page_url = ( "/repos/{owner}/{repo}/milestones?" "state={state}&per_page={per_page}&page={page}" ).format( owner=owner, repo=repo, state=state, per_page=per_page, page=page ) resp = fetch_url_from_github(milestone_page_url, requestor_id=requestor_id) fetched_at = datetime.now() milestone_data_list = resp.json() results = [] repo_id = None for milestone_data in milestone_data_list: try: milestone = process_milestone( milestone_data, via="api", fetched_at=fetched_at, commit=True, repo_id=repo_id, ) repo_id = repo_id or milestone.repo_id results.append(milestone.number) except IntegrityError as exc: self.retry(exc=exc) return results
def sync_page_of_pull_request_files(self, owner, repo, number, pull_request_id=None, children=False, requestor_id=None, per_page=100, page=1): if not pull_request_id: pull_request_id = PullRequest.get(owner, repo, number).id prf_page_url = ( "/repos/{owner}/{repo}/pulls/{number}/files?" "per_page={per_page}&page={page}" ).format( owner=owner, repo=repo, number=number, per_page=per_page, page=page, ) resp = fetch_url_from_github(prf_page_url, requestor_id=requestor_id) fetched_at = datetime.now() prf_data_list = resp.json() results = [] for prf_data in prf_data_list: try: prf = process_pull_request_file( prf_data, via="api", fetched_at=fetched_at, commit=True, pull_request_id=pull_request_id, ) results.append(prf.sha) except IntegrityError as exc: self.retry(exc=exc) except NothingToDo: pass return results
def sync_page_of_labels(self, owner, repo, children=False, requestor_id=None, per_page=100, page=1): label_page_url = ( "/repos/{owner}/{repo}/labels?" "per_page={per_page}&page={page}" ).format( owner=owner, repo=repo, per_page=per_page, page=page ) resp = fetch_url_from_github(label_page_url, requestor_id=requestor_id) fetched_at = datetime.now() label_data_list = resp.json() results = [] repo_id = None for label_data in label_data_list: try: label = process_label( label_data, via="api", fetched_at=fetched_at, commit=True, repo_id=repo_id, ) repo_id = repo_id or label.repo_id results.append(label.name) except IntegrityError as exc: self.retry(exc=exc) return results
def sync_page_of_milestones(self, owner, repo, state="all", children=False, requestor_id=None, per_page=100, page=1): milestone_page_url = ( "/repos/{owner}/{repo}/milestones?" "state={state}&per_page={per_page}&page={page}").format( owner=owner, repo=repo, state=state, per_page=per_page, page=page) resp = fetch_url_from_github(milestone_page_url, requestor_id=requestor_id) fetched_at = datetime.now() milestone_data_list = resp.json() results = [] repo_id = None for milestone_data in milestone_data_list: try: milestone = process_milestone( milestone_data, via="api", fetched_at=fetched_at, commit=True, repo_id=repo_id, ) repo_id = repo_id or milestone.repo_id results.append(milestone.number) except IntegrityError as exc: self.retry(exc=exc) return results
def sync_issue(self, owner, repo, number, children=False, requestor_id=None): issue_url = "/repos/{owner}/{repo}/issues/{number}".format( owner=owner, repo=repo, number=number, ) try: resp = fetch_url_from_github(issue_url, requestor_id=requestor_id) except NotFound: # add more context msg = "Issue {owner}/{repo}#{number} not found".format( owner=owner, repo=repo, number=number, ) raise NotFound(msg, { "type": "issue", "owner": owner, "repo": repo, "number": number, }) issue_data = resp.json() try: issue = process_issue( issue_data, via="api", fetched_at=datetime.now(), commit=True, ) except IntegrityError as exc: self.retry(exc=exc) # ignore `children` attribute for now return issue.id
def sync_page_of_repository_hooks(self, owner, repo, children=False, requestor_id=None, per_page=100, page=1): hook_page_url = ( "/repos/{owner}/{repo}/hooks?per_page={per_page}&page={page}").format( owner=owner, repo=repo, per_page=per_page, page=page, ) resp = fetch_url_from_github(hook_page_url, requestor_id=requestor_id) fetched_at = datetime.now() hook_data_list = resp.json() results = [] for hook_data in hook_data_list: try: hook = process_repository_hook( hook_data, via="api", fetched_at=fetched_at, commit=True, requestor_id=requestor_id, ) results.append(hook.id) except IntegrityError as exc: self.retry(exc=exc) return results
def sync_page_of_repositories_for_user(self, username, type="all", children=False, requestor_id=None, per_page=100, page=1): repo_page_url = ( "/users/{username}/repos?type={type}&per_page={per_page}&page={page}" ).format( username=username, type=type, per_page=per_page, page=page, ) if requestor_id: requestor = User.query.get(int(requestor_id)) assert requestor if requestor.login == username: # we can use the API for getting your *own* repos repo_page_url = ( "/user/repos?type={type}&per_page={per_page}&page={page}" ).format( type=type, per_page=per_page, page=page ) resp = fetch_url_from_github( repo_page_url, requestor_id=requestor_id, headers={"Accept": "application/vnd.github.moondragon+json"}, ) fetched_at = datetime.now() repo_data_list = resp.json() results = [] for repo_data in repo_data_list: try: repo = process_repository( repo_data, via="api", fetched_at=fetched_at, commit=True, requestor_id=requestor_id, ) results.append(repo.id) except IntegrityError as exc: self.retry(exc=exc) if children: owner = repo.owner_login spawn_page_tasks_for_issues.delay( owner, repo.name, children=children, requestor_id=requestor_id, ) spawn_page_tasks_for_labels.delay( owner, repo.name, children=children, requestor_id=requestor_id, ) spawn_page_tasks_for_milestones.delay( owner, repo.name, children=children, requestor_id=requestor_id, ) spawn_page_tasks_for_pull_requests.delay( owner, repo.name, children=children, requestor_id=requestor_id, ) # only try to get repo hooks if the requestor is an admin on this repo assoc = UserRepoAssociation.query.get((requestor_id, repo.id)) if assoc and assoc.can_admin: spawn_page_tasks_for_repository_hooks.delay( owner, repo.name, children=children, requestor_id=requestor_id, ) return results
def sync_repository(self, owner, repo, children=False, requestor_id=None): repo_url = "/repos/{owner}/{repo}".format(owner=owner, repo=repo) try: resp = fetch_url_from_github(repo_url, requestor_id=requestor_id) except NotFound: # add more context msg = "Repo {owner}/{repo} not found".format(owner=owner, repo=repo) raise NotFound(msg, { "type": "repository", "owner": owner, "repo": repo, }) repo_data = resp.json() try: repo = process_repository( repo_data, via="api", fetched_at=datetime.now(), commit=True, requestor_id=requestor_id, ) except IntegrityError as exc: self.retry(exc=exc) if children: spawn_page_tasks_for_issues.delay( owner, repo, children=children, requestor_id=requestor_id, ) spawn_page_tasks_for_labels.delay( owner, repo, children=children, requestor_id=requestor_id, ) spawn_page_tasks_for_milestones.delay( owner, repo, children=children, requestor_id=requestor_id, ) spawn_page_tasks_for_pull_requests.delay( owner, repo, children=children, requestor_id=requestor_id, ) spawn_page_tasks_for_repository_hooks.delay( owner, repo, children=children, requestor_id=requestor_id, ) return repo.id
def spawn_page_tasks_for_user_repositories( username, type="all", children=False, requestor_id=None, per_page=100, ): # acquire lock or fail (we're already in a transaction) lock_name = LOCK_TEMPLATE.format(username=username) existing = Mutex.query.get(lock_name) if existing: return False lock = Mutex(name=lock_name, user_id=requestor_id) db.session.add(lock) try: db.session.commit() except IntegrityError: return False else: logger.info("Lock {name} set by {requestor_id}".format( name=lock_name, requestor_id=requestor_id, )) repo_page_url = ( "/users/{username}/repos?type={type}&per_page={per_page}" ).format( username=username, type=type, per_page=per_page, ) if requestor_id: requestor = User.query.get(int(requestor_id)) assert requestor if requestor.login == username: # we can use the API for getting your *own* repos repo_page_url = ( "/user/repos?type={type}&per_page={per_page}" ).format( type=type, per_page=per_page, ) resp = fetch_url_from_github( repo_page_url, method="HEAD", requestor_id=requestor_id, headers={"Accept": "application/vnd.github.moondragon+json"}, ) last_page_url = URLObject(resp.links.get('last', {}).get('url', "")) last_page_num = int(last_page_url.query.dict.get('page', 1)) g = group( sync_page_of_repositories_for_user.s( username=username, type=type, children=children, requestor_id=requestor_id, per_page=per_page, page=page, ) for page in xrange(1, last_page_num+1) ) finisher = user_repositories_scanned.si( username=username, requestor_id=requestor_id, ) return (g | finisher).delay()
def spawn_page_tasks_for_milestones(owner, repo, state="all", children=False, requestor_id=None, per_page=100): # acquire lock or fail (we're already in a transaction) lock_name = LOCK_TEMPLATE.format(owner=owner, repo=repo) existing = Mutex.query.get(lock_name) if existing: return False lock = Mutex(name=lock_name, user_id=requestor_id) db.session.add(lock) try: db.session.commit() except IntegrityError: return False else: logger.info("Lock {name} set by {requestor_id}".format( name=lock_name, requestor_id=requestor_id, )) milestone_list_url = ("/repos/{owner}/{repo}/pulls?" "state={state}&per_page={per_page}").format( owner=owner, repo=repo, state=state, per_page=per_page, ) resp = fetch_url_from_github( milestone_list_url, method="HEAD", requestor_id=requestor_id, ) last_page_url = URLObject(resp.links.get('last', {}).get('url', "")) last_page_num = int(last_page_url.query.dict.get('page', 1)) g = group( sync_page_of_milestones.s( owner=owner, repo=repo, state=state, requestor_id=requestor_id, per_page=per_page, page=page, ) for page in xrange(1, last_page_num + 1)) finisher = milestones_scanned.si( owner=owner, repo=repo, requestor_id=requestor_id, ) return (g | finisher).delay()
def sync_pull_request(self, owner, repo, number, children=False, requestor_id=None): pr_url = "/repos/{owner}/{repo}/pulls/{number}".format( owner=owner, repo=repo, number=number, ) try: resp = fetch_url_from_github(pr_url, requestor_id=requestor_id) except NotFound: # add more context msg = "PR {owner}/{repo}#{number} not found".format( owner=owner, repo=repo, number=number, ) raise NotFound( msg, { "type": "pull_request", "owner": owner, "repo": repo, "number": number, }) pr_data = resp.json() try: pr = process_pull_request( pr_data, via="api", fetched_at=datetime.now(), commit=True, ) except IntegrityError as exc: self.retry(exc=exc) if children: spawn_page_tasks_for_pull_request_files.delay( owner, repo, number, children=children, requestor_id=requestor_id, ) return pr.id
def spawn_page_tasks_for_pull_requests(owner, repo, state="all", children=False, requestor_id=None, per_page=100): # acquire lock or fail (we're already in a transaction) lock_name = LOCK_TEMPLATE.format(owner=owner, repo=repo) existing = Mutex.query.get(lock_name) if existing: return False lock = Mutex(name=lock_name, user_id=requestor_id) db.session.add(lock) try: db.session.commit() except IntegrityError: return False else: logger.info("Lock {name} set by {requestor_id}".format( name=lock_name, requestor_id=requestor_id, )) pr_list_url = ( "/repos/{owner}/{repo}/pulls?" "state={state}&per_page={per_page}" ).format( owner=owner, repo=repo, state=state, per_page=per_page, ) resp = fetch_url_from_github( pr_list_url, method="HEAD", requestor_id=requestor_id, ) last_page_url = URLObject(resp.links.get('last', {}).get('url', "")) last_page_num = int(last_page_url.query.dict.get('page', 1)) g = group( sync_page_of_pull_requests.s( owner=owner, repo=repo, state=state, children=children, requestor_id=requestor_id, per_page=per_page, page=page ) for page in xrange(1, last_page_num+1) ) finisher = pull_requests_scanned.si( owner=owner, repo=repo, requestor_id=requestor_id, ) return (g | finisher).delay()
def spawn_page_tasks_for_pull_request_files(owner, repo, number, children=False, requestor_id=None, per_page=100): # acquire lock or fail (we're already in a transaction) lock_name = LOCK_TEMPLATE.format(owner=owner, repo=repo, number=number) existing = Mutex.query.get(lock_name) if existing: return False lock = Mutex(name=lock_name, user_id=requestor_id) db.session.add(lock) try: db.session.commit() except IntegrityError: return False pr = PullRequest.get(owner, repo, number) prf_list_url = ( "/repos/{owner}/{repo}/pulls/{number}/files?" "per_page={per_page}" ).format( owner=owner, repo=repo, number=number, per_page=per_page, ) resp = fetch_url_from_github( prf_list_url, method="HEAD", requestor_id=requestor_id, ) last_page_url = URLObject(resp.links.get('last', {}).get('url', "")) last_page_num = int(last_page_url.query.dict.get('page', 1)) g = group( sync_page_of_pull_request_files.s( owner=owner, repo=repo, number=number, pull_request_id=pr.id, children=children, requestor_id=requestor_id, per_page=per_page, page=page, ) for page in xrange(1, last_page_num+1) ) finisher = pull_request_files_scanned.si( owner=owner, repo=repo, number=number, requestor_id=requestor_id, ) return (g | finisher).delay()
def sync_page_of_pull_requests(self, owner, repo, state="all", children=False, requestor_id=None, per_page=100, page=1): pr_page_url = ("/repos/{owner}/{repo}/pulls?" "state={state}&per_page={per_page}&page={page}").format( owner=owner, repo=repo, state=state, per_page=per_page, page=page) resp = fetch_url_from_github(pr_page_url, requestor_id=requestor_id) fetched_at = datetime.now() pr_data_list = resp.json() results = [] for pr_data in pr_data_list: try: pr = process_pull_request( pr_data, via="api", fetched_at=fetched_at, commit=True, ) results.append(pr.id) except IntegrityError as exc: self.retry(exc=exc) if children: spawn_page_tasks_for_pull_request_files.delay( owner, repo, pr.number, children=children, requestor_id=requestor_id, ) return results
def sync_user(self, username, children=False, requestor_id=None): user_url = "/users/{username}".format(username=username) if requestor_id: requestor = User.query.get(int(requestor_id)) assert requestor if requestor.login == username: # we can use the API for getting the authenticated user user_url = "/user" try: resp = fetch_url_from_github(user_url, requestor_id=requestor_id) except NotFound: # add more context msg = "User @{username} not found".format(username=username) raise NotFound(msg, { "type": "user", "username": username, }) user_data = resp.json() try: user = process_user( user_data, via="api", fetched_at=datetime.now(), commit=True, ) except IntegrityError as exc: # multiple workers tried to insert the same user simulataneously. Retry! self.retry(exc=exc) if children: spawn_page_tasks_for_user_repositories.delay( username, children=children, requestor_id=requestor_id, ) return user.id
def sync_repository_hook(self, owner, repo, hook_id, children=False, requestor_id=None): hook_url = "/repos/{owner}/{repo}/hooks/{hook_id}".format( owner=owner, repo=repo, hook_id=hook_id, ) try: resp = fetch_url_from_github(hook_url, requestor_id=requestor_id) except NotFound: # add more context msg = "Hook #{hook_id} for {owner}/{repo} not found".format( hook_id=hook_id, owner=owner, repo=repo, ) raise NotFound( msg, { "type": "repo_hook", "owner": owner, "repo": repo, "hook_id": hook_id, }) hook_data = resp.json() try: hook = process_repository_hook( hook_data, via="api", fetched_at=datetime.now(), commit=True, requestor_id=requestor_id, ) except IntegrityError as exc: self.retry(exc=exc) return hook.id
def sync_page_of_issues(self, owner, repo, state="all", children=False, requestor_id=None, per_page=100, page=1): issue_page_url = ( "/repos/{owner}/{repo}/issues?" "state={state}&per_page={per_page}&page={page}" ).format( owner=owner, repo=repo, state=state, per_page=per_page, page=page ) resp = fetch_url_from_github(issue_page_url, requestor_id=requestor_id) fetched_at = datetime.now() issue_data_list = resp.json() results = [] for issue_data in issue_data_list: try: issue = process_issue( issue_data, via="api", fetched_at=fetched_at, commit=True, ) # ignore `children` attribute for now results.append(issue.id) except IntegrityError as exc: self.retry(exc=exc) return results
def spawn_page_tasks_for_user_repositories( username, type="all", children=False, requestor_id=None, per_page=100, ): # acquire lock or fail (we're already in a transaction) lock_name = LOCK_TEMPLATE.format(username=username) existing = Mutex.query.get(lock_name) if existing: return False lock = Mutex(name=lock_name, user_id=requestor_id) db.session.add(lock) try: db.session.commit() except IntegrityError: return False else: logger.info("Lock {name} set by {requestor_id}".format( name=lock_name, requestor_id=requestor_id, )) repo_page_url = ( "/users/{username}/repos?type={type}&per_page={per_page}").format( username=username, type=type, per_page=per_page, ) if requestor_id: requestor = User.query.get(int(requestor_id)) assert requestor if requestor.login == username: # we can use the API for getting your *own* repos repo_page_url = ( "/user/repos?type={type}&per_page={per_page}").format( type=type, per_page=per_page, ) resp = fetch_url_from_github( repo_page_url, method="HEAD", requestor_id=requestor_id, headers={"Accept": "application/vnd.github.moondragon+json"}, ) last_page_url = URLObject(resp.links.get('last', {}).get('url', "")) last_page_num = int(last_page_url.query.dict.get('page', 1)) g = group( sync_page_of_repositories_for_user.s( username=username, type=type, children=children, requestor_id=requestor_id, per_page=per_page, page=page, ) for page in xrange(1, last_page_num + 1)) finisher = user_repositories_scanned.si( username=username, requestor_id=requestor_id, ) return (g | finisher).delay()
def sync_page_of_repositories_for_user(self, username, type="all", children=False, requestor_id=None, per_page=100, page=1): repo_page_url = ( "/users/{username}/repos?type={type}&per_page={per_page}&page={page}" ).format( username=username, type=type, per_page=per_page, page=page, ) if requestor_id: requestor = User.query.get(int(requestor_id)) assert requestor if requestor.login == username: # we can use the API for getting your *own* repos repo_page_url = ( "/user/repos?type={type}&per_page={per_page}&page={page}" ).format(type=type, per_page=per_page, page=page) resp = fetch_url_from_github( repo_page_url, requestor_id=requestor_id, headers={"Accept": "application/vnd.github.moondragon+json"}, ) fetched_at = datetime.now() repo_data_list = resp.json() results = [] for repo_data in repo_data_list: try: repo = process_repository( repo_data, via="api", fetched_at=fetched_at, commit=True, requestor_id=requestor_id, ) results.append(repo.id) except IntegrityError as exc: self.retry(exc=exc) if children: owner = repo.owner_login spawn_page_tasks_for_issues.delay( owner, repo.name, children=children, requestor_id=requestor_id, ) spawn_page_tasks_for_labels.delay( owner, repo.name, children=children, requestor_id=requestor_id, ) spawn_page_tasks_for_milestones.delay( owner, repo.name, children=children, requestor_id=requestor_id, ) spawn_page_tasks_for_pull_requests.delay( owner, repo.name, children=children, requestor_id=requestor_id, ) # only try to get repo hooks if the requestor is an admin on this repo assoc = UserRepoAssociation.query.get((requestor_id, repo.id)) if assoc and assoc.can_admin: spawn_page_tasks_for_repository_hooks.delay( owner, repo.name, children=children, requestor_id=requestor_id, ) return results