def get_thread_url_for_submission(commitfest_id, submission_id): """Given a Commitfest ID and a submission ID, return the URL of the 'whole thread' page in the mailing list archives.""" # find all the threads and latest message times result = None url = "https://commitfest.postgresql.org/%s/%s/" % (commitfest_id, submission_id) candidates = [] candidate = None for line in cfbot_util.slow_fetch(url).splitlines(): groups = re.search( """Latest at <a href="https://www.postgresql.org/message-id/([^"]+)">(2[^<]+)""", line) if groups: candidate = (groups.group(2), groups.group(1)) # we'll only take threads that are followed by evidence that there is at least one attachment groups = re.search("""Latest attachment .* <button type="button" """, line) if groups: candidates.append(candidate) # take the one with the most recent email if len(candidates) > 0: candidates.sort() result = "https://www.postgresql.org/message-id/flat/" + candidates[ -1][1] return result
def pull_build_results(conn): builds = None cursor = conn.cursor() cursor.execute("""SELECT id, commitfest_id, submission_id, ci_commit_id FROM build_result WHERE provider = 'appveyor' AND result IS NULL""") for id, commitfest_id, submission_id, ci_commit_id in cursor.fetchall(): # lazily fetch data from travis only when we first need it if builds == None: builds = {} for item in json.loads(cfbot_util.slow_fetch(cfbot_config.APPVEYOR_API_BUILDS))["builds"]: builds[(item["branch"], item["commitId"])] = (item["status"], item["version"]) branch = "commitfest/%s/%s" % (commitfest_id, submission_id) key = (branch, ci_commit_id) if key in builds: result, build_id = builds[key] if result == "success": result = "success" elif result == "failed": result = "failure" else: result = None url = cfbot_config.APPVEYOR_BUILD_URL % build_id cursor.execute("""UPDATE build_result SET result = %s, url = %s, modified = now() WHERE id = %s""", (result, url, id)) conn.commit()
def get_submissions_for_commitfest(commitfest_id): """Given a Commitfest ID, return a list of Submission objects.""" result = [] parser = HTMLParser.HTMLParser() url = "https://commitfest.postgresql.org/%s/" % (commitfest_id, ) next_line_has_version = False next_line_has_authors = False next_line_has_latest_email = False state = None latest_email = None authors = "" for line in cfbot_util.slow_fetch(url).splitlines(): groups = re.search('\<a href="([0-9]+)/"\>([^<]+)</a>', line) if groups: submission_id = groups.group(1) name = parser.unescape(groups.group(2)) if next_line_has_version: next_line_has_version = False next_line_has_authors = True continue if next_line_has_authors: next_line_has_authors = False groups = re.search("<td>([^<]*)</td>", line) if groups: authors = groups.group(1) authors = re.sub(" *\\([^)]*\\)", "", authors) continue if next_line_has_latest_email: next_line_has_latest_email = False groups = re.search( '<td style="white-space: nowrap;">(.*)<br/>(.*)</td>', line) if groups: latest_email = groups.group(1) + " " + groups.group(2) if latest_email == ' ': latest_email = None result.append( Submission(submission_id, commitfest_id, name, state, authors.split(", "), latest_email)) groups = re.search( '<td><span class="label label-[^"]*">([^<]+)</span></td>', line) if groups: state = groups.group(1) next_line_has_version = True continue groups = re.search('<td style="white-space: nowrap;">.*<br/>.*</td>', line) if groups: next_line_has_latest_email = True continue next_line_has_authors = False next_line_has_latest_email = False return result
def get_thread_url_for_submission(commitfest_id, submission_id): """Given a Commitfest ID and a submission ID, return the URL of the 'whole thread' page in the mailing list archives.""" # if there is more than one, we'll take the furthest down on the page... # TODO: look at the dates instead! result = None url = "https://commitfest.postgresql.org/%s/%s/" % (commitfest_id, submission_id) for line in cfbot_util.slow_fetch(url).splitlines(): groups = re.search( '<dt><a href="(https://www.postgresql.org/message-id/flat/[^"]+)"', line) if groups: result = groups.group(1) return result
def get_current_commitfest_id(): """Find the ID of the current open or next future Commitfest.""" result = None for line in cfbot_util.slow_fetch( "https://commitfest.postgresql.org").splitlines(): groups = re.search( '<a href="/([0-9]+)/">[0-9]+-[0-9]+</a> \((Open|In Progress) ', line) if groups: commitfest_id = groups.group(1) state = groups.group(2) result = int(commitfest_id) if result == None: raise Exception("Could not determine the current Commitfest ID") return result
def pull_build_results(conn): builds = None cursor = conn.cursor() cursor.execute("""SELECT id, commitfest_id, submission_id, ci_commit_id FROM build_result WHERE provider = 'travis' AND result IS NULL""") for id, commitfest_id, submission_id, ci_commit_id in cursor.fetchall(): # lazily fetch data from travis only when we first need it if builds == None: builds = {} for item in json.loads( cfbot_util.slow_fetch(cfbot_config.TRAVIS_API_BUILDS)): builds[(item["branch"], item["commit"])] = (item["result"], item["state"], item["id"]) branch = "commitfest/%s/%s" % (commitfest_id, submission_id) key = (branch, ci_commit_id) if key in builds: result, state, build_id = builds[key] if result == 0: result = "success" elif result == None: # no result; normally this means we're done, but... if state == "finished": # if we're finished then this means we timed out result = "failure" else: result = None else: result = "failure" url = cfbot_config.TRAVIS_BUILD_URL % build_id cursor.execute( """UPDATE build_result SET result = %s, url = %s, modified = now() WHERE id = %s""", (result, url, id)) conn.commit()
def get_latest_patches_from_thread_url(thread_url): """Given a 'whole thread' URL from the archives, find the last message that had at least one attachment called something.patch. Return the message ID and the list of URLs to fetch all the patches.""" selected_message_attachments = [] selected_message_id = None message_attachments = [] message_id = None for line in cfbot_util.slow_fetch(thread_url).splitlines(): groups = re.search( '<a href="(/message-id/attachment/[^"]*\\.(diff|patch|patch\\.gz|tar\\.gz|tgz|tar\\.bz2))">', line) if groups: message_attachments.append("https://www.postgresql.org" + groups.group(1)) selected_message_attachments = message_attachments selected_message_id = message_id else: #groups = re.search('<a name="([^"]+)"></a>', line) groups = re.search( '<td><a href="/message-id/[^"]+">([^"]+)</a></td>', line) if groups: message_id = groups.group(1) message_attachments = [] # if there is a tarball attachment, there must be only one attachment, # otherwise give up on this thread (we don't know how to combine patches and # tarballs) if selected_message_attachments != None: if any( x.endswith(".tgz") or x.endswith(".tar.gz") or x.endswith(".tar.bz2") for x in selected_message_attachments): if len(selected_message_attachments) > 1: selected_message_id = None selected_message_attachments = None # if there are multiple patch files, they had better follow the convention # of leading numbers, otherwise we don't know how to apply them in the right # order return selected_message_id, selected_message_attachments
def process_submission(conn, commitfest_id, submission_id): cursor = conn.cursor() template_repo_path = patchburner_ctl("template-repo-path").strip() burner_repo_path = patchburner_ctl("burner-repo-path").strip() patch_dir = patchburner_ctl("burner-patch-path").strip() #print "got %s" % update_patchbase_tree() update_patchbase_tree(template_repo_path) commit_id = get_commit_id(template_repo_path) logging.info("processing submission %d, %d" % (commitfest_id, submission_id)) # create a fresh patchburner jail patchburner_ctl("destroy") patchburner_ctl("create") # find out where to put the patches so the jail can see them # fetch the patches from the thread and put them in the patchburner's # filesystem time.sleep(10) # argh, try to close race against slow archives thread_url = cfbot_commitfest_rpc.get_thread_url_for_submission( commitfest_id, submission_id) message_id, patch_urls = cfbot_commitfest_rpc.get_latest_patches_from_thread_url( thread_url) for patch_url in patch_urls: parsed = urlparse.urlparse(patch_url) filename = os.path.basename(parsed.path) dest = os.path.join(patch_dir, filename) with open(dest, "w+") as f: f.write(cfbot_util.slow_fetch(patch_url)) # apply the patches inside the jail output, rcode = patchburner_ctl("apply", want_rcode=True) # write the patch output to a public log file log_file = "patch_%d_%d.log" % (commitfest_id, submission_id) with open(os.path.join(cfbot_config.WEB_ROOT, log_file), "w+") as f: f.write(output) log_url = cfbot_config.CFBOT_APPLY_URL % log_file # did "patch" actually succeed? if rcode != 0: # we failed to apply the patches insert_build_result(conn, commitfest_id, submission_id, 'apply', message_id, commit_id, log_url, 'failure', log_url) else: # we applied the patch; now make it into a branch with a commit on it # including the CI control files for all enabled providers for d in cfbot_config.CI_PROVIDERS: for f in os.listdir(d): s = os.path.join(d, f) if os.path.isfile(s): shutil.copy(s, os.path.join(burner_repo_path, f)) branch = make_branch(conn, burner_repo_path, commitfest_id, submission_id, message_id) # push it to the remote monitored repo, if configured if cfbot_config.GIT_REMOTE_NAME: logging.info("pushing branch %s" % branch) my_env = os.environ.copy() my_env["GIT_SSH_COMMAND"] = cfbot_config.GIT_SSH_COMMAND subprocess.check_call( "cd %s && git push -q -f %s %s" % (burner_repo_path, cfbot_config.GIT_REMOTE_NAME, branch), env=my_env, shell=True) # record the build status ci_commit_id = get_commit_id(burner_repo_path) insert_build_result(conn, commitfest_id, submission_id, 'apply', message_id, commit_id, ci_commit_id, 'success', log_url) # create placeholder results for the CI providers (we'll start polling them) for provider in cfbot_config.CI_PROVIDERS: insert_build_result(conn, commitfest_id, submission_id, provider, message_id, commit_id, ci_commit_id, None, None) # record that we have processed this commit ID and message ID # # Unfortunately we also have to clobber last_message_id to avoid getting # stuck in a loop, because sometimes the commitfest app reports a change # in last email date before the new email is visible in the flat thread (!), # which means that we can miss a new patch. Doh. Need something better # here (don't really want to go back to polling threads aggressively...) cursor.execute( """UPDATE submission SET last_message_id = %s, last_branch_message_id = %s, last_branch_commit_id = %s, last_branch_time = now() WHERE commitfest_id = %s AND submission_id = %s""", (message_id, message_id, commit_id, commitfest_id, submission_id)) conn.commit() patchburner_ctl("destroy")