def nudge_study_index(request): """"Support method to update oti index in response to GitHub webhooks This examines the JSON payload of a GitHub webhook to see which studies have been added, modified, or removed. Then it calls oti's index service to (re)index the NexSON for those studies, or to delete a study's information if it was deleted from the docstore. Finally, we clear the cached study list (response to find_studies with no args). N.B. This depends on a GitHub webhook on the chosen docstore. """ payload = extract_posted_data(request) add_or_update_ids, modified, remove_ids = github_payload_to_amr(payload, harvest_study_ids_from_paths) add_or_update_ids.add(modified) sds = get_phylesystem_doc_store(request) # this check will not be sufficient if we have multiple shards opentree_docstore_url = sds.remote_docstore_url if payload['repository']['url'] != opentree_docstore_url: raise httpexcept(HTTPBadRequest, "wrong repo for this API instance") otindex_base_url = get_otindex_base_url(request) msg = "" if add_or_update_ids: msg += otindex_call(add_or_update_ids, otindex_base_url, 'add_update') if remove_ids: msg += otindex_call(remove_ids, otindex_base_url, 'remove') # TODO: check returned IDs against our original list... what if something failed? github_webhook_url = "{}/settings/hooks".format(opentree_docstore_url) full_msg = format_gh_webhook_response(github_webhook_url, msg) if msg: raise httpexcept(HTTPInternalServerError, full_msg) return full_msg
def put_document(request): """Open Tree API methods relating to updating existing resources. See `finish_write_operation` for description of the response. """ document, put_args = extract_write_args(request) if put_args.get('starting_commit_SHA') is None: msg = 'PUT operation expects a "starting_commit_SHA" argument with the SHA of the parent' raise httpexcept(HTTPBadRequest, msg) if put_args.get('doc_id') is None: raise httpexcept(HTTPBadRequest, 'PUT operation expects a URL that ends with a document ID') umbrella = umbrella_from_request(request) return finish_write_operation(request, umbrella, document, put_args)
def nudge_taxon_index(request): """"Support method to update taxon index (taxomachine) in response to GitHub webhooks This examines the JSON payload of a GitHub webhook to see which taxa have been added, modified, or removed. Then it calls the appropriate index service to (re)index these taxa, or to delete a taxon's information if it was deleted in an amendment. TODO: Clear any cached taxon list. N.B. This depends on a GitHub webhook on the taxonomic-amendments docstore! """ payload = extract_posted_data(request) tads = get_taxon_amendments_doc_store(request) amendments_repo_url = tads.remote_docstore_url if payload['repository']['url'] != amendments_repo_url: raise httpexcept(HTTPBadRequest, "wrong repo for this API instance") added_ids, modified_ids, removed_ids = github_payload_to_amr(payload, harvest_ott_ids_from_paths) msg_list = [] # build a working URL, gather amendment body, and nudge the index! amendments_api_base_url = get_taxonomy_api_base_url(request) nudge_url = "{b}v3/taxonomy/process_additions".format(b=amendments_api_base_url) for doc_id in added_ids: try: amendment_blob = tads.return_document(doc_id=doc_id)[0] except: msg_list.append("retrieval of {} failed".format(doc_id)) else: # Extra weirdness required here, as neo4j needs an encoded *string* # of the amendment JSON, within a second JSON wrapper :-/ postable_blob = {"addition_document": json.dumps(amendment_blob)} postable_string = json.dumps(postable_blob) try: do_http_post_json(url=nudge_url, data=postable_string) except: msg_list.append("nudge of taxonomy processor failed for {}".format(doc_id)) # LATER: add handlers for modified and removed taxa? if modified_ids: raise httpexcept(HTTPBadRequest, "We don't currently re-index modified taxa!") if removed_ids: raise httpexcept(HTTPBadRequest, "We don't currently re-index removed taxa!") # N.B. If we had any cached amendment results, we'd clear them now # api_utils.clear_matching_cache_keys(...) github_webhook_url = "{}/settings/hooks".format(amendments_repo_url) msg = '\n'.join(msg_list) full_msg = format_gh_webhook_response(github_webhook_url, msg) if msg == '': return full_msg raise httpexcept(HTTPInternalServerError, full_msg)
def post_document(request): """Open Tree API methods relating to creating a new resource. See `extract_write_args` for a description of the arguments that are extracted from the request object. See `finish_write_operation` for description of the response. """ document, post_args = extract_write_args(request) if post_args.get('doc_id') is None: msg = 'POST operation does not expect a URL that ends with a document ID' raise httpexcept(HTTPBadRequest, msg) umbrella = umbrella_from_request(request) return finish_write_operation(request, umbrella, document, post_args)
def delete_document(request): """Doees the work of the delete views. Return from peyotl.TypeAwareDocStore.delete_document Uses "starting_commit_SHA", "commit_msg", "doc_id" and arugments from `authenticate` """ args = extract_write_args(request, require_document=False)[1] parent_sha = args['starting_commit_SHA'] commit_msg = args['commit_msg'] auth_info = args['auth_info'] doc_id = args['doc_id'] umbrella = umbrella_from_request(request) try: x = umbrella.delete_document(doc_id, auth_info, parent_sha, commit_msg=commit_msg) except GitWorkflowError, err: _LOG.exception("delete exception") raise httpexcept(HTTPInternalServerError, err.msg)
def include_tree_in_synth(request): """Adds a (study_id, tree_id) pair to the last (default) collection used in synthesis. See `collection_args_helper` for args used. :raises HTTPNotFound if the (study_id, tree_id) is not in the set of studies. :return collection that is the concatenation of all trees queued for synthesis. """ study_id, tree_id, auth_info = collection_args_helper(request)[1:] # examine this study and tree, to confirm it exists *and* to capture its name sds = get_phylesystem_doc_store(request) try: found_study = sds.return_doc(study_id, commit_sha=None, return_WIP_map=False)[0] match_list = extract_tree_nexson(found_study, tree_id=tree_id) if len(match_list) != 1: raise KeyError('tree id not found') found_tree = match_list[0][1] found_tree_name = found_tree.get('@label') or tree_id except: # report a missing/misidentified tree msg = "Specified tree '{t}' in study '{s}' not found! Save this study and try again?" _LOG.exception(msg) raise httpexcept(HTTPNotFound, msg.format(s=study_id, t=tree_id)) x = synth_collection_helper(request) cds, coll_id_list, current_synth_coll = x[0], x[1], x[3] if cds.collection_includes_tree(current_synth_coll, study_id, tree_id): return current_synth_coll commit_msg = "Added via API (include_tree_in_synth)" ref = found_study.get('nexml', {}).get('^ot:studyPublicationReference', '') comment = commit_msg + " from {p}" comment = comment.format(p=ref) decision = cds.create_tree_inclusion_decision(study_id=study_id, tree_id=tree_id, name=found_tree_name, comment=comment) # find the default synth-input collection and parse its JSON default_collection_id = coll_id_list[-1] append_tree_to_collection_helper(request, cds, default_collection_id, decision, auth_info, commit_msg=commit_msg) return trees_in_synth(request)
def render_markdown(request): """Reads a markdown str "src" from a JSON body of the HTTP request and returns the HTML version. :param request: with data["src"] field :return: HTML representation of the markdown :raises HTTPBadRequest if the input is not found. """ data = extract_posted_data(request) try: src = data['src'] except KeyError: raise httpexcept(HTTPBadRequest, '"src" parameter not found in POST') # noinspection PyUnusedLocal def add_blank_target(attrs, new=False): # pylint: disable=W0613 """Hook to add target="_blank" to links created by bleach.linkify""" attrs['target'] = '_blank' return attrs h = markdown.markdown(src) ct = ['p', 'a', 'hr', 'i', 'em', 'b', 'div', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4'] h = bleach.clean(h, tags=ct) h = bleach.linkify(h, callbacks=[add_blank_target]) return Response(h)
def exclude_tree_from_synth(request): """Removes a (study_id, tree_id) pair to the last (default) collection used in synthesis. See `collection_args_helper` for args used. :raises HTTPNotFound if the (study_id, tree_id) is not in the set of studies. :return collection that is the concatenation of all trees queued for synthesis. """ study_id, tree_id, auth_info = collection_args_helper(request)[1:] cds, coll_id_list, coll_list, current_synth_coll = synth_collection_helper(request) if not cds.collection_includes_tree(current_synth_coll, study_id, tree_id): return current_synth_coll needs_push = {} for coll_id, coll in itertools.izip(coll_id_list, coll_list): if cds.collection_includes_tree(coll, study_id, tree_id): try: msg = "Updated via API (exclude_tree_from_synth)" r = cds.purge_tree_from_collection(coll_id, study_id=study_id, tree_id=tree_id, auth_info=auth_info, commit_msg=msg) commit_return = r except GitWorkflowError, err: raise httpexcept(HTTPInternalServerError, err.msg) except:
def post_study_document(request): """POST of a new study. See `extract_write_args` for a description of the arguments that are extracted from the request object. The content of the new study is populated based on the value of "import_method": "import-method-TREEBASE_ID" should be accompanied with at "treebase_id" argument "import-method-PUBLICATION_DOI" or "import-method-PUBLICATION_DOI" values should be accompanied with at "publication_DOI" or "publication_reference" argument "import-method-POST" is used to indicate that the body of the POST should contain the study See `finish_write_operation` for description of the response. """ request.matchdict['resource_type'] = 'study' document, post_args = extract_write_args(request, study_post=True, require_document=False) if post_args.get('doc_id') is not None: msg = 'POST operation does not expect a URL that ends with a document ID' raise httpexcept(HTTPBadRequest, msg) umbrella = umbrella_from_request(request) import_method = post_args['import_method'] nsv = umbrella.document_schema.schema_version cc0_agreement = post_args['cc0_agreement'] publication_doi = post_args['publication_DOI'] publication_doi_for_crossref = None if publication_doi: # if a URL or something other than a valid DOI was entered, don't submit it to crossref API publication_doi_for_crossref = make_valid_doi(publication_doi) or None publication_ref = post_args['publication_reference'] if import_method == 'import-method-TREEBASE_ID': treebase_id = post_args['treebase_id'] if not treebase_id: msg = "A treebase_id argument is required when import_method={}".format(import_method) raise httpexcept(HTTPBadRequest, msg) try: treebase_number = int(treebase_id.upper().lstrip('S')) except: msg = 'Invalid treebase_id="{}"'.format(treebase_id) raise httpexcept(HTTPBadRequest, msg) try: document = import_nexson_from_treebase(treebase_number, nexson_syntax_version=nsv) except: msg = "Unexpected error parsing the file obtained from TreeBASE. " \ "Please report this bug to the Open Tree of Life developers." raise httpexcept(HTTPBadRequest, msg) elif import_method == 'import-method-PUBLICATION_DOI' \ or import_method == 'import-method-PUBLICATION_REFERENCE': if not (publication_ref or publication_doi_for_crossref): msg = 'Did not find a valid DOI in "publication_DOI" or a reference in ' \ '"publication_reference" arguments.' raise httpexcept(HTTPBadRequest, msg) document = import_nexson_from_crossref_metadata(doi=publication_doi_for_crossref, ref_string=publication_ref, include_cc0=cc0_agreement) elif import_method == 'import-method-POST': if not document: msg = 'Could not read a NexSON from the body of the POST, but ' \ 'import_method="import-method-POST" was used.' raise httpexcept(HTTPBadRequest, msg) else: document = umbrella.document_schema.create_empty_doc() if cc0_agreement: add_cc0_waiver(nexson=document) return finish_write_operation(request, umbrella, document, post_args)
def get_document(request): """Implementation of the GET methods for a resource or part of a resource. See `subresource_request_helper` documentation for details on which arguments are accepted. Depending on the request parameters, the return type can be either text (e.g. a newick string if just one tree from a study is requested, or an "external" format if translation is requested), or JSON. If no translation to an alternative schema is requested, the document will be returned as the "data" field of a JSON object. In that case, other keys will be: "sha" -> the git SHA for the commit returned. "branch2sha" -> dict describing the work-in-progress branch for the document. "url" -> the requested url "commentHTML" -> HTML converted from markdown comment (if any) embedded in doc. "version_history" -> is an optional return. "external_url" -> if requested this will be the same URL as a call to `external_url` "shardName" -> text description of the shard that holds the document. If the resource requested is a study JSON, then the doi field of the document will be used to if resource_type == 'study': duplicate_study_ids = [] try: study_doi = document_blob['nexml']['^ot:studyPublication']['@href'] except: pass # no DOI else: try: oti_domain = get_otindex_base_url(request) duplicate_study_ids = find_studies_by_doi(oti_domain, study_doi) except: _LOG.exception('Call to find_studies_by_doi failed') else: try: duplicate_study_ids.remove(doc_id) except: pass if duplicate_study_ids: result['duplicateStudyIDs'] = duplicate_study_ids """ resource_type = request.matchdict['resource_type'] umbrella = umbrella_from_request(request) subresource_req_dict, params = subresource_request_helper(request) doc_id = params['doc_id'] triple = umbrella.is_plausible_transformation(subresource_req_dict) is_plausible, reason_or_converter, out_syntax = triple if not is_plausible: raise httpexcept(HTTPBadRequest, 'Impossible request: {}'.format(reason_or_converter)) transformer = reason_or_converter parent_sha = params.get('starting_commit_SHA') _LOG.debug('parent_sha = {}'.format(parent_sha)) version_history = None try: if (out_syntax == 'JSON') and params['version_history']: r, version_history = umbrella.return_document_and_history(doc_id, commit_sha=parent_sha, return_WIP_map=True) else: r = umbrella.return_document(doc_id, commit_sha=parent_sha, return_WIP_map=True) except: _LOG.exception('GET failed') raise HTTPNotFound('{r} document {i} GET failure'.format(r=resource_type, i=doc_id)) # noinspection PyBroadException try: document_blob, head_sha, wip_map = r except: _LOG.exception('GET failed') raise httpexcept(HTTPBadRequest, err_body(traceback.format_exc())) if transformer is None: result_data = document_blob else: try: result_data = transformer(umbrella, doc_id, document_blob, head_sha) except KeyError, x: raise httpexcept(HTTPNotFound, 'subresource not found: {}'.format(x)) except ValueError, y: raise httpexcept(HTTPBadRequest, 'subresource not found: {}'.format(y.message))
except: _LOG.exception('GET failed') raise httpexcept(HTTPBadRequest, err_body(traceback.format_exc())) if transformer is None: result_data = document_blob else: try: result_data = transformer(umbrella, doc_id, document_blob, head_sha) except KeyError, x: raise httpexcept(HTTPNotFound, 'subresource not found: {}'.format(x)) except ValueError, y: raise httpexcept(HTTPBadRequest, 'subresource not found: {}'.format(y.message)) except: msg = "Exception in coercing to the document to the requested type. " _LOG.exception(msg) raise httpexcept(HTTPBadRequest, err_body(msg)) if subresource_req_dict['output_is_json']: result = {'sha': head_sha, 'data': result_data, 'branch2sha': wip_map, 'url': request.url, } try: comment_html = render_markdown(umbrella.get_markdown_comment(result_data)) except: comment_html = '' # pylint: disable=R0204 result['commentHTML'] = comment_html try: if version_history is not None: result['version_history'] = version_history # TODO get rid of camelCaseVersion