Exemple #1
0
def nudge_study_index(request):
    """"Support method to update oti index in response to GitHub webhooks

    This examines the JSON payload of a GitHub webhook to see which studies have
    been added, modified, or removed. Then it calls oti's index service to
    (re)index the NexSON for those studies, or to delete a study's information if
    it was deleted from the docstore.

    Finally, we clear the cached study list (response to find_studies with no args).

    N.B. This depends on a GitHub webhook on the chosen docstore.
    """
    payload = extract_posted_data(request)
    add_or_update_ids, modified, remove_ids = github_payload_to_amr(payload,
                                                                    harvest_study_ids_from_paths)
    add_or_update_ids.add(modified)
    sds = get_phylesystem_doc_store(request)
    # this check will not be sufficient if we have multiple shards
    opentree_docstore_url = sds.remote_docstore_url
    if payload['repository']['url'] != opentree_docstore_url:
        raise httpexcept(HTTPBadRequest, "wrong repo for this API instance")
    otindex_base_url = get_otindex_base_url(request)
    msg = ""
    if add_or_update_ids:
        msg += otindex_call(add_or_update_ids, otindex_base_url, 'add_update')
    if remove_ids:
        msg += otindex_call(remove_ids, otindex_base_url, 'remove')
    # TODO: check returned IDs against our original list... what if something failed?

    github_webhook_url = "{}/settings/hooks".format(opentree_docstore_url)
    full_msg = format_gh_webhook_response(github_webhook_url, msg)
    if msg:
        raise httpexcept(HTTPInternalServerError, full_msg)
    return full_msg
Exemple #2
0
def put_document(request):
    """Open Tree API methods relating to updating existing resources.
    See `finish_write_operation` for description of the response.
    """
    document, put_args = extract_write_args(request)
    if put_args.get('starting_commit_SHA') is None:
        msg = 'PUT operation expects a "starting_commit_SHA" argument with the SHA of the parent'
        raise httpexcept(HTTPBadRequest, msg)
    if put_args.get('doc_id') is None:
        raise httpexcept(HTTPBadRequest, 'PUT operation expects a URL that ends with a document ID')
    umbrella = umbrella_from_request(request)
    return finish_write_operation(request, umbrella, document, put_args)
Exemple #3
0
def nudge_taxon_index(request):
    """"Support method to update taxon index (taxomachine) in response to GitHub webhooks

    This examines the JSON payload of a GitHub webhook to see which taxa have
    been added, modified, or removed. Then it calls the appropriate index service to
    (re)index these taxa, or to delete a taxon's information if it was deleted in
    an amendment.

    TODO: Clear any cached taxon list.

    N.B. This depends on a GitHub webhook on the taxonomic-amendments docstore!
    """
    payload = extract_posted_data(request)
    tads = get_taxon_amendments_doc_store(request)
    amendments_repo_url = tads.remote_docstore_url
    if payload['repository']['url'] != amendments_repo_url:
        raise httpexcept(HTTPBadRequest, "wrong repo for this API instance")
    added_ids, modified_ids, removed_ids = github_payload_to_amr(payload,
                                                                 harvest_ott_ids_from_paths)
    msg_list = []
    # build a working URL, gather amendment body, and nudge the index!
    amendments_api_base_url = get_taxonomy_api_base_url(request)
    nudge_url = "{b}v3/taxonomy/process_additions".format(b=amendments_api_base_url)
    for doc_id in added_ids:
        try:
            amendment_blob = tads.return_document(doc_id=doc_id)[0]
        except:
            msg_list.append("retrieval of {} failed".format(doc_id))
        else:
            # Extra weirdness required here, as neo4j needs an encoded *string*
            # of the amendment JSON, within a second JSON wrapper :-/
            postable_blob = {"addition_document": json.dumps(amendment_blob)}
            postable_string = json.dumps(postable_blob)
            try:
                do_http_post_json(url=nudge_url, data=postable_string)
            except:
                msg_list.append("nudge of taxonomy processor failed for {}".format(doc_id))
    # LATER: add handlers for modified and removed taxa?
    if modified_ids:
        raise httpexcept(HTTPBadRequest, "We don't currently re-index modified taxa!")
    if removed_ids:
        raise httpexcept(HTTPBadRequest, "We don't currently re-index removed taxa!")
    # N.B. If we had any cached amendment results, we'd clear them now
    # api_utils.clear_matching_cache_keys(...)
    github_webhook_url = "{}/settings/hooks".format(amendments_repo_url)
    msg = '\n'.join(msg_list)
    full_msg = format_gh_webhook_response(github_webhook_url, msg)
    if msg == '':
        return full_msg
    raise httpexcept(HTTPInternalServerError, full_msg)
Exemple #4
0
def post_document(request):
    """Open Tree API methods relating to creating a new resource.
    See `extract_write_args` for a description of the arguments that are extracted from the
    request object.
    See `finish_write_operation` for description of the response.
    """
    document, post_args = extract_write_args(request)
    if post_args.get('doc_id') is None:
        msg = 'POST operation does not expect a URL that ends with a document ID'
        raise httpexcept(HTTPBadRequest, msg)
    umbrella = umbrella_from_request(request)
    return finish_write_operation(request, umbrella, document, post_args)
Exemple #5
0
def delete_document(request):
    """Doees the work of the delete views.
    Return from peyotl.TypeAwareDocStore.delete_document
    Uses "starting_commit_SHA", "commit_msg", "doc_id" and arugments from `authenticate`
    """
    args = extract_write_args(request, require_document=False)[1]
    parent_sha = args['starting_commit_SHA']
    commit_msg = args['commit_msg']
    auth_info = args['auth_info']
    doc_id = args['doc_id']
    umbrella = umbrella_from_request(request)
    try:
        x = umbrella.delete_document(doc_id, auth_info, parent_sha, commit_msg=commit_msg)
    except GitWorkflowError, err:
        _LOG.exception("delete exception")
        raise httpexcept(HTTPInternalServerError, err.msg)
Exemple #6
0
def include_tree_in_synth(request):
    """Adds a (study_id, tree_id) pair to the last (default) collection used in synthesis.
    See `collection_args_helper` for args used.
    :raises HTTPNotFound if the (study_id, tree_id) is not in the set of studies.
    :return collection that is the concatenation of all trees queued for synthesis.
    """
    study_id, tree_id, auth_info = collection_args_helper(request)[1:]
    # examine this study and tree, to confirm it exists *and* to capture its name
    sds = get_phylesystem_doc_store(request)
    try:
        found_study = sds.return_doc(study_id, commit_sha=None, return_WIP_map=False)[0]
        match_list = extract_tree_nexson(found_study, tree_id=tree_id)
        if len(match_list) != 1:
            raise KeyError('tree id not found')
        found_tree = match_list[0][1]
        found_tree_name = found_tree.get('@label') or tree_id
    except:  # report a missing/misidentified tree
        msg = "Specified tree '{t}' in study '{s}' not found! Save this study and try again?"
        _LOG.exception(msg)
        raise httpexcept(HTTPNotFound, msg.format(s=study_id, t=tree_id))
    x = synth_collection_helper(request)
    cds, coll_id_list, current_synth_coll = x[0], x[1], x[3]
    if cds.collection_includes_tree(current_synth_coll, study_id, tree_id):
        return current_synth_coll
    commit_msg = "Added via API (include_tree_in_synth)"
    ref = found_study.get('nexml', {}).get('^ot:studyPublicationReference', '')
    comment = commit_msg + " from {p}"
    comment = comment.format(p=ref)
    decision = cds.create_tree_inclusion_decision(study_id=study_id,
                                                  tree_id=tree_id,
                                                  name=found_tree_name,
                                                  comment=comment)
    # find the default synth-input collection and parse its JSON
    default_collection_id = coll_id_list[-1]
    append_tree_to_collection_helper(request,
                                     cds,
                                     default_collection_id,
                                     decision,
                                     auth_info,
                                     commit_msg=commit_msg)
    return trees_in_synth(request)
Exemple #7
0
def render_markdown(request):
    """Reads a markdown str "src" from a JSON body of the HTTP request and returns the HTML version.

    :param request: with data["src"] field
    :return: HTML representation of the markdown
    :raises HTTPBadRequest if the input is not found.
    """
    data = extract_posted_data(request)
    try:
        src = data['src']
    except KeyError:
        raise httpexcept(HTTPBadRequest, '"src" parameter not found in POST')

    # noinspection PyUnusedLocal
    def add_blank_target(attrs, new=False):  # pylint: disable=W0613
        """Hook to add target="_blank" to links created by bleach.linkify"""
        attrs['target'] = '_blank'
        return attrs

    h = markdown.markdown(src)
    ct = ['p', 'a', 'hr', 'i', 'em', 'b', 'div', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4']
    h = bleach.clean(h, tags=ct)
    h = bleach.linkify(h, callbacks=[add_blank_target])
    return Response(h)
Exemple #8
0
def exclude_tree_from_synth(request):
    """Removes a (study_id, tree_id) pair to the last (default) collection used in synthesis.
    See `collection_args_helper` for args used.
    :raises HTTPNotFound if the (study_id, tree_id) is not in the set of studies.
    :return collection that is the concatenation of all trees queued for synthesis.
    """
    study_id, tree_id, auth_info = collection_args_helper(request)[1:]
    cds, coll_id_list, coll_list, current_synth_coll = synth_collection_helper(request)
    if not cds.collection_includes_tree(current_synth_coll, study_id, tree_id):
        return current_synth_coll
    needs_push = {}
    for coll_id, coll in itertools.izip(coll_id_list, coll_list):
        if cds.collection_includes_tree(coll, study_id, tree_id):
            try:
                msg = "Updated via API (exclude_tree_from_synth)"
                r = cds.purge_tree_from_collection(coll_id,
                                                   study_id=study_id,
                                                   tree_id=tree_id,
                                                   auth_info=auth_info,
                                                   commit_msg=msg)
                commit_return = r
            except GitWorkflowError, err:
                raise httpexcept(HTTPInternalServerError, err.msg)
            except:
Exemple #9
0
def post_study_document(request):
    """POST of a new study.
    See `extract_write_args` for a description of the arguments that are extracted from the
    request object.
    The content of the new study is populated based on the value of "import_method":
        "import-method-TREEBASE_ID" should be accompanied with at "treebase_id" argument
        "import-method-PUBLICATION_DOI" or "import-method-PUBLICATION_DOI" values
            should be accompanied with at "publication_DOI" or "publication_reference" argument
        "import-method-POST" is used to indicate that the body of the POST should contain the study

    See `finish_write_operation` for description of the response.
    """
    request.matchdict['resource_type'] = 'study'
    document, post_args = extract_write_args(request, study_post=True, require_document=False)
    if post_args.get('doc_id') is not None:
        msg = 'POST operation does not expect a URL that ends with a document ID'
        raise httpexcept(HTTPBadRequest, msg)
    umbrella = umbrella_from_request(request)
    import_method = post_args['import_method']
    nsv = umbrella.document_schema.schema_version
    cc0_agreement = post_args['cc0_agreement']
    publication_doi = post_args['publication_DOI']
    publication_doi_for_crossref = None
    if publication_doi:
        # if a URL or something other than a valid DOI was entered, don't submit it to crossref API
        publication_doi_for_crossref = make_valid_doi(publication_doi) or None
    publication_ref = post_args['publication_reference']
    if import_method == 'import-method-TREEBASE_ID':
        treebase_id = post_args['treebase_id']
        if not treebase_id:
            msg = "A treebase_id argument is required when import_method={}".format(import_method)
            raise httpexcept(HTTPBadRequest, msg)
        try:
            treebase_number = int(treebase_id.upper().lstrip('S'))
        except:
            msg = 'Invalid treebase_id="{}"'.format(treebase_id)
            raise httpexcept(HTTPBadRequest, msg)
        try:
            document = import_nexson_from_treebase(treebase_number, nexson_syntax_version=nsv)
        except:
            msg = "Unexpected error parsing the file obtained from TreeBASE. " \
                  "Please report this bug to the Open Tree of Life developers."
            raise httpexcept(HTTPBadRequest, msg)
    elif import_method == 'import-method-PUBLICATION_DOI' \
            or import_method == 'import-method-PUBLICATION_REFERENCE':
        if not (publication_ref or publication_doi_for_crossref):
            msg = 'Did not find a valid DOI in "publication_DOI" or a reference in ' \
                  '"publication_reference" arguments.'
            raise httpexcept(HTTPBadRequest, msg)
        document = import_nexson_from_crossref_metadata(doi=publication_doi_for_crossref,
                                                        ref_string=publication_ref,
                                                        include_cc0=cc0_agreement)
    elif import_method == 'import-method-POST':
        if not document:
            msg = 'Could not read a NexSON from the body of the POST, but ' \
                  'import_method="import-method-POST" was used.'
            raise httpexcept(HTTPBadRequest, msg)
    else:
        document = umbrella.document_schema.create_empty_doc()
        if cc0_agreement:
            add_cc0_waiver(nexson=document)
    return finish_write_operation(request, umbrella, document, post_args)
Exemple #10
0
def get_document(request):
    """Implementation of the GET methods for a resource or part of a resource.

    See `subresource_request_helper` documentation for details on which arguments
        are accepted.
    Depending on the request parameters, the return type can be either text (e.g. a newick
        string if just one tree from a study is requested, or an "external" format if
        translation is requested), or JSON.
    If no translation to an alternative schema is requested, the document will be returned
        as the "data" field of a JSON object. In that case, other keys will be:
        "sha" -> the git SHA for the commit returned.
        "branch2sha" -> dict describing the work-in-progress branch for the document.
        "url" -> the requested url
        "commentHTML" -> HTML converted from markdown comment (if any) embedded in doc.
        "version_history" -> is an optional return.
        "external_url" -> if requested this will be the same URL as a call to `external_url`
        "shardName" -> text description of the shard that holds the document.
    If the resource requested is a study JSON, then the doi field of the document will be
        used to
        if resource_type == 'study':
            duplicate_study_ids = []
            try:
                study_doi = document_blob['nexml']['^ot:studyPublication']['@href']
            except:
                pass  # no DOI
            else:
                try:
                    oti_domain = get_otindex_base_url(request)
                    duplicate_study_ids = find_studies_by_doi(oti_domain, study_doi)
                except:
                    _LOG.exception('Call to find_studies_by_doi failed')
                else:
                    try:
                        duplicate_study_ids.remove(doc_id)
                    except:
                        pass
            if duplicate_study_ids:
                result['duplicateStudyIDs'] = duplicate_study_ids


    """
    resource_type = request.matchdict['resource_type']
    umbrella = umbrella_from_request(request)
    subresource_req_dict, params = subresource_request_helper(request)
    doc_id = params['doc_id']
    triple = umbrella.is_plausible_transformation(subresource_req_dict)
    is_plausible, reason_or_converter, out_syntax = triple
    if not is_plausible:
        raise httpexcept(HTTPBadRequest, 'Impossible request: {}'.format(reason_or_converter))
    transformer = reason_or_converter
    parent_sha = params.get('starting_commit_SHA')
    _LOG.debug('parent_sha = {}'.format(parent_sha))
    version_history = None
    try:
        if (out_syntax == 'JSON') and params['version_history']:
            r, version_history = umbrella.return_document_and_history(doc_id,
                                                                      commit_sha=parent_sha,
                                                                      return_WIP_map=True)
        else:
            r = umbrella.return_document(doc_id, commit_sha=parent_sha, return_WIP_map=True)
    except:
        _LOG.exception('GET failed')
        raise HTTPNotFound('{r} document {i} GET failure'.format(r=resource_type, i=doc_id))
    # noinspection PyBroadException
    try:
        document_blob, head_sha, wip_map = r
    except:
        _LOG.exception('GET failed')
        raise httpexcept(HTTPBadRequest, err_body(traceback.format_exc()))
    if transformer is None:
        result_data = document_blob
    else:
        try:
            result_data = transformer(umbrella, doc_id, document_blob, head_sha)
        except KeyError, x:
            raise httpexcept(HTTPNotFound, 'subresource not found: {}'.format(x))
        except ValueError, y:
            raise httpexcept(HTTPBadRequest, 'subresource not found: {}'.format(y.message))
Exemple #11
0
 except:
     _LOG.exception('GET failed')
     raise httpexcept(HTTPBadRequest, err_body(traceback.format_exc()))
 if transformer is None:
     result_data = document_blob
 else:
     try:
         result_data = transformer(umbrella, doc_id, document_blob, head_sha)
     except KeyError, x:
         raise httpexcept(HTTPNotFound, 'subresource not found: {}'.format(x))
     except ValueError, y:
         raise httpexcept(HTTPBadRequest, 'subresource not found: {}'.format(y.message))
     except:
         msg = "Exception in coercing to the document to the requested type. "
         _LOG.exception(msg)
         raise httpexcept(HTTPBadRequest, err_body(msg))
 if subresource_req_dict['output_is_json']:
     result = {'sha': head_sha,
               'data': result_data,
               'branch2sha': wip_map,
               'url': request.url,
               }
     try:
         comment_html = render_markdown(umbrella.get_markdown_comment(result_data))
     except:
         comment_html = ''  # pylint: disable=R0204
     result['commentHTML'] = comment_html
     try:
         if version_history is not None:
             result['version_history'] = version_history
             # TODO get rid of camelCaseVersion