Esempio n. 1
0
def get_file_tree(project, file_path=None):
    """
    Get a file listing from the git remote
    """
    config = get_project_config(project)
    if config is None:
        return jsonify({"msg": "No such project."}), 400
    # Fetch changes (to update index) but don't merge, and then run ls-files to get file listing.
    try:
        if not is_a_test(project):
            run_git_command(project, ["fetch"])
        if file_path is None:
            output = run_git_command(project, ["ls-files"])
        else:
            output = run_git_command(project, ["ls-files", file_path])
        file_listing = [
            s.strip().decode('utf-8', 'ignore') for s in output.splitlines()
        ]
    except subprocess.CalledProcessError as e:
        return jsonify({
            "msg": "Git file listing failed.",
            "reason": str(e.output)
        }), 500
    tree = path_list_to_tree(file_listing)
    return jsonify(tree)
Esempio n. 2
0
def get_gallery_image(project, collection_id, file_name):
    logger.info("Getting galleries")
    try:
        project_id = get_project_id_from_name(project)
        config = get_project_config(project)
        connection = db_engine.connect()
        sql = sqlalchemy.sql.text(
            "SELECT image_path as image_path from media_collection WHERE project_id = :p_id AND id = :id "
        ).bindparams(p_id=project_id, id=collection_id)
        result = connection.execute(sql).fetchone()
        result = dict(result)
        connection.close()
        file_path = safe_join(config["file_root"], "media",
                              str(result['image_path']),
                              "{}".format(str(file_name)))
        try:
            output = io.BytesIO()
            with open(file_path, mode="rb") as img_file:
                output.write(img_file.read())
            content = output.getvalue()
            output.close()
            return Response(content, status=200, content_type="image/jpeg")
        except Exception:
            logger.exception(f"Failed to read from image file at {file_path}")
            return Response("File not found: " + file_path,
                            status=404,
                            content_type="text/json")
    except Exception:
        logger.exception("Failed to parse gallery image request.")
        return Response("Couldn't get gallery file.",
                        status=404,
                        content_type="text/json")
Esempio n. 3
0
def get_title(project, collection_id, publication_id, lang="swe"):
    """
    Get title page for a given publication @TODO: remove publication_id, it is not needed?
    """
    config = get_project_config(project)
    if config is None:
        return jsonify({"msg": "No such project."}), 400
    else:
        can_show, message = get_collection_published_status(
            project, collection_id)
        if can_show:
            logger.info("Getting XML for {} and transforming...".format(
                request.full_path))
            version = "int" if config["show_internally_published"] else "ext"
            # TODO get original_filename from publication_collection_title table? how handle language/version
            filename = "{}_tit_{}_{}.xml".format(collection_id, lang, version)
            xsl_file = "title.xsl"
            content = get_content(project, "tit", filename, xsl_file, None)
            data = {
                "id": "{}_{}_tit".format(collection_id, publication_id),
                "content": content.replace(" id=", " data-id=")
            }
            return jsonify(data), 200
        else:
            return jsonify({
                "id": "{}_{}".format(collection_id, publication_id),
                "error": message
            }), 403
Esempio n. 4
0
def get_song_file(project, file_type, file_name):
    """
    Retrieve a single file from project root that belongs to a song
    It can be musicxml, midi
    """
    config = get_project_config(project)
    if config is None:
        return jsonify({"msg": "No such project."}), 400
    file_path = ""
    if 'musicxml' in str(file_type):
        file_path = safe_join(config["file_root"], "musicxml",
                              "{}.xml".format(str(file_name)))
        file_name = "{}.xml".format(str(file_name))
    elif 'midi' in str(file_type):
        file_path = safe_join(config["file_root"], "midi-files",
                              "{}.mid".format(str(file_name)))

    try:
        return send_file(file_path,
                         as_attachment=True,
                         mimetype='application/octet-stream',
                         attachment_filename=file_name)
    except Exception:
        logger.exception(f"Failed sending file from {file_path}")
        return Response("File not found.",
                        status=404,
                        content_type="text/json")
Esempio n. 5
0
def file_exists_in_file_root(project, file_path):
    """
    Check if the given file exists in the webfiles repository for the given project
    Returns True if the file exists, otherwise False.
    """
    config = get_project_config(project)
    if config is None:
        return False
    return os.path.exists(safe_join(config["file_root"], file_path))
Esempio n. 6
0
def is_a_test(project):
    """
    Returns true if running in debug mode and project git_repository not configured, indicating that this is a test
    """
    config = get_project_config(project)
    if config is None and int(os.environ.get("FLASK_DEBUG", 0)) == 1:
        return True
    elif config is not None and config["git_repository"] is None and int(
            os.environ.get("FLASK_DEBUG", 0)) == 1:
        return True
Esempio n. 7
0
def update_config(project):
    config = get_project_config(project)
    if config is None:
        return jsonify({"msg": "No such project."}), 400
    else:
        request_data = request.get_json()
        file_path = os.path.join(config["file_root"], "config.json")
        with open(file_path, "w") as f:
            json.dump(request_data, f)
        return jsonify({"msg": "received"})
Esempio n. 8
0
def get_config_file(project):
    config = get_project_config(project)
    if config is None:
        return jsonify({"msg": "No such project."}), 400
    else:
        file_path = os.path.join(config["file_root"], "config.json")
        if not os.path.exists(file_path):
            return jsonify({})
        with open(file_path) as f:
            json_data = json.load(f)
        return jsonify(json_data)
Esempio n. 9
0
def run_git_command(project, command):
    """
    Helper method to run arbitrary git commands as if in the project's webfiles repository root folder
    @type project: str
    @type command: list
    """
    config = get_project_config(project)
    git_root = config["file_root"]
    git_command = ["git", "-C", git_root]
    for c in command:
        git_command.append(c)
    return subprocess.check_output(git_command, stderr=subprocess.STDOUT)
Esempio n. 10
0
def check_project_config(project):
    """
    Check the config file for project webfiles repository configuration.
    Returns True if config okay, otherwise False and a message
    """
    config = get_project_config(project)
    if config is None:
        return False, "Project config not found."
    if not is_a_test(project) and "git_repository" not in config:
        return False, "git_repository not in project config."
    if "git_branch" not in config:
        return False, "git_branch information not in project config."
    if "file_root" not in config:
        return False, "file_root information not in project config."
    return True, "Project config OK."
Esempio n. 11
0
def get_pdf_file(project, collection_id, file_type, download_name,
                 use_download_name):
    """
    Retrieve a single file from project root
    Currently only PDF or ePub
    """
    config = get_project_config(project)
    if config is None:
        return jsonify({"msg": "No such project."}), 400
    connection = db_engine.connect()
    # Check that the collection exists
    statement = sqlalchemy.sql.text(
        "SELECT * FROM publication_collection WHERE id=:coll_id").bindparams(
            coll_id=collection_id)
    row = connection.execute(statement).fetchone()
    if row is None:
        return jsonify({
            "msg":
            "Desired publication collection was not found in database!"
        }), 404

    file_path = ""

    if use_download_name and 'pdf' in str(file_type):
        if '.pdf' in str(download_name):
            direct_download_name = download_name.split('.pdf')[0]
        else:
            direct_download_name = download_name

        file_path = safe_join(config["file_root"], "downloads", collection_id,
                              "{}.pdf".format(direct_download_name))
    elif 'pdf' in str(file_type):
        file_path = safe_join(config["file_root"], "downloads", collection_id,
                              "{}.pdf".format(int(collection_id)))
    elif 'epub' in str(file_type):
        file_path = safe_join(config["file_root"], "downloads", collection_id,
                              "{}.epub".format(int(collection_id)))
    connection.close()

    try:
        return send_file(file_path,
                         attachment_filename=download_name,
                         conditional=True)
    except Exception:
        logger.exception(f"Failed sending file from {file_path}")
        return Response("File not found.",
                        status=404,
                        content_type="text/json")
Esempio n. 12
0
def get_static_pages_as_json(project, language):
    config = get_project_config(project)
    if config is None:
        return jsonify({"msg": "No such project."}), 400
    else:
        logger.info(
            "Getting static content from /{}/static-pages-toc/{}".format(
                project, language))
        folder_path = safe_join(config["file_root"], "md", language)

        if os.path.exists(folder_path):
            data = path_hierarchy(project, folder_path, language)
            return jsonify(data), 200
        else:
            logger.info("did not find {}".format(folder_path))
            abort(404)
Esempio n. 13
0
def get_json_file(project, folder, file_name):
    config = get_project_config(project)
    if config is None:
        return jsonify({"msg": "No such project."}), 400
    else:
        file_path = safe_join(config["file_root"], folder,
                              "{}.json".format(str(file_name)))
        try:
            with open(file_path) as f:
                data = json.load(f)
            return jsonify(data), 200
        except Exception:
            logger.exception(f"Failed to read JSON file at {file_path}")
            return Response("File not found.",
                            status=404,
                            content_type="text/json")
Esempio n. 14
0
def get_html_contents_as_json(project, filename):
    config = get_project_config(project)
    if config is None:
        return jsonify({"msg": "No such project."}), 400
    else:
        logger.info("Getting static content from /{}/html/{}".format(
            project, filename))
        file_path = safe_join(config["file_root"], "html",
                              "{}.html".format(filename))
        if os.path.exists(file_path):
            with io.open(file_path, encoding="UTF-8") as html_file:
                contents = html_file.read()
            data = {"filename": filename, "content": contents}
            return jsonify(data), 200
        else:
            abort(404)
Esempio n. 15
0
def get_type_gallery_image(project, connection_type, connection_id):
    logger.info("Getting gallery file")
    if connection_type not in ['tag', 'location', 'subject']:
        return Response("Couldn't get media connection data.",
                        status=404,
                        content_type="text/json")
    type_column = "{}_id".format(connection_type)
    try:
        project_id = get_project_id_from_name(project)
        config = get_project_config(project)
        connection = db_engine.connect()
        sql = f"SELECT mcol.image_path, m.image_filename_front FROM media_connection mcon " \
              f"JOIN {connection_type} t ON t.id = mcon.{type_column} " \
              f"JOIN media m ON m.id = mcon.media_id " \
              f"JOIN media_collection mcol ON mcol.id = m.media_collection_id " \
              f"WHERE t.id = :id " \
              f"AND t.project_id = :p_id " \
              f"AND mcol.deleted != 1 AND t.deleted != 1 AND m.deleted != 1 AND mcon.deleted != 1 LIMIT 1"
        sql = sqlalchemy.sql.text(sql).bindparams(p_id=project_id,
                                                  id=connection_id)
        result = connection.execute(sql).fetchone()
        result = dict(result)
        connection.close()
        file_path = safe_join(
            config["file_root"], "media", str(result['image_path']),
            str(result['image_filename_front']).replace(".jpg", "_thumb.jpg"))
        try:
            output = io.BytesIO()
            with open(file_path, mode="rb") as img_file:
                output.write(img_file.read())
            content = output.getvalue()
            output.close()
            return Response(content, status=200, content_type="image/jpeg")
        except Exception:
            logger.exception(f"Failed to read from image file at {file_path}")
            return Response("File not found: " + file_path,
                            status=404,
                            content_type="text/json")
    except Exception:
        logger.exception("Failed to parse gallery image request.")
        return Response("Couldn't get type file.",
                        status=404,
                        content_type="text/json")
Esempio n. 16
0
def get_collections(project):
    config = get_project_config(project)
    if config is None:
        return jsonify({"msg": "No such project."}), 400
    else:
        logger.info("Getting collections /{}/collections".format(project))
        connection = db_engine.connect()
        status = 1 if config["show_internally_published"] else 2
        project_id = get_project_id_from_name(project)
        sql = sqlalchemy.sql.text(
            """ SELECT id, name as title, published, date_created, date_modified, date_published_externally, legacy_id,
            project_id, publication_collection_title_id, publication_collection_introduction_id, name FROM publication_collection WHERE project_id = :p_id AND published>=:p_status ORDER BY name """
        )
        statement = sql.bindparams(p_status=status, p_id=project_id)
        results = []
        for row in connection.execute(statement).fetchall():
            results.append(dict(row))
        connection.close()
        return jsonify(results)
Esempio n. 17
0
def update_files_in_git_repo(project, specific_file=False):
    """
    Helper method to sync local repositories with remote to get latest changes
    """
    config = get_project_config(project)
    if config is None:
        return False, "No such project."
    git_branch = config["git_branch"]

    # First, fetch latest changes from remote, but don't update local
    try:
        run_git_command(project, ["fetch"])
    except subprocess.CalledProcessError as e:
        return False, str(e.output)

    if not specific_file:
        # If we're updating all files, get the list of changed files and then merge in remote changes to local repo
        try:
            output = run_git_command(project, [
                "show", "--pretty=format:", "--name-only",
                "..origin/{}".format(git_branch)
            ])
            new_and_changed_files = [
                s.strip().decode('utf-8', 'ignore')
                for s in output.splitlines()
            ]
        except subprocess.CalledProcessError as e:
            return False, str(e.output)
        try:
            run_git_command(project, ["merge", "origin/{}".format(git_branch)])
        except subprocess.CalledProcessError as e:
            return False, str(e.output)
        return True, new_and_changed_files
    else:
        # If we're only updating one file, checkout that specific file, ignoring the others
        # This makes things go faster if we're not concerned with the changes in other files at the moment
        try:
            run_git_command(project, [
                "checkout", "origin/{}".format(git_branch), "--", specific_file
            ])
        except subprocess.CalledProcessError as e:
            return False, str(e.output)
        return True, specific_file
Esempio n. 18
0
def get_md_contents_as_json(project, fileid):
    config = get_project_config(project)
    if config is None:
        return jsonify({"msg": "No such project."}), 400
    else:
        parts = fileid.split("-")
        pathTmp = fileid
        if len(parts) > 4:
            if "0" in parts[4]:
                pathTmp = parts[0] + "-" + parts[1] + "-" + parts[
                    2] + "-" + parts[3] + "-" + parts[4]
            else:
                pathTmp = parts[0] + "-" + parts[1] + "-" + parts[
                    2] + "-0" + parts[4]
        path = "*/".join(pathTmp.split("-")) + "*"

        file_path_query = safe_join(config["file_root"], "md", path)

        try:
            file_path_full = [f for f in glob.iglob(file_path_query)]
            if len(file_path_full) <= 0:
                logger.info(
                    "Not found {} (md_contents fetch)".format(file_path_full))
                abort(404)
            else:
                file_path = file_path_full[0]
                logger.info("Finding {} (md_contents fetch)".format(file_path))
                if os.path.exists(file_path):
                    with io.open(file_path, encoding="UTF-8") as md_file:
                        contents = md_file.read()
                    data = {"fileid": fileid, "content": contents}
                    return jsonify(data), 200
                else:
                    abort(404)
        except Exception:
            logger.exception("Error fetching: {}".format(file_path_query))
            abort(404)
Esempio n. 19
0
def get_file(project, file_path):
    """
    Get latest file from git remote
    """
    config = get_project_config(project)
    if config is None:
        return jsonify({"msg": "No such project."}), 400
    # TODO swift and/or S3 support for large files (images/facsimiles)
    config_okay = check_project_config(project)
    if not config_okay[0]:
        return jsonify({
            "msg": "Error in git configuration, check configuration file.",
            "reason": config_okay[1]
        }), 500

    if not is_a_test(project):
        # Sync the desired file from remote repository to local API repository
        update_repo = update_files_in_git_repo(project, file_path)
        if not update_repo[0]:
            return jsonify({
                "msg": "Git update failed to execute properly.",
                "reason": update_repo[1]
            }), 500

    if file_exists_in_file_root(project, file_path):
        # read file, encode as base64 string and return to user as JSON data.
        with io.open(safe_join(config["file_root"], file_path),
                     mode="rb") as file:
            file_bytestring = base64.b64encode(file.read())
            return jsonify({
                "file": file_bytestring.decode("utf-8"),
                "filepath": file_path
            })
    else:
        return jsonify(
            {"msg":
             "The requested file was not found in the git repository."}), 404
Esempio n. 20
0
def get_facsimile_page_image(project, facsimile_type, facs_id, facs_nr):
    config = get_project_config(project)
    if config is None:
        return jsonify({"msg": "No such project."}), 400
    else:
        logger.info("Getting facsimile page image")
    try:
        zoom_level = "4"
        if facsimile_type == 'facsimile':
            file_path = safe_join(config["file_root"], "facsimiles", facs_id,
                                  zoom_level, "{}.jpg".format(int(facs_nr)))
        elif facsimile_type == 'song-example':
            file_path = safe_join(config["file_root"], "song-example-images",
                                  facs_id, "{}.jpg".format(int(facs_nr)))
        else:
            # TODO placeholder page image file?
            file_path = ""

        output = io.BytesIO()
        try:
            with open(file_path, mode="rb") as img_file:
                output.write(img_file.read())
            content = output.getvalue()
            output.close()
            return Response(content, status=200, content_type="image/jpeg")
        except Exception:
            logger.exception(f"Failed to read facsimile page from {file_path}")
            return Response("File not found: " + file_path,
                            status=404,
                            content_type="text/json")
    except Exception:
        logger.exception(
            f"Failed to interpret facsimile page image request {request.url}")
        return Response("Couldn't get facsimile page.",
                        status=404,
                        content_type="text/json")
Esempio n. 21
0
def upload_facsimile_file(project, collection_id, page_number):
    """
    Upload a facsimile file in image format.

    Endpoint accepts requests with enctype=multipart/form-data
    Endpoint assumes facsimile is provided as form parameter named 'facsimile'
    (for example, curl -F 'facsimile=@path/to/local/file' https://api.sls.fi/digitaledition/<project>/facsimiles/<collection_id>/<page_number>)

    ---
    First and foremost, only accept images. Reject with 400 anything that allowed_facsimile() doesn't accept.
    Then, attempt to convert image to 4 different "zoom levels" of .jpg with imagemagick

    Lastly, store the images in root/facsimiles/<collection_id>/<zoom_level>/<page_number>.jpg
    Where zoom_level is determined by FACSIMILE_IMAGE_SIZES in generics.py (1-4)
    """
    # TODO OpenStack Swift support for ISILON file storage - config param for root 'facsimiles' path
    # ensure temporary facsimile upload folder exists
    os.makedirs(FACSIMILE_UPLOAD_FOLDER, exist_ok=True)
    config = get_project_config(project)
    if config is None:
        return jsonify({"msg": "No such project."}), 400
    if request.files is None:
        return jsonify({"msg": "Request.files is none!"}), 400
    if "facsimile" not in request.files:
        return jsonify({"msg":
                        "No file provided in request (facsimile)!"}), 400
    # get a folder path for the facsimile collection from the database if set, otherwise use project file root
    connection = db_engine.connect()
    collection_check_statement = sqlalchemy.sql.text(
        "SELECT * FROM publication_facsimile_collection WHERE deleted != 1 AND id=:coll_id"
    ).bindparams(coll_id=collection_id)
    row = connection.execute(collection_check_statement).fetchone()
    if row is None:
        return jsonify(
            {"msg":
             "Desired facsimile collection was not found in database!"}), 404
    elif row.folder_path != '' and row.folder_path is not None:
        collection_folder_path = safe_join(row.folder_path, collection_id)
    else:
        collection_folder_path = safe_join(config["file_root"], "facsimiles",
                                           collection_id)
    connection.close()

    # handle received file
    uploaded_file = request.files["facsimile"]
    # if user selects no file, some libraries send a POST with an empty file and filename
    if uploaded_file.filename == "":
        return jsonify({"msg":
                        "No file provided in uploaded_file.filename!"}), 400

    if uploaded_file and allowed_facsimile(uploaded_file.filename):
        # handle potentially malicious filename and save file to temp folder
        temp_path = os.path.join(FACSIMILE_UPLOAD_FOLDER,
                                 secure_filename(uploaded_file.filename))
        uploaded_file.save(temp_path)

        # resize file using imagemagick
        resize = convert_resize_uploaded_facsimile(temp_path,
                                                   collection_folder_path,
                                                   page_number)

        if resize:
            return jsonify({"msg": "OK"})
        else:
            return jsonify({"msg":
                            "Failed to resize uploaded facsimile!"}), 500
    else:
        return jsonify({
            "msg":
            f"Invalid facsimile provided. Allowed filetypes are {ALLOWED_EXTENSIONS_FOR_FACSIMILE_UPLOAD}. TIFF files are preferred."
        }), 400
Esempio n. 22
0
def handle_toc(project, collection_id):
    config = get_project_config(project)
    if config is None:
        return jsonify({"msg": "No such project."}), 400
    else:
        if request.method == "GET":
            logger.info(
                f"Getting table of contents for /{project}/toc/{collection_id}"
            )
            file_path_query = safe_join(config["file_root"], "toc",
                                        f'{collection_id}.json')

            try:
                file_path = [f for f in glob.iglob(file_path_query)][0]
                logger.info(f"Finding {file_path} (toc collection fetch)")
                if os.path.exists(file_path):
                    with io.open(file_path, encoding="UTF-8") as json_file:
                        contents = json_file.read()
                    return contents, 200
                else:
                    abort(404)
            except IndexError:
                logger.warning(f"File {file_path_query} not found on disk.")
                abort(404)
            except Exception:
                logger.exception(f"Error fetching {file_path_query}")
                abort(404)
        elif request.method == "PUT":
            # uploading a new table of contents requires authorization and project permission
            identity = get_jwt_identity()
            if identity is None:
                return jsonify({"msg": "Missing Authorization Header"}), 403
            else:
                authorized = False
                # in debug mode, test user has access to every project
                if int(os.environ.get(
                        "FLASK_DEBUG",
                        0)) == 1 and identity["sub"] == "*****@*****.**":
                    authorized = True
                elif identity["projects"] is not None and project in identity[
                        "projects"]:
                    authorized = True

                if not authorized:
                    return jsonify({"msg": "No access to this project."}), 403
                else:
                    logger.info(
                        f"Processing new table of contents for /{project}/toc/{collection_id}"
                    )
                    data = request.get_json()
                    if not data:
                        return jsonify({"msg": "No JSON in payload."}), 400
                    file_path = safe_join(config["file_root"], "toc",
                                          f"{collection_id}.json")
                    try:
                        # save new toc as file_path.new
                        with open(f"{file_path}.new", "w",
                                  encoding="utf-8") as outfile:
                            json.dump(data, outfile)
                    except Exception as ex:
                        # if we fail to save the file, make sure it doesn't exist before returning an error
                        try:
                            os.remove(f"{file_path}.new")
                        except FileNotFoundError:
                            pass
                        return jsonify({
                            "msg": "Failed to save JSON data to disk.",
                            "reason": ex
                        }), 500
                    else:
                        # if we succeed, remove the old file and rename file_path.new to file_path
                        # (could be combined into just os.rename, but some OSes don't like that)
                        os.rename(f"{file_path}.new", file_path)

                        # get author and construct git commit message
                        author_email = get_jwt_identity()["sub"]
                        author = "{} <{}>".format(
                            author_email.split("@")[0], author_email)
                        message = "TOC update by {}".format(author_email)

                        # git commit (and possibly push) file
                        commit_result = git_commit_and_push_file(
                            project, author, message, file_path)
                        if commit_result:
                            return jsonify(
                                {"msg": f"Saved new toc as {file_path}"})
                        else:
                            return jsonify({
                                "msg":
                                "git commit failed! Possible configuration fault or git conflict."
                            }), 500
Esempio n. 23
0
def get_facsimile_file(project, collection_id, number, zoom_level):
    """
    Retrieve a single facsimile image file from project root

    Facsimile files are stored as follows: root/facsimiles/<collection_id>/<zoom_level>/<page_number>.jpg
    The collection_id these are sorted by is the publication_facsimile_collection id, stored as publication_id in the old database structure?

    However, the first page of a publication is not necessarily 1.jpg, as facsimiles often contain title pages and blank pages
    Thus, calling for facsimiles/1/1/1 may require fetching a file from root/facsimiles/1/1/5.jpg
    """
    # TODO OpenStack Swift support for ISILON file storage - config param for root 'facsimiles' path
    config = get_project_config(project)
    if config is None:
        return jsonify({"msg": "No such project."}), 400
    else:
        connection = db_engine.connect()
        check_statement = sqlalchemy.sql.text(
            "SELECT published FROM publication WHERE deleted != 1 AND id = "
            "(SELECT publication_id FROM publication_facsimile WHERE deleted != 1 AND publication_facsimile_collection_id=:coll_id LIMIT 1)"
        ).bindparams(coll_id=collection_id)
        row = connection.execute(check_statement).fetchone()
        if row is None:
            return jsonify(
                {"msg": "Desired facsimile file not found in database."}), 404
        else:
            try:
                status = int(row[0])
            except ValueError:
                logger.exception(f"Couldn't convert {row[0]} to integer.")
                return jsonify(
                    {"msg":
                     "Desired facsimile file not found in database."}), 404
            except Exception:
                logger.exception(
                    f"Unknown exception handling {row} during facsimile file fetch."
                )
                return jsonify(
                    {"msg":
                     "Desired facsimile file not found in database."}), 404
            if status == 0:
                return jsonify(
                    {"msg":
                     "Desired facsimile file not found in database."}), 404
            elif status == 1:
                if not config["show_internally_published"]:
                    return jsonify({
                        "msg":
                        "Desired facsimile file not found in database."
                    }), 404

        statement = sqlalchemy.sql.text(
            "SELECT * FROM publication_facsimile_collection WHERE deleted != 1 AND id=:coll_id"
        ).bindparams(coll_id=collection_id)
        row = connection.execute(statement).fetchone()
        if row is None:
            return jsonify({
                "msg":
                "Desired facsimile collection was not found in database!"
            }), 404
        elif row.folder_path != '' and row.folder_path is not None:
            file_path = safe_join(row.folder_path, collection_id, zoom_level,
                                  "{}.jpg".format(int(number)))
        else:
            file_path = safe_join(config["file_root"], "facsimiles",
                                  collection_id, zoom_level,
                                  "{}.jpg".format(int(number)))
        connection.close()

        output = io.BytesIO()
        try:
            with open(file_path, mode="rb") as img_file:
                output.write(img_file.read())
            content = output.getvalue()
            output.close()
            return Response(content, status=200, content_type="image/jpeg")
        except Exception:
            logger.exception(f"Exception reading facsimile at {file_path}")
            return jsonify({"msg": "Desired facsimile file not found."}), 404
Esempio n. 24
0
def get_facsimiles(project, publication_id, section_id=None):
    config = get_project_config(project)
    if publication_id is None or str(publication_id) == "undefined":
        return False, "No such publication_id."
    if config is None:
        return jsonify({"msg": "No such project."}), 400
    else:
        logger.info("Getting facsimiles /{}/facsimiles/{}".format(
            project, publication_id))

        connection = db_engine.connect()

        sql = 'select *, f.id as publication_facsimile_id from publication_facsimile as f \
        left join publication_facsimile_collection as fc on fc.id=f.publication_facsimile_collection_id \
        left join publication p on p.id=f.publication_id \
        where f.deleted != 1 and fc.deleted != 1 and f.publication_id=:p_id \
        '

        if config["show_internally_published"]:
            sql = " ".join([sql, "and p.published>0"])
        elif config["show_unpublished"]:
            sql = " ".join([sql, "and p.published>2"])

        if section_id is not None:
            sql = " ".join([sql, "and f.section_id = :section"])

        sql = " ".join([sql, "ORDER BY f.priority"])

        if '_' in publication_id:
            pub_id = publication_id.split('_')[1]
        else:
            pub_id = publication_id

        if section_id is not None:
            section_id = str(section_id).replace('ch', '')
            statement = sqlalchemy.sql.text(sql).bindparams(p_id=pub_id,
                                                            section=section_id)
        else:
            statement = sqlalchemy.sql.text(sql).bindparams(p_id=pub_id)

        result = []
        for row in connection.execute(statement).fetchall():
            facsimile = dict(row)
            if row.folder_path != '' and row.folder_path is not None:
                facsimile["start_url"] = row.folder_path
            else:
                facsimile["start_url"] = safe_join(
                    "digitaledition", project, "facsimile",
                    str(row["publication_facsimile_collection_id"]))
            pre_pages = row["start_page_number"] or 0

            facsimile["first_page"] = pre_pages + row["page_nr"]

            sql2 = "SELECT * FROM publication_facsimile WHERE deleted != 1 AND publication_facsimile_collection_id=:fc_id AND page_nr>:page_nr ORDER BY page_nr ASC LIMIT 1"
            statement2 = sqlalchemy.sql.text(sql2).bindparams(
                fc_id=row["publication_facsimile_collection_id"],
                page_nr=row["page_nr"])
            for row2 in connection.execute(statement2).fetchall():
                facsimile["last_page"] = pre_pages + row2["page_nr"] - 1

            if "last_page" not in facsimile.keys():
                facsimile["last_page"] = row["number_of_pages"]

            result.append(facsimile)
        connection.close()

        return_data = result
        return jsonify(return_data), 200
Esempio n. 25
0
def update_file(project, file_path):
    """
    Add new or update existing file in git remote.

    PUT data MUST be in JSON format

    PUT data MUST contain the following:
    file: xml file data in base64, to be created or updated in git repository

    PUT data MAY contain the following override information:
    author: email of the person authoring this change, if not given, JWT identity is used instead
    message: commit message for this change, if not given, generic "File update by <author>" message is used instead
    force: boolean value, if True uses force-push to override errors and possibly mangle the git remote to get the update through
    """
    config = get_project_config(project)
    if config is None:
        return jsonify({"msg": "No such project."}), 400
    # Check if request has valid JSON and set author/message/force accordingly
    request_data = request.get_json()
    if not request_data:
        return jsonify({"msg": "No JSON in PUT request."}), 400
    elif "file" not in request_data:
        return jsonify({"msg": "No file in JSON data."}), 400

    author_email = request_data.get("author", get_jwt_identity()["sub"])
    message = request_data.get("message",
                               "File update by {}".format(author_email))
    force = bool(request_data.get("force", False))

    # git commit requires author info to be in the format "Name <email>"
    # As we only have an email address to work with, split email on @ and give first part as name
    # - [email protected] becomes "foo <*****@*****.**>"
    author = "{} <{}>".format(author_email.split("@")[0], author_email)

    # Read the file from request and decode the base64 string into raw binary data
    file = io.BytesIO(base64.b64decode(request_data["file"]))

    # verify git config
    config_okay = check_project_config(project)
    if not config_okay[0]:
        return jsonify({
            "msg": "Error in git configuration, check configuration file.",
            "reason": config_okay[1]
        }), 500

    # fetch latest changes from remote
    if not is_a_test(project):
        try:
            run_git_command(project, ["fetch"])
        except subprocess.CalledProcessError as e:
            return jsonify({
                "msg": "Git fetch failed to execute properly.",
                "reason": str(e.output)
            }), 500

        # check if desired file has changed in remote since last update
        # if so, fail and return both user file and repo file to user, unless force=True
        try:
            output = run_git_command(project, [
                "show", "--pretty=format:", "--name-only",
                "..origin/{}".format(config["git_branch"])
            ])
            new_and_changed_files = [
                s.strip().decode('utf-8', 'ignore')
                for s in output.splitlines()
            ]
        except subprocess.CalledProcessError as e:
            return jsonify({
                "msg": "Git show failed to execute properly.",
                "reason": str(e.output)
            }), 500
        if safe_join(config["file_root"],
                     file_path) in new_and_changed_files and not force:
            with io.open(safe_join(config["file_root"], file_path),
                         mode="rb") as repo_file:
                file_bytestring = base64.b64encode(repo_file.read())
                return jsonify({
                    "msg":
                    "File {} has been changed in git repository since last update, please manually check file changes.",
                    "your_file": request_data["file"],
                    "repo_file": file_bytestring.decode("utf-8")
                }), 409

        # merge in latest changes so that the local repository is updated
        try:
            run_git_command(
                project, ["merge", "origin/{}".format(config["git_branch"])])
        except subprocess.CalledProcessError as e:
            return jsonify({
                "msg": "Git merge failed to execute properly.",
                "reason": str(e.output)
            }), 500

    # check the status of the git repo, so we know if we need to git add later
    file_exists = file_exists_in_file_root(project, file_path)

    # Secure filename and save new file to local repo
    # Could be more secure...
    pos = file_path.find('.xml')
    if pos > 0:
        filename = safe_join(config["file_root"], file_path)
        if file and filename:
            with io.open(filename, mode="wb") as new_file:
                new_file.write(file.getvalue())
    else:
        return jsonify({"msg": "File path error"}), 500

    # Add file to local repo if it wasn't already in the repository
    if not file_exists:
        try:
            run_git_command(project, ["add", filename])
        except subprocess.CalledProcessError as e:
            return jsonify({
                "msg": "Git add failed to execute properly.",
                "reason": str(e.output)
            }), 500

    # Commit changes to local repo, noting down user and commit message
    try:
        run_git_command(
            project, ["commit", "--author={}".format(author), "-m", message])
    except subprocess.CalledProcessError as e:
        return jsonify({
            "msg": "Git commit failed to execute properly.",
            "reason": str(e.output)
        }), 500

    # push new commit to remote repository
    if not is_a_test(project):
        try:
            if force:
                run_git_command(project, ["push", "-f"])
            else:
                run_git_command(project, ["push"])
        except subprocess.CalledProcessError as e:
            return jsonify({
                "msg": "Git push failed to execute properly.",
                "reason": str(e.output)
            }), 500

    return jsonify({"msg": "File updated successfully in repository."})
Esempio n. 26
0
def git_commit_and_push_file(project, author, message, file_path, force=False):
    # verify git config
    config_okay = check_project_config(project)
    if not config_okay[0]:
        logger.error("Error in git config, check project configuration!")
        return False

    config = get_project_config(project)

    # fetch latest changes from remote
    if not is_a_test(project):
        try:
            run_git_command(project, ["fetch"])
        except subprocess.CalledProcessError:
            logger.exception("Git fetch failed to execute properly.")
            return False

        # check if desired file has changed in remote since last update
        # if so, fail and return both user file and repo file to user, unless force=True
        try:
            output = run_git_command(project, [
                "show", "--pretty=format:", "--name-only",
                "..origin/{}".format(config["git_branch"])
            ])
            new_and_changed_files = [
                s.strip().decode('utf-8', 'ignore')
                for s in output.splitlines()
            ]
        except subprocess.CalledProcessError as e:
            logger.error("Git show failed to execute properly.")
            logger.error(str(e.output))
            return False

        if safe_join(config["file_root"],
                     file_path) in new_and_changed_files and not force:
            logger.error(
                "File {} has been changed in git repository since last update, please manually check file changes."
                .format(file_path))
            return False

        # merge in latest changes so that the local repository is updated
        try:
            run_git_command(
                project, ["merge", "origin/{}".format(config["git_branch"])])
        except subprocess.CalledProcessError as e:
            logger.error("Git merge failed to execute properly.")
            logger.error(str(e.output))
            return False

    # git add file
    try:
        run_git_command(project, ["add", file_path])
    except subprocess.CalledProcessError as e:
        logger.error("Git add failed to execute properly!")
        logger.error(str(e.output))
        return False

    # Commit changes to local repo, noting down user and commit message
    try:
        run_git_command(
            project, ["commit", "--author={}".format(author), "-m", message])
    except subprocess.CalledProcessError as e:
        logger.error("Git commit failed to execute properly.")
        logger.error(str(e.output))
    else:
        logger.info("git commit of {} succeeded".format(file_path))

    # push new commit to remote repository
    if not is_a_test(project):
        try:
            if force:
                run_git_command(project, ["push", "-f"])
            else:
                run_git_command(project, ["push"])
        except subprocess.CalledProcessError as e:
            logger.error("Git push failed to execute properly.")
            logger.error(str(e.output))
            return False
        else:
            logger.info("git push of {} succeeded".format(file_path))
    # if we reach this point, the file has been commited (and possibly pushed)
    return True
Esempio n. 27
0
def get_comments(project,
                 collection_id,
                 publication_id,
                 note_id=None,
                 section_id=None):
    """
    Get comments file text for a given publication
    """
    config = get_project_config(project)
    if config is None:
        return jsonify({"msg": "No such project."}), 400
    else:
        can_show, message = get_published_status(project, collection_id,
                                                 publication_id)
        if can_show:
            logger.info("Getting XML for {} and transforming...".format(
                request.full_path))
            connection = db_engine.connect()
            select = "SELECT legacy_id FROM publication_comment WHERE id IN (SELECT publication_comment_id FROM publication WHERE id = :p_id) \
                        AND legacy_id IS NOT NULL AND original_filename IS NULL"

            statement = sqlalchemy.sql.text(select).bindparams(
                p_id=publication_id)
            result = connection.execute(statement).fetchone()

            bookId = get_collection_legacy_id(collection_id)
            if bookId is None:
                bookId = collection_id

            bookId = '"{}"'.format(bookId)

            if result is not None:
                filename = "{}_com.xml".format(result["legacy_id"])
                connection.close()
            else:
                filename = "{}_{}_com.xml".format(collection_id,
                                                  publication_id)
                connection.close()
            logger.debug("Filename (com) for {} is {}".format(
                publication_id, filename))
            params = {
                "estDocument":
                '"file://{}"'.format(
                    safe_join(config["file_root"], "xml", "est",
                              filename.replace("com", "est"))),
                "bookId":
                bookId
            }

            if note_id is not None and section_id is None:
                params["noteId"] = '"{}"'.format(note_id)
                xsl_file = "notes.xsl"
            else:
                xsl_file = "com.xsl"

            if section_id is not None:
                section_id = '"{}"'.format(section_id)
                content = get_content(
                    project, "com", filename, xsl_file, {
                        "sectionId":
                        str(section_id),
                        "estDocument":
                        '"file://{}"'.format(
                            safe_join(config["file_root"], "xml", "est",
                                      filename.replace("com", "est"))),
                        "bookId":
                        bookId
                    })
            else:
                content = get_content(project, "com", filename, xsl_file,
                                      params)

            data = {
                "id": "{}_{}_com".format(collection_id, publication_id),
                "content": content
            }
            connection.close()
            return jsonify(data), 200
        else:
            return jsonify({
                "id": "{}_{}".format(collection_id, publication_id),
                "error": message
            }), 403