Example #1
0
    def delete_resource(self, r_id: str) -> None:
        """
        Delete a resource from the LIBREary.

        This method deletes the canonical copy and removes the corresponding entry in the `resources`
            table.

        :param r_id - the UUID of the resouce you're deleting
        """

        resource_info = self.cursor.execute(
            "select * from resources where id=?", (r_id, ))
        canonical_checksum = resource_info[4]

        canonical_adapter = AdapterManager.create_adapter(
            self.canonical_adapter_type, self.canonical_adapter_id,
            self.config_dir)
        checksum = canonical_adapter.get_actual_checksum(r_id)

        if checksum == canonical_checksum:
            logger.debug(f"Deleting canonical copy of object {r_id}")
            canonical_adapter._delete_canonical(r_id)
        else:
            logger.error("Checksum Mismatch")
            raise ChecksumMismatchException

        logger.debug(f"Deleting object {r_id} from resources database")
        self.cursor.execute("delete from resources where id=?", (r_id, ))
        self.conn.commit()
Example #2
0
    def delete_resource(self, r_id: str) -> None:
        """
        Delete a resource from the LIBREary.

        This method deletes the canonical copy and removes the corresponding entry in the `resources`
            table.

        :param r_id - the UUID of the resouce you're deleting
        """

        try:
            resource_info = self.metadata_man.get_resource_info(r_id)[0]
            canonical_checksum = resource_info[4]
        except IndexError:
            logger.debug(f"Already deleted {r_id}")

        canonical_adapter = AdapterManager.create_adapter(
            self.canonical_adapter_type, self.canonical_adapter_id, self.config_dir, self.config["metadata"])

        try:
            checksum = canonical_adapter.get_actual_checksum(r_id)
        except NoCopyExistsException:
            self.metadata_man.delete_resource(r_id)
            return

        if checksum == canonical_checksum:
            logger.debug(f"Deleting canonical copy of object {r_id}")
            canonical_adapter._delete_canonical(r_id)
        else:
            raise ChecksumMismatchException

        logger.debug(f"Deleting object {r_id} from resources database")
        self.metadata_man.delete_resource(r_id)
Example #3
0
    def ingest(self, current_file_path: str, levels: List[str],
               description: str, delete_after_store: bool = False, metadata_schema: List = [], metadata: List = []) -> str:
        """
        Ingest an object to LIBREary. This method:
        - Creates the canonical copy of the object
        - Creates the entry in the `resources` table describing the resource
        - Optionally, delete the file out of the dropbox dir.

        :param current_file_path -
        """
        filename = current_file_path.split("/")[-1]
        sha1Hash = hashlib.sha1(open(current_file_path, "rb").read())
        checksum = sha1Hash.hexdigest()

        canonical_adapter = AdapterManager.create_adapter(
            self.canonical_adapter_type, self.canonical_adapter_id, self.config_dir, self.config["metadata"])

        obj_uuid = str(uuid.uuid4())

        logger.debug(f"Ingesting resource {obj_uuid} with filename {filename}")

        canonical_adapter_locator = canonical_adapter._store_canonical(
            current_file_path, obj_uuid, checksum, filename)

        levels = ",".join([str(level) for level in levels])

        # Ingest to db
        self.metadata_man.ingest_to_db(
            canonical_adapter_locator,
            levels,
            filename,
            checksum,
            obj_uuid,
            description)

        # Ingest file metadata:
        if len(metadata_schema) == len(metadata) and len(metadata_schema) != 0:
            self.metadata_man.set_object_metadata_schema(
                obj_uuid, metadata_schema)
            self.metadata_man.set_all_object_metadata(obj_uuid, metadata)

        # If file is not in dropbox, copy it there

        if delete_after_store:
            os.remove(current_file_path)

        return obj_uuid
Example #4
0
    def ingest(self,
               current_file_path: str,
               levels: List[str],
               description: str,
               delete_after_store: bool = False) -> str:
        """
        Ingest an object to LIBREary. This method:
        - Creates the canonical copy of the object
        - Creates the entry in the `resources` table describing the resource
        - Optionally, delete the file out of the dropbox dir.

        :param current_file_path -
        """
        filename = current_file_path.split("/")[-1]
        sha1Hash = hashlib.sha1(open(current_file_path, "rb").read())
        checksum = sha1Hash.hexdigest()

        canonical_adapter = AdapterManager.create_adapter(
            self.canonical_adapter_type, self.canonical_adapter_id,
            self.config_dir)

        obj_uuid = str(uuid.uuid4())

        logger.debug(f"Ingesting resource {obj_uuid} with filename {filename}")

        canonical_adapter_locator = canonical_adapter._store_canonical(
            current_file_path, obj_uuid, checksum, filename)

        levels = ",".join([str(l) for l in levels])

        # Ingest to db

        self.cursor.execute(
            "insert into resources values (?, ?, ?, ?, ?, ?, ?)",
            (None, canonical_adapter_locator, levels, filename, checksum,
             obj_uuid, description))

        self.conn.commit()

        # If file is not in dropbox, copy it there

        if delete_after_store:
            os.remove(current_file_path)

        return obj_uuid
Example #5
0
import json

from libreary.adapter_manager import AdapterManager

CONFIG_DIR = "/Users/ben/Desktop/libre-ary/config"


config = json.load(
        open("{}/{}".format(CONFIG_DIR, "config.json")))
am = AdapterManager(config)
a = am.set_additional_adapter("drive","GoogleDriveAdapter" )
# print(a._store_canonical("/Users/ben/Desktop/grace.jpg", "34", "6b4f683d08d5431b5f8d1c8f4071610d5cab758d", "grace.jpg"))
# a.retrieve("34")


print(am.verify_adapter("drive"))
Example #6
0
import json

from libreary.adapter_manager import AdapterManager
from libreary.config_parser import ConfigParser

CONFIG_DIR = "/Users/ben/Desktop/libre-ary/libreary/config"

config = json.load(
    open("{}/{}".format(CONFIG_DIR, "adapter_manager_config.json")))
am = AdapterManager(config)
am.create_adapter("LocalAdapter", "local1")

print(am.verify_adapter("local1"))
Example #7
0
import json

from libreary.adapter_manager import AdapterManager
from libreary.config_parser import ConfigParser

CONFIG_DIR = "/Users/ben/Desktop/libre-ary/config"


config = json.load(
        open("{}/{}".format(CONFIG_DIR, "config.json")))
am = AdapterManager(config)
am.set_additional_adapter("s3", "S3Adapter")

print(am.verify_adapter("s3"))
Example #8
0
    def __init__(self, config_dir: str):
        """
        Constructor for Libreary object.
            :param config_dir - a string pointing to a directory
            containing Libreary configuration. In the config directory
            must be a file called `config.json` which contains main
            configuration, and a separate config file for each adapter
            you plan to use. More detail on adapter configs can be found
            in the adapters constructor documentation.


            The structure of `config_dir/config.json` should be as follows:
            ```{json}
            {
                "metadata": {
                    "db_file": "path to SQLite3 DB file for metadata"
                },
                "adapters": # List of adapters - each entry should look like:
                [{
                    "type":"AdapterType (name of class)",
                    "id": "Adapter Identifier"
                }],
                "options": {
                    "dropbox_dir": "Path to dropbox directory, where files you want to ingest should be placed",
                    "output_dir": "Path to directory you want files to be retrieved to",
                    "config_dir": "Path to config directory"
                },
                "canonical_adapter":"Adapter Identifier for Canonical Adapter"
            }
            ```

            The canonical adapter is the adapter which will store the "canonical" copy
            of each resource, which will then be used as the "real" version of that digital object.

            This object creeates an adapter manager and an ingester. For more information,
            see those classes.

            The output and dropbox directories are volatile and should not be used for object storage.
        """
        # Config stuff
        self.config_dir = config_dir
        self.config_path = "{}/config.json".format(self.config_dir)
        self.config = json.load(open(self.config_path))

        try:
            # Metadata stuff
            self.metadata_db = os.path.realpath(
                self.config['metadata'].get("db_file"))
            self.conn = sqlite3.connect(self.metadata_db)
            self.cursor = self.conn.cursor()

            # Directories we care about
            self.dropbox_dir = self.config["options"]["dropbox_dir"]
            self.ret_dir = self.config["options"]["output_dir"]

            # Objects we need
            self.adapter_man = AdapterManager(self.config)
            self.ingester = Ingester(self.config)
            logger.debug("LIBREary configuration valid. Proceeding.")
        except KeyError:
            logger.error("Invalid LIBREary config. Exiting.")
            raise KeyError
Example #9
0
class Libreary:
    """
    This is the user-facing class for LIBRE-ary. Users of LIBRE-ary should only interact
    with this class directly. LIBRE-ary objects are able to handle all of the
    functionality of this module. Developers should feel free to extend the
    functionality of this class and are encouraged to submit pull requests
    to the main repository.

    This class currently contains the following methods:

    - ingest (load a resource into LIBRE-ary)
    - retrieve (retrieve a copy of an object)
    - delete (delete an object)
    - update (update an object)
    - search (search for information about objects)
    - run_full_check (check all resources to verify integrity)
    - check_single_resource (check only a single resource)
    """
    def __init__(self, config_dir: str):
        """
        Constructor for Libreary object.
            :param config_dir - a string pointing to a directory
            containing Libreary configuration. In the config directory
            must be a file called `config.json` which contains main
            configuration, and a separate config file for each adapter
            you plan to use. More detail on adapter configs can be found
            in the adapters constructor documentation.


            The structure of `config_dir/config.json` should be as follows:
            ```{json}
            {
                "metadata": {
                    "db_file": "path to SQLite3 DB file for metadata"
                },
                "adapters": # List of adapters - each entry should look like:
                [{
                    "type":"AdapterType (name of class)",
                    "id": "Adapter Identifier"
                }],
                "options": {
                    "dropbox_dir": "Path to dropbox directory, where files you want to ingest should be placed",
                    "output_dir": "Path to directory you want files to be retrieved to",
                    "config_dir": "Path to config directory"
                },
                "canonical_adapter":"Adapter Identifier for Canonical Adapter"
            }
            ```

            The canonical adapter is the adapter which will store the "canonical" copy
            of each resource, which will then be used as the "real" version of that digital object.

            This object creeates an adapter manager and an ingester. For more information,
            see those classes.

            The output and dropbox directories are volatile and should not be used for object storage.
        """
        # Config stuff
        self.config_dir = config_dir
        self.config_path = "{}/config.json".format(self.config_dir)
        self.config = json.load(open(self.config_path))

        try:
            # Metadata stuff
            self.metadata_db = os.path.realpath(
                self.config['metadata'].get("db_file"))
            self.conn = sqlite3.connect(self.metadata_db)
            self.cursor = self.conn.cursor()

            # Directories we care about
            self.dropbox_dir = self.config["options"]["dropbox_dir"]
            self.ret_dir = self.config["options"]["output_dir"]

            # Objects we need
            self.adapter_man = AdapterManager(self.config)
            self.ingester = Ingester(self.config)
            logger.debug("LIBREary configuration valid. Proceeding.")
        except KeyError:
            logger.error("Invalid LIBREary config. Exiting.")
            raise KeyError

    def run_check(deep: bool = False) -> List[str]:
        """
        Check all of the objects in the LIBRE-ary. This follows the following process:

        For each object:
            Get canonical copy and actual checksum. Make sure canonical copy matches expected checksum
            If it doesn't:
                Attempt to recover canonical copy
            Get a list of all levels that the object has been labelled as:
            For each level:
                Get a list of adapters is is stored in:
                For each adapter:
                    Check to make sure that copy's checksum matches what it should:
                    If it doesn't:
                        Attempt to recover it.


        :param deep speficies whether to use a deep search. A deep search will calculate actual checksums
        of each copy of each object, while a shallow one will trust that the checksum in the metadata
        database matches that of the actual object.
        """
        logger.debug(f"Running check of all objects in LIBREary. Deep: {deep}")

    def ingest(self,
               current_file_path: str,
               levels: List[str],
               description: str,
               delete_after_store: bool = False) -> str:
        """
        Ingest a new object to the LIBRE-ary. This:
            1. Creates an entry in the `resources` table in the metadata db
            2. Creates an object UUID
            3. Ingests the canonical copy
            4. Sends copies to adapters which match specified levels
            5. Returns object ID

        :param current_file_path - the current path to the file you wish to ingest
        :param levels - a list of names of levels. These levels must exist in the
            `levels` table in the metadata db
        :param description - a description of this object. This is useful when you
            want to search for objects later
        :param delete_after_store - Boolean. If True, the Ingester will delete the object after it's stored.
        """

        # Don't want ingester to delete it, because then AM will need to
        # retrieve.
        obj_id = self.ingester.ingest(current_file_path,
                                      levels,
                                      description,
                                      delete_after_store=False)
        self.adapter_man.send_resource_to_adapters(
            obj_id, delete_after_send=delete_after_store)
        logger.debug(
            f"Ingesting object {obj_id} to LIBREary. Description: {description}"
        )
        return obj_id

    def retrieve(self, r_id: str) -> str:
        """
        Retrieve an object. This will save a copy of the object
            as `<self.output_dir>/<object_filename>`

        The output and dropbox directories are volatile and should not be used for object storage.

        Adapters and other objects frequently may delete or write files in these directories.

        :param r_id - The resource UUID that corresponds to the object you'd like to retrieve.

        Returns a path to the retireved object.
        """
        logger.debug(f"Retrieving object {r_id}")
        new_location = self.adapter_man.retrieve_by_preference(r_id)
        return new_location

    def delete(self, r_id: str) -> None:
        """
        Delete an object from LIBRE-ary. This:
            1. Deletes the resource from all of the adapters it was stored in
            2. Deletes the resource from the metadata db entirely
            3. Removes the canonical copy

        Be careful with this function, as there is no undo option.
        """
        logger.debug(f"Deleting object {r_id}")
        self.adapter_man.delete_resource_from_adapters(r_id)
        self.ingester.delete_resource((r_id))

    def update(self, r_id: str, updated_path: str) -> None:
        """
        Update a resource with a new object. Preserves UUID and all other metadata (levels, etc.)

        :param r_id - the UUID of the object you'd like to update
        :param updated_path - path to the contents of the updated object.

        """
        logger.debug(f"Updating object {r_id}")

    def search(self, search_term: str) -> List[str]:
        """
        Search the metadata db for information about resources.

        :param search_term - a string with which to search against the metadata db.
            Can match UUID, filename, original path, or description.
        """
        pass

    def check_single_resource(self, r_id: str, deep: bool = False) -> bool:
        """
        Check a single object in the LIBRE-ary. This follows the following process:


        Get canonical copy and actual checksum. Make sure canonical copy matches expected checksum
        If it doesn't:
            Attempt to recover canonical copy
        Get a list of all levels that the object has been labelled as:
        For each level:
            Get a list of adapters is is stored in:
            For each adapter:
                Check to make sure that copy's checksum matches what it should:
                If it doesn't:
                    Attempt to recover it.


        :param deep speficies whether to use a deep search. A deep search will calculate actual checksums
        of each copy of each object, while a shallow one will trust that the checksum in the metadata
        database matches that of the actual object.

        :param r_id - the resource ID of the object you'd like to check
        """
        logger.debug(f"Checking object {r_id}")

    def add_level(self,
                  name: str,
                  frequency: int,
                  adapters: List[dict],
                  copies=1) -> None:
        """
        Add a level to the metadata database.

        :param name - name for the level
        :param frequency - check frequency for level. Currently unimplemented
        :param adapters - dict object specifying adapters the level uses. Example:
            ```{json}
            [
                {
                "id": "local1",
                "type":"LocalAdapter"
                },
                {
                "id": "local2",
                "type":"LocalAdapter"
                }
            ]

            ```
        :param copies - copies to store for each adapter. Currently, only 1 is supported
        """
        logger.debug(f"Adding new level: {name}")
        str_adapters = json.dumps(adapters)
        self.cursor.execute("insert into levels values (?, ?, ?, ?)",
                            (name, frequency, str_adapters, copies))
        self.conn.commit()
Example #10
0
import json

from libreary.adapter_manager import AdapterManager
from libreary.config_parser import ConfigParser

CONFIG_DIR = "/Users/ben/Desktop/libre-ary/config"

config = json.load(
    open("{}/{}".format(CONFIG_DIR, "adapter_manager_config.json")))
am = AdapterManager(config)

# print(am.adapters)
# print(am.send_resource_to_adapters("1277ccb6-051c-458d-9250-570b6e085d79"))
# print(am.retrieve_by_preference("1277ccb6-051c-458d-9250-570b6e085d79"))
# am.delete_resource_from_adapters("1277ccb6-051c-458d-9250-570b6e085d79")
print(am.restore_canonical_copy("1277ccb6-051c-458d-9250-570b6e085d79"))