Example #1
0
 def setUp(self):
     """Create some Records to compare."""
     self.record_one = Record(id="spam",
                              type="new_eggs",
                              data={
                                  "foo": {
                                      "value": 12
                                  },
                                  "bar": {
                                      "value": "1",
                                      "tags": ["in"]
                                  }
                              },
                              files=[{
                                  "uri": "ham.png",
                                  "mimetype": "png"
                              }, {
                                  "uri": "ham.curve",
                                  "tags": ["hammy"]
                              }],
                              user_defined={})
     self.record_two = Record(id="spam",
                              type="new_eggs",
                              data={
                                  "foo": {
                                      "value": 12
                                  },
                                  "bar": {
                                      "value": "1",
                                      "tags": ["in"]
                                  }
                              },
                              files=[{
                                  "uri": "ham.png",
                                  "mimetype": "png"
                              }, {
                                  "uri": "ham.curve",
                                  "tags": ["hammy"]
                              }],
                              user_defined={})
     self.record_three = Record(id="spam2",
                                type="super_eggs",
                                data={
                                    "foo": {
                                        "value": 13
                                    },
                                    "bar": {
                                        "value": "1",
                                        "tags": ["in"]
                                    }
                                },
                                files=[{
                                    "uri": "ham.png",
                                    "mimetype": "png"
                                }, {
                                    "uri": "ham.curve",
                                    "tags": ["hammy"]
                                }],
                                user_defined={})
Example #2
0
 def test_recorddao_delete_one(self):
     """Test that RecordDAO is deleting correctly."""
     record_dao = self.create_dao_factory(
         test_db_dest=self.test_db_dest).create_record_dao()
     record_dao.insert(Record(id="rec_1", type="sample"))
     record_dao.delete("rec_1")
     self.assertEqual(list(record_dao.get_all_of_type("sample")), [])
Example #3
0
 def test_recorddao_insert_retrieve(self):
     """Test that RecordDAO is inserting and getting correctly."""
     record_dao = self.create_dao_factory().create_record_dao()
     rec = Record(
         id="spam",
         type="eggs",
         data={"eggs": {
             "value": 12,
             "units": None,
             "tags": ["runny"]
         }},
         files=[{
             "uri": "eggs.brek",
             "mimetype": "egg",
             "tags": ["fried"]
         }],
         user_defined={})
     record_dao.insert(rec)
     returned_record = record_dao.get("spam")
     # Test one definition of Record equivalence.
     # Done instead of __dict__ to make it clearer what part fails (if any)
     self.assertEqual(returned_record.id, rec.id)
     self.assertEqual(returned_record.type, rec.type)
     self.assertEqual(returned_record.data, rec.data)
     self.assertEqual(returned_record.files, rec.files)
     self.assertEqual(returned_record.user_defined, rec.user_defined)
Example #4
0
 def test_recorddao_delete_data_cascade(self):
     """Test that deletion of a Record correctly cascades to data and files."""
     factory = self.create_dao_factory(test_db_dest=self.test_db_dest)
     record_dao = factory.create_record_dao()
     data = {
         "eggs": {
             "value": 12,
             "tags": ["breakfast"]
         },
         "flavor": {
             "value": "tasty"
         }
     }
     files = [{"uri": "justheretoexist.png"}]
     record_dao.insert(
         Record(id="rec_1", type="sample", data=data, files=files))
     record_dao.delete("rec_1")
     # Make sure the data, raw, files, and relationships were deleted as well
     dead_data = record_dao.get_data_for_records(
         id_list=["rec_1"], data_list=["eggs", "flavor"])
     self.assertEqual(dead_data, {})
     dead_files = list(
         record_dao.get_given_document_uri("justheretoexist.png",
                                           ids_only=True))
     self.assertEqual(dead_files, [])
Example #5
0
 def test_recorddao_delete_one_with_relationship(self):
     """Test that RecordDAO deletions include relationships."""
     factory = self.create_dao_factory(test_db_dest=self.test_db_dest)
     record_dao = factory.create_record_dao()
     relationship_dao = factory.create_relationship_dao()
     record_1 = Record(id="rec_1", type="sample")
     record_2 = Record(id="rec_2", type="sample")
     record_dao.insert_many([record_1, record_2])
     relationship_dao.insert(subject_id="rec_1",
                             object_id="rec_2",
                             predicate="dupes")
     record_dao.delete("rec_1")
     # Make sure the relationship was deleted
     self.assertFalse(relationship_dao.get(subject_id="rec_1"))
     # rec_2 should not be deleted
     remaining_records = list(
         record_dao.get_all_of_type("sample", ids_only=True))
     self.assertEqual(remaining_records, ["rec_2"])
Example #6
0
 def test_find_file_from_sina_records(self):
     store, kosh_db = self.connect(sync=False)
     sina_recs = store.get_sina_records()
     rec = Record("foo", type="blah")
     rec.add_file("setup.py", mimetype="py")
     sina_recs.insert(rec)
     rec = Record("bar", type="blah")
     rec.add_file("smefile")
     sina_recs.insert(rec)
     self.assertEqual(len(list(store.find(file_uri="setup.py"))), 1)
     self.assertEqual(len(list(store.find(file_uri="smefile"))), 1)
Example #7
0
    def test_recorddao_delete_many(self):
        """Test that RecordDAO can delete many at once."""
        factory = self.create_dao_factory(test_db_dest=self.test_db_dest)
        record_dao = factory.create_record_dao()
        relationship_dao = factory.create_relationship_dao()
        record_1 = Record(id="rec_1", type="sample")
        record_2 = Record(id="rec_2", type="sample")
        record_3 = Record(id="rec_3", type="sample")
        record_4 = Record(id="rec_4", type="sample")
        all_ids = ["rec_1", "rec_2", "rec_3", "rec_4"]
        record_dao.insert_many([record_1, record_2, record_3, record_4])
        relationship_dao.insert(subject_id="rec_1",
                                object_id="rec_2",
                                predicate="dupes")
        relationship_dao.insert(subject_id="rec_2",
                                object_id="rec_2",
                                predicate="is")
        relationship_dao.insert(subject_id="rec_3",
                                object_id="rec_4",
                                predicate="dupes")
        relationship_dao.insert(subject_id="rec_4",
                                object_id="rec_4",
                                predicate="is")
        # Delete several
        record_dao.delete_many(["rec_1", "rec_2", "rec_3"])
        remaining_records = list(
            record_dao.get_all_of_type("sample", ids_only=True))
        self.assertEqual(remaining_records, ["rec_4"])

        # Make sure expected data entries were deleted as well (acts as cascade test)
        for_all = record_dao.get_data_for_records(id_list=all_ids,
                                                  data_list=["eggs", "flavor"])
        for_one = record_dao.get_data_for_records(id_list=["rec_4"],
                                                  data_list=["eggs", "flavor"])
        self.assertEqual(for_all, for_one)

        # Make sure expected Relationships were deleted
        self.assertFalse(relationship_dao.get(object_id="rec_2"))
        self.assertFalse(relationship_dao.get(subject_id="rec_3"))
        self.assertEqual(len(relationship_dao.get(object_id="rec_4")), 1)
Example #8
0
 def setUp(self):
     """Create records used for testing."""
     self.record_one = Record(id="spam",
                              type="new_eggs",
                              data={"list_scalars": {"value": [1, 2, 3]},
                                    "list_strings": {"value": ['apple',
                                                               'orange']},
                                    "bar": {"value": "1",
                                            "tags": ["in"]}},
                              files=[{"uri": "ham.png", "mimetype": "png"},
                                     {"uri": "ham.curve", "tags": ["hammy"]}],
                              user_defined={})
     self.record_two = Record(id="spam2",
                              type="new_eggs",
                              data={"bad_list": {"value": ['bad', 3]},
                                    "bad_list_2": {"value":
                                                   [1, 2, {'not': 'allowed'}]},
                                    "bar": {"value": "1",
                                            "tags": ["in"]}},
                              files=[{"uri": "ham.png", "mimetype": "png"},
                                     {"uri": "ham.curve", "tags": ["hammy"]}],
                              user_defined={})
Example #9
0
def create_kosh_users(record_handler,
                      users=[os.environ.get("USER", "default"), "anonymous"]):
    """Add Kosh user to the Kosh store
    :param record_handler: The sina records object
    :type record_handler: sina.records
    :param users: list of usernames to add
    :type users: list
    """
    store_info = list(record_handler.find_with_type([
        "__kosh_storeinfo__",
    ]))[0]

    user_type = store_info["data"]["users_type"]["value"]
    # Create users
    for user in users:
        new_user = list(
            record_handler.find(types=[
                user_type,
            ], data={"username": user}))
        if len(new_user) == 0:
            uid = hashlib.md5(user.encode()).hexdigest()
            user_record = Record(id=uid, type=user_type)
            user_record.add_data("username", user)
            record_handler.insert(user_record)
Example #10
0
    def __init__(self, source_dir):
        """
        Get our first Records created and set up initial info.

        :param source_dir: path to the "files" subdirectory we create during conversion.
                           Because we've already copied important files into there, it acts as both
                           source and destination.
        """
        self.source_dir = source_dir
        # We start off with a few Records detailing how the quality control numbers work.
        self.records = [
            Record(qid, "qc", data={"desc": {
                "value": desc
            }}) for qid, desc in QC_DATA
        ]
        self.relationships = []
Example #11
0
    def add_experiment(self, experiment_id):
        """
        Add an experiment Record.

        :param experiment_id: experiment id string
        """
        exp_files = {
            os.path.join(self.source_dir, CSV_NAME): {
                "mimetype": "text/csv",
                "tags": ["data"]
            }
        }
        for extra_file, tag, mimetype in SUPPLEMENTAL_FILES:
            extra_path = os.path.join(self.source_dir,
                                      os.path.basename(extra_file))
            exp_files[extra_path] = {"mimetype": mimetype, "tags": [tag]}
        self.records.append(Record(experiment_id, "exp", files=exp_files))
Example #12
0
 def setUp(self):
     """Set up data for testing get_list."""
     factory = sina_sql.DAOFactory()
     self.record_dao = factory.create_record_dao()
     data = {"eggs": {"value": [0, 1, 2, 3]}}
     data_2 = {"eggs": {"value": [1, 2, 3, 4, 5]}}
     data_3 = {"eggs": {"value": [4, 5, 6, 7]}}
     data_4 = {"spam": {"value": ["awesome", "canned", "zebra"]}}
     data_5 = {"spam": {"value": ["fried", "toasted", "zebra"]}}
     data_6 = {"spam": {"value": ["tree", "honey"]}}
     self.record_1 = Record(id="rec_1", type="sample", data=data)
     self.record_2 = Record(id="rec_2", type="sample", data=data_2)
     self.record_3 = Record(id="rec_3", type="sample", data=data_3)
     self.record_4 = Record(id="rec_4", type="sample", data=data_4)
     self.record_5 = Record(id="rec_5", type="sample", data=data_5)
     self.record_6 = Record(id="rec_6", type="sample", data=data_6)
     self.record_dao.insert_many([
         self.record_1, self.record_2, self.record_3, self.record_4,
         self.record_5, self.record_6
     ])
Example #13
0
    def add_observation(self, obs_id, obs_filename, data):
        """
        Add an observation Record.

        :param obs_id: the id of the observation to create
        :param obs_filename: the file that contains its data (and only its data)
        :param data: a dictionary of datum_name: val that we want to assign to this observation.
        """
        obs_record = Record(obs_id,
                            "obs",
                            files={obs_filename: {
                                "mimetype": "text/plain"
                            }})
        for name, val in data.items():
            if name in ("o2_qc", "ph_qc"):
                obs_record.add_data(name, val, tags=["qc"])
            else:
                obs_record.add_data(name,
                                    float(val),
                                    units=UNITS.get(name, None))
        self.records.append(obs_record)
Example #14
0
File: dataset.py Project: LLNL/kosh
    def associate(self, uri, mime_type, metadata={},
                  id_only=True, long_sha=False, absolute_path=True):
        """associates a uri/mime_type with this dataset

        :param uri: uri(s) to access content
        :type uri: str or list of str
        :param mime_type: mime type associated with this file
        :type mime_type: str or list of str
        :param metadata: metadata to associate with file, defaults to {}
        :type metadata: dict, optional
        :param id_only: do not return kosh file object, just its id
        :type id_only: bool
        :param long_sha: Do we compute the long sha on this or not?
        :type long_sha: bool
        :param absolute_path: if file exists should we store its absolute_path
        :type absolute_path: bool
        :return: A (list) Kosh Sina File(s)
        :rtype: list of KoshSinaFile or KoshSinaFile
        """

        rec = self.get_record()
        # Need to remember we touched associated files
        now = time.time()

        if isinstance(uri, basestring):
            uris = [uri, ]
            metadatas = [metadata, ]
            mime_types = [mime_type, ]
            single_element = True
        else:
            uris = uri
            if isinstance(metadata, dict):
                metadatas = [metadata, ] * len(uris)
            else:
                metadatas = metadata
            if isinstance(mime_type, basestring):
                mime_types = [mime_type, ] * len(uris)
            else:
                mime_types = mime_type
            single_element = False

        new_recs = []
        kosh_file_ids = []

        for i, uri in enumerate(uris):
            try:
                meta = metadatas[i].copy()
                if os.path.exists(uri):
                    if long_sha:
                        meta["long_sha"] = compute_long_sha(uri)
                    if absolute_path:
                        uri = os.path.abspath(uri)
                    if not os.path.isdir(uri) and "fast_sha" not in meta:
                        meta["fast_sha"] = compute_fast_sha(uri)
                rec["user_defined"]["{uri}___associated_last_modified".format(
                    uri=uri)] = now
                # We need to check if the uri was already associated somewhere
                tmp_uris = list(self.__store__.find(
                    types=[self.__store__._sources_type, ], uri=uri, ids_only=True))

                if len(tmp_uris) == 0:
                    Id = uuid.uuid4().hex
                    rec_obj = Record(id=Id, type=self.__store__._sources_type)
                else:
                    rec_obj = self.__store__.get_record(tmp_uris[0])
                    Id = rec_obj.id
                    existing_mime = rec_obj["data"]["mime_type"]["value"]
                    mime_type = mime_types[i]
                    if existing_mime != mime_types[i]:
                        rec["files"][uri]["mime_type"] = existing_mime
                        raise TypeError("source {} is already associated with another dataset with mimetype"
                                        " '{}' you specified mime_type '{}'".format(uri, existing_mime, mime_types[i]))
                rec.add_file(uri, mime_types[i])
                rec["files"][uri]["kosh_id"] = Id
                meta["uri"] = uri
                meta["mime_type"] = mime_types[i]
                meta["associated"] = [self.id, ]
                for key in meta:
                    rec_obj.add_data(key, meta[key])
                    last_modif_att = "{name}_last_modified".format(name=key)
                    rec_obj["user_defined"][last_modif_att] = time.time()
                if not self.__store__.__sync__:
                    rec_obj["user_defined"]["last_update_from_db"] = time.time()
                    self.__store__.__sync__dict__[Id] = rec_obj
                new_recs.append(rec_obj)
            except TypeError as err:
                raise(err)
            except Exception:
                # file already in there
                # Let's get the matching id
                if rec_obj["data"]["mime_type"]["value"] != mime_types[i]:
                    raise TypeError("file {} is already associated with this dataset with mimetype"
                                    " '{}' you specified mime_type '{}'".format(uri, existing_mime, mime_type))
                else:
                    Id = rec["files"][uri]["kosh_id"]
                    if len(metadatas[i]) != 0:
                        warnings.warn(
                            "uri {} was already associated, metadata will "
                            "stay unchanged\nEdit object (id={}) directly to update attributes.".format(uri, Id))
            kosh_file_ids.append(Id)

        if self.__store__.__sync__:
            self.__store__.lock()
            self.__store__.__record_handler__.insert(new_recs)
            self.__store__.unlock()
            self._update_record(rec)
        else:
            self._update_record(rec, self.__store__._added_unsync_mem_store)

        # Since we changed the associated, we need to cleanup
        # the features cache
        self.__dict__["__features__"][None] = {}

        if id_only:
            if single_element:
                return kosh_file_ids[0]
            else:
                return kosh_file_ids

        kosh_files = []
        for Id in kosh_file_ids:
            self.__dict__["__features__"][Id] = {}
            kosh_file = KoshSinaObject(Id=Id,
                                       kosh_type=self.__store__._sources_type,
                                       store=self.__store__,
                                       metadata=metadata,
                                       record_handler=self.__record_handler__)
            kosh_files.append(kosh_file)

        if single_element:
            return kosh_files[0]
        else:
            return kosh_files
Example #15
0
def populate_database_with_data(record_dao):
    """
    Add test data to a database in a backend-independent way.

    :param record_dao: The RecordDAO used to insert records into a database.
    """
    spam_record = Record(id="spam", type="run")
    spam_record["application"] = "breakfast_maker"
    spam_record["user"] = "******"
    spam_record["version"] = "1.4.0"
    spam_record.data["spam_scal"] = {
        "value": 10,
        "units": "pigs",
        "tags": ["hammy"]
    }
    spam_record.data["spam_scal_2"] = {"value": 200}
    spam_record.data["val_data"] = {"value": "runny", "tags": ["edible"]}
    spam_record.data["val_data_2"] = {"value": "double yolks"}
    spam_record.files = [{"uri": "beep.wav"}, {"uri": "beep.pong"}]

    spam_record_2 = Record(id="spam2", type="run")
    spam_record_2.data["spam_scal"] = {"value": 10.99999}
    spam_record_2.files = [{"uri": "beep/png"}]

    spam_record_3 = Record(id="spam3", type="foo")
    spam_record_3.data["spam_scal"] = {"value": 10.5}
    spam_record_3.data["spam_scal_2"] = {"value": 10.5}
    spam_record_3.data["val_data"] = {"value": "chewy", "tags": ["edible"]}
    spam_record_3.data["val_data_2"] = {"value": "double yolks"}
    spam_record_3.files = [{"uri": "beeq.png"}]

    spam_record_4 = Record(id="spam4", type="bar")
    spam_record_4.data["val_data_2"] = {"value": "double yolks"}
    spam_record_4.files = [{"uri": "beep.png"}]

    spam_record_5 = Record(id="spam5", type="run")
    spam_record_5.data["spam_scal_3"] = {"value": 46}
    spam_record_5.data["val_data_3"] = {"value": "sugar"}
    spam_record_5.data["val_data_list_1"] = {"value": [0, 9.3]}
    spam_record_5.data["val_data_list_2"] = {"value": ['eggs', 'pancake']}
    spam_record_5.files = [{
        "uri": "beep.wav",
        "tags": ["output", "eggs"],
        "mimetype": 'audio/wav'
    }]

    spam_record_6 = Record(id="spam6", type="spamrec")
    spam_record_6.data["val_data_3"] = {"value": "syrup"}
    spam_record_6.data["val_data_list_1"] = {"value": [8, 20]}
    spam_record_6.data["val_data_list_2"] = {"value": ['eggs', 'yellow']}

    egg_record = Record(id="eggs", type="eggrec")
    egg_record.data["eggs_scal"] = {"value": 0}

    record_dao.insert_many([
        spam_record, spam_record_2, spam_record_3, spam_record_4,
        spam_record_5, spam_record_6, egg_record
    ])
Example #16
0
def update_store_and_get_info_record(records, ensemble_predicate=None):
    """Obtain the sina record containing store info
    If necessary update store to latest standards
    :param records: The sina store "records" object
    :type records: sina.datastore.DataStore.RecordOperations
    :param ensemble_predicate: The predicate for the relationship to an ensemble
    :type ensemble_predicate: str
    :returns: sina record for store info
    :rtype: Record
    """
    # First let's see if this store contains a dedicated record
    # describing this store specs
    store_info = list(records.find_with_type("__kosh_storeinfo__"))
    if len(store_info) > 1:
        # There is a small chance that the store was created on multiple processors
        # simultaneously and that these are identical, let's try to recover
        # that was true for a small period in Kosh dev branch
        rec = store_info[0]
    elif len(store_info) == 0:
        # ok it's the old type or a new store, let's try to upgrade it for next time
        # and add the store info
        # Because of mpi ranks issues let's fix the id
        rec = Record(id="__kosh_store_info__", type="__kosh_storeinfo__")
        if hasattr(records, "insert"):  # Readonly can't insert
            # It's possible many ranks will try to create this record
            # They are all identical, let's allow the error
            try:
                records.insert(rec)
            except Exception:
                pass
    else:
        rec = store_info[0]
        # This will fail if we get to version x.10
        # revisit then...
        ver = sum([
            float(x) / 10**i for i, x in enumerate(version().split("."))
            if x[0] != 'g'
        ])
        min_ver = rec["data"]["kosh_min_version"]["value"]
        min_ver = sum(
            [float(x) / 10**i for i, x in enumerate(min_ver.split("."))])
        if ver < min_ver:
            raise RuntimeError(
                "This Kosh store requires Kosh version greater than {}, you have {}"
                .format(min_ver, version()))
    need_update = False
    if "sources_type" not in rec["data"]:
        rec.add_data("sources_type", "file")
        need_update = True
    if "users_type" not in rec["data"]:
        rec.add_data("users_type", "user")
        need_update = True
    if "groups_type" not in rec["data"]:
        rec.add_data("groups_type", "group")
        need_update = True
    if "loaders_type" not in rec["data"]:
        rec.add_data("loaders_type", "koshloader")
        need_update = True
    if "ensembles_type" not in rec["data"]:
        rec.add_data("ensembles_type", "kosh_ensemble")
        need_update = True
    if "ensemble_predicate" not in rec["data"]:
        if ensemble_predicate is None:
            rec.add_data("ensemble_predicate", "is a member of ensemble")
        else:
            rec.add_data("ensemble_predicate", ensemble_predicate)
        need_update = True
    if "kosh_min_version" not in rec["data"]:
        rec.add_data("kosh_min_version", "1.2.1")
        need_update = True
    if "reserved_types" not in rec["data"]:
        rec.add_data("reserved_types", [
            "__kosh_storeinfo__", "file", "user", "group", "kosh_ensemble",
            "koshloader"
        ])
        need_update = True
    if sorted(rec["data"]["reserved_types"]["value"]) != [
            '__kosh_storeinfo__', 'file', 'group', 'kosh_ensemble',
            'koshloader', 'user'
    ]:
        rec["data"]["reserved_types"]["value"] = [
            '__kosh_storeinfo__', 'file', 'group', 'kosh_ensemble',
            'koshloader', 'user'
        ]
        need_update = True
    if need_update and hasattr(records, "insert"):
        try:
            records.delete(rec.id)
        except Exception:  # in case multi-processors interfere with each others
            pass
        try:
            records.insert(rec)
        except Exception:  # in case multi-processors interfere with each others
            pass
    return rec
Example #17
0
    def __init__(self,
                 Id,
                 store,
                 kosh_type,
                 record_handler,
                 protected=[],
                 metadata={},
                 schema=None,
                 record=None):
        """__init__ sina object base class

        :param Id: id to use for unique identification, if None is passed set for you via uui4()
        :type Id: str
        :param store: Kosh store associated
        :type store: KoshSinaStore
        :param kosh_type: type of Kosh object (dataset, file, project, ...)
        :type kosh_type: str
        :param record_handler: sina record handler object
        :type record_handler: RecordDAO
        :param protected: list of protected parameters, e.g internal params not to be stored
        :type protected: list, optional
        :param metadata: dictionary of attributes/value to initialize object with, defaults to {}
        :type metadata: dict, optional
        :param record: sina record to prevent looking it up again and again in sina
        :type record: Record
        """
        self.__dict__["__store__"] = store
        self.__dict__["__schema__"] = schema
        self.__dict__["__record_handler__"] = record_handler
        self.__dict__["__protected__"] = [
            "id", "__type__", "__protected__", "__record_handler__",
            "__store__", "id", "__schema__"
        ] + protected
        self.__dict__["__type__"] = kosh_type
        if Id is None:
            Id = uuid.uuid4().hex
            record = Record(id=Id, type=kosh_type)
            if store.__sync__:
                store.lock()
                store.__record_handler__.insert(record)
                store.unlock()
            else:
                record["user_defined"]["last_update_from_db"] = time.time()
                self.__store__.__sync__dict__[Id] = record
            self.__dict__["id"] = Id
        else:
            self.__dict__["id"] = Id
            if record is None:
                try:
                    record = self.get_record()
                except BaseException:  # record exists nowhere
                    record = Record(id=Id, type=kosh_type)
                    if store.__sync__:
                        store.lock()
                        store.__record_handler__.insert(record)
                        store.unlock()
                    else:
                        self.__store__.__sync__dict__[Id] = record
                        record["user_defined"][
                            "last_update_from_db"] = time.time()

        for att, value in metadata.items():
            setattr(self, att, value)