def setUp(self): """Create some Records to compare.""" self.record_one = Record(id="spam", type="new_eggs", data={ "foo": { "value": 12 }, "bar": { "value": "1", "tags": ["in"] } }, files=[{ "uri": "ham.png", "mimetype": "png" }, { "uri": "ham.curve", "tags": ["hammy"] }], user_defined={}) self.record_two = Record(id="spam", type="new_eggs", data={ "foo": { "value": 12 }, "bar": { "value": "1", "tags": ["in"] } }, files=[{ "uri": "ham.png", "mimetype": "png" }, { "uri": "ham.curve", "tags": ["hammy"] }], user_defined={}) self.record_three = Record(id="spam2", type="super_eggs", data={ "foo": { "value": 13 }, "bar": { "value": "1", "tags": ["in"] } }, files=[{ "uri": "ham.png", "mimetype": "png" }, { "uri": "ham.curve", "tags": ["hammy"] }], user_defined={})
def test_recorddao_delete_one(self): """Test that RecordDAO is deleting correctly.""" record_dao = self.create_dao_factory( test_db_dest=self.test_db_dest).create_record_dao() record_dao.insert(Record(id="rec_1", type="sample")) record_dao.delete("rec_1") self.assertEqual(list(record_dao.get_all_of_type("sample")), [])
def test_recorddao_insert_retrieve(self): """Test that RecordDAO is inserting and getting correctly.""" record_dao = self.create_dao_factory().create_record_dao() rec = Record( id="spam", type="eggs", data={"eggs": { "value": 12, "units": None, "tags": ["runny"] }}, files=[{ "uri": "eggs.brek", "mimetype": "egg", "tags": ["fried"] }], user_defined={}) record_dao.insert(rec) returned_record = record_dao.get("spam") # Test one definition of Record equivalence. # Done instead of __dict__ to make it clearer what part fails (if any) self.assertEqual(returned_record.id, rec.id) self.assertEqual(returned_record.type, rec.type) self.assertEqual(returned_record.data, rec.data) self.assertEqual(returned_record.files, rec.files) self.assertEqual(returned_record.user_defined, rec.user_defined)
def test_recorddao_delete_data_cascade(self): """Test that deletion of a Record correctly cascades to data and files.""" factory = self.create_dao_factory(test_db_dest=self.test_db_dest) record_dao = factory.create_record_dao() data = { "eggs": { "value": 12, "tags": ["breakfast"] }, "flavor": { "value": "tasty" } } files = [{"uri": "justheretoexist.png"}] record_dao.insert( Record(id="rec_1", type="sample", data=data, files=files)) record_dao.delete("rec_1") # Make sure the data, raw, files, and relationships were deleted as well dead_data = record_dao.get_data_for_records( id_list=["rec_1"], data_list=["eggs", "flavor"]) self.assertEqual(dead_data, {}) dead_files = list( record_dao.get_given_document_uri("justheretoexist.png", ids_only=True)) self.assertEqual(dead_files, [])
def test_recorddao_delete_one_with_relationship(self): """Test that RecordDAO deletions include relationships.""" factory = self.create_dao_factory(test_db_dest=self.test_db_dest) record_dao = factory.create_record_dao() relationship_dao = factory.create_relationship_dao() record_1 = Record(id="rec_1", type="sample") record_2 = Record(id="rec_2", type="sample") record_dao.insert_many([record_1, record_2]) relationship_dao.insert(subject_id="rec_1", object_id="rec_2", predicate="dupes") record_dao.delete("rec_1") # Make sure the relationship was deleted self.assertFalse(relationship_dao.get(subject_id="rec_1")) # rec_2 should not be deleted remaining_records = list( record_dao.get_all_of_type("sample", ids_only=True)) self.assertEqual(remaining_records, ["rec_2"])
def test_find_file_from_sina_records(self): store, kosh_db = self.connect(sync=False) sina_recs = store.get_sina_records() rec = Record("foo", type="blah") rec.add_file("setup.py", mimetype="py") sina_recs.insert(rec) rec = Record("bar", type="blah") rec.add_file("smefile") sina_recs.insert(rec) self.assertEqual(len(list(store.find(file_uri="setup.py"))), 1) self.assertEqual(len(list(store.find(file_uri="smefile"))), 1)
def test_recorddao_delete_many(self): """Test that RecordDAO can delete many at once.""" factory = self.create_dao_factory(test_db_dest=self.test_db_dest) record_dao = factory.create_record_dao() relationship_dao = factory.create_relationship_dao() record_1 = Record(id="rec_1", type="sample") record_2 = Record(id="rec_2", type="sample") record_3 = Record(id="rec_3", type="sample") record_4 = Record(id="rec_4", type="sample") all_ids = ["rec_1", "rec_2", "rec_3", "rec_4"] record_dao.insert_many([record_1, record_2, record_3, record_4]) relationship_dao.insert(subject_id="rec_1", object_id="rec_2", predicate="dupes") relationship_dao.insert(subject_id="rec_2", object_id="rec_2", predicate="is") relationship_dao.insert(subject_id="rec_3", object_id="rec_4", predicate="dupes") relationship_dao.insert(subject_id="rec_4", object_id="rec_4", predicate="is") # Delete several record_dao.delete_many(["rec_1", "rec_2", "rec_3"]) remaining_records = list( record_dao.get_all_of_type("sample", ids_only=True)) self.assertEqual(remaining_records, ["rec_4"]) # Make sure expected data entries were deleted as well (acts as cascade test) for_all = record_dao.get_data_for_records(id_list=all_ids, data_list=["eggs", "flavor"]) for_one = record_dao.get_data_for_records(id_list=["rec_4"], data_list=["eggs", "flavor"]) self.assertEqual(for_all, for_one) # Make sure expected Relationships were deleted self.assertFalse(relationship_dao.get(object_id="rec_2")) self.assertFalse(relationship_dao.get(subject_id="rec_3")) self.assertEqual(len(relationship_dao.get(object_id="rec_4")), 1)
def setUp(self): """Create records used for testing.""" self.record_one = Record(id="spam", type="new_eggs", data={"list_scalars": {"value": [1, 2, 3]}, "list_strings": {"value": ['apple', 'orange']}, "bar": {"value": "1", "tags": ["in"]}}, files=[{"uri": "ham.png", "mimetype": "png"}, {"uri": "ham.curve", "tags": ["hammy"]}], user_defined={}) self.record_two = Record(id="spam2", type="new_eggs", data={"bad_list": {"value": ['bad', 3]}, "bad_list_2": {"value": [1, 2, {'not': 'allowed'}]}, "bar": {"value": "1", "tags": ["in"]}}, files=[{"uri": "ham.png", "mimetype": "png"}, {"uri": "ham.curve", "tags": ["hammy"]}], user_defined={})
def create_kosh_users(record_handler, users=[os.environ.get("USER", "default"), "anonymous"]): """Add Kosh user to the Kosh store :param record_handler: The sina records object :type record_handler: sina.records :param users: list of usernames to add :type users: list """ store_info = list(record_handler.find_with_type([ "__kosh_storeinfo__", ]))[0] user_type = store_info["data"]["users_type"]["value"] # Create users for user in users: new_user = list( record_handler.find(types=[ user_type, ], data={"username": user})) if len(new_user) == 0: uid = hashlib.md5(user.encode()).hexdigest() user_record = Record(id=uid, type=user_type) user_record.add_data("username", user) record_handler.insert(user_record)
def __init__(self, source_dir): """ Get our first Records created and set up initial info. :param source_dir: path to the "files" subdirectory we create during conversion. Because we've already copied important files into there, it acts as both source and destination. """ self.source_dir = source_dir # We start off with a few Records detailing how the quality control numbers work. self.records = [ Record(qid, "qc", data={"desc": { "value": desc }}) for qid, desc in QC_DATA ] self.relationships = []
def add_experiment(self, experiment_id): """ Add an experiment Record. :param experiment_id: experiment id string """ exp_files = { os.path.join(self.source_dir, CSV_NAME): { "mimetype": "text/csv", "tags": ["data"] } } for extra_file, tag, mimetype in SUPPLEMENTAL_FILES: extra_path = os.path.join(self.source_dir, os.path.basename(extra_file)) exp_files[extra_path] = {"mimetype": mimetype, "tags": [tag]} self.records.append(Record(experiment_id, "exp", files=exp_files))
def setUp(self): """Set up data for testing get_list.""" factory = sina_sql.DAOFactory() self.record_dao = factory.create_record_dao() data = {"eggs": {"value": [0, 1, 2, 3]}} data_2 = {"eggs": {"value": [1, 2, 3, 4, 5]}} data_3 = {"eggs": {"value": [4, 5, 6, 7]}} data_4 = {"spam": {"value": ["awesome", "canned", "zebra"]}} data_5 = {"spam": {"value": ["fried", "toasted", "zebra"]}} data_6 = {"spam": {"value": ["tree", "honey"]}} self.record_1 = Record(id="rec_1", type="sample", data=data) self.record_2 = Record(id="rec_2", type="sample", data=data_2) self.record_3 = Record(id="rec_3", type="sample", data=data_3) self.record_4 = Record(id="rec_4", type="sample", data=data_4) self.record_5 = Record(id="rec_5", type="sample", data=data_5) self.record_6 = Record(id="rec_6", type="sample", data=data_6) self.record_dao.insert_many([ self.record_1, self.record_2, self.record_3, self.record_4, self.record_5, self.record_6 ])
def add_observation(self, obs_id, obs_filename, data): """ Add an observation Record. :param obs_id: the id of the observation to create :param obs_filename: the file that contains its data (and only its data) :param data: a dictionary of datum_name: val that we want to assign to this observation. """ obs_record = Record(obs_id, "obs", files={obs_filename: { "mimetype": "text/plain" }}) for name, val in data.items(): if name in ("o2_qc", "ph_qc"): obs_record.add_data(name, val, tags=["qc"]) else: obs_record.add_data(name, float(val), units=UNITS.get(name, None)) self.records.append(obs_record)
def associate(self, uri, mime_type, metadata={}, id_only=True, long_sha=False, absolute_path=True): """associates a uri/mime_type with this dataset :param uri: uri(s) to access content :type uri: str or list of str :param mime_type: mime type associated with this file :type mime_type: str or list of str :param metadata: metadata to associate with file, defaults to {} :type metadata: dict, optional :param id_only: do not return kosh file object, just its id :type id_only: bool :param long_sha: Do we compute the long sha on this or not? :type long_sha: bool :param absolute_path: if file exists should we store its absolute_path :type absolute_path: bool :return: A (list) Kosh Sina File(s) :rtype: list of KoshSinaFile or KoshSinaFile """ rec = self.get_record() # Need to remember we touched associated files now = time.time() if isinstance(uri, basestring): uris = [uri, ] metadatas = [metadata, ] mime_types = [mime_type, ] single_element = True else: uris = uri if isinstance(metadata, dict): metadatas = [metadata, ] * len(uris) else: metadatas = metadata if isinstance(mime_type, basestring): mime_types = [mime_type, ] * len(uris) else: mime_types = mime_type single_element = False new_recs = [] kosh_file_ids = [] for i, uri in enumerate(uris): try: meta = metadatas[i].copy() if os.path.exists(uri): if long_sha: meta["long_sha"] = compute_long_sha(uri) if absolute_path: uri = os.path.abspath(uri) if not os.path.isdir(uri) and "fast_sha" not in meta: meta["fast_sha"] = compute_fast_sha(uri) rec["user_defined"]["{uri}___associated_last_modified".format( uri=uri)] = now # We need to check if the uri was already associated somewhere tmp_uris = list(self.__store__.find( types=[self.__store__._sources_type, ], uri=uri, ids_only=True)) if len(tmp_uris) == 0: Id = uuid.uuid4().hex rec_obj = Record(id=Id, type=self.__store__._sources_type) else: rec_obj = self.__store__.get_record(tmp_uris[0]) Id = rec_obj.id existing_mime = rec_obj["data"]["mime_type"]["value"] mime_type = mime_types[i] if existing_mime != mime_types[i]: rec["files"][uri]["mime_type"] = existing_mime raise TypeError("source {} is already associated with another dataset with mimetype" " '{}' you specified mime_type '{}'".format(uri, existing_mime, mime_types[i])) rec.add_file(uri, mime_types[i]) rec["files"][uri]["kosh_id"] = Id meta["uri"] = uri meta["mime_type"] = mime_types[i] meta["associated"] = [self.id, ] for key in meta: rec_obj.add_data(key, meta[key]) last_modif_att = "{name}_last_modified".format(name=key) rec_obj["user_defined"][last_modif_att] = time.time() if not self.__store__.__sync__: rec_obj["user_defined"]["last_update_from_db"] = time.time() self.__store__.__sync__dict__[Id] = rec_obj new_recs.append(rec_obj) except TypeError as err: raise(err) except Exception: # file already in there # Let's get the matching id if rec_obj["data"]["mime_type"]["value"] != mime_types[i]: raise TypeError("file {} is already associated with this dataset with mimetype" " '{}' you specified mime_type '{}'".format(uri, existing_mime, mime_type)) else: Id = rec["files"][uri]["kosh_id"] if len(metadatas[i]) != 0: warnings.warn( "uri {} was already associated, metadata will " "stay unchanged\nEdit object (id={}) directly to update attributes.".format(uri, Id)) kosh_file_ids.append(Id) if self.__store__.__sync__: self.__store__.lock() self.__store__.__record_handler__.insert(new_recs) self.__store__.unlock() self._update_record(rec) else: self._update_record(rec, self.__store__._added_unsync_mem_store) # Since we changed the associated, we need to cleanup # the features cache self.__dict__["__features__"][None] = {} if id_only: if single_element: return kosh_file_ids[0] else: return kosh_file_ids kosh_files = [] for Id in kosh_file_ids: self.__dict__["__features__"][Id] = {} kosh_file = KoshSinaObject(Id=Id, kosh_type=self.__store__._sources_type, store=self.__store__, metadata=metadata, record_handler=self.__record_handler__) kosh_files.append(kosh_file) if single_element: return kosh_files[0] else: return kosh_files
def populate_database_with_data(record_dao): """ Add test data to a database in a backend-independent way. :param record_dao: The RecordDAO used to insert records into a database. """ spam_record = Record(id="spam", type="run") spam_record["application"] = "breakfast_maker" spam_record["user"] = "******" spam_record["version"] = "1.4.0" spam_record.data["spam_scal"] = { "value": 10, "units": "pigs", "tags": ["hammy"] } spam_record.data["spam_scal_2"] = {"value": 200} spam_record.data["val_data"] = {"value": "runny", "tags": ["edible"]} spam_record.data["val_data_2"] = {"value": "double yolks"} spam_record.files = [{"uri": "beep.wav"}, {"uri": "beep.pong"}] spam_record_2 = Record(id="spam2", type="run") spam_record_2.data["spam_scal"] = {"value": 10.99999} spam_record_2.files = [{"uri": "beep/png"}] spam_record_3 = Record(id="spam3", type="foo") spam_record_3.data["spam_scal"] = {"value": 10.5} spam_record_3.data["spam_scal_2"] = {"value": 10.5} spam_record_3.data["val_data"] = {"value": "chewy", "tags": ["edible"]} spam_record_3.data["val_data_2"] = {"value": "double yolks"} spam_record_3.files = [{"uri": "beeq.png"}] spam_record_4 = Record(id="spam4", type="bar") spam_record_4.data["val_data_2"] = {"value": "double yolks"} spam_record_4.files = [{"uri": "beep.png"}] spam_record_5 = Record(id="spam5", type="run") spam_record_5.data["spam_scal_3"] = {"value": 46} spam_record_5.data["val_data_3"] = {"value": "sugar"} spam_record_5.data["val_data_list_1"] = {"value": [0, 9.3]} spam_record_5.data["val_data_list_2"] = {"value": ['eggs', 'pancake']} spam_record_5.files = [{ "uri": "beep.wav", "tags": ["output", "eggs"], "mimetype": 'audio/wav' }] spam_record_6 = Record(id="spam6", type="spamrec") spam_record_6.data["val_data_3"] = {"value": "syrup"} spam_record_6.data["val_data_list_1"] = {"value": [8, 20]} spam_record_6.data["val_data_list_2"] = {"value": ['eggs', 'yellow']} egg_record = Record(id="eggs", type="eggrec") egg_record.data["eggs_scal"] = {"value": 0} record_dao.insert_many([ spam_record, spam_record_2, spam_record_3, spam_record_4, spam_record_5, spam_record_6, egg_record ])
def update_store_and_get_info_record(records, ensemble_predicate=None): """Obtain the sina record containing store info If necessary update store to latest standards :param records: The sina store "records" object :type records: sina.datastore.DataStore.RecordOperations :param ensemble_predicate: The predicate for the relationship to an ensemble :type ensemble_predicate: str :returns: sina record for store info :rtype: Record """ # First let's see if this store contains a dedicated record # describing this store specs store_info = list(records.find_with_type("__kosh_storeinfo__")) if len(store_info) > 1: # There is a small chance that the store was created on multiple processors # simultaneously and that these are identical, let's try to recover # that was true for a small period in Kosh dev branch rec = store_info[0] elif len(store_info) == 0: # ok it's the old type or a new store, let's try to upgrade it for next time # and add the store info # Because of mpi ranks issues let's fix the id rec = Record(id="__kosh_store_info__", type="__kosh_storeinfo__") if hasattr(records, "insert"): # Readonly can't insert # It's possible many ranks will try to create this record # They are all identical, let's allow the error try: records.insert(rec) except Exception: pass else: rec = store_info[0] # This will fail if we get to version x.10 # revisit then... ver = sum([ float(x) / 10**i for i, x in enumerate(version().split(".")) if x[0] != 'g' ]) min_ver = rec["data"]["kosh_min_version"]["value"] min_ver = sum( [float(x) / 10**i for i, x in enumerate(min_ver.split("."))]) if ver < min_ver: raise RuntimeError( "This Kosh store requires Kosh version greater than {}, you have {}" .format(min_ver, version())) need_update = False if "sources_type" not in rec["data"]: rec.add_data("sources_type", "file") need_update = True if "users_type" not in rec["data"]: rec.add_data("users_type", "user") need_update = True if "groups_type" not in rec["data"]: rec.add_data("groups_type", "group") need_update = True if "loaders_type" not in rec["data"]: rec.add_data("loaders_type", "koshloader") need_update = True if "ensembles_type" not in rec["data"]: rec.add_data("ensembles_type", "kosh_ensemble") need_update = True if "ensemble_predicate" not in rec["data"]: if ensemble_predicate is None: rec.add_data("ensemble_predicate", "is a member of ensemble") else: rec.add_data("ensemble_predicate", ensemble_predicate) need_update = True if "kosh_min_version" not in rec["data"]: rec.add_data("kosh_min_version", "1.2.1") need_update = True if "reserved_types" not in rec["data"]: rec.add_data("reserved_types", [ "__kosh_storeinfo__", "file", "user", "group", "kosh_ensemble", "koshloader" ]) need_update = True if sorted(rec["data"]["reserved_types"]["value"]) != [ '__kosh_storeinfo__', 'file', 'group', 'kosh_ensemble', 'koshloader', 'user' ]: rec["data"]["reserved_types"]["value"] = [ '__kosh_storeinfo__', 'file', 'group', 'kosh_ensemble', 'koshloader', 'user' ] need_update = True if need_update and hasattr(records, "insert"): try: records.delete(rec.id) except Exception: # in case multi-processors interfere with each others pass try: records.insert(rec) except Exception: # in case multi-processors interfere with each others pass return rec
def __init__(self, Id, store, kosh_type, record_handler, protected=[], metadata={}, schema=None, record=None): """__init__ sina object base class :param Id: id to use for unique identification, if None is passed set for you via uui4() :type Id: str :param store: Kosh store associated :type store: KoshSinaStore :param kosh_type: type of Kosh object (dataset, file, project, ...) :type kosh_type: str :param record_handler: sina record handler object :type record_handler: RecordDAO :param protected: list of protected parameters, e.g internal params not to be stored :type protected: list, optional :param metadata: dictionary of attributes/value to initialize object with, defaults to {} :type metadata: dict, optional :param record: sina record to prevent looking it up again and again in sina :type record: Record """ self.__dict__["__store__"] = store self.__dict__["__schema__"] = schema self.__dict__["__record_handler__"] = record_handler self.__dict__["__protected__"] = [ "id", "__type__", "__protected__", "__record_handler__", "__store__", "id", "__schema__" ] + protected self.__dict__["__type__"] = kosh_type if Id is None: Id = uuid.uuid4().hex record = Record(id=Id, type=kosh_type) if store.__sync__: store.lock() store.__record_handler__.insert(record) store.unlock() else: record["user_defined"]["last_update_from_db"] = time.time() self.__store__.__sync__dict__[Id] = record self.__dict__["id"] = Id else: self.__dict__["id"] = Id if record is None: try: record = self.get_record() except BaseException: # record exists nowhere record = Record(id=Id, type=kosh_type) if store.__sync__: store.lock() store.__record_handler__.insert(record) store.unlock() else: self.__store__.__sync__dict__[Id] = record record["user_defined"][ "last_update_from_db"] = time.time() for att, value in metadata.items(): setattr(self, att, value)