def test_datetime_utils(): assert ( to_isoformat_ceil_ms(datetime(2019, 12, 13, 0, 23, 11, 9515)) == "2019-12-13T00:23:11.010" ) assert to_isoformat_ceil_ms("2019-12-13T00:23:11.010") == "2019-12-13T00:23:11.010" assert to_dt("2019-12-13T00:23:11.010") == datetime(2019, 12, 13, 0, 23, 11, 10000) assert to_dt(datetime(2019, 12, 13, 0, 23, 11, 10000)) == datetime( 2019, 12, 13, 0, 23, 11, 10000 )
def rebuild_metadata_from_index(self, index_query: dict = None): """ Read data from the index store and populate the metadata of the S3 bucket Force all of the keys to be lower case to be Minio compatible Args: index_query: query on the index store """ qq = {} if index_query is None else index_query for index_doc in self.index.query(qq): key_ = self.sub_dir + index_doc[self.key] s3_object = self.s3_bucket.Object(key_) # make sure the keys all all lower case new_meta = { str(k).lower(): v for k, v in s3_object.metadata.items() } for k, v in index_doc.items(): new_meta[str(k).lower()] = v new_meta.pop("_id") if self.last_updated_field in new_meta: new_meta[self.last_updated_field] = str( to_isoformat_ceil_ms(new_meta[self.last_updated_field])) # s3_object.metadata.update(new_meta) s3_object.copy_from( CopySource={ "Bucket": self.s3_bucket.name, "Key": key_ }, Metadata=new_meta, MetadataDirective="REPLACE", )
def write_doc_to_s3(self, doc: Dict, search_keys: List[str]): """ Write the data to s3 and return the metadata to be inserted into the index db Args: doc: the document search_keys: list of keys to pull from the docs and be inserted into the index db """ search_doc = {k: doc[k] for k in search_keys} search_doc[self.key] = doc[self.key] # Ensure key is in metadata if self.sub_dir != "": search_doc["sub_dir"] = self.sub_dir # Remove MongoDB _id from search if "_id" in search_doc: del search_doc["_id"] data = msgpack.packb(doc, default=monty_default) if self.compress: # Compress with zlib if chosen search_doc["compression"] = "zlib" data = zlib.compress(data) if self.last_updated_field in search_doc: search_doc[self.last_updated_field] = str( to_isoformat_ceil_ms(search_doc[self.last_updated_field])) self.s3_bucket.put_object(Key=self.sub_dir + str(doc[self.key]), Body=data, Metadata=search_doc) return search_doc
def write_doc_to_s3(self, doc: Dict, search_keys: List[str]): """ Write the data to s3 and return the metadata to be inserted into the index db Args: doc: the document search_keys: list of keys to pull from the docs and be inserted into the index db """ s3_bucket = self._get_bucket() search_doc = {k: str(doc[k]) for k in search_keys} search_doc[self.key] = doc[self.key] # Ensure key is in metadata if self.sub_dir != "": search_doc["sub_dir"] = self.sub_dir # Remove MongoDB _id from search if "_id" in search_doc: del search_doc["_id"] # to make hashing more meaningful, make sure last updated field is removed lu_info = doc.pop(self.last_updated_field, None) data = msgpack.packb(doc, default=monty_default) if self.compress: # Compress with zlib if chosen search_doc["compression"] = "zlib" data = zlib.compress(data) if self.last_updated_field in doc: # need this conversion for aws metadata insert search_doc[self.last_updated_field] = str( to_isoformat_ceil_ms(doc[self.last_updated_field])) s3_bucket.put_object(Key=self.sub_dir + str(doc[self.key]), Body=data, Metadata=search_doc) if lu_info is not None: search_doc[self.last_updated_field] = lu_info if self.store_hash: hasher = sha1() hasher.update(data) obj_hash = hasher.hexdigest() search_doc["obj_hash"] = obj_hash return search_doc