コード例 #1
0
def test_datetime_utils():

    assert (
        to_isoformat_ceil_ms(datetime(2019, 12, 13, 0, 23, 11, 9515))
        == "2019-12-13T00:23:11.010"
    )
    assert to_isoformat_ceil_ms("2019-12-13T00:23:11.010") == "2019-12-13T00:23:11.010"

    assert to_dt("2019-12-13T00:23:11.010") == datetime(2019, 12, 13, 0, 23, 11, 10000)
    assert to_dt(datetime(2019, 12, 13, 0, 23, 11, 10000)) == datetime(
        2019, 12, 13, 0, 23, 11, 10000
    )
コード例 #2
0
    def rebuild_metadata_from_index(self, index_query: dict = None):
        """
        Read data from the index store and populate the metadata of the S3 bucket
        Force all of the keys to be lower case to be Minio compatible
        Args:
            index_query: query on the index store
        """

        qq = {} if index_query is None else index_query
        for index_doc in self.index.query(qq):
            key_ = self.sub_dir + index_doc[self.key]
            s3_object = self.s3_bucket.Object(key_)
            # make sure the keys all all lower case
            new_meta = {
                str(k).lower(): v
                for k, v in s3_object.metadata.items()
            }
            for k, v in index_doc.items():
                new_meta[str(k).lower()] = v
            new_meta.pop("_id")
            if self.last_updated_field in new_meta:
                new_meta[self.last_updated_field] = str(
                    to_isoformat_ceil_ms(new_meta[self.last_updated_field]))
            # s3_object.metadata.update(new_meta)
            s3_object.copy_from(
                CopySource={
                    "Bucket": self.s3_bucket.name,
                    "Key": key_
                },
                Metadata=new_meta,
                MetadataDirective="REPLACE",
            )
コード例 #3
0
    def write_doc_to_s3(self, doc: Dict, search_keys: List[str]):
        """
        Write the data to s3 and return the metadata to be inserted into the index db

        Args:
            doc: the document
            search_keys: list of keys to pull from the docs and be inserted into the
            index db
        """
        search_doc = {k: doc[k] for k in search_keys}
        search_doc[self.key] = doc[self.key]  # Ensure key is in metadata
        if self.sub_dir != "":
            search_doc["sub_dir"] = self.sub_dir

        # Remove MongoDB _id from search
        if "_id" in search_doc:
            del search_doc["_id"]

        data = msgpack.packb(doc, default=monty_default)

        if self.compress:
            # Compress with zlib if chosen
            search_doc["compression"] = "zlib"
            data = zlib.compress(data)

        if self.last_updated_field in search_doc:
            search_doc[self.last_updated_field] = str(
                to_isoformat_ceil_ms(search_doc[self.last_updated_field]))

        self.s3_bucket.put_object(Key=self.sub_dir + str(doc[self.key]),
                                  Body=data,
                                  Metadata=search_doc)

        return search_doc
コード例 #4
0
    def write_doc_to_s3(self, doc: Dict, search_keys: List[str]):
        """
        Write the data to s3 and return the metadata to be inserted into the index db

        Args:
            doc: the document
            search_keys: list of keys to pull from the docs and be inserted into the
            index db
        """
        s3_bucket = self._get_bucket()

        search_doc = {k: str(doc[k]) for k in search_keys}
        search_doc[self.key] = doc[self.key]  # Ensure key is in metadata
        if self.sub_dir != "":
            search_doc["sub_dir"] = self.sub_dir

        # Remove MongoDB _id from search
        if "_id" in search_doc:
            del search_doc["_id"]

        # to make hashing more meaningful, make sure last updated field is removed
        lu_info = doc.pop(self.last_updated_field, None)
        data = msgpack.packb(doc, default=monty_default)

        if self.compress:
            # Compress with zlib if chosen
            search_doc["compression"] = "zlib"
            data = zlib.compress(data)

        if self.last_updated_field in doc:
            # need this conversion for aws metadata insert
            search_doc[self.last_updated_field] = str(
                to_isoformat_ceil_ms(doc[self.last_updated_field]))

        s3_bucket.put_object(Key=self.sub_dir + str(doc[self.key]),
                             Body=data,
                             Metadata=search_doc)

        if lu_info is not None:
            search_doc[self.last_updated_field] = lu_info

        if self.store_hash:
            hasher = sha1()
            hasher.update(data)
            obj_hash = hasher.hexdigest()
            search_doc["obj_hash"] = obj_hash
        return search_doc