def import_customer(last_pull_date): """ ETL function for import new customer data :param last_pull_date: date where retrieve can begin :return: """ if not _check_webservice_status(): logger.error( "Web service not available, not ETL integration of Customer") return url_target = "{0}/find/CustomerRevision".format(__etl_http_link) params = dumps({"lastUpdate": { "$gt": parse(last_pull_date) }}, json_options=JSONOptions(datetime_representation=2)) r = requests.get(url_target, params={"query": params}, auth=(__user_etl_mongo, __etl_etl_mongo)) if r.status_code == 200: try: for elem in loads(_decompress(r.content)): insert_customer(DB_scoped_session, elem, mapping) DB_scoped_session.commit() logger.info( 'Successful ETL Customer since {}'.format(last_pull_date)) except Exception as e: logger.error( "Error in ETL task (import_customer)({0}): {1}".format( last_pull_date, e)) DB_scoped_session.rollback() finally: DB_scoped_session.close()
def create_tests(): assert os.path.isdir(_TEST_PATH) for dirpath, _, filenames in os.walk(_TEST_PATH): dirname = os.path.split(dirpath)[-1] for filename in filenames: test_type, ext = os.path.splitext(filename) if ext != '.json': continue with open(os.path.join(dirpath, filename)) as scenario_stream: opts = JSONOptions(document_class=ScenarioDict) scenario_def = json_util.loads(scenario_stream.read(), json_options=opts) # Construct test from scenario. for test in scenario_def['tests']: test_name = 'test_%s_%s_%s' % ( dirname, test_type.replace( "-", "_"), str(test['description'].replace(" ", "_"))) new_test = create_test(scenario_def, test) new_test = env.require(lambda: not test.get('skipReason'), test.get('skipReason'), new_test) if 'secondary' in test_name: new_test = env.require(lambda: env.secondaries, 'No secondaries', new_test) # In Python 2, case test_name from unicode to str. new_test.__name__ = str(test_name) setattr(MotorTransactionTest, new_test.__name__, new_test)
def test_json_options_with_options(self): opts = JSONOptions( datetime_representation=DatetimeRepresentation.NUMBERLONG, json_mode=JSONMode.LEGACY) self.assertEqual(opts.datetime_representation, DatetimeRepresentation.NUMBERLONG) opts2 = opts.with_options( datetime_representation=DatetimeRepresentation.ISO8601, json_mode=JSONMode.LEGACY) self.assertEqual(opts2.datetime_representation, DatetimeRepresentation.ISO8601) opts = JSONOptions(strict_number_long=True, json_mode=JSONMode.LEGACY) self.assertEqual(opts.strict_number_long, True) opts2 = opts.with_options(strict_number_long=False) self.assertEqual(opts2.strict_number_long, False) opts = json_util.CANONICAL_JSON_OPTIONS self.assertNotEqual(opts.uuid_representation, UuidRepresentation.JAVA_LEGACY) opts2 = opts.with_options( uuid_representation=UuidRepresentation.JAVA_LEGACY) self.assertEqual(opts2.uuid_representation, UuidRepresentation.JAVA_LEGACY) self.assertEqual(opts2.document_class, dict) opts3 = opts2.with_options(document_class=SON) self.assertEqual(opts3.uuid_representation, UuidRepresentation.JAVA_LEGACY) self.assertEqual(opts3.document_class, SON)
def test_numberlong(self): jsn = '{"weight": {"$numberLong": "65535"}}' self.assertEqual(json_util.loads(jsn)['weight'], Int64(65535)) self.assertEqual(json_util.dumps({"weight": Int64(65535)}), '{"weight": 65535}') json_options = JSONOptions(strict_number_long=True, json_mode=JSONMode.LEGACY) self.assertEqual( json_util.dumps({"weight": Int64(65535)}, json_options=json_options), jsn)
def test_uuid(self): doc = {'uuid': uuid.UUID('f47ac10b-58cc-4372-a567-0e02b2c3d479')} uuid_legacy_opts = LEGACY_JSON_OPTIONS.with_options( uuid_representation=UuidRepresentation.PYTHON_LEGACY) self.round_trip(doc, json_options=uuid_legacy_opts) self.assertEqual( '{"uuid": {"$uuid": "f47ac10b58cc4372a5670e02b2c3d479"}}', json_util.dumps(doc, json_options=LEGACY_JSON_OPTIONS)) self.assertEqual( '{"uuid": ' '{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "03"}}', json_util.dumps( doc, json_options=STRICT_JSON_OPTIONS.with_options( uuid_representation=UuidRepresentation.PYTHON_LEGACY))) self.assertEqual( '{"uuid": ' '{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "04"}}', json_util.dumps(doc, json_options=JSONOptions( strict_uuid=True, json_mode=JSONMode.LEGACY, uuid_representation=STANDARD))) self.assertEqual( doc, json_util.loads( '{"uuid": ' '{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "03"}}', json_options=uuid_legacy_opts)) for uuid_representation in (set(ALL_UUID_REPRESENTATIONS) - {UuidRepresentation.UNSPECIFIED}): options = JSONOptions(strict_uuid=True, json_mode=JSONMode.LEGACY, uuid_representation=uuid_representation) self.round_trip(doc, json_options=options) # Ignore UUID representation when decoding BSON binary subtype 4. self.assertEqual( doc, json_util.loads( '{"uuid": ' '{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "04"}}', json_options=options))
def write_record(self, obj: dict) -> None: """ Prints the document via a INFO log :param obj: dict. :return: """ opts = JSONOptions( strict_number_long=False, datetime_representation=DatetimeRepresentation.ISO8601, json_mode=JSONMode.RELAXED) obj_str = json_util.dumps(obj) logger.info(extra=dict(Func='Record', Op='Tail', Attributes={ 'identifier': self.identifier, 'record': obj_str }), msg=obj_str)
def assertBinaryUUID(self, val): self.assertIsInstance(val, Binary) self.assertEqual(val.subtype, UUID_SUBTYPE) # Location of JSON test files. BASE = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'client-side-encryption') SPEC_PATH = os.path.join(BASE, 'spec') OPTS = CodecOptions(uuid_representation=STANDARD) # Use SON to preserve the order of fields while parsing json. Use tz_aware # =False to match how CodecOptions decodes dates. JSON_OPTS = JSONOptions(document_class=SON, uuid_representation=STANDARD, tz_aware=False) def read(*paths): with open(os.path.join(BASE, *paths)) as fp: return fp.read() def json_data(*paths): return json_util.loads(read(*paths), json_options=JSON_OPTS) def bson_data(*paths): return encode(json_data(*paths), codec_options=OPTS)
def handler(event, context): """ Perform a complete backup of a MongoDB database to a S3 bucket. Each collection will be stored in a separate JSON file, as output by `mongodump`. Required environment values are MONGO_URI, BUCKET_NAME. Optionally MONGO_DATABASE, BUCKET_FOLDER and COLLECTION_BLACKLIST can be set. """ collection_blacklist = environ.get("COLLECTION_BLACKLIST") bucket_folder = environ.get("BUCKET_FOLDER", "backups") bucket_name = environ["BUCKET_NAME"] db_uri = environ["MONGO_URI"] if environ.get("MONGO_URI_IS_ENCRYPTED"): from base64 import b64decode kms = boto.client("kms") decrypted = kms.decrypt(CiphertextBlob=b64decode(db_uri)) db_uri = decrypted["Plaintext"].decode() db_name = environ.get("MONGO_DATABASE") if db_name is None: from urllib.parse import urlparse loc = urlparse(db_uri) db_name = loc.path.strip("/") LOGGER.info("Backing up collections from database %s in bucket %s", db_name, bucket_name) s3.meta.client.head_bucket( Bucket=bucket_name) # Check that the given bucket actually exists. client = MongoClient(db_uri) database = client.get_database(db_name) skip = ([s.strip() for s in collection_blacklist.split(",")] if collection_blacklist else []) eligible_collections = [ name for name in database.collection_names() if not name in skip ] json_options = JSONOptions( datetime_representation=DatetimeRepresentation.ISO8601) def write_all_docs(collection_name, writer): for doc in database.get_collection(collection_name).find(): writer.write(dumps(doc, json_options=json_options) + "\n") for collection_name in eligible_collections: if environ.get("IN_MEMORY"): writer = StringIO() write_all_docs(collection_name, writer) s3.Bucket(bucket_name).put_object( Body=writer.getvalue().encode(), Key="{}/{}.json".format(bucket_folder, collection_name), ) else: with open(temp_filepath, "w") as writer: write_all_docs(collection_name, writer) s3.Bucket(bucket_name).upload_file( temp_filepath, "{}/{}.json".format(bucket_folder, collection_name)) LOGGER.info("Done backing up collection {}".format(collection_name))
import json import math from datetime import datetime from enum import Enum from typing import Any, Tuple from uuid import UUID import numpy as np from bson import SON, ObjectId from bson.json_util import JSONOptions, JSONMode, dumps, loads __all__ = ['json_dumps', 'json_loads'] JSON_OPTIONS = JSONOptions(json_mode=JSONMode.RELAXED) JSON_OPTIONS.strict_uuid = False # do not move it to the constructor above! def _json_convert(o: Any, no_dollar_field: bool = False) -> Any: if isinstance(o, bool): # special fix: bool is subclass of int, we do not want to convert True # to 1, so we need to fix it. o = o elif isinstance(o, Enum): o = _json_convert(o.value, no_dollar_field) elif hasattr(o, 'items'): o = SON((k, _json_convert(v, no_dollar_field)) for k, v in o.items()) elif hasattr(o, '__iter__') and not isinstance(o, (str, bytes, np.ndarray)): o = [_json_convert(v, no_dollar_field) for v in o] elif isinstance(o, str): o = str(o)
"key": "key", "endpoint": "example.com" } encrypter.create_data_key("aws", master_key=master_key) self.assertEqual("example.com:443", mock_key_vault.kms_endpoint) def read(filename, **kwargs): with open(os.path.join(DATA_DIR, filename), **kwargs) as fp: return fp.read() OPTS = CodecOptions(uuid_representation=STANDARD) # Use SON to preserve the order of fields while parsing json. JSON_OPTS = JSONOptions(document_class=SON, uuid_representation=STANDARD) def json_data(filename): return json_util.loads(read(filename), json_options=JSON_OPTS) def bson_data(filename): return BSON.encode(json_data(filename), codec_options=OPTS) def http_data(filename): data = read(filename, mode='rb') return data.replace(b'\n', b'\r\n')
def test_datetime(self): tz_aware_opts = json_util.DEFAULT_JSON_OPTIONS.with_options( tz_aware=True) # only millis, not micros self.round_trip( {"date": datetime.datetime(2009, 12, 9, 15, 49, 45, 191000, utc)}, json_options=tz_aware_opts) self.round_trip( {"date": datetime.datetime(2009, 12, 9, 15, 49, 45, 191000)}) for jsn in [ '{"dt": { "$date" : "1970-01-01T00:00:00.000+0000"}}', '{"dt": { "$date" : "1970-01-01T00:00:00.000000+0000"}}', '{"dt": { "$date" : "1970-01-01T00:00:00.000+00:00"}}', '{"dt": { "$date" : "1970-01-01T00:00:00.000000+00:00"}}', '{"dt": { "$date" : "1970-01-01T00:00:00.000000+00"}}', '{"dt": { "$date" : "1970-01-01T00:00:00.000Z"}}', '{"dt": { "$date" : "1970-01-01T00:00:00.000000Z"}}', '{"dt": { "$date" : "1970-01-01T00:00:00Z"}}', '{"dt": {"$date": "1970-01-01T00:00:00.000"}}', '{"dt": { "$date" : "1970-01-01T00:00:00"}}', '{"dt": { "$date" : "1970-01-01T00:00:00.000000"}}', '{"dt": { "$date" : "1969-12-31T16:00:00.000-0800"}}', '{"dt": { "$date" : "1969-12-31T16:00:00.000000-0800"}}', '{"dt": { "$date" : "1969-12-31T16:00:00.000-08:00"}}', '{"dt": { "$date" : "1969-12-31T16:00:00.000000-08:00"}}', '{"dt": { "$date" : "1969-12-31T16:00:00.000000-08"}}', '{"dt": { "$date" : "1970-01-01T01:00:00.000+0100"}}', '{"dt": { "$date" : "1970-01-01T01:00:00.000000+0100"}}', '{"dt": { "$date" : "1970-01-01T01:00:00.000+01:00"}}', '{"dt": { "$date" : "1970-01-01T01:00:00.000000+01:00"}}', '{"dt": { "$date" : "1970-01-01T01:00:00.000000+01"}}' ]: self.assertEqual( EPOCH_AWARE, json_util.loads(jsn, json_options=tz_aware_opts)["dt"]) self.assertEqual(EPOCH_NAIVE, json_util.loads(jsn)["dt"]) dtm = datetime.datetime(1, 1, 1, 1, 1, 1, 0, utc) jsn = '{"dt": {"$date": -62135593139000}}' self.assertEqual( dtm, json_util.loads(jsn, json_options=tz_aware_opts)["dt"]) jsn = '{"dt": {"$date": {"$numberLong": "-62135593139000"}}}' self.assertEqual( dtm, json_util.loads(jsn, json_options=tz_aware_opts)["dt"]) # Test dumps format pre_epoch = {"dt": datetime.datetime(1, 1, 1, 1, 1, 1, 10000, utc)} post_epoch = {"dt": datetime.datetime(1972, 1, 1, 1, 1, 1, 10000, utc)} self.assertEqual( '{"dt": {"$date": {"$numberLong": "-62135593138990"}}}', json_util.dumps(pre_epoch)) self.assertEqual('{"dt": {"$date": "1972-01-01T01:01:01.010Z"}}', json_util.dumps(post_epoch)) self.assertEqual( '{"dt": {"$date": -62135593138990}}', json_util.dumps(pre_epoch, json_options=LEGACY_JSON_OPTIONS)) self.assertEqual( '{"dt": {"$date": 63075661010}}', json_util.dumps(post_epoch, json_options=LEGACY_JSON_OPTIONS)) self.assertEqual( '{"dt": {"$date": {"$numberLong": "-62135593138990"}}}', json_util.dumps(pre_epoch, json_options=STRICT_JSON_OPTIONS)) self.assertEqual( '{"dt": {"$date": "1972-01-01T01:01:01.010Z"}}', json_util.dumps(post_epoch, json_options=STRICT_JSON_OPTIONS)) number_long_options = JSONOptions( datetime_representation=DatetimeRepresentation.NUMBERLONG, json_mode=JSONMode.LEGACY) self.assertEqual( '{"dt": {"$date": {"$numberLong": "63075661010"}}}', json_util.dumps(post_epoch, json_options=number_long_options)) self.assertEqual( '{"dt": {"$date": {"$numberLong": "-62135593138990"}}}', json_util.dumps(pre_epoch, json_options=number_long_options)) # ISO8601 mode assumes naive datetimes are UTC pre_epoch_naive = {"dt": datetime.datetime(1, 1, 1, 1, 1, 1, 10000)} post_epoch_naive = { "dt": datetime.datetime(1972, 1, 1, 1, 1, 1, 10000) } self.assertEqual( '{"dt": {"$date": {"$numberLong": "-62135593138990"}}}', json_util.dumps(pre_epoch_naive, json_options=STRICT_JSON_OPTIONS)) self.assertEqual( '{"dt": {"$date": "1972-01-01T01:01:01.010Z"}}', json_util.dumps(post_epoch_naive, json_options=STRICT_JSON_OPTIONS)) # Test tz_aware and tzinfo options self.assertEqual( datetime.datetime(1972, 1, 1, 1, 1, 1, 10000, utc), json_util.loads( '{"dt": {"$date": "1972-01-01T01:01:01.010+0000"}}', json_options=tz_aware_opts)["dt"]) self.assertEqual( datetime.datetime(1972, 1, 1, 1, 1, 1, 10000, utc), json_util.loads( '{"dt": {"$date": "1972-01-01T01:01:01.010+0000"}}', json_options=JSONOptions(tz_aware=True, tzinfo=utc))["dt"]) self.assertEqual( datetime.datetime(1972, 1, 1, 1, 1, 1, 10000), json_util.loads( '{"dt": {"$date": "1972-01-01T01:01:01.010+0000"}}', json_options=JSONOptions(tz_aware=False))["dt"]) self.round_trip(pre_epoch_naive, json_options=JSONOptions(tz_aware=False)) # Test a non-utc timezone pacific = FixedOffset(-8 * 60, 'US/Pacific') aware_datetime = { "dt": datetime.datetime(2002, 10, 27, 6, 0, 0, 10000, pacific) } self.assertEqual( '{"dt": {"$date": "2002-10-27T06:00:00.010-0800"}}', json_util.dumps(aware_datetime, json_options=STRICT_JSON_OPTIONS)) self.round_trip(aware_datetime, json_options=JSONOptions(json_mode=JSONMode.LEGACY, tz_aware=True, tzinfo=pacific)) self.round_trip( aware_datetime, json_options=JSONOptions( datetime_representation=DatetimeRepresentation.ISO8601, json_mode=JSONMode.LEGACY, tz_aware=True, tzinfo=pacific))
USER_DEFINED_SUBTYPE, UuidRepresentation, STANDARD) from bson.code import Code from bson.dbref import DBRef from bson.int64 import Int64 from bson.max_key import MaxKey from bson.min_key import MinKey from bson.objectid import ObjectId from bson.regex import Regex from bson.timestamp import Timestamp from bson.tz_util import FixedOffset, utc from test import unittest, IntegrationTest STRICT_JSON_OPTIONS = JSONOptions( strict_number_long=True, datetime_representation=DatetimeRepresentation.ISO8601, strict_uuid=True, json_mode=JSONMode.LEGACY) class TestJsonUtil(unittest.TestCase): def round_tripped(self, doc, **kwargs): return json_util.loads(json_util.dumps(doc, **kwargs), **kwargs) def round_trip(self, doc, **kwargs): self.assertEqual(doc, self.round_tripped(doc, **kwargs)) def test_basic(self): self.round_trip({"hello": "world"}) def test_json_options_with_options(self):
def get_formatted_params(self, params): return dumps(params, json_options=JSONOptions(datetime_representation=2))
def import_consolidation(serial_number, last_pull_date): """ ETL function for import new consolidation data :param serial_number: serial number to retrieve :param last_pull_date: date where retrieve can begin :return: """ if not _check_webservice_status(): logger.error( "Web service not available, not ETL integration of consolidation for {}" .format(serial_number)) return launch_purge = False url_target = "{0}/aggregate/IBAutomation".format(__etl_http_link) params = dumps([{ "$unwind": "$data.resultRows" }, { "$match": { "data.serialNumber": serial_number, "data.dwibInformationType": { "$in": [ "EltXAConsolidation", "EltMGConsolidation", "EltCTConsolidation", "EltRFConsolidation" ] }, "receptionDate": { "$gt": parse(last_pull_date) } } }], json_options=JSONOptions(datetime_representation=2)) r = requests.get(url_target, params={"query": params}, auth=(__user_etl_mongo, __etl_etl_mongo)) if r.status_code == 200: try: _list_to_insert = [] for elem in loads(_decompress(r.content)): # upsert_consolidation(DB_scoped_session, elem, mapping) _list_to_insert.append(construct_consolidation(elem, mapping)) DB_scoped_session.bulk_save_objects(_list_to_insert) if DB_scoped_session.query(Customer).filter( Customer.serial_number == serial_number, Customer.is_last == True).count() != 0: DB_scoped_session.query(Customer).filter( Customer.serial_number == serial_number, Customer.is_last == True).update({"is_monitored": 1}) DB_scoped_session.commit() launch_purge = True logger.info( 'Successful ETL consolidation for {0} since {1}'.format( serial_number, last_pull_date)) except Exception as e: logger.error( "Error in ETL task (import_consolidation)({0}-{1}): {2}". format(serial_number, last_pull_date, e)) DB_scoped_session.rollback() finally: DB_scoped_session.close() if launch_purge and __ACTIVATED_PURGE: url_target = "{0}/delete/IBAutomation".format(__etl_http_link) params = dumps( { "data.serialNumber": serial_number, "data.dwibInformationType": { "$in": [ "EltXAConsolidation", "EltMGConsolidation", "EltCTConsolidation", "EltRFConsolidation" ] }, "receptionDate": { "$lt": parse(last_pull_date) } }, json_options=JSONOptions(datetime_representation=2)) r = requests.delete(url_target, params={"query": params}, auth=(__user_etl_mongo, __etl_etl_mongo)) logger.warning( "ETL consolidation purge task ->status_code {0} : {1}".format( r.status_code, r.content))
"key": "key", "endpoint": "example.com" } encrypter.create_data_key("aws", master_key=master_key) self.assertEqual("example.com:443", mock_key_vault.kms_endpoint) def read(filename, **kwargs): with open(os.path.join(DATA_DIR, filename), **kwargs) as fp: return fp.read() OPTS = CodecOptions(uuid_representation=UuidRepresentation.UNSPECIFIED) # Use SON to preserve the order of fields while parsing json. JSON_OPTS = JSONOptions(document_class=SON, uuid_representation=UuidRepresentation.UNSPECIFIED) def json_data(filename): return json_util.loads(read(filename), json_options=JSON_OPTS) def bson_data(filename): return bson.encode(json_data(filename), codec_options=OPTS) def http_data(filename): data = read(filename, mode='rb') return data.replace(b'\n', b'\r\n')