def test_request(self): # MAKE SOME DATA data = { "constant": "this is a test", "random-data": convert.bytes2base64(Random.bytes(100)) } client = Client(settings.url, unwrap(settings.hawk)) # unwrap() DUE TO BUG https://github.com/kumar303/mohawk/issues/21 link, id = client.send(data) Log.note("Success! Located at {{link}} id={{id}}", link=link, id=id) # FILL THE REST OF THE FILE Log.note("Add ing {{num}} more...", num=99-id) for i in range(id + 1, storage.BATCH_SIZE): l, k = client.send(data) if l != link: Log.error("Expecting rest of data to have same link") # TEST LINK HAS DATA raw_content = requests.get(link).content content = convert.zip2bytes(raw_content) for line in convert.utf82unicode(content).split("\n"): data = convert.json2value(line) if data.etl.id == id: Log.note("Data {{id}} found", id=id) break else: Log.error("Expecting to find data at link")
def test_public_request(self): # MAKE SOME DATA data = { "a": { # MATCHES SERVER PATTERN "b": "good", "c": [ {"k": "good", "m": 1}, {"k": 2, "m": 2} ] }, "constant": "this is a test", "random-data": convert.bytes2base64(Random.bytes(100)) } content = json.dumps(data) response = requests.post( url=settings.url, data=content, headers={ 'Content-Type': CONTENT_TYPE } ) self.assertEqual(response.status_code, 200, "Expecting 200") about = json.loads(response.content) return about['link'], about['etl']['id'] Log.note("Data located at {{link}} id={{id}}", link=link, id=id)
def test_public_request_too_big(self): # MAKE SOME DATA data = { "a": { # MATCHES SERVER PATTERN "b": "good", "c": [ {"k": "good", "m": 1}, {"k": 2, "m": 2} ] }, "constant": "this is a test", "random-data": convert.bytes2base64(Random.bytes(500)) } content = json.dumps(data) def poster(): response = requests.post( url=settings.url, data=content, headers={ 'Content-Type': CONTENT_TYPE } ) self.assertEqual(response.status_code, 200, "Expecting 200") self.assertRaises(Exception, poster)
def _deep_json_to_string(value, depth): """ :param value: SOME STRUCTURE :param depth: THE MAX DEPTH OF PROPERTIES, DEEPER WILL BE STRING-IFIED :return: FLATTER STRUCTURE """ if is_data(value): if depth == 0: return strings.limit(value2json(value), LOG_STRING_LENGTH) return { k: _deep_json_to_string(v, depth - 1) for k, v in value.items() } elif is_sequence(value): return strings.limit(value2json(value), LOG_STRING_LENGTH) elif isinstance(value, number_types): return value elif is_text(value): return strings.limit(value, LOG_STRING_LENGTH) elif is_binary(value): return strings.limit(bytes2base64(value), LOG_STRING_LENGTH) elif isinstance(value, (date, datetime)): return datetime2unix(value) else: return strings.limit(value2json(value), LOG_STRING_LENGTH)
def test_missing_auth(self): # MAKE SOME DATA data = { "constant": "this is a test", "random-data": convert.bytes2base64(Random.bytes(100)) } response = requests.post(settings.bad_url, data=convert.unicode2utf8(convert.value2json(data))) self.assertEqual(response.status_code, 403)
def save(self, query): query.meta = None json = convert.value2json(query) hash = convert.unicode2utf8(json) # TRY MANY HASHES AT ONCE hashes = [None] * HASH_BLOCK_SIZE for i in range(HASH_BLOCK_SIZE): hash = hashlib.sha1(hash).digest() hashes[i] = hash short_hashes = [ convert.bytes2base64(h[0:6]).replace("/", "_") for h in hashes ] available = {h: True for h in short_hashes} existing = self.es.query({ "from": { "type": "elasticsearch", "settings": self.es.settings }, "where": { "terms": { "hash": short_hashes } }, "meta": { "timeout": "2second" } }) for e in Cube(select=existing.select, edges=existing.edges, data=existing.data).values(): if e.query == json: return e.hash available[e.hash] = False # THIS WILL THROW AN ERROR IF THERE ARE NONE, HOW UNLUCKY! best = [h for h in short_hashes if available[h]][0] self.queue.add({ "id": best, "value": { "hash": best, "create_time": Date.now(), "last_used": Date.now(), "query": json } }) Log.note("Saved query as {{hash}}", hash=best) return best
def encrypt(text, _key, salt=None): """ RETURN JSON OF ENCRYPTED DATA {"salt":s, "length":l, "data":d} """ from pyLibrary.queries import jx if not isinstance(text, unicode): Log.error("only unicode is encrypted") if _key is None: Log.error("Expecting a key") if isinstance(_key, str): _key = bytearray(_key) if salt is None: salt = Random.bytes(16) data = bytearray(text.encode("utf8")) # Initialize encryption using key and iv key_expander_256 = key_expander.KeyExpander(256) expanded_key = key_expander_256.expand(_key) aes_cipher_256 = aes_cipher.AESCipher(expanded_key) aes_cbc_256 = cbc_mode.CBCMode(aes_cipher_256, 16) aes_cbc_256.set_iv(salt) output = Data() output.type = "AES256" output.salt = convert.bytes2base64(salt) output.length = len(data) encrypted = bytearray() for _, d in jx.groupby(data, size=16): encrypted.extend(aes_cbc_256.encrypt_block(d)) output.data = convert.bytes2base64(encrypted) json = convert.value2json(output) if DEBUG: test = decrypt(json, _key) if test != text: Log.error("problem with encryption") return json
def encrypt(text, _key, salt=None): """ RETURN JSON OF ENCRYPTED DATA {"salt":s, "length":l, "data":d} """ from pyLibrary.queries import jx if not isinstance(text, unicode): Log.error("only unicode is encrypted") if _key is None: Log.error("Expecting a key") if isinstance(_key, str): _key = bytearray(_key) if salt is None: salt = Random.bytes(16) data = bytearray(text.encode("utf8")) # Initialize encryption using key and iv key_expander_256 = key_expander.KeyExpander(256) expanded_key = key_expander_256.expand(_key) aes_cipher_256 = aes_cipher.AESCipher(expanded_key) aes_cbc_256 = cbc_mode.CBCMode(aes_cipher_256, 16) aes_cbc_256.set_iv(salt) output = Dict() output.type = "AES256" output.salt = convert.bytes2base64(salt) output.length = len(data) encrypted = bytearray() for _, d in jx.groupby(data, size=16): encrypted.extend(aes_cbc_256.encrypt_block(d)) output.data = convert.bytes2base64(encrypted) json = convert.value2json(output) if DEBUG: test = decrypt(json, _key) if test != text: Log.error("problem with encryption") return json
def test_save_then_load(self): test = { "data": [{ "a": "b" }], "query": { "meta": { "save": True }, "from": TEST_TABLE, "select": "a" }, "expecting_list": { "meta": { "format": "list" }, "data": ["b"] } } settings = self.utils.fill_container(test) bytes = unicode2utf8( value2json({ "from": settings.index, "select": "a", "format": "list" })) expected_hash = convert.bytes2base64( hashlib.sha1(bytes).digest()[0:6]).replace("/", "_") wrap(test).expecting_list.meta.saved_as = expected_hash self.utils.send_queries(test) # ENSURE THE QUERY HAS BEEN INDEXED Log.note("Flush saved query (with hash {{hash}})", hash=expected_hash) container = elasticsearch.Index(index="saved_queries", type=save_query.DATA_TYPE, kwargs=settings) container.flush(forced=True) with Timer("wait for 5 seconds"): Till(seconds=5).wait() url = URL(self.utils.testing.query) response = self.utils.try_till_response(url.scheme + "://" + url.host + ":" + text_type(url.port) + "/find/" + expected_hash, data=b'') self.assertEqual(response.status_code, 200) self.assertEqual(response.all_content, bytes)
def test_save_then_load(self): test = { "data": [ {"a": "b"} ], "query": { "meta": {"save": True}, "from": TEST_TABLE, "select": "a" }, "expecting_list": { "meta": { "format": "list" }, "data": ["b"] } } settings = self.utils.fill_container(test) bytes = convert.unicode2utf8(convert.value2json({ "from": settings.index, "select": "a", "format": "list" })) expected_hash = convert.bytes2base64(hashlib.sha1(bytes).digest()[0:6]).replace("/", "_") wrap(test).expecting_list.meta.saved_as = expected_hash self.utils.send_queries(test) #ENSURE THE QUERY HAS BEEN INDEXED container = elasticsearch.Index(index="saved_queries", settings=settings) container.flush() Thread.sleep(seconds=5) url = URL(self.utils.service_url) response = self.utils.try_till_response(url.scheme+"://"+url.host+":"+unicode(url.port)+"/find/"+expected_hash, data=b'') self.assertEqual(response.status_code, 200) self.assertEqual(response.all_content, bytes)
def test_recovery_of_empty_string(self): test = wrap({ "data": [ {"a": "bee"} ], "query": { "from": TEST_TABLE, "select": "a", "where": {"prefix": {"a": ""}}, "format": "list" }, "expecting_list": { "meta": { "format": "list" }, "data": ["bee"] } }) settings = self.utils.fill_container(test) bytes = value2json(test.query).encode('utf8') expected_hash = convert.bytes2base64(hashlib.sha1(bytes).digest()[0:6]).replace("/", "_") test.expecting_list.meta.saved_as = expected_hash test.query.meta = {"save": True} self.utils.send_queries(test) # ENSURE THE QUERY HAS BEEN INDEXED Log.note("Flush saved query") container = elasticsearch.Index(index="saved_queries", kwargs=settings) container.flush(forced=True) with Timer("wait for 5 seconds"): Till(seconds=5).wait() url = URL(self.utils.testing.query) response = self.utils.try_till_response(url.scheme + "://" + url.host + ":" + text(url.port) + "/find/" + expected_hash, data=b'') self.assertEqual(response.status_code, 200) self.assertEqual(response.all_content, bytes)
def _deep_json_to_string(value, depth): """ :param value: SOME STRUCTURE :param depth: THE MAX DEPTH OF PROPERTIES, DEEPER WILL BE STRING-IFIED :return: FLATTER STRUCTURE """ if is_data(value): if depth == 0: return strings.limit(value2json(value), LOG_STRING_LENGTH) return {k: _deep_json_to_string(v, depth - 1) for k, v in value.items()} elif is_sequence(value): return strings.limit(value2json(value), LOG_STRING_LENGTH) elif isinstance(value, number_types): return value elif is_text(value): return strings.limit(value, LOG_STRING_LENGTH) elif is_binary(value): return strings.limit(bytes2base64(value), LOG_STRING_LENGTH) elif isinstance(value, (date, datetime)): return datetime2unix(value) else: return strings.limit(value2json(value), LOG_STRING_LENGTH)
def _deep_json_to_string(value, depth): """ :param value: SOME STRUCTURE :param depth: THE MAX DEPTH OF PROPERTIES, DEEPER WILL BE STRING-IFIED :return: FLATTER STRUCTURE """ if isinstance(value, Mapping): if depth == 0: return strings.limit(value2json(value), LOG_STRING_LENGTH) return { k: _deep_json_to_string(v, depth - 1) for k, v in value.items() } elif isinstance(value, (list, FlatList)): return strings.limit(value2json(value), LOG_STRING_LENGTH) elif isinstance(value, number_types): return value elif isinstance(value, text_type): return strings.limit(value, LOG_STRING_LENGTH) elif isinstance(value, binary_type): return strings.limit(bytes2base64(value), LOG_STRING_LENGTH) else: return strings.limit(value2json(value), LOG_STRING_LENGTH)
import requests from pyLibrary import convert, jsons from pyLibrary.debugs.logs import Log from pyLibrary.dot import unwrap from pyLibrary.maths.randoms import Random from modatasubmission import Client settings = jsons.ref.get("file://~/MoDataSubmissionClient.json") data={ "constant": "this is a test", "random-data": convert.bytes2base64(Random.bytes(100)) } link, id = Client(settings.url, unwrap(settings.hawk)).send(data) Log.note("Success! Located at {{link}} id={{id}}", link=link, id=id) data = convert.unicode2utf8(convert.value2json(settings.example)) response = requests.post( settings.url, data=data, headers={ 'Content-Type': b'application/json' } ) if response.status_code != 200:
def save(self, query): """ SAVE query TO ES FOR LATER RECOVERY :param query: :return: HAS TO USE FOR RECOVERY """ meta, query.meta = query.meta, None json = convert.value2json(query) query.meta = meta hash = json.encode("utf8") # TRY MANY HASHES AT ONCE hashes = [None] * HASH_BLOCK_SIZE for i in range(HASH_BLOCK_SIZE): hash = hashlib.sha1(hash).digest() hashes[i] = hash short_hashes = [convert.bytes2base64(h[0:6]).replace("/", "_") for h in hashes] available = {h: True for h in short_hashes} existing = self.es.query( { "from": "saved_queries", "where": {"terms": {"hash": short_hashes}}, "meta": {"timeout": "2second"}, } ) for e in Cube( select=existing.select, edges=existing.edges, data=existing.data ).values(): if e.query == json: return e.hash available[e.hash] = False # THIS WILL THROW AN ERROR IF THERE ARE NONE, HOW UNLUCKY! best = first(h for h in short_hashes if available[h]) self.queue.add( { "id": best, "value": { "hash": best, "create_time": Date.now(), "last_used": Date.now(), "query": json, }, } ) if meta.testing: while True: verify = self.es.query( { "from": "saved_queries", "where": {"terms": {"hash": best}}, "meta": {"timeout": "2second"}, "select": "query", "format": "list", } ) if verify.data: break Log.alert("wait for saved query") Till(seconds=1).wait() Log.note("Saved {{json}} query as {{hash}}", json=json, hash=best) return best