def key_from_base64(base64_key): return Binary(base64.b64decode(base64_key), UUID_SUBTYPE)
def _authenticate_scram(credentials, sock_info, mechanism): """Authenticate using SCRAM.""" username = credentials.username if mechanism == 'SCRAM-SHA-256': digest = "sha256" digestmod = hashlib.sha256 data = saslprep(credentials.password).encode("utf-8") else: digest = "sha1" digestmod = hashlib.sha1 data = _password_digest(username, credentials.password).encode("utf-8") source = credentials.source cache = credentials.cache # Make local _hmac = hmac.HMAC ctx = sock_info.auth_ctx.get(credentials) if ctx and ctx.speculate_succeeded(): nonce, first_bare = ctx.scram_data res = ctx.speculative_authenticate else: nonce, first_bare, cmd = _authenticate_scram_start( credentials, mechanism) res = sock_info.command(source, cmd) server_first = res['payload'] parsed = _parse_scram_response(server_first) iterations = int(parsed[b'i']) if iterations < 4096: raise OperationFailure("Server returned an invalid iteration count.") salt = parsed[b's'] rnonce = parsed[b'r'] if not rnonce.startswith(nonce): raise OperationFailure("Server returned an invalid nonce.") without_proof = b"c=biws,r=" + rnonce if cache.data: client_key, server_key, csalt, citerations = cache.data else: client_key, server_key, csalt, citerations = None, None, None, None # Salt and / or iterations could change for a number of different # reasons. Either changing invalidates the cache. if not client_key or salt != csalt or iterations != citerations: salted_pass = _hi(digest, data, standard_b64decode(salt), iterations) client_key = _hmac(salted_pass, b"Client Key", digestmod).digest() server_key = _hmac(salted_pass, b"Server Key", digestmod).digest() cache.data = (client_key, server_key, salt, iterations) stored_key = digestmod(client_key).digest() auth_msg = b",".join((first_bare, server_first, without_proof)) client_sig = _hmac(stored_key, auth_msg, digestmod).digest() client_proof = b"p=" + standard_b64encode(_xor(client_key, client_sig)) client_final = b",".join((without_proof, client_proof)) server_sig = standard_b64encode( _hmac(server_key, auth_msg, digestmod).digest()) cmd = SON([('saslContinue', 1), ('conversationId', res['conversationId']), ('payload', Binary(client_final))]) res = sock_info.command(source, cmd) parsed = _parse_scram_response(res['payload']) if not compare_digest(parsed[b'v'], server_sig): raise OperationFailure("Server returned an invalid signature.") # A third empty challenge may be required if the server does not support # skipEmptyExchange: SERVER-44857. if not res['done']: cmd = SON([('saslContinue', 1), ('conversationId', res['conversationId']), ('payload', Binary(b''))]) res = sock_info.command(source, cmd) if not res['done']: raise OperationFailure('SASL conversation failed to complete.')
def _element_to_bson(key, value, check_keys, uuid_subtype): if not isinstance(key, basestring): raise InvalidDocument("documents must have only string keys, " "key was %r" % key) if check_keys: if key.startswith("$"): raise InvalidDocument("key %r must not start with '$'" % key) if "." in key: raise InvalidDocument("key %r must not contain '.'" % key) name = _make_c_string(key, True) if isinstance(value, float): return BSONNUM + name + struct.pack("<d", value) if _use_uuid: if isinstance(value, uuid.UUID): # Java Legacy if uuid_subtype == JAVA_LEGACY: # Python 3.0(.1) returns a bytearray instance for bytes (3.1 # and newer just return a bytes instance). Convert that to # binary_type (here and below) for compatibility. from_uuid = binary_type(value.bytes) as_legacy_java = from_uuid[0:8][::-1] + from_uuid[8:16][::-1] value = Binary(as_legacy_java, subtype=OLD_UUID_SUBTYPE) # C# legacy elif uuid_subtype == CSHARP_LEGACY: # Microsoft GUID representation. value = Binary(binary_type(value.bytes_le), subtype=OLD_UUID_SUBTYPE) # Python else: value = Binary(binary_type(value.bytes), subtype=uuid_subtype) if isinstance(value, Binary): subtype = value.subtype if subtype == 2: value = struct.pack("<i", len(value)) + value return (BSONBIN + name + struct.pack("<i", len(value)) + b(chr(subtype)) + value) if isinstance(value, Code): cstring = _make_c_string(value) if not value.scope: length = struct.pack("<i", len(cstring)) return BSONCOD + name + length + cstring scope = _dict_to_bson(value.scope, False, uuid_subtype, False) full_length = struct.pack("<i", 8 + len(cstring) + len(scope)) length = struct.pack("<i", len(cstring)) return BSONCWS + name + full_length + length + cstring + scope if isinstance(value, binary_type): if PY3: # Python3 special case. Store 'bytes' as BSON binary subtype 0. return (BSONBIN + name + struct.pack("<i", len(value)) + ZERO + value) cstring = _make_c_string(value) length = struct.pack("<i", len(cstring)) return BSONSTR + name + length + cstring if isinstance(value, unicode): cstring = _make_c_string(value) length = struct.pack("<i", len(cstring)) return BSONSTR + name + length + cstring if isinstance(value, dict): return BSONOBJ + name + _dict_to_bson(value, check_keys, uuid_subtype, False) if isinstance(value, (list, tuple)): as_dict = SON(zip([str(i) for i in range(len(value))], value)) return BSONARR + name + _dict_to_bson(as_dict, check_keys, uuid_subtype, False) if isinstance(value, ObjectId): return BSONOID + name + value.binary if value is True: return BSONBOO + name + ONE if value is False: return BSONBOO + name + ZERO if isinstance(value, int): # TODO this is an ugly way to check for this... if value > MAX_INT64 or value < MIN_INT64: raise OverflowError("BSON can only handle up to 8-byte ints") if value > MAX_INT32 or value < MIN_INT32: return BSONLON + name + struct.pack("<q", value) return BSONINT + name + struct.pack("<i", value) # 2to3 will convert long to int here since there is no long in python3. # That's OK. The previous if block will match instead. if isinstance(value, long): if value > MAX_INT64 or value < MIN_INT64: raise OverflowError("BSON can only handle up to 8-byte ints") return BSONLON + name + struct.pack("<q", value) if isinstance(value, datetime.datetime): if value.utcoffset() is not None: value = value - value.utcoffset() millis = int(calendar.timegm(value.timetuple()) * 1000 + value.microsecond / 1000) return BSONDAT + name + struct.pack("<q", millis) if isinstance(value, Timestamp): time = struct.pack("<I", value.time) inc = struct.pack("<I", value.inc) return BSONTIM + name + inc + time if value is None: return BSONNUL + name if isinstance(value, (RE_TYPE, Regex)): pattern = value.pattern flags = "" if value.flags & re.IGNORECASE: flags += "i" if value.flags & re.LOCALE: flags += "l" if value.flags & re.MULTILINE: flags += "m" if value.flags & re.DOTALL: flags += "s" if value.flags & re.UNICODE: flags += "u" if value.flags & re.VERBOSE: flags += "x" return BSONRGX + name + _make_c_string(pattern, True) + \ _make_c_string(flags) if isinstance(value, DBRef): return _element_to_bson(key, value.as_doc(), False, uuid_subtype) if isinstance(value, MinKey): return BSONMIN + name if isinstance(value, MaxKey): return BSONMAX + name raise InvalidDocument("cannot convert value of type %s to bson" % type(value))
def process_cursor(cursor): with open('agg-d-std.json') as f: std = json.load(f) client = MongoClient("localhost", 27017) db = client.dwh with tqdm(total=cursor[0], desc="Progress{}".format(cursor[2]), position=cursor[2], leave=True) as pbar: for doc in db.agg_d.find( { "date": { "$gte": datetime.datetime(year=2012, month=1, day=1) }, "target": { "$lte": 250000 } }, no_cursor_timeout=True).skip(cursor[1]).limit(cursor[0]): time_filter = { "$and": [{ "date": { "$gte": doc["date"] - relativedelta(days=365) } }, { "date": { "$lt": doc["date"] } }] } if db.agg_d.find({"id": doc["id"], **time_filter}).count() < 50: pbar.update(1) continue record = { "x": np.empty((365, 5)), "y": (doc["target"] - std["mean"]) / std["std"] } for i in range(365): date = doc["date"] - relativedelta(days=365 - i) wday = int(date.strftime("%w")) + 1 record["x"][i][0] = -std["mean"] / std["std"] record["x"][i][1] = np.sin(2 * np.pi * wday / 7) record["x"][i][2] = np.cos(2 * np.pi * wday / 7) record["x"][i][3] = np.sin(2 * np.pi * date.month / 12) record["x"][i][4] = np.cos(2 * np.pi * date.month / 12) for day in db.agg_d.find({"id": doc["id"], **time_filter}): index = 365 - (doc["date"] - day["date"]).days record["x"][index][0] = (day["target"] - std["mean"]) / std["std"] wday = int(day["date"].strftime("%w")) + 1 record["x"][index][1] = np.sin(2 * np.pi * wday / 7) record["x"][index][2] = np.cos(2 * np.pi * wday / 7) record["x"][index][3] = np.sin(2 * np.pi * doc["month"] / 12) record["x"][index][4] = np.cos(2 * np.pi * doc["month"] / 12) record["x"] = Binary(_pickle.dumps(record["x"].tolist())) if np.random.rand() < TRAIN_RATIO: db.train.insert(record) else: db.test.insert(record) pbar.update(1)
def checksum(self, item): sha = hashlib.sha1() sha.update(item.tostring()) return Binary(sha.digest())
def upload_pgn(self, run_id, pgn_zip): self.pgndb.insert_one({"run_id": run_id, "pgn_zip": Binary(pgn_zip)}) return {}
def test_sort_18(monty_sort, mongo_sort): docs = [ { "a": ["x", True] }, { "a": None }, { "a": [] }, { "a": [5, []] }, { "a": { "s": 7 } }, { "a": { "s": [9] } }, { "a": { "s": 10 } }, { "a": 6 }, { "a": 4 }, { "a": [5, None] }, { "a": [5, [1]] }, { "a": [Decimal128("4.5"), Binary(b"0")] }, { "a": [{ "s": 5 }, False] }, { "a": [{ "s": 9 }] }, { "a": [True, "y"] }, { "a": [] }, ] sort = [("a", -1)] monty_c = monty_sort(docs, sort) mongo_c = mongo_sort(docs, sort) for i in range(len(docs)): assert next(mongo_c)["_id"] == next(monty_c)["_id"]
def _do_append(self, collection, version, symbol, item, previous_version): data = item.tostring() version['base_sha'] = previous_version['base_sha'] version['up_to'] = previous_version['up_to'] + len(item) if len(item) > 0: version['segment_count'] = previous_version['segment_count'] + 1 version['append_count'] = previous_version['append_count'] + 1 version['append_size'] = previous_version['append_size'] + len( data) else: version['segment_count'] = previous_version['segment_count'] version['append_count'] = previous_version['append_count'] version['append_size'] = previous_version['append_size'] #_CHUNK_SIZE is probably too big if we're only appending single rows of data - perhaps something smaller, #or also look at number of appended segments? if version['append_count'] < _APPEND_COUNT and version[ 'append_size'] < _APPEND_SIZE: version['base_version_id'] = previous_version.get( 'base_version_id', previous_version['_id']) if len(item) > 0: segment = {'data': Binary(data), 'compressed': False} segment['segment'] = version['up_to'] - 1 try: collection.update_one( { 'symbol': symbol, 'sha': checksum(symbol, segment) }, { '$set': segment, '$addToSet': { 'parent': version['base_version_id'] } }, upsert=True) except DuplicateKeyError: '''If we get a duplicate key error here, this segment has the same symbol/parent/segment as another chunk, but a different sha. This means that we have 'forked' history. If we concat_and_rewrite here, new chunks will have a different parent id (the _id of this version doc) ...so we can safely write them. ''' self._concat_and_rewrite(collection, version, symbol, item, previous_version) return if 'segment_index' in previous_version: segment_index = self._segment_index( item, existing_index=previous_version.get('segment_index'), start=previous_version['up_to'], new_segments=[ segment['segment'], ]) if segment_index: version['segment_index'] = segment_index logger.debug("Appended segment %d for parent %s" % (segment['segment'], version['_id'])) else: if 'segment_index' in previous_version: version['segment_index'] = previous_version[ 'segment_index'] else: # Too much data has been appended now, so rewrite (and compress/chunk). self._concat_and_rewrite(collection, version, symbol, item, previous_version)
def _do_write(self, collection, version, symbol, item, previous_version, segment_offset=0): sze = int(item.dtype.itemsize * np.prod(item.shape[1:])) # chunk and store the data by (uncompressed) size chunk_size = _CHUNK_SIZE / sze previous_shas = [] if previous_version: previous_shas = set([ x['sha'] for x in collection.find( {'symbol': symbol}, projection={ 'sha': 1, '_id': 0 }, ) ]) length = len(item) if segment_offset > 0 and 'segment_index' in previous_version: existing_index = previous_version['segment_index'] else: existing_index = None segment_index = [] i = -1 # Compress idxs = xrange(int(np.ceil(float(length) / chunk_size))) chunks = [(item[i * chunk_size:(i + 1) * chunk_size]).tostring() for i in idxs] compressed_chunks = compress_array(chunks) # Write bulk = collection.initialize_unordered_bulk_op() for i, chunk in zip(idxs, compressed_chunks): segment = {'data': Binary(chunk), 'compressed': True} segment['segment'] = min( (i + 1) * chunk_size - 1, length - 1) + segment_offset segment_index.append(segment['segment']) sha = checksum(symbol, segment) if sha not in previous_shas: segment['sha'] = sha bulk.find({ 'symbol': symbol, 'sha': sha, 'segment': segment['segment'] }).upsert().update_one({ '$set': segment, '$addToSet': { 'parent': version['_id'] } }) else: bulk.find({ 'symbol': symbol, 'sha': sha, 'segment': segment['segment'] }).update_one({'$addToSet': { 'parent': version['_id'] }}) if i != -1: bulk.execute() segment_index = self._segment_index(item, existing_index=existing_index, start=segment_offset, new_segments=segment_index) if segment_index: version['segment_index'] = segment_index version['segment_count'] = i + 1 version['append_size'] = 0 version['append_count'] = 0 self.check_written(collection, symbol, version)
def modify_segment(segment, item): segment['segment'] -= 2 sha = hashlib.sha1() sha.update(item.encode('ascii')) segment['sha'] = Binary(sha.digest()) segment.pop('_id')
def upload_scan_info(trackds, sc): try: #db.scans.insert([sc.original_json]) atlases = [] for label in sc.track_label_items: # Does this atlas already exist? If not, add it to the collection. atlas = None result = db.atlases.find({ "name": label.name, "parameters": label.parameters }) if result.count() != 0: atlas = result[0]["_id"] else: atlas = db.atlases.insert({ "name": label.name, "parameters": label.parameters }) atlases.append(atlas) db.scans.insert([{ "scan_id": sc.scan_id, "subject_id": sc.subject_id, "gender": sc.scan_gender, "age": sc.scan_age, "study": sc.study, "group": sc.scan_group, "smoothing": sc.smoothing, "cutoff_angle": sc.cutoff_angle, "qa_threshold": sc.qa_threshold, "gfa_threshold": sc.gfa_threshold, "length_min": sc.length_min, "length_max": sc.length_max, "institution": sc.institution, "reconstruction": sc.reconstruction, "scanner": sc.scanner, "n_directions": sc.n_directions, "max_b_value": sc.max_b_value, "bvals": sc.bvals, "bvecs": sc.bvecs, "label": sc.label, "trk_space": sc.trk_space, "atlases": list(set(atlases)), "sls": len(trackds.tracks), "header": Binary(pickle.dumps(trackds.header, protocol=2)), "original_json": sc.original_json }]) except Exception, e: print "Failed to upload scan info", e return False
def _do_write(self, collection, version, symbol, item, previous_version, segment_offset=0): row_size = int(item.dtype.itemsize * np.prod(item.shape[1:])) # chunk and store the data by (uncompressed) size rows_per_chunk = int(_CHUNK_SIZE / row_size) symbol_all_previous_shas, version_shas = set(), set() if previous_version: symbol_all_previous_shas.update( Binary(x['sha']) for x in collection.find({'symbol': symbol}, projection={ 'sha': 1, '_id': 0 })) length = len(item) if segment_offset > 0 and 'segment_index' in previous_version: existing_index = previous_version['segment_index'] else: existing_index = None segment_index = [] # Compress idxs = xrange(int(np.ceil(float(length) / rows_per_chunk))) chunks = [ (item[i * rows_per_chunk:(i + 1) * rows_per_chunk]).tostring() for i in idxs ] compressed_chunks = compress_array(chunks) # Write bulk = [] for i, chunk in zip(idxs, compressed_chunks): segment = { 'data': Binary(chunk), 'compressed': True, 'segment': min((i + 1) * rows_per_chunk - 1, length - 1) + segment_offset, } segment_index.append(segment['segment']) sha = checksum(symbol, segment) segment_spec = { 'symbol': symbol, 'sha': sha, 'segment': segment['segment'] } if ARCTIC_FORWARD_POINTERS_CFG is FwPointersCfg.DISABLED: if sha not in symbol_all_previous_shas: segment['sha'] = sha bulk.append( pymongo.UpdateOne(segment_spec, { '$set': segment, '$addToSet': { 'parent': version['_id'] } }, upsert=True)) else: bulk.append( pymongo.UpdateOne( segment_spec, {'$addToSet': { 'parent': version['_id'] }})) else: version_shas.add(sha) # We only keep for the records the ID of the version which created the segment. # We also need the uniqueness of the parent field for the (symbol, parent, segment) index, # because upon mongo_retry "dirty_append == True", we compress and only the SHA changes # which raises DuplicateKeyError if we don't have a unique (symbol, parent, segment). set_spec = {'$addToSet': {'parent': version['_id']}} if sha not in symbol_all_previous_shas: segment['sha'] = sha set_spec['$set'] = segment bulk.append( pymongo.UpdateOne(segment_spec, set_spec, upsert=True)) elif ARCTIC_FORWARD_POINTERS_CFG is FwPointersCfg.HYBRID: bulk.append(pymongo.UpdateOne(segment_spec, set_spec)) # With FwPointersCfg.ENABLED we make zero updates on existing segment documents, but: # - write only the new segment(s) documents # - write the new version document # This helps with performance as we update as less documents as necessary if bulk: collection.bulk_write(bulk, ordered=False) segment_index = self._segment_index(item, existing_index=existing_index, start=segment_offset, new_segments=segment_index) if segment_index: version['segment_index'] = segment_index version['segment_count'] = len(chunks) version['append_size'] = 0 version['append_count'] = 0 _update_fw_pointers(collection, symbol, version, previous_version, is_append=False, shas_to_add=version_shas) self.check_written(collection, symbol, version)
def _do_append(self, collection, version, symbol, item, previous_version, dirty_append): data = item.tostring() # Compatibility with Arctic 1.22.0 that didn't write base_sha into the version document version['base_sha'] = previous_version.get('base_sha', Binary(b'')) version['up_to'] = previous_version['up_to'] + len(item) if len(item) > 0: version['segment_count'] = previous_version['segment_count'] + 1 version['append_count'] = previous_version['append_count'] + 1 version['append_size'] = previous_version['append_size'] + len( data) else: version['segment_count'] = previous_version['segment_count'] version['append_count'] = previous_version['append_count'] version['append_size'] = previous_version['append_size'] # _CHUNK_SIZE is probably too big if we're only appending single rows of data - perhaps something smaller, # or also look at number of appended segments? if not dirty_append and version[ 'append_count'] < _APPEND_COUNT and version[ 'append_size'] < _APPEND_SIZE: version['base_version_id'] = version_base_or_id(previous_version) if len(item) > 0: segment = { 'data': Binary(data), 'compressed': False, 'segment': version['up_to'] - 1 } sha = checksum(symbol, segment) try: # TODO: We could have a common handling with conditional spec-construction for the update spec. # For now we kept unchanged the existing code which handles backwards pointers. if ARCTIC_FORWARD_POINTERS_CFG is FwPointersCfg.DISABLED: collection.update_one({ 'symbol': symbol, 'sha': sha }, { '$set': segment, '$addToSet': { 'parent': version['base_version_id'] } }, upsert=True) else: set_spec = {'$set': segment} if ARCTIC_FORWARD_POINTERS_CFG is FwPointersCfg.HYBRID: set_spec['$addToSet'] = { 'parent': version['base_version_id'] } else: # FwPointersCfg.ENABLED # We only keep for the records the ID of the version which created the segment. # We also need the uniqueness of the parent field for the (symbol, parent, segment) index, # because upon mongo_retry "dirty_append == True", we compress and only the SHA changes # which raises DuplicateKeyError if we don't have a unique (symbol, parent, segment). set_spec['$addToSet'] = {'parent': version['_id']} collection.update_one({ 'symbol': symbol, 'sha': sha }, set_spec, upsert=True) _update_fw_pointers(collection, symbol, version, previous_version, is_append=True, shas_to_add=(sha, )) except DuplicateKeyError: '''If we get a duplicate key error here, this segment has the same symbol/parent/segment as another chunk, but a different sha. This means that we have 'forked' history. If we concat_and_rewrite here, new chunks will have a different parent id (the _id of this version doc) ...so we can safely write them. ''' self._concat_and_rewrite(collection, version, symbol, item, previous_version) return if 'segment_index' in previous_version: segment_index = self._segment_index( item, existing_index=previous_version.get('segment_index'), start=previous_version['up_to'], new_segments=[ segment['segment'], ]) if segment_index: version['segment_index'] = segment_index logger.debug("Appended segment %d for parent %s" % (segment['segment'], version['_id'])) else: if 'segment_index' in previous_version: version['segment_index'] = previous_version[ 'segment_index'] else: # Too much data has been appended now, so rewrite (and compress/chunk). self._concat_and_rewrite(collection, version, symbol, item, previous_version)
def object_hook(dct): if "$oid" in dct: return ObjectId(str(dct["$oid"])) if "$ref" in dct: return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None)) if "$date" in dct: dtm = dct["$date"] # mongoexport 2.6 and newer if isinstance(dtm, string_type): aware = datetime.datetime.strptime( dtm[:23], "%Y-%m-%dT%H:%M:%S.%f").replace(tzinfo=utc) offset = dtm[23:] if not offset or offset == 'Z': # UTC return aware else: if len(offset) == 5: # Offset from mongoexport is in format (+|-)HHMM secs = (int(offset[1:3]) * 3600 + int(offset[3:]) * 60) elif ':' in offset and len(offset) == 6: # RFC-3339 format (+|-)HH:MM hours, minutes = offset[1:].split(':') secs = (int(hours) * 3600 + int(minutes) * 60) else: # Not RFC-3339 compliant or mongoexport output. raise ValueError("invalid format for offset") if offset[0] == "-": secs *= -1 return aware - datetime.timedelta(seconds=secs) # mongoexport 2.6 and newer, time before the epoch (SERVER-15275) elif isinstance(dtm, collections.Mapping): secs = float(dtm["$numberLong"]) / 1000.0 # mongoexport before 2.6 else: secs = float(dtm) / 1000.0 return EPOCH_AWARE + datetime.timedelta(seconds=secs) if "$regex" in dct: flags = 0 # PyMongo always adds $options but some other tools may not. for opt in dct.get("$options", ""): flags |= _RE_OPT_TABLE.get(opt, 0) return Regex(dct["$regex"], flags) if "$minKey" in dct: return MinKey() if "$maxKey" in dct: return MaxKey() if "$binary" in dct: if isinstance(dct["$type"], int): dct["$type"] = "%02x" % dct["$type"] subtype = int(dct["$type"], 16) if subtype >= 0xffffff80: # Handle mongoexport values subtype = int(dct["$type"][6:], 16) return Binary(base64.b64decode(dct["$binary"].encode()), subtype) if "$code" in dct: return Code(dct["$code"], dct.get("$scope")) if "$uuid" in dct: return uuid.UUID(dct["$uuid"]) if "$undefined" in dct: return None if "$numberLong" in dct: return Int64(dct["$numberLong"]) if "$timestamp" in dct: tsp = dct["$timestamp"] return Timestamp(tsp["t"], tsp["i"]) return dct
def __setitem__(self, url, result): """Save value for this URL """ # record = {'result': result, 'timestamp': datetime.utcnow()} record = {'result': Binary(zlib.compress(pickle.dumps(result))), 'timestamp': datetime.utcnow()} self.db.webpage.update({'_id': url}, {'$set': record}, upsert=True)
def assignPiles(payload): username = payload['username'] gameId = payload['gameId'] games = Game.objects(gameID=gameId) game = games.get(gameID=gameId) #refactor this mess later dataA = [game.playerADataField for game in games] binDataA = dataA[0] dataB = [game.playerBDataField for game in games] binDataB = dataB[0] playerA = pickle.loads(binDataA) playerB = pickle.loads(binDataB) recipient_session_id = clients[username] piles = payload['piles'] if username == playerA.name: for i in range(5): playerA.piles[i].cards = piles[i] game.playerADataField = Binary(pickle.dumps(playerA)) game.save() if pilesReady(playerB.piles): emit('Both Ready', { "authorized": playerA.authorized, "piles": [ len(playerB.piles[0].cards), len(playerB.piles[1].cards), len(playerB.piles[2].cards), len(playerB.piles[3].cards), len(playerB.piles[4].cards) ] }, room=recipient_session_id) opponent_session_id = clients[playerB.name] emit('Both Ready', { "authorized": playerB.authorized, "piles": [ len(playerA.piles[0].cards), len(playerA.piles[1].cards), len(playerA.piles[2].cards), len(playerA.piles[3].cards), len(playerA.piles[4].cards) ] }, room=opponent_session_id) else: emit('Waiting', room=recipient_session_id) #TODO maybe run method that'll emit certain event that'll trigger if both have assigned piles else emit event that'll say to wait print('playerA emitted') elif username == playerB.name: for i in range(5): playerB.piles[i].cards = piles[i] game.playerBDataField = Binary(pickle.dumps(playerB)) game.save() if pilesReady(playerA.piles): emit('Both Ready', { "authorized": playerB.authorized, "piles": [ len(playerA.piles[0].cards), len(playerA.piles[1].cards), len(playerA.piles[2].cards), len(playerA.piles[3].cards), len(playerA.piles[4].cards) ] }, room=recipient_session_id) opponent_session_id = clients[playerA.name] emit('Both Ready', { "authorized": playerA.authorized, "piles": [ len(playerB.piles[0].cards), len(playerB.piles[1].cards), len(playerB.piles[2].cards), len(playerB.piles[3].cards), len(playerB.piles[4].cards) ] }, room=opponent_session_id) else: emit('Waiting', room=recipient_session_id) emit('todo', room=recipient_session_id) print('playerB emitted') else: print('Something is up.')
X.extend(faces) y.extend(labels) return asarray(X), asarray(y) def get_embedding(model, face_pixels): face_pixels = face_pixels.astype('float32') mean, std = face_pixels.mean(), face_pixels.std() face_pixels = (face_pixels - mean) / std samples = expand_dims(face_pixels, axis=0) yhat = model.predict(samples) return yhat[0] # load train dataset trainX, trainy = load_dataset('raw_dataset/') model = load_model('model/facenet_keras.h5') print('Loaded Model') newTrainX = list() data = [] for face_pixels, label in zip(trainX, trainy): embedding = get_embedding(model, face_pixels) mydict = { "name": label, "data": Binary(pickle.dumps(embedding, protocol=2)) } data.append(mydict) res = mycol.insert_many(data) print(res)
def startFight(payload): username = payload['username'] battlingPiles = payload['battlingPiles'] gameId = payload['gameId'] recipient_session_id = clients[username] games = Game.objects(gameID=gameId) game = games.get(gameID=gameId) #refactor this mess later binDataA = game.playerADataField binDataB = game.playerBDataField playerA = pickle.loads(binDataA) playerB = pickle.loads(binDataB) if username == playerA.name: pileAId = battlingPiles[0] pileBId = battlingPiles[1] opponent_session_id = clients[playerB.name] pileA = [] for card in playerA.piles[pileAId].cards: pileA.append(card['value']) pileB = [] for card in playerB.piles[pileBId].cards: pileB.append(card['value']) winlossA = fight(pileA, pileB) if game.roundsLeft == 0 and game.trickNum == 4: if winlossA == winlossstate.WIN: playerA.tricksWon += 1 elif winlossA == winlossstate.LOSS: playerB.tricksWon += 1 if playerA.roundsWon < playerB.roundsWon: emit('game over', "lost", room=recipient_session_id) emit('game over', "won", room=opponent_session_id) elif playerA.roundsWon > playerB.roundsWon: emit('game over', "won", room=recipient_session_id) emit('game over', "lost", room=opponent_session_id) else: emit('game over', "drew", room=recipient_session_id) emit('game over', "drew", room=opponent_session_id) else: if game.trickNum == 4: resetDeck(game, playerA, playerB) resetHandling(playerA, playerB, recipient_session_id, opponent_session_id, winlossA) else: fightHandling(game, playerA, playerB, pileAId, pileBId, recipient_session_id, opponent_session_id, winlossA) else: pileAId = battlingPiles[1] pileBId = battlingPiles[0] opponent_session_id = clients[playerA.name] pileA = [] for card in playerA.piles[pileAId].cards: pileA.append(card['value']) pileB = [] for card in playerB.piles[pileBId].cards: pileB.append(card['value']) winlossB = fight(pileA, pileB) playerA.piles[pileAId].cards = [] playerB.piles[pileBId].cards = [] if game.roundsLeft == 0 and game.trickNum == 4: if winlossB == winlossstate.WIN: playerB.tricksWon += 1 elif winlossB == winlossstate.LOSS: playerB.tricksWon += 1 if playerA.roundsWon < playerB.roundsWon: emit('game over', "lost", room=recipient_session_id) emit('game over', "won", room=opponent_session_id) elif playerA.roundsWon > playerB.roundsWon: emit('game over', "won", room=recipient_session_id) emit('game over', "lost", room=opponent_session_id) else: emit('game over', "drew", room=recipient_session_id) emit('game over', "drew", room=opponent_session_id) else: if game.trickNum == 4: resetDeck(game, playerA, playerB) resetHandling(playerB, playerA, recipient_session_id, opponent_session_id, winlossB) else: fightHandling(game, playerB, playerA, pileBId, pileAId, recipient_session_id, opponent_session_id, winlossB) playerA.authorized = not playerA.authorized playerB.authorized = not playerB.authorized game.playerADataField = Binary(pickle.dumps(playerA)) game.playerBDataField = Binary(pickle.dumps(playerB)) game.save()
def __init__(self): # Ensure id is type 4, regardless of CodecOptions.uuid_representation. self.session_id = {'id': Binary(uuid.uuid4().bytes, 4)} self.last_use = monotonic.time() self._transaction_id = 0 self.dirty = False
def __setitem__(self, url, result): result = Binary(zlib.compress( pickle.dumps(result))) # 注意这里将压缩后的字符串使用了Binary() record = {'result': result, 'timestamp': datetime.utcnow()} self.collect.update({'_id': url}, {'$set': record}, upsert=True) # 插入或更新record ———— update():无返回值
def test_sort_19(monty_sort, mongo_sort): docs = [ { "a": ["x", True] }, { "a": None }, { "a": [] }, { "a": [5, []] }, { "a": { "s": 7 } }, { "a": { "s": [9] } }, { "a": { "s": 10 } }, { "a": 6 }, { "a": 4 }, { "a": [5, None] }, { "a": [5, [1]] }, { "a": [Decimal128("4.5"), Binary(b"0")] }, { "a": [{ "s": 5 }, False] }, { "a": [{ "s": 9 }] }, { "a": [True, "y"] }, { "a": Binary(b"a") }, { "a": b"bytes" }, { "a": ["abc"] }, { "a": "banana" }, { "a": "appple" }, { "a": [Regex("^a", "ix")] }, { "a": Regex("^b") }, { "a": Code("x", {"m": 0}) }, { "a": Code("y") }, { "a": Code("y", {}) }, { "a": Code("y", {"m": 0}) }, { "a": MinKey() }, { "a": MaxKey() }, { "a": Timestamp(0, 1) }, { "a": Timestamp(1, 1) }, { "a": ObjectId(b"000000000000") }, { "a": ObjectId(b"000000000001") }, { "a": datetime(1900, 1, 1) }, { "a": datetime(1900, 1, 2) }, ] sort = [("a", 1)] monty_c = monty_sort(docs, sort) mongo_c = mongo_sort(docs, sort) for i in range(len(docs)): assert next(mongo_c)["_id"] == next(monty_c)["_id"]
def test_mongocrypt_options(self): schema_map = bson_data('schema-map.json') valid = [({ 'local': { 'key': b'1' * 96 } }, None), ({ 'aws': { 'accessKeyId': '', 'secretAccessKey': '' } }, schema_map), ({ 'aws': { 'accessKeyId': 'foo', 'secretAccessKey': 'foo' } }, None), ({ 'aws': { 'accessKeyId': 'foo', 'secretAccessKey': 'foo', 'sessionToken': 'token' } }, None), ({ 'aws': { 'accessKeyId': 'foo', 'secretAccessKey': 'foo' }, 'local': { 'key': b'1' * 96 } }, None), ({ 'local': { 'key': to_base64(b'1' * 96) } }, None), ({ 'local': { 'key': Binary(b'1' * 96) } }, None), ({ 'gcp': { 'email': '*****@*****.**', 'privateKey': b'1' } }, None), ({ 'gcp': { 'email': '*****@*****.**', 'privateKey': to_base64(b'1') } }, None), ({ 'gcp': { 'email': '*****@*****.**', 'privateKey': Binary(b'1') } }, None)] for kms_providers, schema_map in valid: opts = MongoCryptOptions(kms_providers, schema_map) self.assertEqual(opts.kms_providers, kms_providers, msg=kms_providers) self.assertEqual(opts.schema_map, schema_map)
def test_read_object_backwards_compat(): self = create_autospec(PickleStore) version = {'blob': Binary(compressHC(cPickle.dumps(object)))} assert PickleStore.read(self, sentinel.arctic_lib, version, sentinel.symbol) == object
def write(self, symbol, item, metadata=None, chunker=DateChunker(), audit=None, **kwargs): """ Writes data from item to symbol in the database Parameters ---------- symbol: str the symbol that will be used to reference the written data item: Dataframe or Series the data to write the database metadata: ? optional per symbol metadata chunker: Object of type Chunker A chunker that chunks the data in item audit: dict audit information kwargs: optional keyword args that are passed to the chunker. Includes: chunk_size: used by chunker to break data into discrete chunks. see specific chunkers for more information about this param. """ if not isinstance(item, (DataFrame, Series)): raise Exception("Can only chunk DataFrames and Series") self._arctic_lib.check_quota() previous_shas = [] doc = {} meta = {} doc[SYMBOL] = symbol doc[LEN] = len(item) doc[SERIALIZER] = self.serializer.TYPE doc[CHUNKER] = chunker.TYPE doc[USERMETA] = metadata sym = self._get_symbol_info(symbol) if sym: previous_shas = set([Binary(x[SHA]) for x in self._collection.find({SYMBOL: symbol}, projection={SHA: True, '_id': False}, )]) ops = [] meta_ops = [] chunk_count = 0 for start, end, chunk_size, record in chunker.to_chunks(item, **kwargs): chunk_count += 1 data = self.serializer.serialize(record) doc[CHUNK_SIZE] = chunk_size doc[METADATA] = {'columns': data[METADATA][COLUMNS] if COLUMNS in data[METADATA] else ''} meta = data[METADATA] for i in xrange(int(len(data[DATA]) / MAX_CHUNK_SIZE + 1)): chunk = {DATA: Binary(data[DATA][i * MAX_CHUNK_SIZE: (i + 1) * MAX_CHUNK_SIZE])} chunk[SEGMENT] = i chunk[START] = meta[START] = start chunk[END] = meta[END] = end chunk[SYMBOL] = meta[SYMBOL] = symbol dates = [chunker.chunk_to_str(start), chunker.chunk_to_str(end), str(chunk[SEGMENT]).encode('ascii')] chunk[SHA] = self._checksum(dates, chunk[DATA]) meta_ops.append(pymongo.ReplaceOne({SYMBOL: symbol, START: start, END: end}, meta, upsert=True)) if chunk[SHA] not in previous_shas: ops.append(pymongo.UpdateOne({SYMBOL: symbol, START: start, END: end, SEGMENT: chunk[SEGMENT]}, {'$set': chunk}, upsert=True)) else: # already exists, dont need to update in mongo previous_shas.remove(chunk[SHA]) if ops: self._collection.bulk_write(ops, ordered=False) if meta_ops: self._mdata.bulk_write(meta_ops, ordered=False) doc[CHUNK_COUNT] = chunk_count doc[APPEND_COUNT] = 0 if previous_shas: mongo_retry(self._collection.delete_many)({SYMBOL: symbol, SHA: {'$in': list(previous_shas)}}) mongo_retry(self._symbols.update_one)({SYMBOL: symbol}, {'$set': doc}, upsert=True) if audit is not None: audit['symbol'] = symbol audit['action'] = 'write' audit['chunks'] = chunk_count self._audit.insert_one(audit)
def test_basic_encode(self): self.assertRaises(TypeError, BSON.encode, 100) self.assertRaises(TypeError, BSON.encode, "hello") self.assertRaises(TypeError, BSON.encode, None) self.assertRaises(TypeError, BSON.encode, []) self.assertEqual(BSON.encode({}), BSON(b("\x05\x00\x00\x00\x00"))) self.assertEqual( BSON.encode({"test": u"hello world"}), b("\x1B\x00\x00\x00\x02\x74\x65\x73\x74\x00\x0C\x00" "\x00\x00\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C" "\x64\x00\x00")) self.assertEqual( BSON.encode({u"mike": 100}), b("\x0F\x00\x00\x00\x10\x6D\x69\x6B\x65\x00\x64\x00" "\x00\x00\x00")) self.assertEqual( BSON.encode({"hello": 1.5}), b("\x14\x00\x00\x00\x01\x68\x65\x6C\x6C\x6F\x00\x00" "\x00\x00\x00\x00\x00\xF8\x3F\x00")) self.assertEqual(BSON.encode({"true": True}), b("\x0C\x00\x00\x00\x08\x74\x72\x75\x65\x00\x01\x00")) self.assertEqual( BSON.encode({"false": False}), b("\x0D\x00\x00\x00\x08\x66\x61\x6C\x73\x65\x00\x00" "\x00")) self.assertEqual( BSON.encode({"empty": []}), b("\x11\x00\x00\x00\x04\x65\x6D\x70\x74\x79\x00\x05" "\x00\x00\x00\x00\x00")) self.assertEqual( BSON.encode({"none": {}}), b("\x10\x00\x00\x00\x03\x6E\x6F\x6E\x65\x00\x05\x00" "\x00\x00\x00\x00")) self.assertEqual( BSON.encode({"test": Binary(b("test"), 0)}), b("\x14\x00\x00\x00\x05\x74\x65\x73\x74\x00\x04\x00" "\x00\x00\x00\x74\x65\x73\x74\x00")) self.assertEqual( BSON.encode({"test": Binary(b("test"), 2)}), b("\x18\x00\x00\x00\x05\x74\x65\x73\x74\x00\x08\x00" "\x00\x00\x02\x04\x00\x00\x00\x74\x65\x73\x74\x00")) self.assertEqual( BSON.encode({"test": Binary(b("test"), 128)}), b("\x14\x00\x00\x00\x05\x74\x65\x73\x74\x00\x04\x00" "\x00\x00\x80\x74\x65\x73\x74\x00")) self.assertEqual(BSON.encode({"test": None}), b("\x0B\x00\x00\x00\x0A\x74\x65\x73\x74\x00\x00")) self.assertEqual( BSON.encode({"date": datetime.datetime(2007, 1, 8, 0, 30, 11)}), b("\x13\x00\x00\x00\x09\x64\x61\x74\x65\x00\x38\xBE" "\x1C\xFF\x0F\x01\x00\x00\x00")) self.assertEqual( BSON.encode({"regex": re.compile(b("a*b"), re.IGNORECASE)}), b("\x12\x00\x00\x00\x0B\x72\x65\x67\x65\x78\x00\x61" "\x2A\x62\x00\x69\x00\x00")) self.assertEqual( BSON.encode({"$where": Code("test")}), b("\x16\x00\x00\x00\r$where\x00\x05\x00\x00\x00test" "\x00\x00")) self.assertEqual( BSON.encode( {"$field": Code("function(){ return true;}", scope=None)}), b("+\x00\x00\x00\r$field\x00\x1a\x00\x00\x00" "function(){ return true;}\x00\x00")) self.assertEqual( BSON.encode({ "$field": Code("return function(){ return x; }", scope={'x': False}) }), b("=\x00\x00\x00\x0f$field\x000\x00\x00\x00\x1f\x00" "\x00\x00return function(){ return x; }\x00\t\x00" "\x00\x00\x08x\x00\x00\x00\x00")) a = ObjectId(b("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B")) self.assertEqual( BSON.encode({"oid": a}), b("\x16\x00\x00\x00\x07\x6F\x69\x64\x00\x00\x01\x02" "\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x00")) self.assertEqual( BSON.encode({"ref": DBRef("coll", a)}), b("\x2F\x00\x00\x00\x03ref\x00\x25\x00\x00\x00\x02" "$ref\x00\x05\x00\x00\x00coll\x00\x07$id\x00\x00" "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x00" "\x00"))
def __update(self, sym, item, metadata=None, combine_method=None, chunk_range=None, audit=None): ''' helper method used by update and append since they very closely resemble eachother. Really differ only by the combine method. append will combine existing date with new data (within a chunk), whereas update will replace existing data with new data (within a chunk). ''' if not isinstance(item, (DataFrame, Series)): raise Exception("Can only chunk DataFrames and Series") self._arctic_lib.check_quota() symbol = sym[SYMBOL] if chunk_range is not None: self.delete(symbol, chunk_range) sym = self._get_symbol_info(symbol) ops = [] meta_ops = [] chunker = CHUNKER_MAP[sym[CHUNKER]] appended = 0 new_chunks = 0 for start, end, _, record in chunker.to_chunks(item, chunk_size=sym[CHUNK_SIZE]): # read out matching chunks df = self.read(symbol, chunk_range=chunker.to_range(start, end), filter_data=False) # assuming they exist, update them and store the original chunk # range for later use if len(df) > 0: record = combine_method(df, record) if record is None or record.equals(df): continue sym[APPEND_COUNT] += len(record) - len(df) appended += len(record) - len(df) sym[LEN] += len(record) - len(df) else: sym[CHUNK_COUNT] += 1 new_chunks += 1 sym[LEN] += len(record) data = SER_MAP[sym[SERIALIZER]].serialize(record) meta = data[METADATA] chunk_count = int(len(data[DATA]) / MAX_CHUNK_SIZE + 1) seg_count = self._collection.count({SYMBOL: symbol, START: start, END: end}) # remove old segments for this chunk in case we now have less # segments than we did before if seg_count > chunk_count: self._collection.delete_many({SYMBOL: symbol, START: start, END: end, SEGMENT: {'$gte': chunk_count}}) for i in xrange(chunk_count): chunk = {DATA: Binary(data[DATA][i * MAX_CHUNK_SIZE: (i + 1) * MAX_CHUNK_SIZE])} chunk[SEGMENT] = i chunk[START] = start chunk[END] = end chunk[SYMBOL] = symbol dates = [chunker.chunk_to_str(start), chunker.chunk_to_str(end), str(chunk[SEGMENT]).encode('ascii')] sha = self._checksum(dates, data[DATA]) chunk[SHA] = sha ops.append(pymongo.UpdateOne({SYMBOL: symbol, START: start, END: end, SEGMENT: chunk[SEGMENT]}, {'$set': chunk}, upsert=True)) meta_ops.append(pymongo.UpdateOne({SYMBOL: symbol, START: start, END: end}, {'$set': meta}, upsert=True)) if ops: self._collection.bulk_write(ops, ordered=False) self._mdata.bulk_write(meta_ops, ordered=False) sym[USERMETA] = metadata self._symbols.replace_one({SYMBOL: symbol}, sym) if audit is not None: if new_chunks > 0: audit['new_chunks'] = new_chunks if appended > 0: audit['appended_rows'] = appended self._audit.insert_one(audit)
def _authenticate_scram(credentials, sock_info, mechanism): """Authenticate using SCRAM.""" username = credentials.username if mechanism == 'SCRAM-SHA-256': digest = "sha256" digestmod = hashlib.sha256 data = saslprep(credentials.password).encode("utf-8") else: digest = "sha1" digestmod = hashlib.sha1 data = _password_digest(username, credentials.password).encode("utf-8") source = credentials.source cache = credentials.cache # Make local _hmac = hmac.HMAC user = username.encode("utf-8").replace(b"=", b"=3D").replace(b",", b"=2C") nonce = standard_b64encode( (("%s" % (SystemRandom().random(), ))[2:]).encode("utf-8")) first_bare = b"n=" + user + b",r=" + nonce cmd = SON([('saslStart', 1), ('mechanism', mechanism), ('payload', Binary(b"n,," + first_bare)), ('autoAuthorize', 1)]) res = sock_info.command(source, cmd) server_first = res['payload'] parsed = _parse_scram_response(server_first) iterations = int(parsed[b'i']) if iterations < 4096: raise OperationFailure("Server returned an invalid iteration count.") salt = parsed[b's'] rnonce = parsed[b'r'] if not rnonce.startswith(nonce): raise OperationFailure("Server returned an invalid nonce.") without_proof = b"c=biws,r=" + rnonce if cache.data: client_key, server_key, csalt, citerations = cache.data else: client_key, server_key, csalt, citerations = None, None, None, None # Salt and / or iterations could change for a number of different # reasons. Either changing invalidates the cache. if not client_key or salt != csalt or iterations != citerations: salted_pass = _hi(digest, data, standard_b64decode(salt), iterations) client_key = _hmac(salted_pass, b"Client Key", digestmod).digest() server_key = _hmac(salted_pass, b"Server Key", digestmod).digest() cache.data = (client_key, server_key, salt, iterations) stored_key = digestmod(client_key).digest() auth_msg = b",".join((first_bare, server_first, without_proof)) client_sig = _hmac(stored_key, auth_msg, digestmod).digest() client_proof = b"p=" + standard_b64encode(_xor(client_key, client_sig)) client_final = b",".join((without_proof, client_proof)) server_sig = standard_b64encode( _hmac(server_key, auth_msg, digestmod).digest()) cmd = SON([('saslContinue', 1), ('conversationId', res['conversationId']), ('payload', Binary(client_final))]) res = sock_info.command(source, cmd) parsed = _parse_scram_response(res['payload']) if not compare_digest(parsed[b'v'], server_sig): raise OperationFailure("Server returned an invalid signature.") # Depending on how it's configured, Cyrus SASL (which the server uses) # requires a third empty challenge. if not res['done']: cmd = SON([('saslContinue', 1), ('conversationId', res['conversationId']), ('payload', Binary(b''))]) res = sock_info.command(source, cmd) if not res['done']: raise OperationFailure('SASL conversation failed to complete.')
def run_operation(self, sessions, collection, operation): original_collection = collection name = camel_to_snake(operation['name']) if name == 'run_command': name = 'command' elif name == 'download_by_name': name = 'open_download_stream_by_name' elif name == 'download': name = 'open_download_stream' elif name == 'map_reduce': self.skipTest('PyMongo does not support mapReduce') elif name == 'count': self.skipTest('PyMongo does not support count') database = collection.database collection = database.get_collection(collection.name) if 'collectionOptions' in operation: collection = collection.with_options( **self.parse_options(operation['collectionOptions'])) object_name = self.get_object_name(operation) if object_name == 'gridfsbucket': # Only create the GridFSBucket when we need it (for the gridfs # retryable reads tests). obj = GridFSBucket(database, bucket_name=collection.name) else: objects = { 'client': database.client, 'database': database, 'collection': collection, 'testRunner': self } objects.update(sessions) obj = objects[object_name] # Combine arguments with options and handle special cases. arguments = operation.get('arguments', {}) arguments.update(arguments.pop("options", {})) self.parse_options(arguments) cmd = getattr(obj, name) with_txn_callback = functools.partial(self.run_operations, sessions, original_collection, in_with_transaction=True) prepare_spec_arguments(operation, arguments, name, sessions, with_txn_callback) if name == 'run_on_thread': args = {'sessions': sessions, 'collection': collection} args.update(arguments) arguments = args result = cmd(**dict(arguments)) # Cleanup open change stream cursors. if name == "watch": self.addCleanup(result.close) if name == "aggregate": if arguments["pipeline"] and "$out" in arguments["pipeline"][-1]: # Read from the primary to ensure causal consistency. out = collection.database.get_collection( arguments["pipeline"][-1]["$out"], read_preference=ReadPreference.PRIMARY) return out.find() if 'download' in name: result = Binary(result.read()) if isinstance(result, Cursor) or isinstance(result, CommandCursor): return list(result) return result
def text_uuid_to_binary(text_uuid): """Convert text TypedUUID to binary form""" try: return Binary(uuid.UUID(text_uuid).bytes, OLD_UUID_SUBTYPE) except Exception as exc: raise ValueError('Failed to convert text UUID to binary', exc)
def __update(self, sym, item, combine_method=None, chunk_range=None): ''' helper method used by update and append since they very closely resemble eachother. Really differ only by the combine method. append will combine existing date with new data (within a chunk), whereas update will replace existing data with new data (within a chunk). ''' if not isinstance(item, (DataFrame, Series)): raise Exception("Can only chunk DataFrames and Series") symbol = sym[SYMBOL] if chunk_range is not None: self.delete(symbol, chunk_range) sym = self._get_symbol_info(symbol) bulk = self._collection.initialize_unordered_bulk_op() op = False chunker = CHUNKER_MAP[sym[CHUNKER]] for start, end, _, record in chunker.to_chunks( item, chunk_size=sym[CHUNK_SIZE]): # read out matching chunks df = self.read(symbol, chunk_range=chunker.to_range(start, end), filter_data=False) # assuming they exist, update them and store the original chunk # range for later use if len(df) > 0: record = combine_method(df, record) if record is None or record.equals(df): continue sym[APPEND_COUNT] += len(record) sym[LEN] += len(record) - len(df) else: sym[CHUNK_COUNT] += 1 sym[LEN] += len(record) data = SER_MAP[sym[SERIALIZER]].serialize(record) op = True # remove old segments for this chunk in case we now have less # segments than we did before chunk_count = int(len(data[DATA]) / MAX_CHUNK_SIZE + 1) seg_count = self._collection.count({ SYMBOL: symbol, START: start, END: end }) if seg_count > chunk_count: # if chunk count is 1, the segment id will be -1, not 1 self._collection.delete_many({ SYMBOL: symbol, START: start, END: end, SEGMENT: { '$gt': seg_count if chunk_count > 1 else -1 } }) size_chunked = chunk_count > 1 for i in xrange(chunk_count): chunk = { DATA: Binary(data[DATA][i * MAX_CHUNK_SIZE:(i + 1) * MAX_CHUNK_SIZE]) } chunk[METADATA] = data[METADATA] if size_chunked: chunk[SEGMENT] = i else: chunk[SEGMENT] = -1 chunk[START] = start chunk[END] = end chunk[SYMBOL] = symbol dates = [ chunker.chunk_to_str(start), chunker.chunk_to_str(end), str(chunk[SEGMENT]).encode('ascii') ] sha = self._checksum(dates, data[DATA]) chunk[SHA] = sha bulk.find({ SYMBOL: symbol, START: start, END: end, SEGMENT: chunk[SEGMENT] }).upsert().update_one({'$set': chunk}) if op: bulk.execute() self._symbols.replace_one({SYMBOL: symbol}, sym)