コード例 #1
0
ファイル: helpers.py プロジェクト: terakilobyte/csfle-guides
 def key_from_base64(base64_key):
     return Binary(base64.b64decode(base64_key), UUID_SUBTYPE)
コード例 #2
0
def _authenticate_scram(credentials, sock_info, mechanism):
    """Authenticate using SCRAM."""
    username = credentials.username
    if mechanism == 'SCRAM-SHA-256':
        digest = "sha256"
        digestmod = hashlib.sha256
        data = saslprep(credentials.password).encode("utf-8")
    else:
        digest = "sha1"
        digestmod = hashlib.sha1
        data = _password_digest(username, credentials.password).encode("utf-8")
    source = credentials.source
    cache = credentials.cache

    # Make local
    _hmac = hmac.HMAC

    ctx = sock_info.auth_ctx.get(credentials)
    if ctx and ctx.speculate_succeeded():
        nonce, first_bare = ctx.scram_data
        res = ctx.speculative_authenticate
    else:
        nonce, first_bare, cmd = _authenticate_scram_start(
            credentials, mechanism)
        res = sock_info.command(source, cmd)

    server_first = res['payload']
    parsed = _parse_scram_response(server_first)
    iterations = int(parsed[b'i'])
    if iterations < 4096:
        raise OperationFailure("Server returned an invalid iteration count.")
    salt = parsed[b's']
    rnonce = parsed[b'r']
    if not rnonce.startswith(nonce):
        raise OperationFailure("Server returned an invalid nonce.")

    without_proof = b"c=biws,r=" + rnonce
    if cache.data:
        client_key, server_key, csalt, citerations = cache.data
    else:
        client_key, server_key, csalt, citerations = None, None, None, None

    # Salt and / or iterations could change for a number of different
    # reasons. Either changing invalidates the cache.
    if not client_key or salt != csalt or iterations != citerations:
        salted_pass = _hi(digest, data, standard_b64decode(salt), iterations)
        client_key = _hmac(salted_pass, b"Client Key", digestmod).digest()
        server_key = _hmac(salted_pass, b"Server Key", digestmod).digest()
        cache.data = (client_key, server_key, salt, iterations)
    stored_key = digestmod(client_key).digest()
    auth_msg = b",".join((first_bare, server_first, without_proof))
    client_sig = _hmac(stored_key, auth_msg, digestmod).digest()
    client_proof = b"p=" + standard_b64encode(_xor(client_key, client_sig))
    client_final = b",".join((without_proof, client_proof))

    server_sig = standard_b64encode(
        _hmac(server_key, auth_msg, digestmod).digest())

    cmd = SON([('saslContinue', 1), ('conversationId', res['conversationId']),
               ('payload', Binary(client_final))])
    res = sock_info.command(source, cmd)

    parsed = _parse_scram_response(res['payload'])
    if not compare_digest(parsed[b'v'], server_sig):
        raise OperationFailure("Server returned an invalid signature.")

    # A third empty challenge may be required if the server does not support
    # skipEmptyExchange: SERVER-44857.
    if not res['done']:
        cmd = SON([('saslContinue', 1),
                   ('conversationId', res['conversationId']),
                   ('payload', Binary(b''))])
        res = sock_info.command(source, cmd)
        if not res['done']:
            raise OperationFailure('SASL conversation failed to complete.')
コード例 #3
0
def _element_to_bson(key, value, check_keys, uuid_subtype):
    if not isinstance(key, basestring):
        raise InvalidDocument("documents must have only string keys, "
                              "key was %r" % key)

    if check_keys:
        if key.startswith("$"):
            raise InvalidDocument("key %r must not start with '$'" % key)
        if "." in key:
            raise InvalidDocument("key %r must not contain '.'" % key)

    name = _make_c_string(key, True)
    if isinstance(value, float):
        return BSONNUM + name + struct.pack("<d", value)

    if _use_uuid:
        if isinstance(value, uuid.UUID):
            # Java Legacy
            if uuid_subtype == JAVA_LEGACY:
                # Python 3.0(.1) returns a bytearray instance for bytes (3.1
                # and newer just return a bytes instance). Convert that to
                # binary_type (here and below) for compatibility.
                from_uuid = binary_type(value.bytes)
                as_legacy_java = from_uuid[0:8][::-1] + from_uuid[8:16][::-1]
                value = Binary(as_legacy_java, subtype=OLD_UUID_SUBTYPE)
            # C# legacy
            elif uuid_subtype == CSHARP_LEGACY:
                # Microsoft GUID representation.
                value = Binary(binary_type(value.bytes_le),
                               subtype=OLD_UUID_SUBTYPE)
            # Python
            else:
                value = Binary(binary_type(value.bytes), subtype=uuid_subtype)

    if isinstance(value, Binary):
        subtype = value.subtype
        if subtype == 2:
            value = struct.pack("<i", len(value)) + value
        return (BSONBIN + name +
                struct.pack("<i", len(value)) + b(chr(subtype)) + value)
    if isinstance(value, Code):
        cstring = _make_c_string(value)
        if not value.scope:
            length = struct.pack("<i", len(cstring))
            return BSONCOD + name + length + cstring
        scope = _dict_to_bson(value.scope, False, uuid_subtype, False)
        full_length = struct.pack("<i", 8 + len(cstring) + len(scope))
        length = struct.pack("<i", len(cstring))
        return BSONCWS + name + full_length + length + cstring + scope
    if isinstance(value, binary_type):
        if PY3:
            # Python3 special case. Store 'bytes' as BSON binary subtype 0.
            return (BSONBIN + name +
                    struct.pack("<i", len(value)) + ZERO + value)
        cstring = _make_c_string(value)
        length = struct.pack("<i", len(cstring))
        return BSONSTR + name + length + cstring
    if isinstance(value, unicode):
        cstring = _make_c_string(value)
        length = struct.pack("<i", len(cstring))
        return BSONSTR + name + length + cstring
    if isinstance(value, dict):
        return BSONOBJ + name + _dict_to_bson(value, check_keys, uuid_subtype, False)
    if isinstance(value, (list, tuple)):
        as_dict = SON(zip([str(i) for i in range(len(value))], value))
        return BSONARR + name + _dict_to_bson(as_dict, check_keys, uuid_subtype, False)
    if isinstance(value, ObjectId):
        return BSONOID + name + value.binary
    if value is True:
        return BSONBOO + name + ONE
    if value is False:
        return BSONBOO + name + ZERO
    if isinstance(value, int):
        # TODO this is an ugly way to check for this...
        if value > MAX_INT64 or value < MIN_INT64:
            raise OverflowError("BSON can only handle up to 8-byte ints")
        if value > MAX_INT32 or value < MIN_INT32:
            return BSONLON + name + struct.pack("<q", value)
        return BSONINT + name + struct.pack("<i", value)
    # 2to3 will convert long to int here since there is no long in python3.
    # That's OK. The previous if block will match instead.
    if isinstance(value, long):
        if value > MAX_INT64 or value < MIN_INT64:
            raise OverflowError("BSON can only handle up to 8-byte ints")
        return BSONLON + name + struct.pack("<q", value)
    if isinstance(value, datetime.datetime):
        if value.utcoffset() is not None:
            value = value - value.utcoffset()
        millis = int(calendar.timegm(value.timetuple()) * 1000 +
                     value.microsecond / 1000)
        return BSONDAT + name + struct.pack("<q", millis)
    if isinstance(value, Timestamp):
        time = struct.pack("<I", value.time)
        inc = struct.pack("<I", value.inc)
        return BSONTIM + name + inc + time
    if value is None:
        return BSONNUL + name
    if isinstance(value, (RE_TYPE, Regex)):
        pattern = value.pattern
        flags = ""
        if value.flags & re.IGNORECASE:
            flags += "i"
        if value.flags & re.LOCALE:
            flags += "l"
        if value.flags & re.MULTILINE:
            flags += "m"
        if value.flags & re.DOTALL:
            flags += "s"
        if value.flags & re.UNICODE:
            flags += "u"
        if value.flags & re.VERBOSE:
            flags += "x"
        return BSONRGX + name + _make_c_string(pattern, True) + \
            _make_c_string(flags)
    if isinstance(value, DBRef):
        return _element_to_bson(key, value.as_doc(), False, uuid_subtype)
    if isinstance(value, MinKey):
        return BSONMIN + name
    if isinstance(value, MaxKey):
        return BSONMAX + name

    raise InvalidDocument("cannot convert value of type %s to bson" %
                          type(value))
コード例 #4
0
def process_cursor(cursor):
    with open('agg-d-std.json') as f:
        std = json.load(f)

    client = MongoClient("localhost", 27017)
    db = client.dwh

    with tqdm(total=cursor[0],
              desc="Progress{}".format(cursor[2]),
              position=cursor[2],
              leave=True) as pbar:
        for doc in db.agg_d.find(
            {
                "date": {
                    "$gte": datetime.datetime(year=2012, month=1, day=1)
                },
                "target": {
                    "$lte": 250000
                }
            },
                no_cursor_timeout=True).skip(cursor[1]).limit(cursor[0]):

            time_filter = {
                "$and": [{
                    "date": {
                        "$gte": doc["date"] - relativedelta(days=365)
                    }
                }, {
                    "date": {
                        "$lt": doc["date"]
                    }
                }]
            }

            if db.agg_d.find({"id": doc["id"], **time_filter}).count() < 50:
                pbar.update(1)
                continue

            record = {
                "x": np.empty((365, 5)),
                "y": (doc["target"] - std["mean"]) / std["std"]
            }

            for i in range(365):
                date = doc["date"] - relativedelta(days=365 - i)
                wday = int(date.strftime("%w")) + 1
                record["x"][i][0] = -std["mean"] / std["std"]
                record["x"][i][1] = np.sin(2 * np.pi * wday / 7)
                record["x"][i][2] = np.cos(2 * np.pi * wday / 7)
                record["x"][i][3] = np.sin(2 * np.pi * date.month / 12)
                record["x"][i][4] = np.cos(2 * np.pi * date.month / 12)

            for day in db.agg_d.find({"id": doc["id"], **time_filter}):
                index = 365 - (doc["date"] - day["date"]).days
                record["x"][index][0] = (day["target"] -
                                         std["mean"]) / std["std"]
                wday = int(day["date"].strftime("%w")) + 1
                record["x"][index][1] = np.sin(2 * np.pi * wday / 7)
                record["x"][index][2] = np.cos(2 * np.pi * wday / 7)
                record["x"][index][3] = np.sin(2 * np.pi * doc["month"] / 12)
                record["x"][index][4] = np.cos(2 * np.pi * doc["month"] / 12)

            record["x"] = Binary(_pickle.dumps(record["x"].tolist()))

            if np.random.rand() < TRAIN_RATIO:
                db.train.insert(record)
            else:
                db.test.insert(record)

            pbar.update(1)
コード例 #5
0
 def checksum(self, item):
     sha = hashlib.sha1()
     sha.update(item.tostring())
     return Binary(sha.digest())
コード例 #6
0
ファイル: rundb.py プロジェクト: tomtor/fishtest
 def upload_pgn(self, run_id, pgn_zip):
     self.pgndb.insert_one({"run_id": run_id, "pgn_zip": Binary(pgn_zip)})
     return {}
コード例 #7
0
ファイル: test_sorting.py プロジェクト: jonntd/MontyDB
def test_sort_18(monty_sort, mongo_sort):
    docs = [
        {
            "a": ["x", True]
        },
        {
            "a": None
        },
        {
            "a": []
        },
        {
            "a": [5, []]
        },
        {
            "a": {
                "s": 7
            }
        },
        {
            "a": {
                "s": [9]
            }
        },
        {
            "a": {
                "s": 10
            }
        },
        {
            "a": 6
        },
        {
            "a": 4
        },
        {
            "a": [5, None]
        },
        {
            "a": [5, [1]]
        },
        {
            "a": [Decimal128("4.5"), Binary(b"0")]
        },
        {
            "a": [{
                "s": 5
            }, False]
        },
        {
            "a": [{
                "s": 9
            }]
        },
        {
            "a": [True, "y"]
        },
        {
            "a": []
        },
    ]
    sort = [("a", -1)]

    monty_c = monty_sort(docs, sort)
    mongo_c = mongo_sort(docs, sort)

    for i in range(len(docs)):
        assert next(mongo_c)["_id"] == next(monty_c)["_id"]
コード例 #8
0
    def _do_append(self, collection, version, symbol, item, previous_version):

        data = item.tostring()
        version['base_sha'] = previous_version['base_sha']
        version['up_to'] = previous_version['up_to'] + len(item)
        if len(item) > 0:
            version['segment_count'] = previous_version['segment_count'] + 1
            version['append_count'] = previous_version['append_count'] + 1
            version['append_size'] = previous_version['append_size'] + len(
                data)
        else:
            version['segment_count'] = previous_version['segment_count']
            version['append_count'] = previous_version['append_count']
            version['append_size'] = previous_version['append_size']

        #_CHUNK_SIZE is probably too big if we're only appending single rows of data - perhaps something smaller,
        #or also look at number of appended segments?
        if version['append_count'] < _APPEND_COUNT and version[
                'append_size'] < _APPEND_SIZE:
            version['base_version_id'] = previous_version.get(
                'base_version_id', previous_version['_id'])

            if len(item) > 0:

                segment = {'data': Binary(data), 'compressed': False}
                segment['segment'] = version['up_to'] - 1
                try:
                    collection.update_one(
                        {
                            'symbol': symbol,
                            'sha': checksum(symbol, segment)
                        }, {
                            '$set': segment,
                            '$addToSet': {
                                'parent': version['base_version_id']
                            }
                        },
                        upsert=True)
                except DuplicateKeyError:
                    '''If we get a duplicate key error here, this segment has the same symbol/parent/segment
                       as another chunk, but a different sha. This means that we have 'forked' history.
                       If we concat_and_rewrite here, new chunks will have a different parent id (the _id of this version doc)
                       ...so we can safely write them. 
                       '''
                    self._concat_and_rewrite(collection, version, symbol, item,
                                             previous_version)
                    return

                if 'segment_index' in previous_version:
                    segment_index = self._segment_index(
                        item,
                        existing_index=previous_version.get('segment_index'),
                        start=previous_version['up_to'],
                        new_segments=[
                            segment['segment'],
                        ])
                    if segment_index:
                        version['segment_index'] = segment_index
                logger.debug("Appended segment %d for parent %s" %
                             (segment['segment'], version['_id']))
            else:
                if 'segment_index' in previous_version:
                    version['segment_index'] = previous_version[
                        'segment_index']

        else:  # Too much data has been appended now, so rewrite (and compress/chunk).
            self._concat_and_rewrite(collection, version, symbol, item,
                                     previous_version)
コード例 #9
0
    def _do_write(self,
                  collection,
                  version,
                  symbol,
                  item,
                  previous_version,
                  segment_offset=0):

        sze = int(item.dtype.itemsize * np.prod(item.shape[1:]))

        # chunk and store the data by (uncompressed) size
        chunk_size = _CHUNK_SIZE / sze

        previous_shas = []
        if previous_version:
            previous_shas = set([
                x['sha'] for x in collection.find(
                    {'symbol': symbol},
                    projection={
                        'sha': 1,
                        '_id': 0
                    },
                )
            ])

        length = len(item)

        if segment_offset > 0 and 'segment_index' in previous_version:
            existing_index = previous_version['segment_index']
        else:
            existing_index = None

        segment_index = []
        i = -1

        # Compress
        idxs = xrange(int(np.ceil(float(length) / chunk_size)))
        chunks = [(item[i * chunk_size:(i + 1) * chunk_size]).tostring()
                  for i in idxs]
        compressed_chunks = compress_array(chunks)

        # Write
        bulk = collection.initialize_unordered_bulk_op()
        for i, chunk in zip(idxs, compressed_chunks):
            segment = {'data': Binary(chunk), 'compressed': True}
            segment['segment'] = min(
                (i + 1) * chunk_size - 1, length - 1) + segment_offset
            segment_index.append(segment['segment'])
            sha = checksum(symbol, segment)
            if sha not in previous_shas:
                segment['sha'] = sha
                bulk.find({
                    'symbol': symbol,
                    'sha': sha,
                    'segment': segment['segment']
                }).upsert().update_one({
                    '$set': segment,
                    '$addToSet': {
                        'parent': version['_id']
                    }
                })
            else:
                bulk.find({
                    'symbol': symbol,
                    'sha': sha,
                    'segment': segment['segment']
                }).update_one({'$addToSet': {
                    'parent': version['_id']
                }})
        if i != -1:
            bulk.execute()

        segment_index = self._segment_index(item,
                                            existing_index=existing_index,
                                            start=segment_offset,
                                            new_segments=segment_index)
        if segment_index:
            version['segment_index'] = segment_index
        version['segment_count'] = i + 1
        version['append_size'] = 0
        version['append_count'] = 0

        self.check_written(collection, symbol, version)
コード例 #10
0
 def modify_segment(segment, item):
     segment['segment'] -= 2
     sha = hashlib.sha1()
     sha.update(item.encode('ascii'))
     segment['sha'] = Binary(sha.digest())
     segment.pop('_id')
コード例 #11
0
def upload_scan_info(trackds, sc):
    try:
        #db.scans.insert([sc.original_json])
        atlases = []
        for label in sc.track_label_items:
            # Does this atlas already exist? If not, add it to the collection.
            atlas = None
            result = db.atlases.find({
                "name": label.name,
                "parameters": label.parameters
            })
            if result.count() != 0:
                atlas = result[0]["_id"]
            else:
                atlas = db.atlases.insert({
                    "name": label.name,
                    "parameters": label.parameters
                })

            atlases.append(atlas)
        db.scans.insert([{
            "scan_id":
            sc.scan_id,
            "subject_id":
            sc.subject_id,
            "gender":
            sc.scan_gender,
            "age":
            sc.scan_age,
            "study":
            sc.study,
            "group":
            sc.scan_group,
            "smoothing":
            sc.smoothing,
            "cutoff_angle":
            sc.cutoff_angle,
            "qa_threshold":
            sc.qa_threshold,
            "gfa_threshold":
            sc.gfa_threshold,
            "length_min":
            sc.length_min,
            "length_max":
            sc.length_max,
            "institution":
            sc.institution,
            "reconstruction":
            sc.reconstruction,
            "scanner":
            sc.scanner,
            "n_directions":
            sc.n_directions,
            "max_b_value":
            sc.max_b_value,
            "bvals":
            sc.bvals,
            "bvecs":
            sc.bvecs,
            "label":
            sc.label,
            "trk_space":
            sc.trk_space,
            "atlases":
            list(set(atlases)),
            "sls":
            len(trackds.tracks),
            "header":
            Binary(pickle.dumps(trackds.header, protocol=2)),
            "original_json":
            sc.original_json
        }])
    except Exception, e:
        print "Failed to upload scan info", e
        return False
コード例 #12
0
    def _do_write(self,
                  collection,
                  version,
                  symbol,
                  item,
                  previous_version,
                  segment_offset=0):

        row_size = int(item.dtype.itemsize * np.prod(item.shape[1:]))

        # chunk and store the data by (uncompressed) size
        rows_per_chunk = int(_CHUNK_SIZE / row_size)

        symbol_all_previous_shas, version_shas = set(), set()
        if previous_version:
            symbol_all_previous_shas.update(
                Binary(x['sha']) for x in collection.find({'symbol': symbol},
                                                          projection={
                                                              'sha': 1,
                                                              '_id': 0
                                                          }))

        length = len(item)

        if segment_offset > 0 and 'segment_index' in previous_version:
            existing_index = previous_version['segment_index']
        else:
            existing_index = None

        segment_index = []

        # Compress
        idxs = xrange(int(np.ceil(float(length) / rows_per_chunk)))
        chunks = [
            (item[i * rows_per_chunk:(i + 1) * rows_per_chunk]).tostring()
            for i in idxs
        ]
        compressed_chunks = compress_array(chunks)

        # Write
        bulk = []
        for i, chunk in zip(idxs, compressed_chunks):
            segment = {
                'data':
                Binary(chunk),
                'compressed':
                True,
                'segment':
                min((i + 1) * rows_per_chunk - 1, length - 1) + segment_offset,
            }
            segment_index.append(segment['segment'])
            sha = checksum(symbol, segment)
            segment_spec = {
                'symbol': symbol,
                'sha': sha,
                'segment': segment['segment']
            }

            if ARCTIC_FORWARD_POINTERS_CFG is FwPointersCfg.DISABLED:
                if sha not in symbol_all_previous_shas:
                    segment['sha'] = sha
                    bulk.append(
                        pymongo.UpdateOne(segment_spec, {
                            '$set': segment,
                            '$addToSet': {
                                'parent': version['_id']
                            }
                        },
                                          upsert=True))
                else:
                    bulk.append(
                        pymongo.UpdateOne(
                            segment_spec,
                            {'$addToSet': {
                                'parent': version['_id']
                            }}))
            else:
                version_shas.add(sha)

                # We only keep for the records the ID of the version which created the segment.
                # We also need the uniqueness of the parent field for the (symbol, parent, segment) index,
                # because upon mongo_retry "dirty_append == True", we compress and only the SHA changes
                # which raises DuplicateKeyError if we don't have a unique (symbol, parent, segment).
                set_spec = {'$addToSet': {'parent': version['_id']}}

                if sha not in symbol_all_previous_shas:
                    segment['sha'] = sha
                    set_spec['$set'] = segment
                    bulk.append(
                        pymongo.UpdateOne(segment_spec, set_spec, upsert=True))
                elif ARCTIC_FORWARD_POINTERS_CFG is FwPointersCfg.HYBRID:
                    bulk.append(pymongo.UpdateOne(segment_spec, set_spec))
                # With FwPointersCfg.ENABLED  we make zero updates on existing segment documents, but:
                #   - write only the new segment(s) documents
                #   - write the new version document
                # This helps with performance as we update as less documents as necessary

        if bulk:
            collection.bulk_write(bulk, ordered=False)

        segment_index = self._segment_index(item,
                                            existing_index=existing_index,
                                            start=segment_offset,
                                            new_segments=segment_index)
        if segment_index:
            version['segment_index'] = segment_index
        version['segment_count'] = len(chunks)
        version['append_size'] = 0
        version['append_count'] = 0

        _update_fw_pointers(collection,
                            symbol,
                            version,
                            previous_version,
                            is_append=False,
                            shas_to_add=version_shas)

        self.check_written(collection, symbol, version)
コード例 #13
0
    def _do_append(self, collection, version, symbol, item, previous_version,
                   dirty_append):
        data = item.tostring()
        # Compatibility with Arctic 1.22.0 that didn't write base_sha into the version document
        version['base_sha'] = previous_version.get('base_sha', Binary(b''))
        version['up_to'] = previous_version['up_to'] + len(item)
        if len(item) > 0:
            version['segment_count'] = previous_version['segment_count'] + 1
            version['append_count'] = previous_version['append_count'] + 1
            version['append_size'] = previous_version['append_size'] + len(
                data)
        else:
            version['segment_count'] = previous_version['segment_count']
            version['append_count'] = previous_version['append_count']
            version['append_size'] = previous_version['append_size']

        # _CHUNK_SIZE is probably too big if we're only appending single rows of data - perhaps something smaller,
        # or also look at number of appended segments?
        if not dirty_append and version[
                'append_count'] < _APPEND_COUNT and version[
                    'append_size'] < _APPEND_SIZE:
            version['base_version_id'] = version_base_or_id(previous_version)

            if len(item) > 0:
                segment = {
                    'data': Binary(data),
                    'compressed': False,
                    'segment': version['up_to'] - 1
                }
                sha = checksum(symbol, segment)
                try:
                    # TODO: We could have a common handling with conditional spec-construction for the update spec.
                    #       For now we kept unchanged the existing code which handles backwards pointers.
                    if ARCTIC_FORWARD_POINTERS_CFG is FwPointersCfg.DISABLED:
                        collection.update_one({
                            'symbol': symbol,
                            'sha': sha
                        }, {
                            '$set': segment,
                            '$addToSet': {
                                'parent': version['base_version_id']
                            }
                        },
                                              upsert=True)
                    else:
                        set_spec = {'$set': segment}

                        if ARCTIC_FORWARD_POINTERS_CFG is FwPointersCfg.HYBRID:
                            set_spec['$addToSet'] = {
                                'parent': version['base_version_id']
                            }
                        else:  # FwPointersCfg.ENABLED
                            # We only keep for the records the ID of the version which created the segment.
                            # We also need the uniqueness of the parent field for the (symbol, parent, segment) index,
                            # because upon mongo_retry "dirty_append == True", we compress and only the SHA changes
                            # which raises DuplicateKeyError if we don't have a unique (symbol, parent, segment).
                            set_spec['$addToSet'] = {'parent': version['_id']}

                        collection.update_one({
                            'symbol': symbol,
                            'sha': sha
                        },
                                              set_spec,
                                              upsert=True)
                        _update_fw_pointers(collection,
                                            symbol,
                                            version,
                                            previous_version,
                                            is_append=True,
                                            shas_to_add=(sha, ))
                except DuplicateKeyError:
                    '''If we get a duplicate key error here, this segment has the same symbol/parent/segment
                       as another chunk, but a different sha. This means that we have 'forked' history.
                       If we concat_and_rewrite here, new chunks will have a different parent id (the _id of this version doc)
                       ...so we can safely write them.
                       '''
                    self._concat_and_rewrite(collection, version, symbol, item,
                                             previous_version)
                    return

                if 'segment_index' in previous_version:
                    segment_index = self._segment_index(
                        item,
                        existing_index=previous_version.get('segment_index'),
                        start=previous_version['up_to'],
                        new_segments=[
                            segment['segment'],
                        ])
                    if segment_index:
                        version['segment_index'] = segment_index
                logger.debug("Appended segment %d for parent %s" %
                             (segment['segment'], version['_id']))
            else:
                if 'segment_index' in previous_version:
                    version['segment_index'] = previous_version[
                        'segment_index']

        else:  # Too much data has been appended now, so rewrite (and compress/chunk).
            self._concat_and_rewrite(collection, version, symbol, item,
                                     previous_version)
コード例 #14
0
def object_hook(dct):
    if "$oid" in dct:
        return ObjectId(str(dct["$oid"]))
    if "$ref" in dct:
        return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None))
    if "$date" in dct:
        dtm = dct["$date"]
        # mongoexport 2.6 and newer
        if isinstance(dtm, string_type):
            aware = datetime.datetime.strptime(
                dtm[:23], "%Y-%m-%dT%H:%M:%S.%f").replace(tzinfo=utc)
            offset = dtm[23:]
            if not offset or offset == 'Z':
                # UTC
                return aware
            else:
                if len(offset) == 5:
                    # Offset from mongoexport is in format (+|-)HHMM
                    secs = (int(offset[1:3]) * 3600 + int(offset[3:]) * 60)
                elif ':' in offset and len(offset) == 6:
                    # RFC-3339 format (+|-)HH:MM
                    hours, minutes = offset[1:].split(':')
                    secs = (int(hours) * 3600 + int(minutes) * 60)
                else:
                    # Not RFC-3339 compliant or mongoexport output.
                    raise ValueError("invalid format for offset")
                if offset[0] == "-":
                    secs *= -1
                return aware - datetime.timedelta(seconds=secs)
        # mongoexport 2.6 and newer, time before the epoch (SERVER-15275)
        elif isinstance(dtm, collections.Mapping):
            secs = float(dtm["$numberLong"]) / 1000.0
        # mongoexport before 2.6
        else:
            secs = float(dtm) / 1000.0
        return EPOCH_AWARE + datetime.timedelta(seconds=secs)
    if "$regex" in dct:
        flags = 0
        # PyMongo always adds $options but some other tools may not.
        for opt in dct.get("$options", ""):
            flags |= _RE_OPT_TABLE.get(opt, 0)
        return Regex(dct["$regex"], flags)
    if "$minKey" in dct:
        return MinKey()
    if "$maxKey" in dct:
        return MaxKey()
    if "$binary" in dct:
        if isinstance(dct["$type"], int):
            dct["$type"] = "%02x" % dct["$type"]
        subtype = int(dct["$type"], 16)
        if subtype >= 0xffffff80:  # Handle mongoexport values
            subtype = int(dct["$type"][6:], 16)
        return Binary(base64.b64decode(dct["$binary"].encode()), subtype)
    if "$code" in dct:
        return Code(dct["$code"], dct.get("$scope"))
    if "$uuid" in dct:
        return uuid.UUID(dct["$uuid"])
    if "$undefined" in dct:
        return None
    if "$numberLong" in dct:
        return Int64(dct["$numberLong"])
    if "$timestamp" in dct:
        tsp = dct["$timestamp"]
        return Timestamp(tsp["t"], tsp["i"])
    return dct
コード例 #15
0
ファイル: mongo_cache.py プロジェクト: Alchemy2011/spider
 def __setitem__(self, url, result):
     """Save value for this URL
     """
     # record = {'result': result, 'timestamp': datetime.utcnow()}
     record = {'result': Binary(zlib.compress(pickle.dumps(result))), 'timestamp': datetime.utcnow()}
     self.db.webpage.update({'_id': url}, {'$set': record}, upsert=True)
コード例 #16
0
def assignPiles(payload):
    username = payload['username']
    gameId = payload['gameId']
    games = Game.objects(gameID=gameId)
    game = games.get(gameID=gameId)

    #refactor this mess later
    dataA = [game.playerADataField for game in games]
    binDataA = dataA[0]

    dataB = [game.playerBDataField for game in games]
    binDataB = dataB[0]

    playerA = pickle.loads(binDataA)
    playerB = pickle.loads(binDataB)
    recipient_session_id = clients[username]
    piles = payload['piles']

    if username == playerA.name:
        for i in range(5):
            playerA.piles[i].cards = piles[i]

        game.playerADataField = Binary(pickle.dumps(playerA))
        game.save()
        if pilesReady(playerB.piles):
            emit('Both Ready', {
                "authorized":
                playerA.authorized,
                "piles": [
                    len(playerB.piles[0].cards),
                    len(playerB.piles[1].cards),
                    len(playerB.piles[2].cards),
                    len(playerB.piles[3].cards),
                    len(playerB.piles[4].cards)
                ]
            },
                 room=recipient_session_id)
            opponent_session_id = clients[playerB.name]
            emit('Both Ready', {
                "authorized":
                playerB.authorized,
                "piles": [
                    len(playerA.piles[0].cards),
                    len(playerA.piles[1].cards),
                    len(playerA.piles[2].cards),
                    len(playerA.piles[3].cards),
                    len(playerA.piles[4].cards)
                ]
            },
                 room=opponent_session_id)

        else:
            emit('Waiting', room=recipient_session_id)

        #TODO maybe run method that'll emit certain event that'll trigger if both have assigned piles else emit event that'll say to wait
        print('playerA emitted')
    elif username == playerB.name:
        for i in range(5):
            playerB.piles[i].cards = piles[i]

        game.playerBDataField = Binary(pickle.dumps(playerB))
        game.save()

        if pilesReady(playerA.piles):
            emit('Both Ready', {
                "authorized":
                playerB.authorized,
                "piles": [
                    len(playerA.piles[0].cards),
                    len(playerA.piles[1].cards),
                    len(playerA.piles[2].cards),
                    len(playerA.piles[3].cards),
                    len(playerA.piles[4].cards)
                ]
            },
                 room=recipient_session_id)
            opponent_session_id = clients[playerA.name]
            emit('Both Ready', {
                "authorized":
                playerA.authorized,
                "piles": [
                    len(playerB.piles[0].cards),
                    len(playerB.piles[1].cards),
                    len(playerB.piles[2].cards),
                    len(playerB.piles[3].cards),
                    len(playerB.piles[4].cards)
                ]
            },
                 room=opponent_session_id)

        else:
            emit('Waiting', room=recipient_session_id)

        emit('todo', room=recipient_session_id)
        print('playerB emitted')
    else:
        print('Something is up.')
コード例 #17
0
        X.extend(faces)
        y.extend(labels)
    return asarray(X), asarray(y)


def get_embedding(model, face_pixels):
    face_pixels = face_pixels.astype('float32')
    mean, std = face_pixels.mean(), face_pixels.std()
    face_pixels = (face_pixels - mean) / std
    samples = expand_dims(face_pixels, axis=0)
    yhat = model.predict(samples)
    return yhat[0]


# load train dataset
trainX, trainy = load_dataset('raw_dataset/')
model = load_model('model/facenet_keras.h5')
print('Loaded Model')
newTrainX = list()
data = []
for face_pixels, label in zip(trainX, trainy):
    embedding = get_embedding(model, face_pixels)
    mydict = {
        "name": label,
        "data": Binary(pickle.dumps(embedding, protocol=2))
    }
    data.append(mydict)
res = mycol.insert_many(data)

print(res)
コード例 #18
0
def startFight(payload):
    username = payload['username']
    battlingPiles = payload['battlingPiles']
    gameId = payload['gameId']
    recipient_session_id = clients[username]
    games = Game.objects(gameID=gameId)
    game = games.get(gameID=gameId)
    #refactor this mess later
    binDataA = game.playerADataField
    binDataB = game.playerBDataField

    playerA = pickle.loads(binDataA)
    playerB = pickle.loads(binDataB)

    if username == playerA.name:
        pileAId = battlingPiles[0]
        pileBId = battlingPiles[1]
        opponent_session_id = clients[playerB.name]
        pileA = []
        for card in playerA.piles[pileAId].cards:
            pileA.append(card['value'])
        pileB = []
        for card in playerB.piles[pileBId].cards:
            pileB.append(card['value'])

        winlossA = fight(pileA, pileB)
        if game.roundsLeft == 0 and game.trickNum == 4:
            if winlossA == winlossstate.WIN:
                playerA.tricksWon += 1
            elif winlossA == winlossstate.LOSS:
                playerB.tricksWon += 1

            if playerA.roundsWon < playerB.roundsWon:
                emit('game over', "lost", room=recipient_session_id)
                emit('game over', "won", room=opponent_session_id)
            elif playerA.roundsWon > playerB.roundsWon:
                emit('game over', "won", room=recipient_session_id)
                emit('game over', "lost", room=opponent_session_id)
            else:
                emit('game over', "drew", room=recipient_session_id)
                emit('game over', "drew", room=opponent_session_id)
        else:
            if game.trickNum == 4:
                resetDeck(game, playerA, playerB)
                resetHandling(playerA, playerB, recipient_session_id,
                              opponent_session_id, winlossA)
            else:
                fightHandling(game, playerA, playerB, pileAId, pileBId,
                              recipient_session_id, opponent_session_id,
                              winlossA)
    else:
        pileAId = battlingPiles[1]
        pileBId = battlingPiles[0]
        opponent_session_id = clients[playerA.name]
        pileA = []
        for card in playerA.piles[pileAId].cards:
            pileA.append(card['value'])
        pileB = []
        for card in playerB.piles[pileBId].cards:
            pileB.append(card['value'])
        winlossB = fight(pileA, pileB)
        playerA.piles[pileAId].cards = []
        playerB.piles[pileBId].cards = []

        if game.roundsLeft == 0 and game.trickNum == 4:
            if winlossB == winlossstate.WIN:
                playerB.tricksWon += 1
            elif winlossB == winlossstate.LOSS:
                playerB.tricksWon += 1
            if playerA.roundsWon < playerB.roundsWon:
                emit('game over', "lost", room=recipient_session_id)
                emit('game over', "won", room=opponent_session_id)
            elif playerA.roundsWon > playerB.roundsWon:
                emit('game over', "won", room=recipient_session_id)
                emit('game over', "lost", room=opponent_session_id)
            else:
                emit('game over', "drew", room=recipient_session_id)
                emit('game over', "drew", room=opponent_session_id)
        else:
            if game.trickNum == 4:
                resetDeck(game, playerA, playerB)
                resetHandling(playerB, playerA, recipient_session_id,
                              opponent_session_id, winlossB)
            else:
                fightHandling(game, playerB, playerA, pileBId, pileAId,
                              recipient_session_id, opponent_session_id,
                              winlossB)

    playerA.authorized = not playerA.authorized
    playerB.authorized = not playerB.authorized
    game.playerADataField = Binary(pickle.dumps(playerA))
    game.playerBDataField = Binary(pickle.dumps(playerB))
    game.save()
コード例 #19
0
 def __init__(self):
     # Ensure id is type 4, regardless of CodecOptions.uuid_representation.
     self.session_id = {'id': Binary(uuid.uuid4().bytes, 4)}
     self.last_use = monotonic.time()
     self._transaction_id = 0
     self.dirty = False
コード例 #20
0
 def __setitem__(self, url, result):
     result = Binary(zlib.compress(
         pickle.dumps(result)))  # 注意这里将压缩后的字符串使用了Binary()
     record = {'result': result, 'timestamp': datetime.utcnow()}
     self.collect.update({'_id': url}, {'$set': record},
                         upsert=True)  # 插入或更新record ———— update():无返回值
コード例 #21
0
ファイル: test_sorting.py プロジェクト: jonntd/MontyDB
def test_sort_19(monty_sort, mongo_sort):
    docs = [
        {
            "a": ["x", True]
        },
        {
            "a": None
        },
        {
            "a": []
        },
        {
            "a": [5, []]
        },
        {
            "a": {
                "s": 7
            }
        },
        {
            "a": {
                "s": [9]
            }
        },
        {
            "a": {
                "s": 10
            }
        },
        {
            "a": 6
        },
        {
            "a": 4
        },
        {
            "a": [5, None]
        },
        {
            "a": [5, [1]]
        },
        {
            "a": [Decimal128("4.5"), Binary(b"0")]
        },
        {
            "a": [{
                "s": 5
            }, False]
        },
        {
            "a": [{
                "s": 9
            }]
        },
        {
            "a": [True, "y"]
        },
        {
            "a": Binary(b"a")
        },
        {
            "a": b"bytes"
        },
        {
            "a": ["abc"]
        },
        {
            "a": "banana"
        },
        {
            "a": "appple"
        },
        {
            "a": [Regex("^a", "ix")]
        },
        {
            "a": Regex("^b")
        },
        {
            "a": Code("x", {"m": 0})
        },
        {
            "a": Code("y")
        },
        {
            "a": Code("y", {})
        },
        {
            "a": Code("y", {"m": 0})
        },
        {
            "a": MinKey()
        },
        {
            "a": MaxKey()
        },
        {
            "a": Timestamp(0, 1)
        },
        {
            "a": Timestamp(1, 1)
        },
        {
            "a": ObjectId(b"000000000000")
        },
        {
            "a": ObjectId(b"000000000001")
        },
        {
            "a": datetime(1900, 1, 1)
        },
        {
            "a": datetime(1900, 1, 2)
        },
    ]
    sort = [("a", 1)]

    monty_c = monty_sort(docs, sort)
    mongo_c = mongo_sort(docs, sort)

    for i in range(len(docs)):
        assert next(mongo_c)["_id"] == next(monty_c)["_id"]
コード例 #22
0
 def test_mongocrypt_options(self):
     schema_map = bson_data('schema-map.json')
     valid = [({
         'local': {
             'key': b'1' * 96
         }
     }, None),
              ({
                  'aws': {
                      'accessKeyId': '',
                      'secretAccessKey': ''
                  }
              }, schema_map),
              ({
                  'aws': {
                      'accessKeyId': 'foo',
                      'secretAccessKey': 'foo'
                  }
              }, None),
              ({
                  'aws': {
                      'accessKeyId': 'foo',
                      'secretAccessKey': 'foo',
                      'sessionToken': 'token'
                  }
              }, None),
              ({
                  'aws': {
                      'accessKeyId': 'foo',
                      'secretAccessKey': 'foo'
                  },
                  'local': {
                      'key': b'1' * 96
                  }
              }, None), ({
                  'local': {
                      'key': to_base64(b'1' * 96)
                  }
              }, None), ({
                  'local': {
                      'key': Binary(b'1' * 96)
                  }
              }, None),
              ({
                  'gcp': {
                      'email': '*****@*****.**',
                      'privateKey': b'1'
                  }
              }, None),
              ({
                  'gcp': {
                      'email': '*****@*****.**',
                      'privateKey': to_base64(b'1')
                  }
              }, None),
              ({
                  'gcp': {
                      'email': '*****@*****.**',
                      'privateKey': Binary(b'1')
                  }
              }, None)]
     for kms_providers, schema_map in valid:
         opts = MongoCryptOptions(kms_providers, schema_map)
         self.assertEqual(opts.kms_providers,
                          kms_providers,
                          msg=kms_providers)
         self.assertEqual(opts.schema_map, schema_map)
コード例 #23
0
ファイル: test_pickle_store.py プロジェクト: ceallen/arctic
def test_read_object_backwards_compat():
    self = create_autospec(PickleStore)
    version = {'blob': Binary(compressHC(cPickle.dumps(object)))}
    assert PickleStore.read(self, sentinel.arctic_lib, version,
                            sentinel.symbol) == object
コード例 #24
0
ファイル: chunkstore.py プロジェクト: zoe0316/arctic
    def write(self, symbol, item, metadata=None, chunker=DateChunker(), audit=None, **kwargs):
        """
        Writes data from item to symbol in the database

        Parameters
        ----------
        symbol: str
            the symbol that will be used to reference the written data
        item: Dataframe or Series
            the data to write the database
        metadata: ?
            optional per symbol metadata
        chunker: Object of type Chunker
            A chunker that chunks the data in item
        audit: dict
            audit information
        kwargs:
            optional keyword args that are passed to the chunker. Includes:
            chunk_size:
                used by chunker to break data into discrete chunks.
                see specific chunkers for more information about this param.
        """
        if not isinstance(item, (DataFrame, Series)):
            raise Exception("Can only chunk DataFrames and Series")

        self._arctic_lib.check_quota()

        previous_shas = []
        doc = {}
        meta = {}

        doc[SYMBOL] = symbol
        doc[LEN] = len(item)
        doc[SERIALIZER] = self.serializer.TYPE
        doc[CHUNKER] = chunker.TYPE
        doc[USERMETA] = metadata

        sym = self._get_symbol_info(symbol)
        if sym:
            previous_shas = set([Binary(x[SHA]) for x in self._collection.find({SYMBOL: symbol},
                                                                               projection={SHA: True, '_id': False},
                                                                               )])
        ops = []
        meta_ops = []
        chunk_count = 0

        for start, end, chunk_size, record in chunker.to_chunks(item, **kwargs):
            chunk_count += 1
            data = self.serializer.serialize(record)
            doc[CHUNK_SIZE] = chunk_size
            doc[METADATA] = {'columns': data[METADATA][COLUMNS] if COLUMNS in data[METADATA] else ''}
            meta = data[METADATA]

            for i in xrange(int(len(data[DATA]) / MAX_CHUNK_SIZE + 1)):
                chunk = {DATA: Binary(data[DATA][i * MAX_CHUNK_SIZE: (i + 1) * MAX_CHUNK_SIZE])}
                chunk[SEGMENT] = i
                chunk[START] = meta[START] = start
                chunk[END] = meta[END] = end
                chunk[SYMBOL] = meta[SYMBOL] = symbol
                dates = [chunker.chunk_to_str(start), chunker.chunk_to_str(end), str(chunk[SEGMENT]).encode('ascii')]
                chunk[SHA] = self._checksum(dates, chunk[DATA])

                meta_ops.append(pymongo.ReplaceOne({SYMBOL: symbol,
                                                    START: start,
                                                    END: end},
                                                   meta, upsert=True))

                if chunk[SHA] not in previous_shas:
                    ops.append(pymongo.UpdateOne({SYMBOL: symbol,
                                                  START: start,
                                                  END: end,
                                                  SEGMENT: chunk[SEGMENT]},
                                                 {'$set': chunk}, upsert=True))
                else:
                    # already exists, dont need to update in mongo
                    previous_shas.remove(chunk[SHA])

        if ops:
            self._collection.bulk_write(ops, ordered=False)
        if meta_ops:
            self._mdata.bulk_write(meta_ops, ordered=False)

        doc[CHUNK_COUNT] = chunk_count
        doc[APPEND_COUNT] = 0

        if previous_shas:
            mongo_retry(self._collection.delete_many)({SYMBOL: symbol, SHA: {'$in': list(previous_shas)}})

        mongo_retry(self._symbols.update_one)({SYMBOL: symbol},
                                              {'$set': doc},
                                              upsert=True)
        if audit is not None:
            audit['symbol'] = symbol
            audit['action'] = 'write'
            audit['chunks'] = chunk_count
            self._audit.insert_one(audit)
コード例 #25
0
    def test_basic_encode(self):
        self.assertRaises(TypeError, BSON.encode, 100)
        self.assertRaises(TypeError, BSON.encode, "hello")
        self.assertRaises(TypeError, BSON.encode, None)
        self.assertRaises(TypeError, BSON.encode, [])

        self.assertEqual(BSON.encode({}), BSON(b("\x05\x00\x00\x00\x00")))
        self.assertEqual(
            BSON.encode({"test": u"hello world"}),
            b("\x1B\x00\x00\x00\x02\x74\x65\x73\x74\x00\x0C\x00"
              "\x00\x00\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C"
              "\x64\x00\x00"))
        self.assertEqual(
            BSON.encode({u"mike": 100}),
            b("\x0F\x00\x00\x00\x10\x6D\x69\x6B\x65\x00\x64\x00"
              "\x00\x00\x00"))
        self.assertEqual(
            BSON.encode({"hello": 1.5}),
            b("\x14\x00\x00\x00\x01\x68\x65\x6C\x6C\x6F\x00\x00"
              "\x00\x00\x00\x00\x00\xF8\x3F\x00"))
        self.assertEqual(BSON.encode({"true": True}),
                         b("\x0C\x00\x00\x00\x08\x74\x72\x75\x65\x00\x01\x00"))
        self.assertEqual(
            BSON.encode({"false": False}),
            b("\x0D\x00\x00\x00\x08\x66\x61\x6C\x73\x65\x00\x00"
              "\x00"))
        self.assertEqual(
            BSON.encode({"empty": []}),
            b("\x11\x00\x00\x00\x04\x65\x6D\x70\x74\x79\x00\x05"
              "\x00\x00\x00\x00\x00"))
        self.assertEqual(
            BSON.encode({"none": {}}),
            b("\x10\x00\x00\x00\x03\x6E\x6F\x6E\x65\x00\x05\x00"
              "\x00\x00\x00\x00"))
        self.assertEqual(
            BSON.encode({"test": Binary(b("test"), 0)}),
            b("\x14\x00\x00\x00\x05\x74\x65\x73\x74\x00\x04\x00"
              "\x00\x00\x00\x74\x65\x73\x74\x00"))
        self.assertEqual(
            BSON.encode({"test": Binary(b("test"), 2)}),
            b("\x18\x00\x00\x00\x05\x74\x65\x73\x74\x00\x08\x00"
              "\x00\x00\x02\x04\x00\x00\x00\x74\x65\x73\x74\x00"))
        self.assertEqual(
            BSON.encode({"test": Binary(b("test"), 128)}),
            b("\x14\x00\x00\x00\x05\x74\x65\x73\x74\x00\x04\x00"
              "\x00\x00\x80\x74\x65\x73\x74\x00"))
        self.assertEqual(BSON.encode({"test": None}),
                         b("\x0B\x00\x00\x00\x0A\x74\x65\x73\x74\x00\x00"))
        self.assertEqual(
            BSON.encode({"date": datetime.datetime(2007, 1, 8, 0, 30, 11)}),
            b("\x13\x00\x00\x00\x09\x64\x61\x74\x65\x00\x38\xBE"
              "\x1C\xFF\x0F\x01\x00\x00\x00"))
        self.assertEqual(
            BSON.encode({"regex": re.compile(b("a*b"), re.IGNORECASE)}),
            b("\x12\x00\x00\x00\x0B\x72\x65\x67\x65\x78\x00\x61"
              "\x2A\x62\x00\x69\x00\x00"))
        self.assertEqual(
            BSON.encode({"$where": Code("test")}),
            b("\x16\x00\x00\x00\r$where\x00\x05\x00\x00\x00test"
              "\x00\x00"))
        self.assertEqual(
            BSON.encode(
                {"$field": Code("function(){ return true;}", scope=None)}),
            b("+\x00\x00\x00\r$field\x00\x1a\x00\x00\x00"
              "function(){ return true;}\x00\x00"))
        self.assertEqual(
            BSON.encode({
                "$field":
                Code("return function(){ return x; }", scope={'x': False})
            }),
            b("=\x00\x00\x00\x0f$field\x000\x00\x00\x00\x1f\x00"
              "\x00\x00return function(){ return x; }\x00\t\x00"
              "\x00\x00\x08x\x00\x00\x00\x00"))
        a = ObjectId(b("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B"))
        self.assertEqual(
            BSON.encode({"oid": a}),
            b("\x16\x00\x00\x00\x07\x6F\x69\x64\x00\x00\x01\x02"
              "\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x00"))
        self.assertEqual(
            BSON.encode({"ref": DBRef("coll", a)}),
            b("\x2F\x00\x00\x00\x03ref\x00\x25\x00\x00\x00\x02"
              "$ref\x00\x05\x00\x00\x00coll\x00\x07$id\x00\x00"
              "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x00"
              "\x00"))
コード例 #26
0
ファイル: chunkstore.py プロジェクト: zoe0316/arctic
    def __update(self, sym, item, metadata=None, combine_method=None, chunk_range=None, audit=None):
        '''
        helper method used by update and append since they very closely
        resemble eachother. Really differ only by the combine method.
        append will combine existing date with new data (within a chunk),
        whereas update will replace existing data with new data (within a
        chunk).
        '''
        if not isinstance(item, (DataFrame, Series)):
            raise Exception("Can only chunk DataFrames and Series")

        self._arctic_lib.check_quota()

        symbol = sym[SYMBOL]

        if chunk_range is not None:
            self.delete(symbol, chunk_range)
            sym = self._get_symbol_info(symbol)

        ops = []
        meta_ops = []
        chunker = CHUNKER_MAP[sym[CHUNKER]]

        appended = 0
        new_chunks = 0
        for start, end, _, record in chunker.to_chunks(item, chunk_size=sym[CHUNK_SIZE]):
            # read out matching chunks
            df = self.read(symbol, chunk_range=chunker.to_range(start, end), filter_data=False)
            # assuming they exist, update them and store the original chunk
            # range for later use
            if len(df) > 0:
                record = combine_method(df, record)
                if record is None or record.equals(df):
                    continue

                sym[APPEND_COUNT] += len(record) - len(df)
                appended += len(record) - len(df)
                sym[LEN] += len(record) - len(df)
            else:
                sym[CHUNK_COUNT] += 1
                new_chunks += 1
                sym[LEN] += len(record)

            data = SER_MAP[sym[SERIALIZER]].serialize(record)
            meta = data[METADATA]
            
            chunk_count = int(len(data[DATA]) / MAX_CHUNK_SIZE + 1)
            seg_count = self._collection.count({SYMBOL: symbol, START: start, END: end})
            # remove old segments for this chunk in case we now have less
            # segments than we did before
            if seg_count > chunk_count:
                self._collection.delete_many({SYMBOL: symbol,
                                              START: start,
                                              END: end,
                                              SEGMENT: {'$gte': chunk_count}})


            for i in xrange(chunk_count):
                chunk = {DATA: Binary(data[DATA][i * MAX_CHUNK_SIZE: (i + 1) * MAX_CHUNK_SIZE])}
                chunk[SEGMENT] = i
                chunk[START] = start
                chunk[END] = end
                chunk[SYMBOL] = symbol
                dates = [chunker.chunk_to_str(start), chunker.chunk_to_str(end), str(chunk[SEGMENT]).encode('ascii')]
                sha = self._checksum(dates, data[DATA])
                chunk[SHA] = sha
                ops.append(pymongo.UpdateOne({SYMBOL: symbol,
                                              START: start,
                                              END: end,
                                              SEGMENT: chunk[SEGMENT]},
                                             {'$set': chunk}, upsert=True))
                meta_ops.append(pymongo.UpdateOne({SYMBOL: symbol,
                                                   START: start,
                                                   END: end},
                                                  {'$set': meta}, upsert=True))
        if ops:
            self._collection.bulk_write(ops, ordered=False)
            self._mdata.bulk_write(meta_ops, ordered=False)

        sym[USERMETA] = metadata
        self._symbols.replace_one({SYMBOL: symbol}, sym)
        if audit is not None:
            if new_chunks > 0:
                audit['new_chunks'] = new_chunks
            if appended > 0:
                audit['appended_rows'] = appended
            self._audit.insert_one(audit)
コード例 #27
0
def _authenticate_scram(credentials, sock_info, mechanism):
    """Authenticate using SCRAM."""

    username = credentials.username
    if mechanism == 'SCRAM-SHA-256':
        digest = "sha256"
        digestmod = hashlib.sha256
        data = saslprep(credentials.password).encode("utf-8")
    else:
        digest = "sha1"
        digestmod = hashlib.sha1
        data = _password_digest(username, credentials.password).encode("utf-8")
    source = credentials.source
    cache = credentials.cache

    # Make local
    _hmac = hmac.HMAC

    user = username.encode("utf-8").replace(b"=", b"=3D").replace(b",", b"=2C")
    nonce = standard_b64encode(
        (("%s" % (SystemRandom().random(), ))[2:]).encode("utf-8"))
    first_bare = b"n=" + user + b",r=" + nonce

    cmd = SON([('saslStart', 1), ('mechanism', mechanism),
               ('payload', Binary(b"n,," + first_bare)), ('autoAuthorize', 1)])
    res = sock_info.command(source, cmd)

    server_first = res['payload']
    parsed = _parse_scram_response(server_first)
    iterations = int(parsed[b'i'])
    if iterations < 4096:
        raise OperationFailure("Server returned an invalid iteration count.")
    salt = parsed[b's']
    rnonce = parsed[b'r']
    if not rnonce.startswith(nonce):
        raise OperationFailure("Server returned an invalid nonce.")

    without_proof = b"c=biws,r=" + rnonce
    if cache.data:
        client_key, server_key, csalt, citerations = cache.data
    else:
        client_key, server_key, csalt, citerations = None, None, None, None

    # Salt and / or iterations could change for a number of different
    # reasons. Either changing invalidates the cache.
    if not client_key or salt != csalt or iterations != citerations:
        salted_pass = _hi(digest, data, standard_b64decode(salt), iterations)
        client_key = _hmac(salted_pass, b"Client Key", digestmod).digest()
        server_key = _hmac(salted_pass, b"Server Key", digestmod).digest()
        cache.data = (client_key, server_key, salt, iterations)
    stored_key = digestmod(client_key).digest()
    auth_msg = b",".join((first_bare, server_first, without_proof))
    client_sig = _hmac(stored_key, auth_msg, digestmod).digest()
    client_proof = b"p=" + standard_b64encode(_xor(client_key, client_sig))
    client_final = b",".join((without_proof, client_proof))

    server_sig = standard_b64encode(
        _hmac(server_key, auth_msg, digestmod).digest())

    cmd = SON([('saslContinue', 1), ('conversationId', res['conversationId']),
               ('payload', Binary(client_final))])
    res = sock_info.command(source, cmd)

    parsed = _parse_scram_response(res['payload'])
    if not compare_digest(parsed[b'v'], server_sig):
        raise OperationFailure("Server returned an invalid signature.")

    # Depending on how it's configured, Cyrus SASL (which the server uses)
    # requires a third empty challenge.
    if not res['done']:
        cmd = SON([('saslContinue', 1),
                   ('conversationId', res['conversationId']),
                   ('payload', Binary(b''))])
        res = sock_info.command(source, cmd)
        if not res['done']:
            raise OperationFailure('SASL conversation failed to complete.')
コード例 #28
0
    def run_operation(self, sessions, collection, operation):
        original_collection = collection
        name = camel_to_snake(operation['name'])
        if name == 'run_command':
            name = 'command'
        elif name == 'download_by_name':
            name = 'open_download_stream_by_name'
        elif name == 'download':
            name = 'open_download_stream'
        elif name == 'map_reduce':
            self.skipTest('PyMongo does not support mapReduce')
        elif name == 'count':
            self.skipTest('PyMongo does not support count')

        database = collection.database
        collection = database.get_collection(collection.name)
        if 'collectionOptions' in operation:
            collection = collection.with_options(
                **self.parse_options(operation['collectionOptions']))

        object_name = self.get_object_name(operation)
        if object_name == 'gridfsbucket':
            # Only create the GridFSBucket when we need it (for the gridfs
            # retryable reads tests).
            obj = GridFSBucket(database, bucket_name=collection.name)
        else:
            objects = {
                'client': database.client,
                'database': database,
                'collection': collection,
                'testRunner': self
            }
            objects.update(sessions)
            obj = objects[object_name]

        # Combine arguments with options and handle special cases.
        arguments = operation.get('arguments', {})
        arguments.update(arguments.pop("options", {}))
        self.parse_options(arguments)

        cmd = getattr(obj, name)

        with_txn_callback = functools.partial(self.run_operations,
                                              sessions,
                                              original_collection,
                                              in_with_transaction=True)
        prepare_spec_arguments(operation, arguments, name, sessions,
                               with_txn_callback)

        if name == 'run_on_thread':
            args = {'sessions': sessions, 'collection': collection}
            args.update(arguments)
            arguments = args
        result = cmd(**dict(arguments))

        # Cleanup open change stream cursors.
        if name == "watch":
            self.addCleanup(result.close)

        if name == "aggregate":
            if arguments["pipeline"] and "$out" in arguments["pipeline"][-1]:
                # Read from the primary to ensure causal consistency.
                out = collection.database.get_collection(
                    arguments["pipeline"][-1]["$out"],
                    read_preference=ReadPreference.PRIMARY)
                return out.find()
        if 'download' in name:
            result = Binary(result.read())

        if isinstance(result, Cursor) or isinstance(result, CommandCursor):
            return list(result)

        return result
コード例 #29
0
def text_uuid_to_binary(text_uuid):
    """Convert text TypedUUID to binary form"""
    try:
        return Binary(uuid.UUID(text_uuid).bytes, OLD_UUID_SUBTYPE)
    except Exception as exc:
        raise ValueError('Failed to convert text UUID to binary', exc)
コード例 #30
0
ファイル: chunkstore.py プロジェクト: systemtrader/arctic
    def __update(self, sym, item, combine_method=None, chunk_range=None):
        '''
        helper method used by update and append since they very closely
        resemble eachother. Really differ only by the combine method.
        append will combine existing date with new data (within a chunk),
        whereas update will replace existing data with new data (within a
        chunk).
        '''
        if not isinstance(item, (DataFrame, Series)):
            raise Exception("Can only chunk DataFrames and Series")

        symbol = sym[SYMBOL]

        if chunk_range is not None:
            self.delete(symbol, chunk_range)
            sym = self._get_symbol_info(symbol)

        bulk = self._collection.initialize_unordered_bulk_op()
        op = False
        chunker = CHUNKER_MAP[sym[CHUNKER]]

        for start, end, _, record in chunker.to_chunks(
                item, chunk_size=sym[CHUNK_SIZE]):
            # read out matching chunks
            df = self.read(symbol,
                           chunk_range=chunker.to_range(start, end),
                           filter_data=False)
            # assuming they exist, update them and store the original chunk
            # range for later use
            if len(df) > 0:
                record = combine_method(df, record)
                if record is None or record.equals(df):
                    continue

                sym[APPEND_COUNT] += len(record)
                sym[LEN] += len(record) - len(df)
            else:
                sym[CHUNK_COUNT] += 1
                sym[LEN] += len(record)

            data = SER_MAP[sym[SERIALIZER]].serialize(record)
            op = True

            # remove old segments for this chunk in case we now have less
            # segments than we did before
            chunk_count = int(len(data[DATA]) / MAX_CHUNK_SIZE + 1)
            seg_count = self._collection.count({
                SYMBOL: symbol,
                START: start,
                END: end
            })
            if seg_count > chunk_count:
                # if chunk count is 1, the segment id will be -1, not 1
                self._collection.delete_many({
                    SYMBOL: symbol,
                    START: start,
                    END: end,
                    SEGMENT: {
                        '$gt': seg_count if chunk_count > 1 else -1
                    }
                })

            size_chunked = chunk_count > 1
            for i in xrange(chunk_count):
                chunk = {
                    DATA:
                    Binary(data[DATA][i * MAX_CHUNK_SIZE:(i + 1) *
                                      MAX_CHUNK_SIZE])
                }
                chunk[METADATA] = data[METADATA]
                if size_chunked:
                    chunk[SEGMENT] = i
                else:
                    chunk[SEGMENT] = -1
                chunk[START] = start
                chunk[END] = end
                chunk[SYMBOL] = symbol
                dates = [
                    chunker.chunk_to_str(start),
                    chunker.chunk_to_str(end),
                    str(chunk[SEGMENT]).encode('ascii')
                ]
                sha = self._checksum(dates, data[DATA])
                chunk[SHA] = sha
                bulk.find({
                    SYMBOL: symbol,
                    START: start,
                    END: end,
                    SEGMENT: chunk[SEGMENT]
                }).upsert().update_one({'$set': chunk})
        if op:
            bulk.execute()

        self._symbols.replace_one({SYMBOL: symbol}, sym)