def test_exception_wrapping(self): # No matter what exception is raised while trying to decode BSON, # the final exception always matches InvalidBSON and the original # traceback is preserved. # Invalid Python regex, though valid PCRE. # Causes an error in re.compile(). bad_doc = BSON.encode({'r': Regex(r'[\w-\.]')}) try: decode_all(bad_doc) except InvalidBSON: exc_type, exc_value, exc_tb = sys.exc_info() # Original re error was captured and wrapped in InvalidBSON. self.assertEqual(exc_value.args[0], 'bad character range') # Traceback includes bson module's call into re module. for filename, lineno, fname, text in traceback.extract_tb(exc_tb): if filename.endswith('re.py') and fname == 'compile': # Traceback was correctly preserved. break else: self.fail('Traceback not captured') else: self.fail('InvalidBSON not raised')
def rows_func(rows): try: bson_data = bson.decode_all(rows)[0] rows_data = bson_data['array'] #key_indices = bson_data['keyindices'] acc_wrapper._set_data(list(init_acc_values)) for row in rows_data: row_wrapper.load_row(row) aggregator_function(acc_wrapper, row_wrapper) result = [] for key_index in key_indices_wrapper: answer = rows_data[0][key_index] result.append(answer) result.extend(acc_wrapper._get_data()) return numpy_to_bson_friendly(result) except Exception as e: try: e_msg = unicode(e) except: e_msg = u'<unable to get exception message>' try: e_row = unicode(bson.decode_all(rows)[0]['array']) except: e_row = u'<unable to get row data>' try: msg = base64.urlsafe_b64encode((u'Exception: %s running UDF on row: %s' % (e_msg, e_row)).encode('utf-8')) except: msg = base64.urlsafe_b64encode(u'Exception running UDF, unable to provide details.'.encode('utf-8')) raise IaPyWorkerError(msg)
def main(): print 'Are you sure you want to run this?' return db.command('dropDatabase') print 'Creating indices' print ' leden' Es.ensure_indices() print ' moderation' Es_mod.ensure_indices() print ' planning' Es_plan.ensure_indices() print ' poll' Es_poll.ensure_indices() print ' regl' Es_regl.ensure_indices() print ' subscriptions' Es_subscr.ensure_indices() print print 'Restoring data' print ' entities' for e in bson.decode_all(open('entities.bsons').read()): db['entities'].save(e) print ' relations' for e in bson.decode_all(open('relations.bsons').read()): db['relations'].save(e) print ' events' for e in bson.decode_all(open('events.bsons').read()): db['events'].save(e)
def translate_response(self, s=None, force_reply=False): """ translate_response(s=None, force_reply=False) : translate incoming loco packet s : default to None, socket, None if want to using default socket force_reply : default to False, return result of command-sent-by-pykakao only (other packets will be sent to handle_packet) """ if not s: if not self.s: print "error translate_response: connection required" return None else: s = self.s result = {} head = s.recv(4) if not head: print "error translate_response: connection closed" s.close() s = None return None elif head == "\xFF\xFF\xFF\xFF": body = s.recv(18) result["packet_id"] = head result["status_code"] = body[0:2] result["command"] = body[2:13].replace("\x00", "") result["body_type"] = body[13:14] result["body_length"] = struct.unpack("I", body[14:18])[0] result["body"] = decode_all(s.recv(result["body_length"]))[0] return result else: body_length = struct.unpack("I", head)[0] body = self.dec_aes(s.recv(body_length)) result["packet_id"] = body[0:4] result["status_code"] = body[4:6] result["command"] = body[6:17].replace("\x00", "") result["body_type"] = body[17:18] result["body_length"] = struct.unpack("I", body[18:22])[0] result["body"] = decode_all(body[22:])[0] if result["packet_id"] != "\xFF\xFF\xFF\xFF" and force_reply: self.handle_packet(result) return self.translate_response(s, force_reply) else: return result
def test_exception_wrapping(self): # No matter what exception is raised while trying to decode BSON, # the final exception always matches InvalidBSON. # {'s': '\xff'}, will throw attempting to decode utf-8. bad_doc = b'\x0f\x00\x00\x00\x02s\x00\x03\x00\x00\x00\xff\x00\x00\x00' with self.assertRaises(InvalidBSON) as context: decode_all(bad_doc) self.assertIn("codec can't decode byte 0xff", str(context.exception))
def test_legacy_csharp_uuid(self): data = self.csharp_data # Test decoding docs = bson.decode_all(data, CodecOptions(SON, False, PYTHON_LEGACY)) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) docs = bson.decode_all(data, CodecOptions(SON, False, STANDARD)) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) docs = bson.decode_all(data, CodecOptions(SON, False, JAVA_LEGACY)) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) docs = bson.decode_all(data, CodecOptions(SON, False, CSHARP_LEGACY)) for d in docs: self.assertEqual(d['newguid'], uuid.UUID(d['newguidstring'])) # Test encoding encoded = b''.join([ bson.BSON.encode(doc, False, CodecOptions(uuid_representation=PYTHON_LEGACY)) for doc in docs]) self.assertNotEqual(data, encoded) encoded = b''.join([ bson.BSON.encode(doc, False, CodecOptions(uuid_representation=STANDARD)) for doc in docs]) self.assertNotEqual(data, encoded) encoded = b''.join( [bson.BSON.encode(doc, False, CodecOptions(uuid_representation=JAVA_LEGACY)) for doc in docs]) self.assertNotEqual(data, encoded) encoded = b''.join( [bson.BSON.encode(doc, False, CodecOptions(uuid_representation=CSHARP_LEGACY)) for doc in docs]) self.assertEqual(data, encoded)
def _mongodb_decode_wire_protocol(message): """ http://www.mongodb.org/display/DOCS/Mongo+Wire+Protocol """ MONGO_OPS = { 2001: 'msg', 2002: 'insert', 2003: 'reserved', 2004: 'query', 2005: 'get_more', 2006: 'delete', 2007: 'kill_cursors', } _, msg_id, _, opcode, _ = struct.unpack('<iiiii', message[:20]) op = MONGO_OPS.get(opcode, 'unknown') zidx = 20 collection_name_size = message[zidx:].find('\0') collection_name = message[zidx:zidx+collection_name_size] zidx += collection_name_size + 1 skip, limit = struct.unpack('<ii', message[zidx:zidx+8]) zidx += 8 msg = "" try: if message[zidx:]: msg = bson.decode_all(message[zidx:], as_class=dict, tz_aware=False) except Exception, e: msg = 'invalid bson'
def test_backports(self): doc = BSON.encode({"tuple": (1, 2)}) exp = {"tuple": [1, 2]} options = CodecOptions(uuid_representation=ALL_UUID_REPRESENTATIONS[0], tz_aware=False, document_class=dict) self.assertEqual( {"tuple": [1, 2]}, BSON.encode( {"tuple": (1, 2)}, codec_options=options, uuid_subtype=ALL_UUID_REPRESENTATIONS[1]).decode()) self.assertEqual(exp, doc.decode( as_class=SON, tz_aware=True, uuid_subtype=ALL_UUID_REPRESENTATIONS[1], codec_options=options)) self.assertEqual([exp], list(decode_iter( doc, as_class=SON, tz_aware=True, uuid_subtype=ALL_UUID_REPRESENTATIONS[1], codec_options=options))) self.assertEqual([exp], list(decode_file_iter( StringIO(doc), as_class=SON, tz_aware=True, uuid_subtype=ALL_UUID_REPRESENTATIONS[1], codec_options=options))) self.assertEqual([exp], decode_all( doc, SON, True, ALL_UUID_REPRESENTATIONS[1], True, options))
def _mongodb_decode_wire_protocol(message): """ http://www.mongodb.org/display/DOCS/Mongo+Wire+Protocol """ MONGO_OPS = { 1000: "msg", 2001: "update", 2002: "insert", 2003: "reserved", 2004: "query", 2005: "get_more", 2006: "delete", 2007: "kill_cursors", } _, msg_id, _, opcode, _ = struct.unpack("<iiiii", message[:20]) op = MONGO_OPS.get(opcode, "unknown") zidx = 20 collection_name_size = message[zidx:].find("\0") collection_name = message[zidx : zidx + collection_name_size] if ".system." in collection_name: return zidx += collection_name_size + 1 skip, limit = struct.unpack("<ii", message[zidx : zidx + 8]) zidx += 8 msg = "" try: if message[zidx:]: msg = bson.decode_all(message[zidx:]) except: msg = "invalid bson" return {"op": op, "collection": collection_name, "msg_id": msg_id, "skip": skip, "limit": limit, "query": msg}
def _decode_docs(message, deep_decode): try: if deep_decode: return bson.decode_all(message) return [dict(not_decoded=True)] except InvalidBSON, e: return [dict(decode_error='invalid bson: %s' % e)]
def _checkForUpdate( self ): """ Update the agent if possible """ res = urllib2.urlopen( self.settings.version_url % { 'key' : self.settings.mms_key } ) resBson = None try: resBson = bson.decode_all( res.read() ) finally: if res is not None: res.close() res = None if len(resBson) != 1: return versionResponse = resBson[0] if 'status' not in versionResponse or versionResponse['status'] != 'ok': return if 'agentVersion' not in versionResponse or 'authCode' not in versionResponse: return remoteAgentVersion = versionResponse['agentVersion'] authCode = versionResponse['authCode'] if authCode != hmac.new( self.settings.secret_key, remoteAgentVersion, digestmod=hashlib.sha1 ).hexdigest(): self.logger.error( 'Invalid auth code - please confirm your secret key (defined on Settings page) is correct and hmac is properly installed - http://mms.10gen.com/help/' ) return if self._shouldUpgradeAgent( self.settings.settingsAgentVersion, remoteAgentVersion ): self._upgradeAgent( remoteAgentVersion )
def _unpack_response(response, cursor_id=None, as_class=dict, tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE): """Unpack a response from the database. Check the response for errors and unpack, returning a dictionary containing the response data. :Parameters: - `response`: byte string as returned from the database - `cursor_id` (optional): cursor_id we sent to get this response - used for raising an informative exception when we get cursor id not valid at server response - `as_class` (optional): class to use for resulting documents """ response_flag = struct.unpack("<i", response[:4])[0] if response_flag & 1: # Shouldn't get this response if we aren't doing a getMore assert cursor_id is not None raise OperationFailure("cursor id '%s' not valid at server" % cursor_id) elif response_flag & 2: error_object = bson.BSON(response[20:]).decode() if error_object["$err"].startswith("not master"): raise AutoReconnect(error_object["$err"]) elif error_object.get("code") == 50: raise ExecutionTimeout(error_object["$err"], error_object["code"]) raise OperationFailure("database error: %s" % error_object["$err"]) result = {} result["cursor_id"] = struct.unpack("<q", response[4:12])[0] result["starting_from"] = struct.unpack("<i", response[12:16])[0] result["number_returned"] = struct.unpack("<i", response[16:20])[0] result["data"] = bson.decode_all(response[20:], as_class, tz_aware, uuid_subtype) assert len(result["data"]) == result["number_returned"] return result
def parse_update(data, length): #struct OP_UPDATE { # MsgHeader header; // standard message header # int32 ZERO; // 0 - reserved for future use # cstring fullCollectionName; // "dbname.collectionname" # int32 flags; // bit vector. see below # document selector; // the query to select the document # document update; // specification of the update to perform #} val, pos = bson._get_int(data, 16) collection, pos = bson._get_c_string(data, pos) flags, pos = bson._get_int(data, pos) selector, update = "", "" try: o = bson.decode_all(data[pos:length]) selector, update = o except Exception as e: logger.exception() # flags. # 0 Upsert If set, the database will insert the supplied object into the collection if no matching document is found. # 1 MultiUpdate If set, the database will update all matching objects in the collection. Otherwise only updates first matching doc. # 2-31 Reserved Must be set to 0. upsert = check_bit(flags, 0) multi_update = check_bit(flags, 1) return Operation(operation=OP_UPDATE, upsert=upsert, multi_update=multi_update, collection=collection, selector=selector, update=update)
def parse_insert(data, length): #struct { # MsgHeader header; // standard message header # int32 flags; // bit vector - see below # cstring fullCollectionName; // "dbname.collectionname" # document* documents; // one or more documents to insert into the collection #} # flags # 0 ContinueOnError If set, the database will not stop processing a bulk insert if one fails # (eg due to duplicate IDs). This makes bulk insert behave similarly to a series of single inserts, # except lastError will be set if any insert fails, not just the last one. If multiple errors occur, # only the most recent will be reported by getLastError. (new in 1.9.1) # 1-31 Reserved Must be set to 0. flags, pos = bson._get_int(data, 16) continue_on_error = check_bit(flags, 0) collection, pos = bson._get_c_string(data, pos) try: o = bson.decode_all(data[pos:]) except bson.InvalidBSON as e: o = [] logger.exception("exception on bson decode") return Operation(operation=OP_INSERT, collection=collection, continue_on_error=continue_on_error, documents=o)
def parse_delete(data, length): # struct { # MsgHeader header; // standard message header # int32 ZERO; // 0 - reserved for future use # cstring fullCollectionName; // "dbname.collectionname" # int32 flags; // bit vector - see below for details. # document selector; // query object. See below for details. # } # flags: # 0 SingleRemove If set, the database will remove only the first matching document in the collection. Otherwise all matching documents will be removed. # 1-31 Reserved Must be set to 0. zero, pos = bson._get_int(data, 16) collection, pos = bson._get_c_string(data, pos) flags, pos = bson._get_int(data, pos) single_remove = check_bit(flags, 0) try: o = bson.decode_all(data[pos:length]) selector = o[0] except Exception as e: selector = "" logger.exception("exception on bson decode") return Operation(operation=OP_DELETE, collection=collection, single_remove=single_remove, selector=selector)
def _unpack_response(response, cursor_id=None, as_class=dict, tz_aware=False): """Unpack a response from the database. Check the response for errors and unpack, returning a dictionary containing the response data. :Parameters: - `response`: byte string as returned from the database - `cursor_id` (optional): cursor_id we sent to get this response - used for raising an informative exception when we get cursor id not valid at server response - `as_class` (optional): class to use for resulting documents """ response_flag = struct.unpack("<i", response[:4])[0] if response_flag & 1: raise InterfaceError("cursor not valid at server") elif response_flag & 2: error_object = bson.BSON(response[20:]).decode() if error_object["$err"] == "not master": raise DatabaseError("master has changed") raise DatabaseError("database error: %s" % error_object["$err"]) result = {} result["cursor_id"] = struct.unpack("<q", response[4:12])[0] result["starting_from"] = struct.unpack("<i", response[12:16])[0] result["number_returned"] = struct.unpack("<i", response[16:20])[0] result["data"] = bson.decode_all(response[20:], as_class, tz_aware) assert len(result["data"]) == result["number_returned"] return result
def test_polarizationtodb(self): import bson import gzip reference_dir = os.path.abspath(os.path.join(ref_dir, "ferroelectric_wf")) with gzip.open(os.path.join(reference_dir, "tasks.bson.gz")) as f: coll_raw = f.read() coll = bson.decode_all(coll_raw) db = self.get_task_collection() for c in coll: db.insert(c) new_fw_spec = {'_fw_env': {"db_file": os.path.join(db_dir, "db.json")}, 'tags':['wfid_1494203093.06934658']} analysis = PolarizationToDb(db_file='>>db_file<<', name="_polarization_post_processing") analysis.run_task(new_fw_spec) # Check recovered change in polarization coll = self.get_task_collection("polarization_tasks") d = coll.find_one() self.assertAlmostEqual(d['polarization_change_norm'], 46.288752795325244, 5)
def test_read_write_bson(self): self.coll.insert_many([{'_id': bson.objectid.ObjectId()} for i in range(1000)]) bson_location = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'spark_output') self.sc.mongoRDD(CONNECTION_STRING).saveToBSON(bson_location) try: # 'part-r-00000.bson' is a file name generated by Spark. bson_file = os.path.join(bson_location, 'part-r-00000.bson') with open(bson_file, 'rb') as fd: documents = bson.decode_all(fd.read()) self.assertEqual(1000, len(documents)) # Try loading the BSON file into Spark as a separate RDD. bson_rdd = self.sc.BSONFileRDD(bson_file) self.assertEqual(1000, bson_rdd.count()) # Also try the pair version. bson_pair_rdd = self.sc.BSONFilePairRDD(bson_file) self.assertEqual(1000, bson_pair_rdd.count()) first_element = bson_pair_rdd.first() self.assertIsInstance(first_element, tuple) self.assertEqual(2, len(first_element)) finally: try: shutil.rmtree(bson_location) except Exception: pass
def test_set(self): "success type (+OK)" self.query('DEL/hello') f = self.query('SET/hello/world.bson') self.assertTrue(f.headers.getheader('Content-Type') == 'application/bson') obj = bson.decode_all(f.read()) self.assertTrue(obj == [{u'SET': [True, bson.Binary('OK', 0)]}])
def populate_main_sql_testdatabase(engine): meta = MetaData() table = Table('events', meta, Column('id', Integer, primary_key=True, ), Column('time', String(30)), Column('source_ip', String(30)), Column('source_port', String(30)), Column('request_url', String(500)), Column('request_raw', String(65536)), Column('pattern', String(20)), Column('filename', String(500)), ) meta.create_all(engine) insert_dicts = [] data = open(os.path.join(file_dir, 'data/events_500.bson'), 'r').read() for item in bson.decode_all(data): new_item = {"source_ip": item["source_ip"], "source_port": item["source_port"], "request_url": item["request"]["url"], "pattern": item["pattern"]} insert_dicts.append(new_item) conn = engine.connect() print "Inserted: {0}".format(len(insert_dicts)) conn.execute(table.insert(), insert_dicts)
def _mongodb_decode_wire_protocol(message): """ http://www.mongodb.org/display/DOCS/Mongo+Wire+Protocol """ MONGO_OPS = { 1000: 'msg', 2001: 'update', 2002: 'insert', 2003: 'reserved', 2004: 'query', 2005: 'get_more', 2006: 'delete', 2007: 'kill_cursors', } _, msg_id, _, opcode, _ = struct.unpack('<iiiii', message[:20]) op = MONGO_OPS.get(opcode, 'unknown') zidx = 20 collection_name_size = message[zidx:].find('\0') collection_name = message[zidx:zidx+collection_name_size] if '.system.' in collection_name: return zidx += collection_name_size + 1 skip, limit = struct.unpack('<ii', message[zidx:zidx+8]) zidx += 8 msg = "" try: if message[zidx:]: msg = bson.decode_all(message[zidx:]) except: msg = 'invalid bson' return { 'op': op, 'collection': collection_name, 'msg_id': msg_id, 'skip': skip, 'limit': limit, 'query': msg }
def readTmpFile( processPid ): """ Read the temp file """ fileName = os.path.join( tempfile.gettempdir(), 'mms-' + str( processPid ) ) if not os.path.isfile( fileName ): return None f = open( fileName ) try: fileContent = f.read() # Handle the legacy json files if fileContent.startswith( '{' ): os.remove( fileName ) return None resBson = bson.decode_all( fileContent ) if len(resBson) != 1: return None return resBson[0] finally: f.close()
def test_set(self): "success type (+OK)" self.query("DEL/hello") f = self.query("SET/hello/world.bson") self.assertTrue(f.headers.getheader("Content-Type") == "application/bson") obj = bson.decode_all(f.read()) self.assertTrue(obj == [{u"SET": [True, bson.Binary("OK", 0)]}])
def test_treestore(self): output = romanesco.convert( "tree", {"format": "newick", "data": self.newick}, {"format": "r.apetree"}) output = romanesco.convert("tree", output, {"format": "treestore"}) self.assertEqual(output["format"], "treestore") rows = bson.decode_all(output["data"]) for d in rows: if "rooted" in d: root = d self.assertNotEqual(root, None) self.assertEqual(len(root["clades"]), 1) def findId(id): for d in rows: if d["_id"] == id: return d top = findId(root["clades"][0]) self.assertEqual(len(top["clades"]), 2) internal = findId(top["clades"][0]) rubribarbus = findId(top["clades"][1]) ahli = findId(internal["clades"][0]) allogus = findId(internal["clades"][1]) self.assertEqual(internal["branch_length"], 2) self.assertEqual(ahli["name"], "ahli") self.assertEqual(ahli["branch_length"], 0) self.assertEqual(allogus["name"], "allogus") self.assertEqual(allogus["branch_length"], 1) self.assertEqual(rubribarbus["name"], "rubribarbus") self.assertEqual(rubribarbus["branch_length"], 3)
def test_treestore(self): output = convert( 'tree', {'format': 'newick', 'data': self.newick}, {'format': 'r.apetree'}) output = convert('tree', output, {'format': 'treestore'}) self.assertEqual(output['format'], 'treestore') rows = bson.decode_all(output['data']) for d in rows: if 'rooted' in d: root = d self.assertNotEqual(root, None) self.assertEqual(len(root['clades']), 1) def findId(id): for d in rows: if d['_id'] == id: return d top = findId(root['clades'][0]) self.assertEqual(len(top['clades']), 2) internal = findId(top['clades'][0]) rubribarbus = findId(top['clades'][1]) ahli = findId(internal['clades'][0]) allogus = findId(internal['clades'][1]) self.assertEqual(internal['branch_length'], 2) self.assertEqual(ahli['name'], 'ahli') self.assertEqual(ahli['branch_length'], 0) self.assertEqual(allogus['name'], 'allogus') self.assertEqual(allogus['branch_length'], 1) self.assertEqual(rubribarbus['name'], 'rubribarbus') self.assertEqual(rubribarbus['branch_length'], 3)
def Parse_Messaged(path="rocketchat_message.bson", rID_name=None): """Parse the message file from rocketchat db: rocketchat_message.bson and resturn a table of the results.""" # parsing messages if rID_name is None: room, rID_name = Parse_Rooms() bson_file = open(path, 'rb') messages = bson.decode_all(bson_file.read()) res = [] for i in messages: if 'u' in i: if 'msg' in i: if 'rid' in i: tmp = [] tmp.append(i['rid']) if i['rid'] in rID_name: tmp.append(rID_name[i['rid']]) else: tmp.append('None') tmp.append(i['u']['username']) tmp.append(i['ts'].isoformat()) tmp.append(i['msg'].replace('\t', ' ')) res.append(tmp) return res
def Parse_Rooms(path="rocketchat_room.bson"): """Parse the room file from rocket chat: rocketchat_room.bson and returns a table of the results plus a mapping of room ID and room names.""" bson_file = open(path, 'rb') rooms = bson.decode_all(bson_file.read()) # Parsing Rooms room = [] rID_name = {} for i in rooms: Type = i['t'] creation_date = i['ts'].isoformat() nb_msg = i['msgs'] rID = i['_id'] if Type != 'd': name = i['name'] if name == 'general': creator = None else: creator = i['u']['username'] rID_name[rID] = name else: name = None creator = None users = i['usernames'] tmp = [str(i) for i in [rID, name, creator, creation_date, Type, nb_msg, ','.join(users)]] room.append(tmp) return room, rID_name
def test_raw_batches(self): c = self.collection yield c.delete_many({}) yield c.insert_many({'_id': i} for i in range(4)) find = partial(c.find_raw_batches, {}) agg = partial(c.aggregate_raw_batches, [{'$sort': {'_id': 1}}]) for method in find, agg: cursor = method().batch_size(2) yield cursor.fetch_next batch = cursor.next_object() self.assertEqual([{'_id': 0}, {'_id': 1}], bson.decode_all(batch)) lst = yield method().batch_size(2).to_list(length=1) self.assertEqual([{'_id': 0}, {'_id': 1}], bson.decode_all(lst[0]))
async def test_iter_aggregate(self): collection = self.collection await collection.delete_many({}) pipeline = [{'$sort': {'_id': 1}}] # Empty iterator. async for _ in collection.aggregate(pipeline): self.fail() for n_docs in 1, 2, 10: if n_docs: docs = [{'_id': i} for i in range(n_docs)] await collection.insert_many(docs) # Force extra batches to test iteration. j = 0 cursor = collection.aggregate(pipeline).batch_size(3) async for doc in cursor: self.assertEqual(j, doc['_id']) j += 1 self.assertEqual(j, n_docs) j = 0 raw = collection.aggregate_raw_batches(pipeline).batch_size(3) async for batch in raw: j += len(bson.decode_all(batch)) self.assertEqual(j, n_docs) await collection.delete_many({})
def parse_query(data, length): # struct OP_QUERY { # MsgHeader header; // standard message header # int32 flags; // bit vector of query options. See below for details. # cstring fullCollectionName ; // "dbname.collectionname" # int32 numberToSkip; // number of documents to skip # int32 numberToReturn; // number of documents to return # // in the first OP_REPLY batch # document query; // query object. See below for details. # [ document returnFieldsSelector; ] // Optional. Selector indicating the fields # // to return. See below for details. # } # flags: # 0 Reserved Must be set to 0. # 1 TailableCursor Tailable means cursor is not closed when the last data is retrieved. # Rather, the cursor marks the final object's position. You can resume using the cursor later, from where it was located, # if more data were received. Like any "latent cursor", # the cursor may become invalid at some point (CursorNotFound) – for example if the final object it references were deleted. # 2 SlaveOk Allow query of replica slave. Normally these return an error except for namespace "local". # 3 OplogReplay Internal replication use only - driver should not set # 4 NoCursorTimeout The server normally times out idle cursors after an inactivity period (10 minutes) to prevent excess memory use. Set this option to prevent that. # 5 AwaitData Use with TailableCursor. If we are at the end of the data, block for a while rather than returning no data. After a timeout period, we do return as normal. # 6 Exhaust Stream the data down full blast in multiple "more" packages, on the assumption that the client will fully read all data queried. Faster when you are pulling a lot of data and know you want to pull it all down. Note: the client is not allowed to not read all the data unless it closes the connection. # 7 Partial Get partial results from a mongos if some shards are down (instead of throwing an error) # 8-31 Reserved Must be set to 0. flags, pos = bson._get_int(data, 16) tailable_cursor = check_bit(flags, 1) slave_ok = check_bit(flags, 2) oplog_replay = check_bit(flags, 3) no_cursor_timeout = check_bit(flags, 4) await_data = check_bit(flags, 5) exhaust = check_bit(flags, 6) partial = check_bit(flags, 7) collection, pos = bson._get_c_string(data, pos) number_to_skip, pos = bson._get_int(data, pos) number_to_return, pos = bson._get_int(data, pos) fields_to_return = None query = "" try: o = bson.decode_all(data[pos:length]) query = o[0] except bson.InvalidBSON as e: o = [] logger.exception("exception on bson decode") if len(o) == 2: fields_to_return = o[1] return Operation(operation=OP_QUERY,fields_to_return=fields_to_return, tailable_cursor = tailable_cursor, slave_ok = slave_ok, oplog_replay = oplog_replay, no_cursor_timeout = no_cursor_timeout, number_to_skip=number_to_skip, number_to_return=number_to_return, await_data = await_data, exhaust = exhaust, partial = partial, query=query)
def _unpack_response(response, cursor_id=None, codec_options=CodecOptions()): """Unpack a response from the database. Check the response for errors and unpack, returning a dictionary containing the response data. Can raise CursorNotFound, NotMasterError, ExecutionTimeout, or OperationFailure. :Parameters: - `response`: byte string as returned from the database - `cursor_id` (optional): cursor_id we sent to get this response - used for raising an informative exception when we get cursor id not valid at server response - `codec_options` (optional): an instance of :class:`~bson.codec_options.CodecOptions` """ response_flag = struct.unpack("<i", response[:4])[0] if response_flag & 1: # Shouldn't get this response if we aren't doing a getMore assert cursor_id is not None raise CursorNotFound("cursor id '%s' not valid at server" % cursor_id) elif response_flag & 2: error_object = bson.BSON(response[20:]).decode() if error_object["$err"].startswith("not master"): raise NotMasterError(error_object["$err"]) elif error_object.get("code") == 50: raise ExecutionTimeout(error_object.get("$err"), error_object.get("code"), error_object) raise OperationFailure("database error: %s" % error_object.get("$err"), error_object.get("code"), error_object) result = {} result["cursor_id"] = struct.unpack("<q", response[4:12])[0] result["starting_from"] = struct.unpack("<i", response[12:16])[0] result["number_returned"] = struct.unpack("<i", response[16:20])[0] result["data"] = bson.decode_all(response[20:], codec_options) assert len(result["data"]) == result["number_returned"] return result
def _unpack_response(response, cursor_id=None, codec_options=_UNICODE_REPLACE_CODEC_OPTIONS): """Unpack a response from the database and decode the BSON document(s). Check the response for errors and unpack, returning a dictionary containing the response data. Can raise CursorNotFound, NotMasterError, ExecutionTimeout, or OperationFailure. :Parameters: - `response`: byte string as returned from the database - `cursor_id` (optional): cursor_id we sent to get this response - used for raising an informative exception when we get cursor id not valid at server response - `codec_options` (optional): an instance of :class:`~bson.codec_options.CodecOptions` """ result = _raw_response(response, cursor_id) result["data"] = bson.decode_all(result["data"][0], codec_options) return result
def _unpack_response(response, cursor_id=None, as_class=dict, tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE): """Unpack a response from the database. Check the response for errors and unpack, returning a dictionary containing the response data. :Parameters: - `response`: byte string as returned from the database - `cursor_id` (optional): cursor_id we sent to get this response - used for raising an informative exception when we get cursor id not valid at server response - `as_class` (optional): class to use for resulting documents """ response_flag = struct.unpack("<i", response[:4])[0] if response_flag & 1: # Shouldn't get this response if we aren't doing a getMore assert cursor_id is not None raise OperationFailure("cursor id '%s' not valid at server" % cursor_id) elif response_flag & 2: error_object = bson.BSON(response[20:]).decode() if error_object["$err"].startswith("not master"): raise AutoReconnect(error_object["$err"]) raise OperationFailure("database error: %s" % error_object["$err"], error_object) result = {} result["cursor_id"] = struct.unpack("<q", response[4:12])[0] result["starting_from"] = struct.unpack("<i", response[12:16])[0] result["number_returned"] = struct.unpack("<i", response[16:20])[0] result["data"] = bson.decode_all(response[20:], as_class, tz_aware, uuid_subtype) assert len(result["data"]) == result["number_returned"] return result
def _checkForUpdate(self): """ Update the agent if possible """ res = urllib.request.urlopen(self.settings.version_url % {'key': self.settings.mms_key}) resBson = None try: resBson = bson.decode_all(res.read()) finally: if res is not None: res.close() res = None if len(resBson) != 1: return versionResponse = resBson[0] if 'status' not in versionResponse or versionResponse['status'] != 'ok': return if 'agentVersion' not in versionResponse or 'authCode' not in versionResponse: return remoteAgentVersion = versionResponse['agentVersion'] authCode = versionResponse['authCode'] if authCode != hmac.new(self.settings.secret_key, remoteAgentVersion, digestmod=hashlib.sha1).hexdigest(): self.logger.error( 'Invalid auth code - please confirm your secret key (defined on Settings page) is correct and hmac is properly installed - http://mms.10gen.com/help/' ) return if self._shouldUpgradeAgent(self.settings.settingsAgentVersion, remoteAgentVersion): self._upgradeAgent(remoteAgentVersion)
def decode_wire_protocol(message): """ http://www.mongodb.org/display/DOCS/Mongo+Wire+Protocol """ _, msg_id, _, opcode, _ = struct.unpack('<iiiii', message[:20]) op = MONGO_OPS.get(opcode, 'unknown') zidx = 20 collection_name_size = message[zidx:].find('\0') collection_name = message[zidx:zidx + collection_name_size] zidx += collection_name_size + 1 skip, limit = struct.unpack('<ii', message[zidx:zidx + 8]) zidx += 8 try: msg = bson.decode_all(message[zidx:]) except InvalidBSON: msg = 'invalid bson' return { 'op': op, 'collection': collection_name, 'msg_id': msg_id, 'skip': skip, 'limit': limit, 'query': msg, }
def row_func(row): try: row_wrapper.load_row(row) return row_function(row_wrapper) except Exception as e: try: e_msg = unicode(e) except: e_msg = u'<unable to get exception message>' try: e_row = unicode(bson.decode_all(row)[0]['array']) except: e_row = u'<unable to get row data>' try: msg = base64.urlsafe_b64encode( (u'Exception: %s running UDF on row: %s' % (e_msg, e_row)).encode('utf-8')) except: msg = base64.urlsafe_b64encode( u'Exception running UDF, unable to provide details.'. encode('utf-8')) raise IaPyWorkerError(msg)
def fill_db(self, collection_str): '''Check if collection is already in MongoDB If already in MongoDB: Do nothing Else: Load data into db from quiltdata (karrlab/datanator) Args: collection_str: name of collection (e.g. 'ecmdb', 'pax', etc) ''' _, _, collection = self.con_db(collection_str) if collection.find({}).count() != 0: return collection else: manager = wc_utils.quilt.QuiltManager(path=self.cache_dirname, package='datanator') filename = collection_str + '.bson' manager.download_package(filename) with open((self.cache_dirname + '/' + filename), 'rb') as f: collection.insert(decode_all(f.read())) return collection
def test_basic_decode(self): self.assertEqual({"test": u("hello world")}, BSON(b"\x1B\x00\x00\x00\x0E\x74\x65\x73\x74\x00\x0C" b"\x00\x00\x00\x68\x65\x6C\x6C\x6F\x20\x77\x6F" b"\x72\x6C\x64\x00\x00").decode()) self.assertEqual([{"test": u("hello world")}, {}], decode_all(b"\x1B\x00\x00\x00\x0E\x74\x65\x73\x74" b"\x00\x0C\x00\x00\x00\x68\x65\x6C\x6C" b"\x6f\x20\x77\x6F\x72\x6C\x64\x00\x00" b"\x05\x00\x00\x00\x00")) self.assertEqual([{"test": u("hello world")}, {}], list(decode_iter( b"\x1B\x00\x00\x00\x0E\x74\x65\x73\x74" b"\x00\x0C\x00\x00\x00\x68\x65\x6C\x6C" b"\x6f\x20\x77\x6F\x72\x6C\x64\x00\x00" b"\x05\x00\x00\x00\x00"))) self.assertEqual([{"test": u("hello world")}, {}], list(decode_file_iter(StringIO( b"\x1B\x00\x00\x00\x0E\x74\x65\x73\x74" b"\x00\x0C\x00\x00\x00\x68\x65\x6C\x6C" b"\x6f\x20\x77\x6F\x72\x6C\x64\x00\x00" b"\x05\x00\x00\x00\x00"))))
async def test_iter_cursor(self): collection = self.collection await collection.delete_many({}) for n_docs in 0, 1, 2, 10: if n_docs: docs = [{'_id': i} for i in range(n_docs)] await collection.insert_many(docs) # Force extra batches to test iteration. j = 0 async for doc in collection.find().sort('_id').batch_size(3): self.assertEqual(j, doc['_id']) j += 1 self.assertEqual(j, n_docs) j = 0 raw_cursor = collection.find_raw_batches().sort('_id').batch_size(3) async for batch in raw_cursor: j += len(bson.decode_all(batch)) await collection.delete_many({})
def parse_reply(data, length): # struct { # MsgHeader header; // standard message header # int32 responseFlags; // bit vector - see details below # int64 cursorID; // cursor id if client needs to do get more's # int32 startingFrom; // where in the cursor this reply is starting # int32 numberReturned; // number of documents in the reply # document* documents; // documents # } flags, pos = bson._get_int(data, 16) cursor_id, pos = bson._get_long(data, pos,as_class=None, tz_aware=False, uuid_subtype=3) starting_from, pos = bson._get_int(data, pos) number_returned, pos = bson._get_int(data, pos) try: o = bson.decode_all(data[pos:length]) except Exception as e: o = [] logger.exception("exception on bson decode in parse_reply") logger.info(repr(data[pos:length])) return Operation(operation=OP_REPLY, cursor_id=cursor_id, starting_from=starting_from, number_returned=number_returned, documents=o)
def parse_pinterest(**kwargs): if os.path.isfile(BIN_DATA[PINTEREST]): LOG.info("Already processed, skipping.") return data_file = 'subset_iccv_board_pins.bson' source_file = os.path.join(DOWNLOAD[PINTEREST], data_file) if not glob(source_file): raise Exception("Cannot find pinterest dataset") LOG.info("Parsing pinterest") with open(source_file, 'rb') as f: bsob = bson.decode_all(f.read()) map_id_pin = dict() map_pin_id = dict() map_board_id = dict() map_id_board = dict() pins = 0 board_pin_pairs = [] for i, board in enumerate(bsob): map_id_board[i] = board map_board_id[board['board_id']] = i for pin in board['pins']: if (pin not in map_pin_id): map_pin_id[pin] = pins map_id_pin[pins] = pin pins += 1 board_pin_pairs.append((map_board_id[board['board_id']], map_pin_id[pin])) boards = [board for (board, pin) in board_pin_pairs] pins = [pin for (board, pin) in board_pin_pairs] m_sp = sp.csr_matrix(([1] * len(boards), (boards, pins)), shape=(len(map_board_id), len(map_pin_id))) save_as_npz(m_sp, BIN_DATA[PINTEREST])
def _mongodb_decode_wire_protocol(message): """ http://www.mongodb.org/display/DOCS/Mongo+Wire+Protocol """ MONGO_OPS = { 1000: 'msg', 2001: 'update', 2002: 'insert', 2003: 'reserved', 2004: 'query', 2005: 'get_more', 2006: 'delete', 2007: 'kill_cursors', } _, msg_id, _, opcode, _ = struct.unpack('<iiiii', message[:20]) op = MONGO_OPS.get(opcode, 'unknown') zidx = 20 collection_name_size = message[zidx:].find('\0') collection_name = message[zidx:zidx + collection_name_size] if '.system.' in collection_name: return zidx += collection_name_size + 1 skip, limit = struct.unpack('<ii', message[zidx:zidx + 8]) zidx += 8 msg = "" try: if message[zidx:]: msg = bson.decode_all(message[zidx:]) except: msg = 'invalid bson' return { 'op': op, 'collection': collection_name, 'msg_id': msg_id, 'skip': skip, 'limit': limit, 'query': msg }
def convert_tags(file_name='papers.bson'): """ Generates all the tags in the first run """ file_name = f"{base_dir}/{file_name}" print("\n\nConverting tags") all_tags = set() count = 0 # Making a list of all tags to be added with open(file_name, 'rb') as f: for doc in bson.decode_all(f.read()): if count % 1000 == 0: print( f'{count} papers scanned, total {len(all_tags)} found so far' ) tags = get_tags(doc) for tag_name in tags: # For the time being we ignore non-arxiv tags. # ArXiv tags are always of the form archive.subject (https://arxiv.org/help/arxiv_identifier) if not re.match('[A-Za-z\\-]+\\.[A-Za-z\\-]+', tag_name): continue all_tags.add(tag_name) count += 1 print(f'Total {len(all_tags)} unique tags') for tag_name in all_tags: tag = Tag(name=tag_name, source='arXiv') db.session.add(tag) db.session.commit()
def convert_tweets(papers_map, file_name=f'tweets.bson'): file_name = f"{base_dir}/{file_name}" print("\n\nConverting tweets") # Ex # {'_id': '1000018920986808328', 'pids': ['1804.03984'], 'inserted_at_date': datetime.datetime(2020, 5, 1, 23, 46, 44, 341000), 'created_at_date': datetime.datetime(2018, 5, 25, 14, 21, 4), 'created_at_time': 1527258064.0, 'lang': 'en', 'text': 'Coolest part of @aggielaz et al\'s most recent emergent communication paper: when agents jointly learn "conceptual" reprs alongside communication protocol, these concepts are heavily biased by the natural statistics of the environment. https://t.co/K1X6ZSwH3G https://t.co/2eqav3ax6g', 'retweets': 2, 'likes': 5, 'replies': 0, 'user_screen_name': 'j_gauthier', 'user_name': 'Jon Gauthier', 'user_followers_count': 4304, 'user_following_count': 457} with open(file_name, 'rb') as f: count = 0 docs = bson.decode_all(f.read()) total_tweets = len(docs) tweets = [] for doc in docs: count += 1 if count % 3000 == 0: print(f'{count}/{total_tweets} tweets parsed') db.session.bulk_save_objects(tweets) db.session.commit() tweets = [] tweet = create_tweet(doc, papers_map) if tweet: tweets.append(tweet) db.session.bulk_save_objects(tweets) db.session.commit()
def restoreDump(self, file, collectionName): client = MongoClient(MONGODB_URI) db = client[MGI_DB] target_collection = db[collectionName] re = open(file, 'rb').read() target_collection.insert(decode_all(re))
def test_legacy_csharp_uuid(self): if not should_test_uuid: raise SkipTest("No uuid module") # Generated by the .net driver from_csharp = b('ZAAAABBfaWQAAAAAAAVuZXdndWlkABAAAAAD+MkoCd/Jy0iYJ7Vhl' 'iF3BAJuZXdndWlkc3RyaW5nACUAAAAwOTI4YzlmOC1jOWRmLTQ4Y2' 'ItOTgyNy1iNTYxOTYyMTc3MDQAAGQAAAAQX2lkAAEAAAAFbmV3Z3V' 'pZAAQAAAAA9MD0oXQe6VOp7mK4jkttWUCbmV3Z3VpZHN0cmluZwAl' 'AAAAODVkMjAzZDMtN2JkMC00ZWE1LWE3YjktOGFlMjM5MmRiNTY1A' 'ABkAAAAEF9pZAACAAAABW5ld2d1aWQAEAAAAAPRmIO2auc/Tprq1Z' 'oQ1oNYAm5ld2d1aWRzdHJpbmcAJQAAAGI2ODM5OGQxLWU3NmEtNGU' 'zZi05YWVhLWQ1OWExMGQ2ODM1OAAAZAAAABBfaWQAAwAAAAVuZXdn' 'dWlkABAAAAADISpriopuTEaXIa7arYOCFAJuZXdndWlkc3RyaW5nA' 'CUAAAA4YTZiMmEyMS02ZThhLTQ2NGMtOTcyMS1hZWRhYWQ4MzgyMT' 'QAAGQAAAAQX2lkAAQAAAAFbmV3Z3VpZAAQAAAAA98eg0CFpGlPihP' 'MwOmYGOMCbmV3Z3VpZHN0cmluZwAlAAAANDA4MzFlZGYtYTQ4NS00' 'ZjY5LThhMTMtY2NjMGU5OTgxOGUzAAA=') data = base64.b64decode(from_csharp) # Test decoding docs = bson.decode_all(data, SON, False, OLD_UUID_SUBTYPE) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) docs = bson.decode_all(data, SON, False, UUID_SUBTYPE) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) docs = bson.decode_all(data, SON, False, JAVA_LEGACY) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) docs = bson.decode_all(data, SON, False, CSHARP_LEGACY) for d in docs: self.assertEqual(d['newguid'], uuid.UUID(d['newguidstring'])) # Test encoding encoded = b('').join([ bson.BSON.encode(doc, uuid_subtype=OLD_UUID_SUBTYPE) for doc in docs ]) self.assertNotEqual(data, encoded) encoded = b('').join( [bson.BSON.encode(doc, uuid_subtype=UUID_SUBTYPE) for doc in docs]) self.assertNotEqual(data, encoded) encoded = b('').join( [bson.BSON.encode(doc, uuid_subtype=JAVA_LEGACY) for doc in docs]) self.assertNotEqual(data, encoded) encoded = b('').join([ bson.BSON.encode(doc, uuid_subtype=CSHARP_LEGACY) for doc in docs ]) self.assertEqual(data, encoded) # Test insert and find client = get_client() client.pymongo_test.drop_collection('csharp_uuid') coll = client.pymongo_test.csharp_uuid coll.uuid_subtype = CSHARP_LEGACY coll.insert(docs) self.assertEqual(5, coll.count()) for d in coll.find(): self.assertEqual(d['newguid'], uuid.UUID(d['newguidstring'])) coll.uuid_subtype = OLD_UUID_SUBTYPE for d in coll.find(): self.assertNotEqual(d['newguid'], d['newguidstring']) client.pymongo_test.drop_collection('csharp_uuid')
def test_legacy_java_uuid(self): if not should_test_uuid: raise SkipTest("No uuid module") # Generated by the Java driver from_java = b('bAAAAAdfaWQAUCBQxkVm+XdxJ9tOBW5ld2d1aWQAEAAAAAMIQkfACFu' 'Z/0RustLOU/G6Am5ld2d1aWRzdHJpbmcAJQAAAGZmOTk1YjA4LWMwND' 'ctNDIwOC1iYWYxLTUzY2VkMmIyNmU0NAAAbAAAAAdfaWQAUCBQxkVm+' 'XdxJ9tPBW5ld2d1aWQAEAAAAANgS/xhRXXv8kfIec+dYdyCAm5ld2d1' 'aWRzdHJpbmcAJQAAAGYyZWY3NTQ1LTYxZmMtNGI2MC04MmRjLTYxOWR' 'jZjc5Yzg0NwAAbAAAAAdfaWQAUCBQxkVm+XdxJ9tQBW5ld2d1aWQAEA' 'AAAAPqREIbhZPUJOSdHCJIgaqNAm5ld2d1aWRzdHJpbmcAJQAAADI0Z' 'DQ5Mzg1LTFiNDItNDRlYS04ZGFhLTgxNDgyMjFjOWRlNAAAbAAAAAdf' 'aWQAUCBQxkVm+XdxJ9tRBW5ld2d1aWQAEAAAAANjQBn/aQuNfRyfNyx' '29COkAm5ld2d1aWRzdHJpbmcAJQAAADdkOGQwYjY5LWZmMTktNDA2My' '1hNDIzLWY0NzYyYzM3OWYxYwAAbAAAAAdfaWQAUCBQxkVm+XdxJ9tSB' 'W5ld2d1aWQAEAAAAAMtSv/Et1cAQUFHUYevqxaLAm5ld2d1aWRzdHJp' 'bmcAJQAAADQxMDA1N2I3LWM0ZmYtNGEyZC04YjE2LWFiYWY4NzUxNDc' '0MQAA') data = base64.b64decode(from_java) # Test decoding docs = bson.decode_all(data, SON, False, OLD_UUID_SUBTYPE) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) docs = bson.decode_all(data, SON, False, UUID_SUBTYPE) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) docs = bson.decode_all(data, SON, False, CSHARP_LEGACY) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) docs = bson.decode_all(data, SON, False, JAVA_LEGACY) for d in docs: self.assertEqual(d['newguid'], uuid.UUID(d['newguidstring'])) # Test encoding encoded = b('').join([ bson.BSON.encode(doc, uuid_subtype=OLD_UUID_SUBTYPE) for doc in docs ]) self.assertNotEqual(data, encoded) encoded = b('').join( [bson.BSON.encode(doc, uuid_subtype=UUID_SUBTYPE) for doc in docs]) self.assertNotEqual(data, encoded) encoded = b('').join([ bson.BSON.encode(doc, uuid_subtype=CSHARP_LEGACY) for doc in docs ]) self.assertNotEqual(data, encoded) encoded = b('').join( [bson.BSON.encode(doc, uuid_subtype=JAVA_LEGACY) for doc in docs]) self.assertEqual(data, encoded) # Test insert and find client = get_client() client.pymongo_test.drop_collection('java_uuid') coll = client.pymongo_test.java_uuid coll.uuid_subtype = JAVA_LEGACY coll.insert(docs) self.assertEqual(5, coll.count()) for d in coll.find(): self.assertEqual(d['newguid'], uuid.UUID(d['newguidstring'])) coll.uuid_subtype = OLD_UUID_SUBTYPE for d in coll.find(): self.assertNotEqual(d['newguid'], d['newguidstring']) client.pymongo_test.drop_collection('java_uuid')
with open('sondages.json', 'w') as f: f.write(dumps(sondages)) from bson import decode_all from data.reader import BSONInput '''from pprint import pprint for s in ['candidacies','candidates','elections']: globals()[s]=decode_all(open('data/'+s+'.bson').read()) pprint(globals()[s])''' candidatn = {} candidate = {} candidat = {} election = {} for d in decode_all(open('data/candidates.bson').read()): candidat[str(d['_id'])] = (d['first_name'] or '') + ' ' + d['last_name'] for d in decode_all(open('data/candidacies.bson').read()): #if d['published']==True: candidatn[str(d['_id'])] = candidat[str(d['candidate_ids'][0])] candidate[str(d['_id'])] = str(d[u'election_id']) for d in decode_all(open('data/elections.bson').read()): election[str(d['_id'])] = d['name'] tags = {} tagsname = {} for d in decode_all(open('data/tags.bson').read()): tagsname[str(d['_id'])] = d['name'] from collections import Counter
async def validate_action(redis_conn, mongo, mysql_pool, action, data): result = 'Empty' if action == 'get_cookie': result = await redis_conn.execute('GET', data) if result: result = json.loads(result.decode()) else: result = 'Empty' elif action == 'get_cookies': cur = b'0' cookies = [] while cur: cur, keys = await redis_conn.scan(cur, match=data) for key in keys: cookies.append(key.decode()) if cookies: result = {'cookies': cookies} else: result = 'Empty' elif action == 'create_report': try: username = data['username'] email = data['email'] text = data['text'] except: return 'username, email, text is required' if username == '' or email == '' or text == '': return 'some field is empty ;(' id = ObjectId() try: await mongo.sufferers.insert_one({ '_id': id, 'username': username, 'email': email }) except Exception as ex: result = str(ex) key = ''.join(random.choice(string.digits) for _ in range(6)) + '00' des = DES.new(key, DES.MODE_ECB) padded_text = pad(str(text).encode()) encrypted_text = des.encrypt(padded_text) report_id = ObjectId() try: await mongo.reports.insert_one({ '_id': report_id, 'sufferer_id': ObjectId(id), 'username': username, 'encrypted_text': encrypted_text, 'private_key': key.encode() }) result = { 'status': 'ok', 'private_key': key, 'object_id': str(report_id), 'encrypted_text': base64.encodebytes(encrypted_text).decode().strip('\n') } except Exception as ex: result = str(ex) elif action == 'get_report': try: id = data except: return 'data is required' try: report = await mongo.reports.find_one({'_id': ObjectId(data)}) encrypted_text = base64.encodebytes( report['encrypted_text']).decode().strip('\n') result = { 'report': { 'username': report['username'], 'encrypted_text': encrypted_text } } except Exception as ex: result = str(ex) elif action == 'get_reports': reports = [] cursor = mongo.reports.find_raw_batches() async for batch in cursor: for item in bson.decode_all(batch): try: encrypted_text = base64.encodebytes( item['encrypted_text']).decode().strip('\n') reports.append({ 'username': item['username'], 'encrypted_text': encrypted_text }) except Exception as ex: result = str(ex) result = {'reports': reports} elif action == 'send_comment': try: username = data['username'] comment = data['comment'] private = data['private'] except: return 'username, comment, private is required' if username == '' or comment == '' or private == '': return 'some field is empty ;(' async with mysql_pool.acquire() as conn: async with conn.cursor() as cur: try: await cur.execute( "select count(*) from users where username=%s;", data['username']) was_registered, = await cur.fetchone() if not was_registered: await cur.execute( "insert into users (username) values (%s)", data['username']) await cur.execute( "insert into comments (private, text, author_id) values ({}, '{}', (select user_id from users where username='******'))" .format(data['private'], data['comment'], data['username'])) await conn.commit() result = {'ok': 'comment sent'} except Exception as ex: result = str(ex) elif action == 'get_comments': async with mysql_pool.acquire() as conn: async with conn.cursor() as cur: await cur.execute( 'select case private when (username) then username else \'anonymous\' end text, case private when (username) then text else \'private comment\' end text from comments inner join users on author_id=user_id limit 100' ) result = await cur.fetchall() elif action == 'get_my_comments': async with mysql_pool.acquire() as conn: async with conn.cursor() as cur: await cur.execute( 'select user_id from users where username = %s', data) user_id = await cur.fetchone() await cur.execute( 'select text from comments where author_id = %s', user_id) result = await cur.fetchall() return result
import numpy as np import pandas as pd import matplotlib.pyplot as plt from skimage.data import imread import json import bson import io from keras.datasets import mnist from keras.models import Sequential from keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout from keras.utils import np_utils f = open('train_example.bson', 'rb') bs = f.read() # reads bytes from the specified file docs = bson.decode_all(bs) docs = bson.decode_file_iter(open('train_example.bson', 'rb')) data = pd.DataFrame.from_dict(docs) data.head() #Printing sample images for each category for i in range(5): picture = imread(io.BytesIO(data.imgs[i][0]['picture'])) plt.figure() plt.imshow(picture) X = [] for i in range(data.shape[0]): X.append(imread(io.BytesIO(data.imgs[i][0]['picture']))) X = np.array(X, dtype=np.float32) / 255.
def _upgradeAgent(self, newAgentVersion): """ Pull down the files, verify and then stop the current process """ res = urllib.request.urlopen(self.settings.upgrade_url % {'key': self.settings.mms_key}) resBson = None try: resBson = bson.decode_all(res.read()) finally: if res is not None: res.close() res = None if len(resBson) != 1: return upgradeResponse = resBson[0] if 'status' not in upgradeResponse or upgradeResponse[ 'status'] != 'ok' or 'files' not in upgradeResponse: return # Verify the auth codes for all files and names first. for fileInfo in upgradeResponse['files']: if fileInfo['fileAuthCode'] != hmac.new( self.settings.secret_key, fileInfo['file'], digestmod=hashlib.sha1).hexdigest(): self.logger.error( 'Invalid file auth code for upgrade - cancelling') return if fileInfo['fileNameAuthCode'] != hmac.new( self.settings.secret_key, fileInfo['fileName'], digestmod=hashlib.sha1).hexdigest(): self.logger.error( 'Invalid file name auth code for upgrade - cancelling') return # Write the files. for fileInfo in upgradeResponse['files']: fileContent = fileInfo['file'] fileName = fileInfo['fileName'] # If the user has a global username/password defined, make sure it is set in the new settings.py file. if fileName == 'settings.py' and getattr( self.settings, 'globalAuthUsername', None) is not None and getattr( self.settings, 'globalAuthPassword', None) is not None: fileContent = fileContent.replace( 'globalAuthPassword = None', 'globalAuthPassword=%r' % self.settings.globalAuthPassword) fileContent = fileContent.replace( 'globalAuthUsername = None', 'globalAuthUsername=%r' % self.settings.globalAuthUsername) fileSystemName = os.path.join(self.agentDir, fileName) newFile = open(fileSystemName, 'w') try: newFile.write(fileContent) finally: if newFile is not None: newFile.close() # Stop the current agent process try: self.processContainer.stopAgentProcess() self.settings.settingsAgentVersion = newAgentVersion self.logger.info( 'Agent upgraded to version: ' + newAgentVersion + ' - there is up to a five minute timeout before data will be sent again' ) except Exception as e: self.logger.error('Problem restarting agent process: ' + traceback.format_exc(e))
def decode_documents(buffer, start, content_size): docs = bson.decode_all(buffer[start:start + content_size], CODEC_OPTIONS) return docs, start + content_size
def handle_io_in(self, data): offset = 0 while len(data) - offset >= 16: h = packets.MsgHeader(data[offset:offset+16]) # print(h.messageLength) # print(h.opCode) if len(data) - offset < h.messageLength: break if h.opCode == 2004: msg = packets.MsgQuery(data[offset+16:offset+h.messageLength]) # print(h.show()) # print(msg.show()) query = None field_selectors = [] if bson: for doc in bson.decode_all(msg.payload.load): if query is None: query = doc else: field_selectors.append(doc) res = self._handle_query(fullCollectionName=msg.fullCollectionName, query=query, field_selectors=field_selectors) # print(msg) # print(msg.payload) # print(msg.payload.load) payload = b"" for doc in res: payload += bson.BSON.encode(doc) pkg = packets.MsgHeader( responseTo=h.requestID, opCode=1 ) / packets.MsgReply( numberReturned=len(res) ) / Raw(payload) pkg.show() self.send(pkg.build()) elif h.opCode == 2010: msg = packets.MsgCommand(data[offset + 16:offset + h.messageLength]) docs = bson.decode_all(msg.payload.load) res = self._handle_command(msg.database, msg.commandName, docs[0], docs[1], docs[1:]) payload = b"" for doc in res: payload += bson.BSON.encode(doc) pkg = packets.MsgHeader( responseTo=h.requestID, opCode=2011 ) / packets.MsgCommandReply( ) / Raw(payload) pkg.show() self.send(pkg.build()) # print(h.payload) # ToDo: check length offset = offset + h.messageLength return offset
def ifilter(predicate, iterable): """Filter records and return decoded object so that batch processing can work correctly""" return (numpy_to_bson_friendly(bson.decode_all(item)[0]["array"]) for item in iterable if predicate(item))
def ifilterfalse(predicate, iterable): """Filter records that do not match predicate and return decoded object so that batch processing can encode""" return (numpy_to_bson_friendly(bson.decode_all(item)[0]["array"]) for item in iterable if not predicate(item))
import tensorflow as tf import matplotlib.pyplot as plt from skimage.data import imread # or, whatever image library you prefer from sklearn import preprocessing from subprocess import check_output #Output the files in the catagory print(check_output(["ls", "./input"]).decode("utf8")) #Ignore Warnings warnings.filterwarnings("ignore") # Simple data processing data = bson.decode_file_iter(open('./input/train_example.bson', 'rb')) # read bson file into pandas DataFrame with open('./input/train_example.bson', 'rb') as b: df = pd.DataFrame(bson.decode_all(b.read())) #Get shape of first image for e, pic in enumerate(df['imgs'][0]): picture = imread(io.BytesIO(pic['picture'])) pix_x, pix_y, rgb = picture.shape n = len(df.index) #cols of data in train set X_ids = np.zeros((n, 1)).astype(int) Y = np.zeros((n, 1)).astype(int) #category_id for each row X_images = np.zeros((n, pix_x, pix_y, rgb)) #m images are 180 by 180 by 3 i = 0 for c, d in enumerate(data): X_ids[i] = d['_id'] Y[i] = d['category_id']
def load_row(self, data): self._set_data(bson.decode_all(data)[0]['array'])
def loadData(filename): b_file = open(filename, 'rb') bs = b_file.read() dictionary = bson.decode_all(bs) return dictionary
from bson import BSON from bson import decode_all from pymongo import MongoClient try: client = MongoClient('mongodb://*****:*****@server:27017/database', ssl=True, ssl_certfile='client.pem', ssl_ca_certs='ca.pem' ) db = client.database # DATABASE retrieve_request = db.Collection # COLLECTION with open('Path to your backup.bson', 'rb') as f: retrieve_request.insert(decode_all(f.read())) client.close() except Exception as e: print(str(e))
def _pullRemoteConf(self): """ Pull the remote configuration data """ uniqueHostnames = [] res = None try: res = urllib2.urlopen(self.confUrl) resBson = None try: resBson = bson.decode_all(res.read()) finally: if res is not None: res.close() res = None if len(resBson) != 1: return confResponse = resBson[0] if 'hosts' not in confResponse: self.mmsAgent.stopAll() return if 'disableDbstats' in confResponse: self.mmsAgent.disableDbstats = confResponse['disableDbstats'] else: self.mmsAgent.disableDbstats = False hosts = confResponse['hosts'] self.mmsAgent.serverHostDefsLock.acquire() try: # Extract the host information if hosts is not None: for host in hosts: hostDef, hostDefLast = self.mmsAgent.extractHostDef( host) hostKey = hostDef['hostKey'] uniqueHostnames.append(hostKey) if hostKey not in self.mmsAgent.serverHostDefs: self.mmsAgent.startMonitoringThreads(hostDef) else: self.mmsAgent.checkChangedHostDef( hostDef, hostDefLast) hostDef = None hostDefLast = None # Check to see if anything was removed for hostDef in self.mmsAgent.serverHostDefs.values(): if hostDef['hostKey'] not in uniqueHostnames: self.mmsAgent.stopAndClearHost(hostDef['hostKey']) finally: self.mmsAgent.serverHostDefsLock.release() except Exception, e: if res is not None: try: res.close() res = None except: pass self.logger.warning( "Problem pulling configuration data from MMS (check firewall and network): " + traceback.format_exc(e))