def test_equality(self): b = Binary("hello") c = Binary("hello", 100) self.assertNotEqual(b, c) self.assertEqual(c, Binary("hello", 100)) self.assertEqual(b, Binary("hello")) self.assertNotEqual(b, Binary("hello "))
def test_from_then_to_dict(self): def helper(dict): self.assertEqual(dict, (BSON.from_dict(dict)).to_dict()) helper({}) helper({"test": u"hello"}) self.assert_( isinstance( BSON.from_dict({ "hello": "world" }).to_dict()["hello"], types.UnicodeType)) helper({"mike": -10120}) helper({"long": long(10)}) helper({"really big long": 2147483648}) helper({u"hello": 0.0013109}) helper({"something": True}) helper({"false": False}) helper({"an array": [1, True, 3.8, u"world"]}) helper({"an object": {"test": u"something"}}) helper({"a binary": Binary("test", 100)}) helper({"a binary": Binary("test", 128)}) helper({"a binary": Binary("test", 254)}) helper({"another binary": Binary("test")}) helper(SON([(u'test dst', datetime.datetime(1993, 4, 4, 2))])) helper({"big float": float(10000000000)}) def from_then_to_dict(dict): return dict == (BSON.from_dict(dict)).to_dict() qcheck.check_unittest(self, from_then_to_dict, qcheck.gen_mongo_dict(3))
def test_repr(self): b = Binary("hello world") self.assertEqual(repr(b), "Binary('hello world', 2)") c = Binary("\x08\xFF") self.assertEqual(repr(c), "Binary('\\x08\\xff', 2)") d = Binary("test", 100) self.assertEqual(repr(d), "Binary('test', 100)")
def binary_value_test(): s = BinaryField() assert s.wrap(Binary(bytes('foo'.encode('ascii')))) == Binary( bytes('foo'.encode('ascii'))) assert s.wrap(bytes('foo'.encode('ascii'))) == Binary( bytes('foo'.encode('ascii'))) assert s.unwrap(Binary(bytes('foo'.encode('ascii')))) == Binary( bytes('foo'.encode('ascii')))
def test_exceptions(self): self.assertRaises(TypeError, Binary, None) self.assertRaises(TypeError, Binary, u"hello") self.assertRaises(TypeError, Binary, 5) self.assertRaises(TypeError, Binary, 10.2) self.assertRaises(TypeError, Binary, "hello", None) self.assertRaises(TypeError, Binary, "hello", "100") self.assertRaises(ValueError, Binary, "hello", -1) self.assertRaises(ValueError, Binary, "hello", 256) self.assert_(Binary("hello", 0)) self.assert_(Binary("hello", 255))
def _store_result(self, task_id, result, status, traceback=None): """Store return value and status of an executed task.""" from pymongo.binary import Binary meta = { "_id": task_id, "status": status, "result": Binary(self.encode(result)), "date_done": datetime.utcnow(), "traceback": Binary(self.encode(traceback)) } self.collection.save(meta, safe=True) return result
def _store_result(self, task_id, result, status, traceback=None): """Store return value and status of an executed task.""" from pymongo.binary import Binary meta = {"_id": task_id, "status": status, "result": Binary(pickle.dumps(result)), "date_done": datetime.now(), "traceback": Binary(pickle.dumps(traceback))} db = self._get_database() taskmeta_collection = db[self.mongodb_taskmeta_collection] taskmeta_collection.save(meta, safe=True) return result
def __flush_write_buffer(self): """Flush the write buffer contents out to a chunk. """ data = self.__write_buffer.getvalue() if not data: return assert (len(data) <= self.__chunk_size) chunk = { "files_id": self.__id, "n": self.__chunk_number, "data": Binary(data) } self.__collection.chunks.update( { "files_id": self.__id, "n": self.__chunk_number }, chunk, upsert=True) if len(data) == self.__chunk_size: self.__chunk_number += 1 self.__position += len(data) self.__write_buffer.close() self.__write_buffer = StringIO()
def test_binary(self): a_string = "hello world" a_binary = Binary("hello world") self.assert_(a_binary.startswith("hello")) self.assert_(a_binary.endswith("world")) self.assert_(isinstance(a_binary, Binary)) self.failIf(isinstance(a_string, Binary))
def test_index_on_binary(self): db = self.db db.drop_collection("test") db.test.save({"bin": Binary("def")}) db.test.save({"bin": Binary("abc")}) db.test.save({"bin": Binary("ghi")}) self.assertEqual( db.test.find({ "bin": Binary("abc") }).explain()["nscanned"], 3) db.test.create_index("bin") self.assertEqual( db.test.find({ "bin": Binary("abc") }).explain()["nscanned"], 1)
def process_item(self, item, spider): collection = self.collections[item['model']] if item['model'] == 'entities': collection.update_one( {'_id': item['_id']}, {'$set': { 'labels': item['labels'], 'done': True }}) return item _id = hasher(item['lang'], item['name']) # Wikidata if 'type' in item and item['type'] == 'wikidata': collection.update_one({'_id': _id}, {'$set': { 'wikidata': item['wikidata'] }}) return item # Not Found if item['notFound']: collection.update_one({'_id': _id}, {'$set': { 'notFound': True, 'done': True }}) return item # Bad Request if item['badRequest']: collection.update_one({'_id': _id}, {'$set': { 'badRequest': True, 'done': True }}) return item # HTML collection.update_one({'_id': _id}, { '$set': { 'html': Binary(zlib.compress(item['html'])), 'done': True } }) return item
def process_multicurl_response(self, ok, curl, ecode=None, emsg=None): """ Process reponse returned from multicurl cycle. """ task = curl.task # Note: curl.grab == task.grab if task.grab is not None grab = curl.grab grab_original = curl.grab_original url = task.url # or grab.config['url'] grab.process_request_result() # Break links, free resources curl.grab.curl = None curl.grab = None curl.task = None if ok and self.use_cache and grab.request_method == 'GET' and not task.get( 'disable_cache'): if grab.response.code < 400 or grab.response.code == 404: utf_body = grab.response.unicode_body().encode('utf-8') if self.use_cache_compression: body = Binary(zlib.compress(utf_body)) else: body = utf_body utf_url = task.url.encode('utf-8') if isinstance( task.url, unicode) else task.url item = { '_id': sha1(utf_url).hexdigest() if self.cache_key_hash else task.url, 'url': task.url, 'body': body, 'head': grab.response.head, 'response_code': grab.response.code, 'cookies': grab.response.cookies, } #import pdb; pdb.set_trace() try: #self.mongo.cache.save(item, safe=True) self.cache.save(item, safe=True) except Exception, ex: if 'document too large' in unicode(ex): pass else: import pdb pdb.set_trace()
def store(self, *args, **nargs): key = args[0] data = args[1] name = data["name"] record_key = name + "_" + key log(self.__class__.__name__ + ": Storing %s" % (record_key)) odata = data from pymongo.binary import Binary data = Binary(self.BSON.from_dict(data)) self.pymongo_db[name].save({"id" : hashlib.md5(record_key).hexdigest(), "data" : data}) Storage.store(self, record_key, odata)
def _save_taskset(self, taskset_id, result): """Save the taskset result.""" from pymongo.binary import Binary meta = { "_id": taskset_id, "result": Binary(self.encode(result)), "date_done": datetime.utcnow() } self.collection.save(meta, safe=True) return result
def _get_binary(data): (length, data) = _get_int(data) subtype = ord(data[0]) data = data[1:] if subtype == 2: (length2, data) = _get_int(data) if length2 != length - 4: raise InvalidBSON("invalid binary (st 2) - lengths don't match!") length = length2 if subtype == 3 and _use_uuid: return (uuid.UUID(bytes=data[:length]), data[length:]) return (Binary(data[:length], subtype), data[length:])
def _save_taskset(self, taskset_id, result): """Save the taskset result.""" from pymongo.binary import Binary meta = { "_id": taskset_id, "result": Binary(self.encode(result)), "date_done": datetime.utcnow() } db = self._get_database() taskmeta_collection = db[self.mongodb_taskmeta_collection] taskmeta_collection.save(meta, safe=True) return result
def file_to_mongo(file): # upload path = os.path.join('static/temp', secure_filename(file.filename)) file.save(path) # resize size = 300, 222 img = Image.open(path) img.thumbnail(size, Image.ANTIALIAS) img.save(path) # convert string = Binary(open(path, "rb").read().encode("base64")) # remove os.remove(path) # return return string
def store(self, key, value, expires=300): try: expire_time = datetime.datetime.utcnow() + \ datetime.timedelta(seconds=expires) self._conn[self.collection].update({'key': key}, { '$set': { "value": Binary(pickle.dumps(value)), "expires": expire_time } }, upsert=True, safe=True) except OperationFailure: return False else: return True
def __flush_data(self, data): """Flush `data` to a chunk. """ if not data: return assert (len(data) <= self.chunk_size) chunk = { "files_id": self._file["_id"], "n": self._chunk_number, "data": Binary(data) } self._chunks.insert(chunk) self._chunk_number += 1 self._position += len(data)
def main(dbname, collname='sessions'): db = getattr(Connection(), dbname) coll = getattr(db, collname) nsuccess = nfail = ntotal = 0 migratedkey = '__migrated_0_1__0_2' for doc in coll.find({migratedkey: {'$exists': False}}): ntotal += 1 docid = doc['_id'] stdout.write('%4d. Migrating doc %s... ' % (ntotal, docid)) try: data = doc['data'] if not isinstance(data, basestring): stdout.write('abort\n') stderr.write('data field contains non-string value. Was the ' 'migration run already?\n') exit(1) data = loads(decodestring(data)) data = dict( (k, Binary(encodestring(dumps(v))) if needs_encode(v) else v) for (k, v) in data.iteritems()) coll.update({'_id': docid}, {'$set': { migratedkey: True, 'data': data }}, safe=True) except Exception, e: nfail += 1 stdout.write('fail\n') coll.update({'_id': docid}, {'$set': { migratedkey: 'failed: ' + str(e) }}, safe=True) else: nsuccess += 1 stdout.write('ok\n')
def _element_to_bson(key, value, check_keys): if not isinstance(key, basestring): raise InvalidDocument("documents must have only string keys, " "key was %r" % key) if check_keys: if key.startswith("$"): raise InvalidName("key %r must not start with '$'" % key) if "." in key: raise InvalidName("key %r must not contain '.'" % key) name = _make_c_string(key, True) if isinstance(value, float): return "\x01" + name + struct.pack("<d", value) # Use Binary w/ subtype 3 for UUID instances try: import uuid if isinstance(value, uuid.UUID): value = Binary(value.bytes, subtype=3) except ImportError: pass if isinstance(value, Binary): subtype = value.subtype if subtype == 2: value = struct.pack("<i", len(value)) + value return "\x05%s%s%s%s" % (name, struct.pack( "<i", len(value)), chr(subtype), value) if isinstance(value, Code): cstring = _make_c_string(value) scope = _dict_to_bson(value.scope, False, False) full_length = struct.pack("<i", 8 + len(cstring) + len(scope)) length = struct.pack("<i", len(cstring)) return "\x0F" + name + full_length + length + cstring + scope if isinstance(value, str): cstring = _make_c_string(value) length = struct.pack("<i", len(cstring)) return "\x02" + name + length + cstring if isinstance(value, unicode): cstring = _make_c_string(value) length = struct.pack("<i", len(cstring)) return "\x02" + name + length + cstring if isinstance(value, dict): return "\x03" + name + _dict_to_bson(value, check_keys, False) if isinstance(value, (list, tuple)): as_dict = SON(zip([str(i) for i in range(len(value))], value)) return "\x04" + name + _dict_to_bson(as_dict, check_keys, False) if isinstance(value, ObjectId): return "\x07" + name + value.binary if value is True: return "\x08" + name + "\x01" if value is False: return "\x08" + name + "\x00" if isinstance(value, (int, long)): # TODO this is a really ugly way to check for this... if value > 2**64 / 2 - 1 or value < -2**64 / 2: raise OverflowError("MongoDB can only handle up to 8-byte ints") if value > 2**32 / 2 - 1 or value < -2**32 / 2: return "\x12" + name + struct.pack("<q", value) return "\x10" + name + struct.pack("<i", value) if isinstance(value, datetime.datetime): millis = int( calendar.timegm(value.timetuple()) * 1000 + value.microsecond / 1000) return "\x09" + name + struct.pack("<q", millis) if value is None: return "\x0A" + name if isinstance(value, _RE_TYPE): pattern = value.pattern flags = "" if value.flags & re.IGNORECASE: flags += "i" if value.flags & re.LOCALE: flags += "l" if value.flags & re.MULTILINE: flags += "m" if value.flags & re.DOTALL: flags += "s" if value.flags & re.UNICODE: flags += "u" if value.flags & re.VERBOSE: flags += "x" return "\x0B" + name + _make_c_string(pattern, True) + \ _make_c_string(flags) if isinstance(value, DBRef): return _element_to_bson(key, value.as_doc(), False) raise InvalidDocument("cannot convert value of type %s to bson" % type(value))
query = """ SELECT p.ID, p.post_author, u.user_login, p.post_date, p.post_date_gmt, p.post_modified, p.post_modified_gmt, p.post_content, p.post_title, p.post_category, p.post_excerpt, p.guid, p.post_type FROM oh_posts p INNER JOIN oh_users u ON p.post_author = u.ID INNER JOIN oh_term_relationships tr ON p.ID = tr.object_id INNER JOIN oh_term_taxonomy tt ON tr.term_taxonomy_id = tt.term_taxonomy_id INNER JOIN oh_terms t ON tt.term_id = t.term_id WHERE p.post_status = 'publish' AND p.post_type = 'post' AND (t.name = 'act' or t.name = 'The Day in Transparency') ORDER BY p.post_date DESC """ mongo = Connection() saucebrush.run_recipe( MySQLSource(conn, query), MetaFilter(conn), TagFilter(conn), ContentFilter(), FieldModifier(('post_content', 'post_excerpt'), lambda x: Binary(x)), MongoDBEmitter('openhouse', 'blog', drop_collection=True, conn=mongo), #DebugEmitter(), ) for d in mongo['openhouse']['blog'].find(): print "------", d['post_title'] conn.close()
def _hash(key, difs): return Binary(DocStore._hash(key, difs))
def _make_html_page(self, response, lru, lrulinks): p = self._make_raw_page(response, lru) p['body'] = Binary(response.body.encode('zip')) p['lrulinks'] = lrulinks return p
def encode(self, sessiondict): return dict( (k, Binary(Store.encode(self, v)) if needs_encode(v) else v) for (k, v) in sessiondict.iteritems())
def test_subtype(self): b = Binary("hello") self.assertEqual(b.subtype, 2) c = Binary("hello", 100) self.assertEqual(c.subtype, 100)
def __create_mongo_value_for_value(self, value): if isinstance(value, datastore_types.Rating): return { 'class': 'rating', 'rating': int(value), } if isinstance(value, datastore_types.Category): return { 'class': 'category', 'category': str(value), } if isinstance(value, datastore_types.Key): return { 'class': 'key', 'path': self.__id_for_key(value._ToPb()), } if isinstance(value, types.ListType): list_for_db = [ self.__create_mongo_value_for_value(v) for v in value ] sorted_list = sorted(value) return { 'class': 'list', 'list': list_for_db, 'ascending_sort_key': self.__create_mongo_value_for_value(sorted_list[0]), 'descending_sort_key': self.__create_mongo_value_for_value(sorted_list[-1]), } if isinstance(value, users.User): return { 'class': 'user', 'email': value.email(), } if isinstance(value, datastore_types.Text): return { 'class': 'text', 'string': unicode(value), } if isinstance(value, datastore_types.Blob): return Binary(value) if isinstance(value, datastore_types.ByteString): return {'class': 'bytes', 'value': Binary(value)} if isinstance(value, datastore_types.IM): return { 'class': 'im', 'protocol': value.protocol, 'address': value.address, } if isinstance(value, datastore_types.GeoPt): return { 'class': 'geopt', 'lat': value.lat, 'lon': value.lon, } if isinstance(value, datastore_types.Email): return { 'class': 'email', 'value': value, } if isinstance(value, datastore_types.BlobKey): return { 'class': 'blobkey', 'value': str(value), } return value
def test_basic_from_dict(self): self.assertRaises(TypeError, BSON.from_dict, 100) self.assertRaises(TypeError, BSON.from_dict, "hello") self.assertRaises(TypeError, BSON.from_dict, None) self.assertRaises(TypeError, BSON.from_dict, []) self.assertEqual(BSON.from_dict({}), BSON("\x05\x00\x00\x00\x00")) self.assertEqual( BSON.from_dict({"test": u"hello world"}), "\x1B\x00\x00\x00\x02\x74\x65\x73\x74\x00\x0C\x00\x00" "\x00\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64\x00" "\x00") self.assertEqual( BSON.from_dict({u"mike": 100}), "\x0F\x00\x00\x00\x10\x6D\x69\x6B\x65\x00\x64\x00\x00" "\x00\x00") self.assertEqual( BSON.from_dict({"hello": 1.5}), "\x14\x00\x00\x00\x01\x68\x65\x6C\x6C\x6F\x00\x00\x00" "\x00\x00\x00\x00\xF8\x3F\x00") self.assertEqual(BSON.from_dict({"true": True}), "\x0C\x00\x00\x00\x08\x74\x72\x75\x65\x00\x01\x00") self.assertEqual( BSON.from_dict({"false": False}), "\x0D\x00\x00\x00\x08\x66\x61\x6C\x73\x65\x00\x00" "\x00") self.assertEqual( BSON.from_dict({"empty": []}), "\x11\x00\x00\x00\x04\x65\x6D\x70\x74\x79\x00\x05\x00" "\x00\x00\x00\x00") self.assertEqual( BSON.from_dict({"none": {}}), "\x10\x00\x00\x00\x03\x6E\x6F\x6E\x65\x00\x05\x00\x00" "\x00\x00\x00") self.assertEqual( BSON.from_dict({"test": Binary("test")}), "\x18\x00\x00\x00\x05\x74\x65\x73\x74\x00\x08\x00\x00" "\x00\x02\x04\x00\x00\x00\x74\x65\x73\x74\x00") self.assertEqual( BSON.from_dict({"test": Binary("test", 128)}), "\x14\x00\x00\x00\x05\x74\x65\x73\x74\x00\x04\x00\x00" "\x00\x80\x74\x65\x73\x74\x00") self.assertEqual(BSON.from_dict({"test": None}), "\x0B\x00\x00\x00\x0A\x74\x65\x73\x74\x00\x00") self.assertEqual( BSON.from_dict({"date": datetime.datetime(2007, 1, 8, 0, 30, 11)}), "\x13\x00\x00\x00\x09\x64\x61\x74\x65\x00\x38\xBE\x1C" "\xFF\x0F\x01\x00\x00\x00") self.assertEqual( BSON.from_dict({"regex": re.compile("a*b", re.IGNORECASE)}), "\x12\x00\x00\x00\x0B\x72\x65\x67\x65\x78\x00\x61\x2A" "\x62\x00\x69\x00\x00") self.assertEqual( BSON.from_dict({"$where": Code("test")}), "\x1F\x00\x00\x00\x0F\x24\x77\x68\x65\x72\x65\x00\x12" "\x00\x00\x00\x05\x00\x00\x00\x74\x65\x73\x74\x00\x05" "\x00\x00\x00\x00\x00") a = ObjectId("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B") self.assertEqual( BSON.from_dict({"oid": a}), "\x16\x00\x00\x00\x07\x6F\x69\x64\x00\x00\x01\x02\x03" "\x04\x05\x06\x07\x08\x09\x0A\x0B\x00") self.assertEqual( BSON.from_dict({"ref": DBRef("coll", a)}), "\x2F\x00\x00\x00\x03ref\x00\x25\x00\x00\x00\x02$ref" "\x00\x05\x00\x00\x00coll\x00\x07$id\x00\x00\x01\x02" "\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x00\x00")
from pymongo import ASCENDING from mongodb_benchmark_tools import post_data small = { "integer": 5, "number": 5.05, "boolean": False, "array": ["test", "benchmark"] } medium = { "base_url": "http://www.example.com/test-me", "total_word_count": 6743, "access_time": datetime.datetime.utcnow(), "sub_object": small, "data": Binary("hello" * 40), "big_array": ["mongodb"] * 20 } large = {"bigger_array": [medium] * 5, "data": Binary("hello" * 500)} class Benchmark(object): name = "benchmark" description = "a benchmark" categories = [] def setup(self): pass def run(self, iterations): pass
# load svg data handle = rsvg.Handle(None, svg) # setup image image = cairo.ImageSurface(cairo.FORMAT_ARGB32, 250, 250) ctx = cairo.Context(image) ctx.fill() # scale to 250x250 scale = 250.0 / max(handle.get_property("height"), handle.get_property("width")) ctx.scale(scale, scale) # translate to center height = scale * handle.get_property("height") width = scale * handle.get_property("width") ctx.translate(125.0 - (0.5 * width), 125.0 - (0.5 * height)) # render image handle.render_cairo(ctx) # write png data png = StringIO() image.write_to_png(png) # insert diagram into document molecules_collection.update({"inchikey" : str(inchikey)}, {"$set" : {"diagram" : Binary(png.getvalue(), BINARY_SUBTYPE)}}, False)