def setUp(self): self.document = Document.objects.all()[0] self.base_store = MongoDict(host=settings.MONGODB_CONFIG['host'], port=settings.MONGODB_CONFIG['port'], database=settings.MONGODB_CONFIG['database'], collection=settings.MONGODB_CONFIG['analysis_collection']) self.proxy = StoreProxy(self.document.id, self.base_store)
def test_clear(self): for counter in range(10): self.collection.insert({'_id': 'test-' + str(counter), 'value': counter}) my_dict = MongoDict() my_dict.clear() # should use collections' drop method #TODO: test `clear`'s call duration self.assertEquals(self.collection.find().count(), 0)
def test_clear(self): for counter in range(10): self.collection.insert({ '_id': 'test-' + str(counter), 'value': counter }) my_dict = MongoDict() my_dict.clear() # should use collections' drop method #TODO: test `clear`'s call duration self.assertEquals(self.collection.find().count(), 0)
def connect(self, username, password, host, port, database, collection): try: self.db = MongoDict(host, port, database, collection, auth=(username, password)) return "ok" except: return "error: " + str(sys.exc_info()[1])
def __init__(self, **configuration): '''Instantiate a MongoDictStore `configuration` must have the keys: - host - port - database - collection (for MongoDict) - monitoring_collection ''' self._dict = MongoDict(**configuration, safe=True) self._connection = Connection(configuration['host'], configuration['port'], safe=True) self._db = self._connection[configuration['database']] self._monitoring = self._db[configuration['monitoring_collection']]
def test_set_item_should_save_data_in_collection(self): my_dict = MongoDict(**self.config) my_dict['python'] = 'rules' results = list(self.collection.find()) self.assertEqual(len(results), 1) self.assertEqual(results[0]['_id'], 'python') self.assertEqual(decode(results[0]['v']), 'rules')
def test_set_item(self): my_dict = MongoDict(**self.config) my_dict['python'] = 'rules' results = list(self.collection.find()) self.assertEquals(len(results), 1) self.assertEquals(results[0]['_id'], 'python') self.assertEquals(results[0]['value'], 'rules')
def test_customized_codec(self): self.collection.drop() self.config['codec'] = (lambda x: json.dumps(x).encode('utf-8'), json.loads) data = {'first': (1, 2), 'test': [3, 4], 'test2': {'...': 456}} expected = [('first', [1, 2]), ('test', [3, 4]), ('test2', { '...': 456 })] # JSON represents Python tuples as arrays and deserialized arrays as # lists, so our tuples turn lists self.config['default'] = data my_dict = MongoDict(**self.config) for result in self.collection.find(): key = result['_id'] value = json.loads(result['v'].decode('utf-8')) pair = (key, value) self.assertIn(pair, expected) expected.remove(pair) #we need to remove every pair in this list (instead of just adding #it to a dict and then comparing the whole dicts) because JSON #encodes tuples as arrays and decodes arrays as lists and we can't #have a list as a dict key since it is not hashable self.assertEqual(expected, [])
def _pre_setup(self, *args, **kwargs): super(TestWithMongo, self)._pre_setup(*args, **kwargs) if 'test' not in gridfs_storage.database: error_message = ( "We expect the mongodb database name to contain the " "string 'test' to make sure you don't mess up your production " "database. Are you sure you're using settings.test to run these " "tests?") raise ImproperlyConfigured(error_message) gridfs_storage._connection.drop_database(gridfs_storage.database) for doc in Document.objects.all(): gridfs_storage.save( os.path.basename(doc.blob.name), StringIO("This is a test file with some test text.")) self.store = MongoDict( host=settings.MONGODB_CONFIG['host'], port=settings.MONGODB_CONFIG['port'], database=settings.MONGODB_CONFIG['database'], collection=settings.MONGODB_CONFIG['analysis_collection']) filename = os.path.join(settings.PROJECT_ROOT, 'core/fixtures/mongodb/analysis.json') with open(filename, 'r') as mongo_fixture: for obj in json.load(mongo_fixture): self.store[obj['_id']] = obj['value']
def test_should_be_a_MutableMapping(self): my_dict = MongoDict() self.assertTrue(isinstance(my_dict, MutableMapping)) expected_methods = ['setitem', 'getitem', 'delitem', 'iter', 'len'] actual_methods = dir(my_dict) for method in expected_methods: self.assertIn('__{}__'.format(method), actual_methods)
class MongoDictStore(object): '''Sample Store based on MongoDict''' def __init__(self, **configuration): '''Instantiate a MongoDictStore `configuration` must have the keys: - host - port - database - collection (for MongoDict) - monitoring_collection ''' self._dict = MongoDict(**configuration, safe=True) self._connection = Connection(configuration['host'], configuration['port'], safe=True) self._db = self._connection[configuration['database']] self._monitoring = self._db[configuration['monitoring_collection']] def retrieve(self, info): '''Retrieve data to pass to `WorkerClass.process` `info` has keys 'worker', 'worker_requires' and 'data': - 'data' comes from pipeline data - 'worker' is the worker name - 'worker_requires' is 'requires' attribute of WorkerClass For MongoDictStore, 'data' must have an 'id' key ''' data_id = info['data']['id'] worker_input = {} for key in info['worker_requires']: mapped_key = 'id:{}:{}'.format(data_id, key) worker_input[key] = self._dict.get(mapped_key, None) return worker_input def save(self, info): '''Save information returned by `WorkerClass.process` `info` has keys 'worker', 'worker_requires', 'worker_result' and 'data': - 'data' comes from pipeline data - 'worker' is the worker name - 'worker_requires' is 'requires' attribute of WorkerClass - 'worker_result' is what WorkerClass.process returned ''' data_id = info['data']['id'] for key, value in info['worker_result'].items(): mapped_key = 'id:{}:{}'.format(data_id, key) list_key = 'id:{}:_keys'.format(data_id) self._dict[mapped_key] = value if list_key not in self._dict: self._dict[list_key] = [key] else: data = self._dict[list_key] data.append(key) self._dict[list_key] = data def save_monitoring(self, data): self._monitoring.insert(data)
def test_get_item(self): self.collection.insert({'_id': 'testing', 'value': 123}) self.collection.insert({'_id': 'bla bla bla', 'value': 3.14}) my_dict = MongoDict(**self.config) self.assertEquals(my_dict['testing'], 123) self.assertEquals(my_dict['bla bla bla'], 3.14) with self.assertRaises(KeyError): temp = my_dict['non ecxiste']
def test_len(self): for counter in range(1000): self.collection.insert({ '_id': 'test-' + str(counter), 'value': counter }) my_dict = MongoDict(**self.config) self.assertEquals(self.collection.find().count(), len(my_dict))
def test_deletion_of_MongoDict_object_should_sync_data_even_without_safe( self): config = self.config.copy() config['safe'] = False my_dict = MongoDict(**config) for i in range(1000): my_dict['testing_' + str(i)] = str(i) del my_dict self.assertEqual(self.collection.find().count(), 1000)
def test_del_item(self): self.collection.insert({'_id': 'testing', 'value': 123}) self.collection.insert({'_id': 'bla bla bla', 'value': 3.14}) my_dict = MongoDict(**self.config) del my_dict['testing'] results = list(self.collection.find()) self.assertEquals(results, [{'_id': 'bla bla bla', 'value': 3.14}]) with self.assertRaises(KeyError): del my_dict['non ecxiste']
def test_non_unicode_strings(self): my_dict = MongoDict() string_1 = u'Álvaro Justen'.encode('iso-8859-15') with self.assertRaises(UnicodeError): my_dict[string_1] = 123 with self.assertRaises(UnicodeError): temp = my_dict[string_1] with self.assertRaises(UnicodeError): my_dict['python'] = string_1
def test_iter(self): for counter in range(10): self.collection.insert({'_id': 'test-' + str(counter), 'value': counter}) my_dict = MongoDict(**self.config) keys = [] for key in my_dict: keys.append(key) self.assertEquals(len(keys), 10) expected_keys = ['test-' + str(counter) for counter in range(10)] self.assertEquals(set(keys), set(expected_keys)) self.assertEquals(set(my_dict.keys()), set(expected_keys)) results = [] for key, value in my_dict.iteritems(): results.append((key, value)) values = [x[1] for x in results] expected_values = list(range(10)) self.assertEquals(set(values), set(expected_values)) self.assertEquals(set(my_dict.values()), set(expected_values))
def test_keys_method_should_not_raises_exception_if_more_than_16MB(self): '''Should not raise exception if sum of keys is greater 16MB Bug reported by @andrebco: <https://github.com/turicas/mongodict/issues/10> ''' my_dict = MongoDict(**self.config) key_template = ('python-rules' * 100000) + '{}' key_byte_count = 0 key_count = 0 keys = set() while key_byte_count < 20 * 1024 * 1024: # 20MB > 16MB new_key = key_template.format(key_count) my_dict[new_key] = 'some value' key_byte_count += len(new_key) key_count += 1 keys.add(new_key) dict_keys = my_dict.keys() self.assertEquals(len(keys), len(dict_keys)) self.assertTrue(keys == set(dict_keys))
def properties(self): if self.id is None: raise ValueError("This document was not saved, so you cannot " "retrieve it's information from the backend.") if self._store is None: self._store = MongoDict( host=settings.MONGODB_CONFIG['host'], port=settings.MONGODB_CONFIG['port'], database=settings.MONGODB_CONFIG['database'], collection=settings.MONGODB_CONFIG['analysis_collection']) return StoreProxy(self.id, self._store)
def test_get_item_should_retrieve_data_from_collection(self): self.collection.insert({'_id': 'testing', 'v': Binary(encode('123'))}) self.collection.insert({ '_id': 'bla bla bla', 'v': Binary(encode('3.14')) }) my_dict = MongoDict(**self.config) self.assertEqual(my_dict['testing'], '123') self.assertEqual(my_dict['bla bla bla'], '3.14') with self.assertRaises(KeyError): temp = my_dict['non ecxiste']
def __init__(self, **config): host, port, database = config['host'], config['port'], \ config['database'] self._connection = Connection(host, port) self._db = self._connection[database] if 'username' in config and 'password' in config: self._db.authenticate(username, password) self._dict = MongoDict(host=host, port=port, database=database, collection=config['analysis_collection']) #TODO: use auth on mongodict self._monitoring = self._db[config['monitoring_collection']] self._gridfs = GridFS(self._db, config['gridfs_collection'])
def test_iter(self): for counter in range(10): self.collection.insert({ '_id': 'test-' + str(counter), 'value': counter }) my_dict = MongoDict(**self.config) keys = [] for key in my_dict: keys.append(key) self.assertEquals(len(keys), 10) expected_keys = ['test-' + str(counter) for counter in range(10)] self.assertEquals(set(keys), set(expected_keys)) self.assertEquals(set(my_dict.keys()), set(expected_keys)) results = [] for key, value in my_dict.iteritems(): results.append((key, value)) values = [x[1] for x in results] expected_values = list(range(10)) self.assertEquals(set(values), set(expected_values)) self.assertEquals(set(my_dict.values()), set(expected_values))
def test_default_items(self): my_dict = MongoDict(default={ 'answer': 42, 'spam': 'ham' }, **self.config) results = list(self.collection.find()) self.assertEquals(len(results), 2) self.assertEquals(results[0]['_id'], 'answer') self.assertEquals(results[0]['value'], 42) self.assertEquals(results[1]['_id'], 'spam') self.assertEquals(results[1]['value'], 'ham')
def test_del_item_should_delete_pair_in_the_collection(self): self.collection.insert({'_id': 'testing', 'v': Binary(encode('123'))}) self.collection.insert({ '_id': 'bla bla bla', 'v': Binary(encode('3.14')) }) my_dict = MongoDict(**self.config) del my_dict['testing'] results = list(self.collection.find()) self.assertEqual(results[0]['_id'], 'bla bla bla') self.assertEqual(decode(results[0]['v']), '3.14') with self.assertRaises(KeyError): del my_dict['non ecxiste']
def test_pickle_codec_should_return_same_objects(self): self.collection.drop() my_dict = MongoDict(**self.config) my_dict['int'] = 42 my_dict['float'] = 3.14 my_dict['string'] = 'python' my_dict[key_2] = [3, 4] my_dict[key_1] = {123: 456} # since key_1 and key_2 has the same information (one is unicode, other # is bytes), the contents are overwritten in MongoDict self.assertEqual(my_dict['int'], 42) self.assertEqual(my_dict['float'], 3.14) self.assertEqual(my_dict['string'], 'python') self.assertEqual(my_dict[key_1], {123: 456})
def test_verify_if_index_is_created(self): self.config['index_type'] = 'invalid' with self.assertRaises(ValueError): MongoDict(**self.config) del (self.config['index_type']) self.collection.drop() my_dict = MongoDict(**self.config) # default 'index_type' = 'key' indexes = extract_indexes(self.collection.index_information()) expected_indexes = [[('_id', 1)]] self.assertEqual(indexes, expected_indexes) self.collection.drop() self.config['index_type'] = 'key-value' self.config['collection'] = 'index_test' self.collection = self.db[self.config['collection']] other_dict = MongoDict(**self.config) indexes = extract_indexes(self.collection.index_information()) expected_indexes = set([(('_id', 1), ), (('_id', 1), ('v', 1))]) new_indexes = set() for index in indexes: new_indexes.add(tuple(index)) self.assertEqual(new_indexes, expected_indexes)
class StoreProxyTest(TestWithMongo): fixtures = ['users', 'corpora', 'documents'] def setUp(self): self.document = Document.objects.all()[0] self.base_store = MongoDict(host=settings.MONGODB_CONFIG['host'], port=settings.MONGODB_CONFIG['port'], database=settings.MONGODB_CONFIG['database'], collection=settings.MONGODB_CONFIG['analysis_collection']) self.proxy = StoreProxy(self.document.id, self.base_store) def test_access_store_data_without_formating_the_key_with_document_id(self): self.assertEqual(self.proxy['text'], self.base_store['id:{}:text'.format(self.document.id)]) def test_list_keys_based_on_the_available_properties(self): self.assertEqual(self.proxy.keys(), self.base_store['id:{}:_properties'.format(self.document.id)]) def test_only_lists_items_in_store_that_are_related_from_this_document(self): """ Since we're using UserDict.DictMixin as a parent class of StoreProxy, this behaviour comes from implementing the `keys` method. """ expected_items = [(k.split(':')[-1], v) for k, v in self.base_store.items() if int(k.split(':')[1]) == self.document.id and k.split(':')[-1] != u'_properties'] self.assertItemsEqual(self.proxy.items(), expected_items) def test_proxy_is_read_only(self): with self.assertRaises(AttributeError): self.proxy["new key"] = "new value" def test_access_store_keys_for_a_document_that_does_not_have_entries_in_mongo(self): corpus = Corpus.objects.all()[0] document = Document.objects.create(blob=File(StringIO(), "filename"), owner=corpus.owner, corpus=corpus) with self.assertRaisesRegexp(KeyError, "Can't find information for " "document with id"): document.properties.keys() def test_access_inexistent_key_for_a_document_that_has_entries_in_mongo(self): document = Document.objects.all()[0] with self.assertRaisesRegexp(KeyError, "Can't find key .* for " "document with id .*$"): document.properties['invalid_key']
class Storage: def __init__(self): self.disconnect() @reporter( "connect as user %s with password %s to database (host %s , port %n , database %s , collection %s )", defaults=["", "", "localhost", 27017, "snapmesh", "snapmesh"]) def connect(self, username, password, host, port, database, collection): try: self.db = MongoDict(host, port, database, collection, auth=(username, password)) return "ok" except: return "error: " + str(sys.exc_info()[1]) @command("disconnect") def disconnect(self): self.db = None @predicate("connected?") def is_connected(self): return self.db is not None @reporter("%s from the collection") def get(self, key): if self.db is None: return "not connected" if key in self.db: return self.db[key] else: return "" @command("%s as %s into the collection") def put(self, value, key): if self.db is None: return "not connected" self.db[key] = value @reporter("contents of the collection") def list(self): if self.db is None: return "not connected" return "\n".join(self.db.keys())
class MongoDBStore(object): '''Store PyPLN workers' analysis in MongoDB, using MongoDict''' def __init__(self, **config): host, port, database = config['host'], config['port'], \ config['database'] self._connection = Connection(host, port) self._db = self._connection[database] if 'username' in config and 'password' in config: self._db.authenticate(username, password) self._dict = MongoDict(host=host, port=port, database=database, collection=config['analysis_collection']) #TODO: use auth on mongodict self._monitoring = self._db[config['monitoring_collection']] self._gridfs = GridFS(self._db, config['gridfs_collection']) def retrieve(self, info): '''Retrieve data to pass to `WorkerClass.process` `info` has keys 'worker', 'worker_requires' and 'data': - 'data' comes from pipeline data - 'worker' is the worker name - 'worker_requires' is 'requires' attribute of WorkerClass ''' data = info['data'] result = {'_missing': []} if info['worker'] == 'Extractor': if '_id' not in data: raise ValueError('Invalid job data: missing "_id"') file_data = self._gridfs.get(ObjectId(data['_id'])) result = {'length': file_data.length, 'md5': file_data.md5, 'filename': file_data.filename, 'upload_date': file_data.upload_date, 'contents': file_data.read()} else: if 'id' not in data: raise ValueError('Invalid job data: missing "id"') for key in info['worker_requires']: new_key = 'id:{}:{}'.format(data['id'], key) try: result[key] = self._dict[new_key] except KeyError: result['_missing'].append(key) return result def save(self, info): '''Save information returned by `WorkerClass.process` `info` has keys 'worker', 'worker_requires', 'worker_result' and 'data': - 'data' comes from pipeline data - 'worker' is the worker name - 'worker_requires' is 'requires' attribute of WorkerClass - 'worker_result' is what WorkerClass.process returned ''' data = info['data'] worker_result = info['worker_result'] if 'id' not in data: raise ValueError('Invalid job data: missing "id"') # insert results for key, value in worker_result.items(): new_key = 'id:{}:{}'.format(data['id'], key) if key == "_exception" or key == "_traceback": content = self._dict.get(new_key, []) if isinstance(content, str): content = [content] content.append({'worker': info['worker'], 'traceback': worker_result[key]}) self._dict[new_key] = content else: self._dict[new_key] = worker_result[key] # update property list for this document properties_key = 'id:{}:_properties'.format(data['id']) all_properties = self._dict.get(properties_key, []) + \ worker_result.keys() self._dict[properties_key] = list(set(all_properties)) def save_monitoring(self, info): '''Save broker's monitoring information''' self._monitoring.insert(info)
def type2test(self): collection_name = random_string() self.collections.append(collection_name) return MongoDict(host=MONGO_HOST, port=MONGO_PORT, database=MONGO_DATABASE, collection=collection_name)
def test_duplication(self): my_dict = MongoDict() my_dict['python'] = 'rules' my_dict['python'] = 42
def test_in(self): self.collection.insert({'_id': 'testing', 'value': 123}) my_dict = MongoDict(**self.config) self.assertIn('testing', my_dict) self.assertNotIn('python', my_dict)
def test_should_be_possible_to_assign_new_values_to_existing_keys(self): my_dict = MongoDict(**self.config) my_dict['python'] = 'rules' my_dict['python'] = '42' self.assertNotEqual(my_dict['python'], 'rules') self.assertEqual(my_dict['python'], '42')