def setup(self): h.skip_if_stubbed_solr() super(TestClassifierController, self).setup() h.load_fixture('cra') h.clean_and_reindex_solr() self.db = mongo.db()
def distinct_count(key, dataset_name): assert ('.' not in key or key.startswith('time.')) collection_name = 'distincts__%s' % dataset_name db = mongo.db() if collection_name not in db.collection_names(): update_distincts(dataset_name) db[collection_name].find({'values': key})
def _cmd_dropdataset(self): self._check_args_length(2) ds_name = self.args[1] log.warn("Dropping dataset '%s'", ds_name) from openspending.model import mongo db = mongo.db() log.info("Removing entries for dataset %s", ds_name) db.entry.remove({'dataset.name': ds_name}) log.info("Removing dimensions for dataset %s", ds_name) db.dimension.remove({'dataset': ds_name}) log.info("Removing distincts for dataset %s", ds_name) db['distincts__%s' % ds_name].drop() log.info("Removing cubes for dataset %s", ds_name) cubes = filter(lambda x: x.startswith('cubes.%s.' % ds_name), db.collection_names()) for c in cubes: db[c].drop() log.info("Removing dataset object for dataset %s", ds_name) db.dataset.remove({'name': ds_name})
def test_compute_cube(self): from openspending.model import Dataset cra = h.load_fixture('cra') cube = Cube.configure_default_cube(cra) cube.compute() h.assert_true('cubes.cra.default' in mongo.db().collection_names())
def parent_color(obj): if 'color' in obj: return obj.get('color') if 'parent' in obj and obj.get('parent'): try: parent = db().dereference(obj.get('parent')) return parent_color(parent) except: pass return DEFAULT_COLOR
def test_loader_creates_indexes(self): db = mongo.db() db.create_collection('entry') db.create_collection('entity') h.assert_equal(self._get_index_num(Entry), 1) h.assert_equal(self._get_index_num(Entity), 1) self._make_loader() h.assert_equal(self._get_index_num(Entry), 9) h.assert_equal(self._get_index_num(Entity), 2)
def distinct(key, dataset_name=None, **query): '''Return the distinct values for `key` for all *Entry* objects matching the dataset_name or ***query*. It will query solr for a result. There may be short time frames where the result from solr does not match the distincts for a key in the datastore (mongodb). ``key`` The key of the field for which the distinct will be returned ``dataset`` A dataset name or a :class:`openspending.model.Dataset` object ``**query`` Parameters for an *AND* query. Only the *key* values objects matching these queries will be counted. If you want to query by dataset **don't** add the condition here, use *dataset_name*. Returns: A list of distinct values. ''' direct_mongo_query = False # the same keys used in serverside_js/compute_distincts.js not_aggregated_keys = ['_id', 'name', 'amount', 'classifiers', 'entities', 'currency'] if ((dataset_name is None) or (len(query) > 0) or (key in not_aggregated_keys)): direct_mongo_query = True else: dataset = Dataset.c.find_one({'name': dataset_name}, as_class=dict) if not dataset: raise ValueError('Dataset "%s" does not exist' % dataset_name) if not direct_mongo_query: collection_name = 'distincts__%s' % dataset_name db = mongo.db() if collection_name not in db.collection_names(): # We need to create the distincts collection first update_distincts(dataset_name) distincts_collection = db[collection_name] log.info('use distincts collection %s' % collection_name) return distincts_collection.find({'value.keys': key}).distinct('_id') if direct_mongo_query: if dataset_name is not None: query['dataset.name'] = dataset_name return Entry.c.find(query).distinct(key)
def test_fallback_for_missing_entity_name(self): # We use the objectid of an entity as a fallback value for 'name' loader = self._make_loader() loader.create_dimension('name', 'Name', '') loader.create_dimension('label', 'Label', '') loader.create_dimension('from', 'From', '') from_entity = self._make_entity(loader, name="", label='Entity w/o name') entry = {'name': 'Entry', 'label': 'Entry Label', 'from': from_entity, 'time': {'from': {'year': 2009, 'day': 20090101}}} self._make_entry(loader, **entry) cube = Cube.configure_default_cube(loader.dataset) cube.compute() cube_collection = mongo.db()[cube.collection_name] h.assert_equal(cube_collection.find().count(), 1) cube_from = cube_collection.find_one()['from'] h.assert_equal(cube_from['name'], cube_from['_id'])
def remove_dataset(dataset_name): log.warn("Dropping dataset '%s'", dataset_name) from openspending.model import mongo db = mongo.db() log.info("Removing entries") db.entry.remove({'dataset.name': dataset_name}) log.info("Removing dimensions") db.dimension.remove({'dataset': dataset_name}) log.info("Removing distincts") db['distincts__%s' % dataset_name].drop() log.info("Removing cubes") cubes = filter(lambda x: x.startswith('cubes.%s.' % dataset_name), db.collection_names()) for c in cubes: db[c].drop() log.info("Removing dataset object for dataset %s", dataset_name) db.dataset.remove({'name': dataset_name})
def used_keys(dataset_name): collection_name = 'distincts__%s' % dataset_name db = mongo.db() if collection_name not in db.collection_names(): update_distincts(dataset_name) db[collection_name].distinct('value')