def setup(self):
        h.skip_if_stubbed_solr()

        super(TestClassifierController, self).setup()
        h.load_fixture('cra')
        h.clean_and_reindex_solr()
        self.db = mongo.db()
Exemple #2
0
def distinct_count(key, dataset_name):
    assert ('.' not in key or key.startswith('time.'))
    collection_name = 'distincts__%s' % dataset_name
    db = mongo.db()
    if collection_name not in db.collection_names():
        update_distincts(dataset_name)
    db[collection_name].find({'values': key})
Exemple #3
0
    def _cmd_dropdataset(self):
        self._check_args_length(2)

        ds_name = self.args[1]

        log.warn("Dropping dataset '%s'", ds_name)

        from openspending.model import mongo
        db = mongo.db()

        log.info("Removing entries for dataset %s", ds_name)
        db.entry.remove({'dataset.name': ds_name})

        log.info("Removing dimensions for dataset %s", ds_name)
        db.dimension.remove({'dataset': ds_name})

        log.info("Removing distincts for dataset %s", ds_name)
        db['distincts__%s' % ds_name].drop()

        log.info("Removing cubes for dataset %s", ds_name)
        cubes = filter(lambda x: x.startswith('cubes.%s.' % ds_name),
                       db.collection_names())
        for c in cubes:
            db[c].drop()

        log.info("Removing dataset object for dataset %s", ds_name)
        db.dataset.remove({'name': ds_name})
Exemple #4
0
    def test_compute_cube(self):
        from openspending.model import Dataset

        cra = h.load_fixture('cra')

        cube = Cube.configure_default_cube(cra)
        cube.compute()

        h.assert_true('cubes.cra.default' in mongo.db().collection_names())
Exemple #5
0
def parent_color(obj):
    if 'color' in obj:
        return obj.get('color')
    if 'parent' in obj and obj.get('parent'):
        try:
            parent = db().dereference(obj.get('parent'))
            return parent_color(parent)
        except:
            pass
    return DEFAULT_COLOR
    def test_loader_creates_indexes(self):
        db = mongo.db()
        db.create_collection('entry')
        db.create_collection('entity')
        h.assert_equal(self._get_index_num(Entry), 1)
        h.assert_equal(self._get_index_num(Entity), 1)

        self._make_loader()
        h.assert_equal(self._get_index_num(Entry), 9)
        h.assert_equal(self._get_index_num(Entity), 2)
Exemple #7
0
def distinct(key, dataset_name=None, **query):
    '''Return the distinct values for `key` for all *Entry* objects
    matching the dataset_name or ***query*. It will query solr for
    a result. There may be short time frames where the result from
    solr does not match the distincts for a key in the datastore (mongodb).

    ``key``
        The key of the field for which the distinct will be returned
    ``dataset``
        A dataset name or a :class:`openspending.model.Dataset` object
    ``**query``
        Parameters for an *AND* query. Only the *key* values objects
        matching these queries will be counted. If you want to query
        by dataset **don't** add the condition here, use *dataset_name*.

    Returns: A list of distinct values.
    '''

    direct_mongo_query = False

    # the same keys used in serverside_js/compute_distincts.js
    not_aggregated_keys = ['_id', 'name', 'amount', 'classifiers',
                           'entities', 'currency']

    if ((dataset_name is None) or (len(query) > 0) or
        (key in not_aggregated_keys)):
        direct_mongo_query = True
    else:
        dataset = Dataset.c.find_one({'name': dataset_name},
                                    as_class=dict)
        if not dataset:
            raise ValueError('Dataset "%s" does not exist' % dataset_name)

    if not direct_mongo_query:
        collection_name = 'distincts__%s' % dataset_name
        db = mongo.db()

        if collection_name not in db.collection_names():
            # We need to create the distincts collection first
            update_distincts(dataset_name)
        distincts_collection = db[collection_name]
        log.info('use distincts collection %s' % collection_name)
        return distincts_collection.find({'value.keys': key}).distinct('_id')

    if direct_mongo_query:
        if dataset_name is not None:
            query['dataset.name'] = dataset_name
        return Entry.c.find(query).distinct(key)
Exemple #8
0
    def test_fallback_for_missing_entity_name(self):
        # We use the objectid of an entity as a fallback value for 'name'
        loader = self._make_loader()
        loader.create_dimension('name', 'Name', '')
        loader.create_dimension('label', 'Label', '')
        loader.create_dimension('from', 'From', '')

        from_entity = self._make_entity(loader, name="",
                                        label='Entity w/o name')
        entry = {'name': 'Entry',
                 'label': 'Entry Label',
                 'from': from_entity,
                 'time': {'from': {'year': 2009,
                                   'day': 20090101}}}
        self._make_entry(loader, **entry)
        cube = Cube.configure_default_cube(loader.dataset)
        cube.compute()

        cube_collection = mongo.db()[cube.collection_name]
        h.assert_equal(cube_collection.find().count(), 1)
        cube_from = cube_collection.find_one()['from']
        h.assert_equal(cube_from['name'], cube_from['_id'])
def remove_dataset(dataset_name):
    log.warn("Dropping dataset '%s'", dataset_name)

    from openspending.model import mongo
    db = mongo.db()

    log.info("Removing entries")
    db.entry.remove({'dataset.name': dataset_name})

    log.info("Removing dimensions")
    db.dimension.remove({'dataset': dataset_name})

    log.info("Removing distincts")
    db['distincts__%s' % dataset_name].drop()

    log.info("Removing cubes")
    cubes = filter(lambda x: x.startswith('cubes.%s.' % dataset_name),
                   db.collection_names())
    for c in cubes:
        db[c].drop()

    log.info("Removing dataset object for dataset %s", dataset_name)
    db.dataset.remove({'name': dataset_name})
Exemple #10
0
def used_keys(dataset_name):
    collection_name = 'distincts__%s' % dataset_name
    db = mongo.db()
    if collection_name not in db.collection_names():
        update_distincts(dataset_name)
    db[collection_name].distinct('value')