コード例 #1
0
ファイル: classifier.py プロジェクト: nomed/openspending
def create_classifier(name, taxonomy, label=u'', description=u'',
                      **classifier):
    '''create a :class:openspending.model.`classifier`. The ``name`` has to
    be unique for the ``taxonomy``. The ``classifier`` will be updated
    with the values for ``label``, ``description`` and
    ``**classifier``

    Arguments:

    ``name``
      name of the classifier. (``unicode``)
    ``taxonomy``
      The taxonomy to which the classifier ``name`` belongs.
      (``unicode``)
    ``label``, ``descripiton``, ``**classifiers``
      used to update the classifier for the first time

    Returns: An :class:`openspending.model.Classifier` object

    Raises:
       AssertionError if more than one ``Classifer`` object with the
       Name existes in the ``taxonomy``
    '''
    check_rest_suffix(name)
    query = {'name': name, 'taxonomy': taxonomy}
    assert Classifier.find(query).count() <= 1, \
        "Ambiguous classifier name (%s) in (%s)" % (name, taxonomy)
    if label:
        classifier['label'] = label
    if description:
        classifier['descripiton'] = description
    Classifier.c.update(query, {"$set": classifier}, upsert=True)
    return Classifier.find_one(query)
コード例 #2
0
    def create_entity(self, name=None, label=u'', description=u'',
                      _cache=None, match_keys=('name', ), **entity):
        '''\
        Create or update an :class:`openspending.model.Entity` object in the
        database when this is called for the entity the first time.
        An existing entity is looked up with the entitie's data for
        *match_keys*. By default we look up by name, but other data
        like company or tax ids may be more appropriate.

        The entry will only be created/updated if it is not in the
        ``_cache``.

        ``name``
            Name of the entity.
        ``label, description``
            label an description of the entity (unicodes)
        ``match_keys``
            The keys with which we try to find an existing entity
            in the database. default: ('name',). type: ``list`` or
            ``tuple``.
        ``**entity``
            Keyword arguments that are saved in the entity.
        ``_cache``
          Use the given ``dict`` like object for caching.
          Normally not used by callers. It can be used to force an
          update of an entity that was created/updated by an earlier
          call. With ``None`` (default), the ``Loader`` uses internal
          caching.

        Returns: The created ``Entity`` object.

        Raises:

        :exc:`AssertionError`
            If the name ends with a suffix used for REST, e.g. .json.
            If match_keys is not list or tuple.
        :exc:`KeyError`
            If a given match_key is not present in the entity.
        '''
        # assertions
        check_rest_suffix(name)
        if not isinstance(match_keys, (list, tuple)):
            raise AssertionError('match_keys has to be list or tuple')

        entity.update({'name': name,
                       'label': label,
                       'description': description})

        # prepare a cache for the match_keys combination
        if _cache is None:
            _cache = self.entity_cache
        cache = _cache.setdefault(match_keys, {})
        cache_key = tuple([entity[key] for key in match_keys])

        if not cache_key in cache:
            query = {}
            for key in match_keys:
                query[key] = entity[key]
            entity_obj = Entity.find_one(query)

            if entity_obj is None:
                operation = CREATE
            else:
                operation = UPDATE

            Entity.c.update(query, {"$set": entity}, upsert=True)
            new_entity = Entity.find_one(query)
            self._add_changeobj(Entity.c.name, new_entity['_id'],
                                new_entity, operation)
            cache[cache_key] = new_entity

        return cache[cache_key]
コード例 #3
0
    def __init__(self, dataset_name, unique_keys, label, description=u'',
                 metadata=None, currency=u'gbp', time_axis='time.from.year',
                 changeset=None):
        '''\
        Constructs a Loader for the :class:`openspending.model.Dataset`
        `dataset_name`. Calling the constructor creates or updates the
        `Dataset` object with `dataset_name`, `label`, `description`,
        `metadata` and `currency`. The Loader instance can only be used
        to create :class:`openspending.model.Entry` objects with the same set
        of `unique_keys`. If you need to create another type of
        ``Entry`` objects instantiate another ``Loader``.

        ``dataset_name``
            The unique name for the dataset.
        ``unique_keys``
            The keys for which all entries in the dataset are unique.
            For example if you have a entries with payments that have
            are identifiable by a *department* and a *consecutive number*
            that is unique within the *department*, you would pass in
            a list with the keys ``['department', 'consecutive_number']``.
        ``label``
            A label for the dataset that can be presented to the user
        ``description``
            A description for the dataset taht can be presented
            to the user.
        ``metadata``
            A ``dict`` with metadata that will be saved on the dataset.
        ``currency``
            The default currency for the entries in the dataset. An
            individual currency can be set in :meth:`create_entry`.
            The currenty is stored in upper case.
        ``time_axis``
            The time axis of the dataset. This is the time range for which
            all entries in the dataset can be analized. The default is
            'time.from.year' and should not be changed.
            fixme: add details and move possible values into constants in
            model.dataset.
         ``changeset``
            A :class:`openspending.model.Changeset` object. This is only required
            if you use load a dataset with more than one loader. If you
            want to add manual changes to the changeset of your loader
            you can retrive the changeset with *.changeset*.

        Raises:
            ``AssertionError`` if more than one dataset with the name
                ``dataset_name`` exists already.
            ``ValueError``
                If and duplicated :class:`openspending.model.Entry` object
                is found (The entry has the same values for the
                ``unique_keys``) or two :class:`model.class.Entity`
                objects are found with the same name.
        '''
        assert isinstance(dataset_name, unicode)
        assert isinstance(unique_keys, list)
        check_rest_suffix(dataset_name)

        # create a changeset:
        if changeset is None:
            name = dataset_name
            if label:
                name = "%s (%s)" % (name, label)
            message = ('Load dataset %s. currency: %s, time axis: %s' %
                       (name, currency, time_axis))
            changeset = Changeset()
            changeset.author = 'system'
            changeset.message = message
            changeset.save()
        self.changeset = changeset

        # get the dataset
        q = {'name': dataset_name}
        dataset_count = Dataset.find(q).count()
        if dataset_count == 0:
            operation = CREATE
        elif dataset_count == 1:
            operation = UPDATE
        else:
            raise AssertionError("Ambiguous dataset name: %s" % dataset_name)
        data = {"label": label,
                "currency": currency.upper(),
                "description": description,
                "time_axis": time_axis}
        if metadata is not None:
            data.update(metadata)
        Dataset.c.update(q, {"$set": data}, upsert=True)
        self.dataset = Dataset.find_one(q)
        self._add_changeobj(Dataset.c.name, self.dataset.id, self.dataset,
                            operation)
        self.base_query = {"dataset._id": self.dataset.id}

        # caches
        self.entity_cache = {}
        self.classifier_cache = {}
        self.unique_keys = unique_keys

        # We need indexes to speed up lookups and updates
        self.ensure_index(Entry, ['dataset._id'])
        self.ensure_index(Entry, ['dataset.name'])
        self.ensure_index(Entry, ['classifiers'])
        self.ensure_index(Entry, ['entities'])
        self.ensure_index(Entry, ['from._id'])
        self.ensure_index(Entry, ['to._id'])
        self.ensure_index(Entry, ['to._id', 'from._id', 'amount'])
        self.ensure_index(Classifier, ['taxonomy', 'name'])
        self.ensure_index(Dimension, ['dataset', 'key'])
        self.ensure_index(Entity, ['name'])
        # fixme: The entry.name index might be dropped when Base.by_id()
        #        changes. The 'name' field for entries is not interesting.
        self.ensure_index(Entry, ['name'])

        # Make sure entries and entities are unique
        self.existing_entries = self._ensure_unique(Entry, self.unique_keys,
                                                    self.base_query)
        self._ensure_unique(Entity, ['name'])

        # info's needed to print statistics during the run
        self.num_entries = 0
        self.start_time = None