Beispiel #1
0
class CospreadDataRecords(SpreadsheetDataRecords):
    def __init__(self, data, generate_names=False):
        self.generate_names = generate_names
        # cospread uses a list of alternative essential_titles
        essential_titles = ["Package name", "Abstract"]
        self.title_normaliser = (
            # ('Normalised title', 'regex of variations'),
            ("Package name", "(Package name|Identifier)$"),
            ("Title", "Title$"),
            ("CO Identifier", "CO (Identifier|Reference)$"),
            ("Notes", "Notes|Abstract$"),
            ("Date released", "Date released$"),
            ("Date updated", "Date updated$"),
            ("Date update future", "Date to be published$"),
            ("Update frequency", "Update frequency$"),
            ("Geographical Granularity - Standard", "Geographical Granularity - Standard$"),
            ("Geographical Granularity - Other", "Geographical Granularity - Other$"),
            ("Geographic coverage - England", "Geographic coverage - England$"),
            ("Geographic coverage - N. Ireland", "Geographic coverage - N. Ireland$"),
            ("Geographic coverage - Scotland", "Geographic coverage - Scotland$"),
            ("Geographic coverage - Wales", "Geographic coverage - Wales$"),
            ("Geographic coverage - Overseas", "Geographic coverage - Overseas$"),
            ("Geographic coverage - Global", "Geographic coverage - Global$"),
            ("Temporal Granularity - Standard", "Temporal Granularity - Standard$"),
            ("Temporal Granularity - Other", "Temporal Granularity - Other$"),
            ("Temporal Coverage - To", "Temporal Coverage - To"),
            ("Temporal Coverage - From", "Temporal Coverage - From$"),
            ("Categories", "Categories$"),
            ("National Statistic", "National Statistic$"),
            ("Precision", "Precision$"),
            ("URL", "URL$"),
            ("Download URL", "(Download URL|Resources - URL)$"),
            ("File format", "(Download |Resources - )?file format$"),
            ("Download Description", "(Resources -|Download) Description$"),
            ("Taxonomy URL", "Taxonomy URL$"),
            ("Department", "Department$"),
            ("Agency responsible", "Agency responsible$"),
            ("Published by", "Published by$"),
            ("Published via", "Published via$"),
            ("Contact - Permanent contact point", "(Contact|Author) - Permanent contact point"),
            ("Contact - E-mail address.", "(Contact|Author) - E-mail address.$"),
            ("Maintainer - ", "Maintainer - (Blank unless not the author\.)?$"),
            ("Maintainer - E-mail address", "Maintainer - E-mail address"),
            ("Licence", "Licence$"),
            ("Tags", "Tags$"),
            ("Mandate", "Mandate$"),
        )
        # compile regexes
        self.title_normaliser = [(norm_title, re.compile(regex, re.I)) for norm_title, regex in self.title_normaliser]
        self.optional_columns = [
            u"Temporal Coverage - To",
            u"Temporal Coverage - From",
            u"Download Description",
            u"National Statistic",
            u"Maintainer - E-mail address",
            u"Maintainer - ",
            u"Categories",
        ]
        self.column_spreading_titles = [
            "Geographical Granularity",
            "Geographic coverage",
            "Temporal Granularity",
            "Temporal Coverage",
            "Author",
            "Maintainer",
            "Contact",
        ]
        self.standard_or_other_columns = ["Geographical Granularity", "Temporal Granularity"]
        self.resource_keys = ["Download URL", "File format", "Download Description"]
        super(CospreadDataRecords, self).__init__(data, essential_titles)

    def find_titles(self, essential_titles):
        row_index = 0
        titles = []
        assert isinstance(essential_titles, (list, tuple))
        essential_title_set = set(essential_titles + [title.lower() for title in essential_titles])
        while True:
            if row_index >= self._data.get_num_rows():
                raise ImportException("Could not find title row")
            row = self._data.get_row(row_index)
            if essential_title_set & set(row):
                next_row = self._data.get_row(row_index + 1)
                last_title = None
                for col_index, row_val in enumerate(row):
                    if not row_val:
                        title = None
                        if last_title in self.column_spreading_titles:
                            title = "%s - %s" % (last_title, next_row[col_index])
                    else:
                        title = row_val.strip().replace("  ", " ")
                        last_title = title
                    if title in self.column_spreading_titles:
                        title = "%s - %s" % (title, next_row[col_index])
                    titles.append(title)
                return (titles, row_index + 1)
            row_index += 1

    def create_title_mapping(self):
        """Creates a mapping between the spreadsheet\'s actual column
        titles and the normalised versions.

        Results in self.title_map and self.title_reverse_map which are
        comprehensive for this spreadsheet.

        """
        self.title_map = OrderedDict()
        for title in self.titles:
            for norm_title, regex in self.title_normaliser:
                if regex.match(title):
                    self.title_map[title] = norm_title
                    break
            else:
                raise AssertionError("Did not recognise title: %r" % title)
        self.title_reverse_map = dict((v, k) for k, v in self.title_map.iteritems())
        # check all keys map both ways
        unmatched_keys = set(self.title_map.keys()) - set(self.title_reverse_map.values())
        if unmatched_keys:
            msg = "Columns not identified by REs: %r" % (
                set(self.title_map.keys()) - set(self.title_reverse_map.values())
            )
            msg += "\nColumns over identified by REs: %r" % (
                set(self.title_reverse_map.keys()) - set(self.title_map.values())
            )
            raise AssertionError(msg)

    @property
    def records(self):
        """Returns package records.
        * Collates packages with download_url in multiple rows in resources.
        * Collapses 'Standard' / 'Other' column pairs into single value.
        * Renames columns to standard names.
        """
        current_record = None
        package_identity_column = "Package name" if not self.generate_names else "Title"
        self.create_title_mapping()
        try:

            def get_record_key(record_, standard_key):
                alt_key = self.title_reverse_map.get(standard_key)
                if alt_key and alt_key in record_:
                    return alt_key
                else:
                    return standard_key
                return record_[self.title_reverse_map[property]]

            for record in super(CospreadDataRecords, self).records:
                if (
                    current_record
                    and current_record[package_identity_column]
                    == record[get_record_key(record, package_identity_column)]
                ):
                    # this record is another resource for the current record.
                    keys_that_should_match = set(current_record.keys()) - set(
                        self.resource_keys + ["resources"] + self.standard_or_other_columns
                    )
                    for key in keys_that_should_match:
                        record_key = get_record_key(record, key)
                        assert current_record[key] == record[record_key], (
                            "Multiple resources for package %r, but value for key %r does not match: %r!=%r"
                            % (
                                record[get_record_key(record, package_identity_column)],
                                key,
                                current_record[key],
                                record[record_key],
                            )
                        )
                else:
                    # this record is new, so yield the old 'current_record' before
                    # making this record 'current_record'.
                    if current_record:
                        yield current_record
                    current_record = record.copy()
                    current_record["resources"] = []
                    # Collapse 'standard/other' columns into one
                    for column in self.standard_or_other_columns:
                        standard = current_record["%s - Standard" % column]
                        other = current_record["%s - Other" % column]
                        if standard == "Other (specify)" or standard == None:
                            value = other
                        else:
                            assert not other, 'Both "Standard" and "Other" values for column %r in record %r' % (
                                column,
                                current_record,
                            )
                            value = standard
                        current_record[column] = value
                        del current_record["%s - Standard" % column]
                        del current_record["%s - Other" % column]
                    # Rename column titles to normalised ones
                    for title in self.title_map.keys():
                        norm_title = self.title_map[title]
                        if norm_title != title:
                            current_record[norm_title] = current_record[title]
                            del current_record[title]
                # Put download_url into resources
                resource = OrderedDict()
                for key in self.resource_keys:
                    key_used = None
                    if key in record:
                        key_used = key
                    else:
                        alt_key = self.title_reverse_map.get(key)
                        if alt_key and alt_key in record:
                            key_used = alt_key
                            value = record[alt_key]
                    if key_used:
                        value = record[key_used]
                        resource[key] = value
                        del record[key_used]
                    else:
                        if key in self.optional_columns:
                            record[key] = None
                        else:
                            raise KeyError(key)
                current_record["resources"].append(resource)
        except KeyError, e:
            print "Could not find spreadsheet title %r.\nrecord keys: %r.\ncurrent_record keys: %r" % (
                e.message,
                record.keys(),
                current_record.keys(),
            )
            raise
        if current_record:
            yield current_record
Beispiel #2
0
class ModelRenderer(object):
    """
    The `ModelRenderer` class is the superclass for all classes needing to deal 
    with `model` access and supporting rendering capabilities.
    """
    prettify = staticmethod(prettify)

    def __init__(self, model, session=None, data=None, prefix=None):
        """ 
        - `model`: 
              a SQLAlchemy mapped class or instance.  New object creation
              should be done by passing the class, which will need a default
              (no-parameter) constructor.  After construction or binding of
              the :class:`~formalchemy.forms.FieldSet`, the instantiated object will be available as
              the `.model` attribute.

        - `session=None`: 
              the session to use for queries (for relations). If `model` is associated
              with a session, that will be used by default. (Objects mapped with a
              `scoped_session
              <http://www.sqlalchemy.org/docs/05/session.html#contextual-thread-local-sessions>`_
              will always have a session. Other objects will
              also have a session if they were loaded by a Query.)

        - `data=None`: 
              dictionary-like object of user-submitted data to validate and/or
              sync to the `model`. Scalar attributes should have a single
              value in the dictionary; multi-valued relations should have a
              list, even if there are zero or one values submitted.  Currently,
              pylons request.params() objects and plain dictionaries are known
              to work.

        - `prefix=None`:
              the prefix to prepend to html name attributes. This is useful to avoid 
              field name conflicts when there are two fieldsets creating objects
              from the same model in one html page.  (This is not needed when
              editing existing objects, since the object primary key is used as part
              of the field name.)


        Only the `model` parameter is required.

        After binding, :class:`~formalchemy.forms.FieldSet`'s `model` attribute will always be an instance.
        If you bound to a class, `FormAlchemy` will call its constructor with no
        arguments to create an appropriate instance. 

        .. NOTE::

          This instance will not be added to the current session, even if you are using `Session.mapper`.

        All of these parameters may be overridden by the `bind` or `rebind`
        methods.  The `bind` method returns a new instance bound as specified,
        while `rebind` modifies the current :class:`~formalchemy.forms.FieldSet` and has
        no return value. (You may not `bind` to a different type of SQLAlchemy
        model than the initial one -- if you initially bind to a `User`, you
        must subsequently bind `User`'s to that :class:`~formalchemy.forms.FieldSet`.)

        Typically, you will configure a :class:`~formalchemy.forms.FieldSet` once in
        your common form library, then `bind` specific instances later for editing. (The
        `bind` method is thread-safe; `rebind` is not.)  Thus:

        load stuff:

        >>> from formalchemy.tests import FieldSet, User, session

        now, in `library.py`

        >>> fs = FieldSet(User)
        >>> fs.configure(options=[]) # put all configuration stuff here
        
        and in `controller.py`
        
        >>> from library import fs
        >>> user = session.query(User).first()
        >>> fs2 = fs.bind(user)
        >>> html = fs2.render()

        The `render_fields` attribute is an OrderedDict of all the `Field`'s
        that have been configured, keyed by name. The order of the fields
        is the order in `include`, or the order they were declared
        in the SQLAlchemy model class if no `include` is specified.

        The `_fields` attribute is an OrderedDict of all the `Field`'s
        the ModelRenderer knows about, keyed by name, in their
        unconfigured state.  You should not normally need to access
        `_fields` directly.
        
        (Note that although equivalent `Field`'s (fields referring to
        the same attribute on the SQLAlchemy model) will equate with
        the == operator, they are NOT necessarily the same `Field`
        instance.  Stick to referencing `Field`'s from their parent
        `FieldSet` to always get the "right" instance.)
        """
        self._fields = OrderedDict()
        self._render_fields = OrderedDict()
        self.model = self.session = None
        self.prefix = prefix

        if not model:
            raise Exception('model parameter may not be None')
        ModelRenderer.rebind(self, model, session, data)

        cls = isinstance(self.model, type) and self.model or type(self.model)
        try:
            class_mapper(cls)
        except:
            # this class is not managed by SA.  extract any raw Fields defined on it.
            keys = cls.__dict__.keys()
            keys.sort(lambda a, b: cmp(a.lower(), b.lower())) # 2.3 support
            for key in keys:
                field = cls.__dict__[key]
                if isinstance(field, fields.Field):
                    if field.name and field.name != key:
                        raise Exception('Fields in a non-mapped class have the same name as their attribute.  Do not manually give them a name.')
                    field.name = field.key = key
                    self.append(field)
            if not self._fields:
                raise Exception("not bound to a SA instance, and no manual Field definitions found")
        else:
            # SA class.
            # load synonyms so we can ignore them
            synonyms = set(p for p in class_mapper(cls).iterate_properties 
                           if isinstance(p, SynonymProperty))
            # attributes we're interested in
            attrs = []
            for p in class_mapper(cls).iterate_properties:
                attr = _get_attribute(cls, p)
                if ((isinstance(p, SynonymProperty) or attr.property.key not in (s.name for s in synonyms))
                    and not isinstance(attr.impl, DynamicAttributeImpl)):
                    attrs.append(attr)
            # sort relations last before storing in the OrderedDict
            L = [fields.AttributeField(attr, self) for attr in attrs]
            L.sort(lambda a, b: cmp(a.is_relation, b.is_relation)) # note, key= not used for 2.3 support
            self._fields.update((field.key, field) for field in L)

    def append(self, field):
        """Append a Field to the FieldSet.

        By default, this Field will be included in the rendered form or table.
        """
        if not isinstance(field, fields.Field):
            raise ValueError('Can only add Field objects; got %s instead' % field)
        field.parent = self
        _fields = self._render_fields or self._fields
        _fields[field.name] = field
        return self  # Cascade pattern

    def add(self, field):
        warnings.warn(DeprecationWarning('FieldSet.add is deprecated. Use FieldSet.append instead.'))
        self.append(field)

    def extend(self, fields):
        """Add a list of fields. By default, each Field will be included in the
        rendered form or table."""
        for field in fields:
            self.append(field)
        return self  # Cascade pattern

    def insert(self, field, new_field):
        """Insert a new field before an existing field"""
        fields_ = self._render_fields or self._fields
        if not isinstance(new_field, fields.Field):
            raise ValueError('Can only add Field objects; got %s instead' % field)
        if isinstance(field, fields.AbstractField):
            try:
                index = fields_.keys().index(field.name)
            except ValueError:
                raise ValueError('%s not in fields' % field.name)
        else:
            raise TypeError('field must be a Field. Got %r' % new_field)
        items = fields_.items()
        new_field.parent = self
        items.insert(index, (new_field.name, new_field))
        if self._render_fields:
            self._render_fields = OrderedDict(items)
        else:
            self._fields = OrderedDict(items)
        return self  # Cascade pattern
            
    def modify(self, *args):
        """Modify fields with their new value, without modifying the order"""
        for override in args:
            if override.name not in self._render_fields.keys():
                raise ValueError("Field %s isn't part of the fields to render, or you didn't configure you FieldSet yet" % override)
            for i, field in enumerate(self._render_fields):
                if field == override.key:
                    self._render_fields[field] = override
                    break
        return self

    def render_fields(self):
        """
        The set of attributes that will be rendered, as a (ordered)
        dict of `{fieldname: Field}` pairs. If you haven't called configure
        with exclude/include, then this will be the list of default Fields
        as found by introspecting the SQLAlchemy model.
        """
        if not self._render_fields:
            self._render_fields = OrderedDict([(field.key, field) for field in self._get_fields()])
        return self._render_fields
    render_fields = property(render_fields)

    def configure(self, pk=False, exclude=[], include=[], options=[]):
        """
        The `configure` method specifies a set of attributes to be rendered.
        By default, all attributes are rendered except primary keys and
        foreign keys.  But, relations `based on` foreign keys `will` be
        rendered.  For example, if an `Order` has a `user_id` FK and a `user`
        relation based on it, `user` will be rendered (as a select box of
        `User`'s, by default) but `user_id` will not.

        Parameters:
          * `pk=False`:
                set to True to include primary key columns
          * `exclude=[]`:
                an iterable of attributes to exclude.  Other attributes will
                be rendered normally
          * `include=[]`:
                an iterable of attributes to include.  Other attributes will
                not be rendered
          * `options=[]`:
                an iterable of modified attributes.  The set of attributes to
                be rendered is unaffected
          * `global_validator=None`:
                global_validator` should be a function that performs
                validations that need to know about the entire form.
          * `focus=True`:
                the attribute (e.g., `fs.orders`) whose rendered input element
                gets focus. Default value is True, meaning, focus the first
                element. False means do not focus at all.

        Only one of {`include`, `exclude`} may be specified.

        Note that there is no option to include foreign keys.  This is
        deliberate.  Use `include` if you really need to manually edit FKs.

        If `include` is specified, fields will be rendered in the order given
        in `include`.  Otherwise, fields will be rendered in alphabetical
        order.

        Examples: given a `FieldSet` `fs` bound to a `User` instance as a
        model with primary key `id` and attributes `name` and `email`, and a
        relation `orders` of related Order objects, the default will be to
        render `name`, `email`, and `orders`. To render the orders list as
        checkboxes instead of a select, you could specify::

        >>> from formalchemy.tests import FieldSet, User
        >>> fs = FieldSet(User)
        >>> fs.configure(options=[fs.orders.checkbox()])

        To render only name and email,

        >>> fs.configure(include=[fs.name, fs.email])
        
        or

        >>> fs.configure(exclude=[fs.orders])

        Of course, you can include modifications to a field in the `include`
        parameter, such as here, to render name and options-as-checkboxes:

        >>> fs.configure(include=[fs.name, fs.orders.checkbox()])
        """
        self._render_fields = OrderedDict([(field.key, field) for field in self._get_fields(pk, exclude, include, options)])

    def bind(self, model=None, session=None, data=None):
        """
        Return a copy of this FieldSet or Grid, bound to the given
        `model`, `session`, and `data`. The parameters to this method are the
        same as in the constructor.

        Often you will create and `configure` a FieldSet or Grid at application
        startup, then `bind` specific instances to it for actual editing or display.
        """
        if not (model or session or data):
            raise Exception('must specify at least one of {model, session, data}')
        if not model:
            if not self.model:
                raise Exception('model must be specified when none is already set')
            model = fields._pk(self.model) is None and type(self.model) or self.model
        # copy.copy causes a stacktrace on python 2.5.2/OSX + pylons.  unable to reproduce w/ simpler sample.
        mr = object.__new__(self.__class__)
        mr.__dict__ = dict(self.__dict__)
        # two steps so bind's error checking can work
        ModelRenderer.rebind(mr, model, session, data)
        mr._fields = OrderedDict([(key, renderer.bind(mr)) for key, renderer in self._fields.iteritems()])
        if self._render_fields:
            mr._render_fields = OrderedDict([(field.key, field) for field in
                                             [field.bind(mr) for field in self._render_fields.itervalues()]])
        return mr

    def rebind(self, model=None, session=None, data=None):
        """
        Like `bind`, but acts on this instance.  No return value.
        Not all parameters are treated the same; specifically, what happens if they are NOT specified is different:
           * if `model` is not specified, the old model is used
           * if `session` is not specified, FA tries to re-guess session from the model
           * if data is not specified, it is rebound to None.
        """
        original_model = model
        if model:
            if isinstance(model, type):
                try:
                    model = model()
                except:
                    raise Exception('%s appears to be a class, not an instance, but FormAlchemy cannot instantiate it.  (Make sure all constructor parameters are optional!)' % model)
                # take object out of session, if present
                try:
                    _obj_session = object_session(model)
                except AttributeError:
                    pass # non-SA object; doesn't need session
                else:
                    if _obj_session:
                        _obj_session.expunge(model)
            elif object_session(model):
                # for instances of mapped classes, require that the instance have a PK already
                try:
                    class_mapper(type(model))
                except:
                    pass
                else:
                    if fields._pk(model) is None:
                        raise Exception('Mapped instances to be bound must either have a primary key set or not be in a Session.  When creating a new object, bind the class instead [i.e., bind(User), not bind(User())]')
            if self.model and type(self.model) != type(model):
                raise ValueError('You can only bind to another object of the same type you originally bound to (%s), not %s' % (type(self.model), type(model)))
            self.model = model
            self._bound_pk = fields._pk(model)

        # Assign new data
        if data is None:
            self.data = None
        elif hasattr(data, 'getall') and hasattr(data, 'getone'):
            self.data = data
        else:
            try:
                self.data = SimpleMultiDict(data)
            except:
                raise Exception('unsupported data object %s.  currently only dicts and Paste multidicts are supported' % self.data)

        # Reset Field deserialization caches:
        _fields = self._render_fields or self._fields
        for f in _fields:
            self[f]._reset_cache()

        if session:
            if not isinstance(session, Session) and not isinstance(session, ScopedSession):
                raise ValueError('Invalid SQLAlchemy session object %s' % session)
            self.session = session
        elif model:
            if '_obj_session' in locals():
                # model may be a temporary object, expunged from its session -- grab the existing reference
                self.session = _obj_session
            else:
                try:
                    o_session = object_session(model)
                except AttributeError:
                    pass # non-SA object
                else:
                    if o_session:
                        self.session = o_session
        # if we didn't just instantiate (in which case object_session will be None), 
        # the session should be the same as the object_session
        if self.session and model == original_model:
            try:
                o_session = object_session(self.model)
            except AttributeError:
                pass # non-SA object
            else:
                if o_session and self.session is not o_session:
                    raise Exception('You may not explicitly bind to a session when your model already belongs to a different one')

    def sync(self):
        """
        Sync (copy to the corresponding attributes) the data passed to the constructor or `bind` to the `model`.
        """
        if self.data is None:
            raise Exception("No data bound; cannot sync")
        for field in self.render_fields.itervalues():
            field.sync()
        if self.session:
            self.session.add(self.model)

    def _raw_fields(self):
        return self._fields.values()
    
    def _get_fields(self, pk=False, exclude=[], include=[], options=[]):
        # sanity check
        if include and exclude:
            raise Exception('Specify at most one of include, exclude')

        # help people who meant configure(include=[X]) but just wrote configure(X), resulting in pk getting the positional argument
        if pk not in [True, False]:
            raise ValueError('pk option must be True or False, not %s' % pk)

        # verify that options that should be lists of Fields, are
        for iterable in ['include', 'exclude', 'options']:
            try:
                L = list(eval(iterable))
            except:
                raise ValueError('`%s` parameter should be an iterable' % iterable)
            for field in L:
                if not isinstance(field, fields.AbstractField):
                    raise TypeError('non-AbstractField object `%s` found in `%s`' % (field, iterable))
                if field not in self._fields.values():
                    raise ValueError('Unrecognized Field `%s` in `%s` -- did you mean to call append() first?' % (field, iterable))

        # if include is given, those are the fields used.  otherwise, include those not explicitly (or implicitly) excluded.
        if not include:
            ignore = list(exclude) # don't modify `exclude` directly to avoid surprising caller
            if not pk:
                ignore.extend([wrapper for wrapper in self._raw_fields() if wrapper.is_pk and not wrapper.is_collection])
            ignore.extend([wrapper for wrapper in self._raw_fields() if wrapper.is_raw_foreign_key])
            include = [field for field in self._raw_fields() if field not in ignore]
            
        # in the returned list, replace any fields in `include` w/ the corresponding one in `options`, if present.
        # this is a bit clunky because we want to 
        #   1. preserve the order given in `include`
        #   2. not modify `include` (or `options`) directly; that could surprise the caller
        options_dict = {} # create + update for 2.3's benefit
        options_dict.update(dict([(wrapper, wrapper) for wrapper in options]))
        L = []
        for wrapper in include:
            if wrapper in options_dict:
                L.append(options_dict[wrapper])
            else:
                L.append(wrapper)
        return L
    
    def __getattr__(self, attrname):
        try:
            return self._render_fields[attrname]
        except KeyError:
            try:
                return self._fields[attrname]
            except KeyError:
                raise AttributeError(attrname)

    __getitem__ = __getattr__

    def __setattr__(self, attrname, value):
        if attrname not in ('_fields', '__dict__', 'focus') and \
           (attrname in self._fields or isinstance(value, fields.AbstractField)):
            raise AttributeError('Do not set field attributes manually.  Use append() or configure() instead')
        object.__setattr__(self, attrname, value)

    def __delattr__(self, attrname):
        if attrname in self._render_fields:
            del self._render_fields[attrname]
        elif attrname in self._fields:
            raise RuntimeError("You try to delete a field but your form is not configured")
        else:
            raise AttributeError("field %s does not exist" % attrname)

    __delitem__ = __delattr__

    def render(self, **kwargs):
        raise NotImplementedError()
Beispiel #3
0
class CospreadDataRecords(SpreadsheetDataRecords):
    def __init__(self, data, generate_names=False):
        self.generate_names = generate_names
        # cospread uses a list of alternative essential_titles
        essential_titles = ['Package name', 'Abstract']
        self.title_normaliser = (
            #('Normalised title', 'regex of variations'),
            ('Package name', '(Package name|Identifier)$'),
            ('Title', 'Title$'),
            ('CO Identifier', 'CO (Identifier|Reference)$'),
            ('Notes', 'Notes|Abstract$'),
            ('Date released', 'Date released$'),
            ('Date updated', 'Date updated$'),
            ('Date update future', 'Date to be published$'),
            ('Update frequency', 'Update frequency$'),
            ('Geographical Granularity - Standard',
             'Geographical Granularity - Standard$'),
            ('Geographical Granularity - Other',
             'Geographical Granularity - Other$'),
            ('Geographic coverage - England',
             'Geographic coverage - England$'),
            ('Geographic coverage - N. Ireland',
             'Geographic coverage - N. Ireland$'),
            ('Geographic coverage - Scotland',
             'Geographic coverage - Scotland$'),
            ('Geographic coverage - Wales', 'Geographic coverage - Wales$'),
            ('Geographic coverage - Overseas',
             'Geographic coverage - Overseas$'),
            ('Geographic coverage - Global', 'Geographic coverage - Global$'),
            ('Temporal Granularity - Standard',
             'Temporal Granularity - Standard$'),
            ('Temporal Granularity - Other', 'Temporal Granularity - Other$'),
            ('Temporal Coverage - To', 'Temporal Coverage - To'),
            ('Temporal Coverage - From', 'Temporal Coverage - From$'),
            ('Categories', 'Categories$'),
            ('National Statistic', 'National Statistic$'),
            ('Precision', 'Precision$'),
            ('URL', 'URL$'),
            ('Download URL', '(Download URL|Resources - URL)$'),
            ('File format', '(Download |Resources - )?file format$'),
            ('Download Description', '(Resources -|Download) Description$'),
            ('Taxonomy URL', 'Taxonomy URL$'),
            ('Department', 'Department$'),
            ('Agency responsible', 'Agency responsible$'),
            ('Published by', 'Published by$'),
            ('Published via', 'Published via$'),
            ('Contact - Permanent contact point',
             '(Contact|Author) - Permanent contact point'),
            ('Contact - E-mail address.',
             '(Contact|Author) - E-mail address.$'),
            ('Maintainer - ',
             'Maintainer - (Blank unless not the author\.)?$'),
            ('Maintainer - E-mail address', 'Maintainer - E-mail address'),
            ('Licence', 'Licence$'),
            ('Tags', 'Tags$'),
            ('Mandate', 'Mandate$'),
        )
        # compile regexes
        self.title_normaliser = [
            (norm_title, re.compile(regex, re.I)) \
            for norm_title, regex in self.title_normaliser
            ]
        self.optional_columns = [
            u'Temporal Coverage - To', u'Temporal Coverage - From',
            u'Download Description', u'National Statistic',
            u'Maintainer - E-mail address', u'Maintainer - ', u'Categories'
        ]
        self.column_spreading_titles = [
            'Geographical Granularity', 'Geographic coverage',
            'Temporal Granularity', 'Temporal Coverage', 'Author',
            'Maintainer', 'Contact'
        ]
        self.standard_or_other_columns = [
            'Geographical Granularity', 'Temporal Granularity'
        ]
        self.resource_keys = [
            'Download URL', 'File format', 'Download Description'
        ]
        super(CospreadDataRecords, self).__init__(data, essential_titles)

    def find_titles(self, essential_titles):
        row_index = 0
        titles = []
        assert isinstance(essential_titles, (list, tuple))
        essential_title_set = set(essential_titles + \
                                  [title.lower() for title in essential_titles])
        while True:
            if row_index >= self._data.get_num_rows():
                raise ImportException('Could not find title row')
            row = self._data.get_row(row_index)
            if essential_title_set & set(row):
                next_row = self._data.get_row(row_index + 1)
                last_title = None
                for col_index, row_val in enumerate(row):
                    if not row_val:
                        title = None
                        if last_title in self.column_spreading_titles:
                            title = '%s - %s' % (last_title,
                                                 next_row[col_index])
                    else:
                        title = row_val.strip().replace('  ', ' ')
                        last_title = title
                    if title in self.column_spreading_titles:
                        title = '%s - %s' % (title, next_row[col_index])
                    titles.append(title)
                return (titles, row_index + 1)
            row_index += 1

    def create_title_mapping(self):
        '''Creates a mapping between the spreadsheet\'s actual column
        titles and the normalised versions.

        Results in self.title_map and self.title_reverse_map which are
        comprehensive for this spreadsheet.

        '''
        self.title_map = OrderedDict()
        for title in self.titles:
            for norm_title, regex in self.title_normaliser:
                if regex.match(title):
                    self.title_map[title] = norm_title
                    break
            else:
                raise AssertionError('Did not recognise title: %r' % title)
        self.title_reverse_map = dict(
            (v, k) for k, v in self.title_map.iteritems())
        # check all keys map both ways
        unmatched_keys = set(self.title_map.keys()) - set(
            self.title_reverse_map.values())
        if unmatched_keys:
            msg = 'Columns not identified by REs: %r' % (set(
                self.title_map.keys()) - set(self.title_reverse_map.values()))
            msg += '\nColumns over identified by REs: %r' % (set(
                self.title_reverse_map.keys()) - set(self.title_map.values()))
            raise AssertionError(msg)

    @property
    def records(self):
        '''Returns package records.
        * Collates packages with download_url in multiple rows in resources.
        * Collapses 'Standard' / 'Other' column pairs into single value.
        * Renames columns to standard names.
        '''
        current_record = None
        package_identity_column = 'Package name' if not self.generate_names else 'Title'
        self.create_title_mapping()
        try:

            def get_record_key(record_, standard_key):
                alt_key = self.title_reverse_map.get(standard_key)
                if alt_key and alt_key in record_:
                    return alt_key
                else:
                    return standard_key
                return record_[self.title_reverse_map[property]]

            for record in super(CospreadDataRecords, self).records:
                if current_record and \
                       current_record[package_identity_column] == \
                       record[get_record_key(record, package_identity_column)]:
                    # this record is another resource for the current record.
                    keys_that_should_match = set(current_record.keys()) - set(
                        self.resource_keys + ['resources'] +
                        self.standard_or_other_columns)
                    for key in keys_that_should_match:
                        record_key = get_record_key(record, key)
                        assert current_record[key] == record[
                            record_key], 'Multiple resources for package %r, but value for key %r does not match: %r!=%r' % (
                                record[get_record_key(
                                    record, package_identity_column)], key,
                                current_record[key], record[record_key])
                else:
                    # this record is new, so yield the old 'current_record' before
                    # making this record 'current_record'.
                    if current_record:
                        yield current_record
                    current_record = record.copy()
                    current_record['resources'] = []
                    # Collapse 'standard/other' columns into one
                    for column in self.standard_or_other_columns:
                        standard = current_record['%s - Standard' % column]
                        other = current_record['%s - Other' % column]
                        if standard == 'Other (specify)' or standard == None:
                            value = other
                        else:
                            assert not other, 'Both "Standard" and "Other" values for column %r in record %r' % (
                                column, current_record)
                            value = standard
                        current_record[column] = value
                        del current_record['%s - Standard' % column]
                        del current_record['%s - Other' % column]
                    # Rename column titles to normalised ones
                    for title in self.title_map.keys():
                        norm_title = self.title_map[title]
                        if norm_title != title:
                            current_record[norm_title] = current_record[title]
                            del current_record[title]
                # Put download_url into resources
                resource = OrderedDict()
                for key in self.resource_keys:
                    key_used = None
                    if key in record:
                        key_used = key
                    else:
                        alt_key = self.title_reverse_map.get(key)
                        if alt_key and alt_key in record:
                            key_used = alt_key
                            value = record[alt_key]
                    if key_used:
                        value = record[key_used]
                        resource[key] = value
                        del record[key_used]
                    else:
                        if key in self.optional_columns:
                            record[key] = None
                        else:
                            raise KeyError(key)
                current_record['resources'].append(resource)
        except KeyError, e:
            print 'Could not find spreadsheet title %r.\nrecord keys: %r.\ncurrent_record keys: %r' % (
                e.message, record.keys(), current_record.keys())
            raise
        if current_record:
            yield current_record