예제 #1
0
class ExportSchema(Document, UnicodeMixIn):
    """
    An export schema that can store intermittent contents of the export so
    that the entire doc list doesn't have to be used to generate the export
    """
    index = JsonProperty()
    schema = DictProperty()
    timestamp = TimeStampProperty()

    def __unicode__(self):
        return "%s: %s" % (json.dumps(self.index), self.timestamp)

    @classmethod
    def wrap(cls, data):
        if data.get('timestamp', '').startswith('1-01-01'):
            data['timestamp'] = '1970-01-01T00:00:00Z'

        return super(ExportSchema, cls).wrap(data)

    @classmethod
    def last(cls, index):
        return cls.view(
            "couchexport/schema_checkpoints",
            startkey=[json.dumps(index), {}],
            endkey=[json.dumps(index)],
            descending=True,
            limit=1,
            include_docs=True,
            reduce=False,
        ).one()

    @classmethod
    def get_all_checkpoints(cls, index):
        doc_ids = [
            result["id"] for result in cls.get_db().view(
                "couchexport/schema_checkpoints",
                startkey=[json.dumps(index)],
                endkey=[json.dumps(index), {}],
                reduce=False,
            )
        ]
        for doc in iter_docs(cls.get_db(), doc_ids):
            yield cls.wrap(doc)

    _tables = None

    @property
    def tables(self):
        if self._tables is None:
            from couchexport.export import get_headers
            headers = get_headers(self.schema, separator=".")
            self._tables = [(index, row[0]) for index, row in headers]
        return self._tables

    @property
    def table_dict(self):
        return dict(self.tables)

    def get_columns(self, index):
        return ['id'] + self.table_dict[index].data

    def get_all_ids(self, database=None):
        database = database or self.get_db()
        return set([
            result['id'] for result in database.view(
                "couchexport/schema_index",
                reduce=False,
                **get_schema_index_view_keys(self.index)).all()
        ])

    def get_new_ids(self, database=None):
        database = database or self.get_db()
        assert self.timestamp, 'exports without timestamps are no longer supported.'
        tag_as_list = force_tag_to_list(self.index)
        startkey = tag_as_list + [self.timestamp.isoformat()]
        endkey = tag_as_list + [{}]
        return set([
            result['id']
            for result in database.view("couchexport/schema_index",
                                        reduce=False,
                                        startkey=startkey,
                                        endkey=endkey)
        ])

    def get_new_docs(self, database=None):
        return iter_docs(self.get_new_ids(database))
예제 #2
0
class ExportSchema(Document, UnicodeMixIn):
    """
    An export schema that can store intermittent contents of the export so
    that the entire doc list doesn't have to be used to generate the export
    """
    index = JsonProperty()
    seq = StringProperty()  # semi-deprecated
    schema = DictProperty()
    timestamp = TimeStampProperty()

    def __unicode__(self):
        return "%s: %s" % (json.dumps(self.index), self.seq)

    @property
    def is_bigcouch(self):
        try:
            int(self.seq)
            return False
        except ValueError:
            return True

    @classmethod
    def wrap(cls, data):
        if isinstance(data.get('seq'), (int, long)):
            data['seq'] = unicode(data['seq'])
        ret = super(ExportSchema, cls).wrap(data)
        if not ret.timestamp:
            # these won't work on bigcouch so we want to know if this happens
            notify_exception(
                None, 'an export without a timestamp was accessed! %s (%s)' %
                (ret.index, ret._id))
            # this isn't the cleanest nor is it perfect but in the event
            # this doc traversed databases somehow and now has a bad seq
            # id, make sure to just reset it to 0.
            # This won't catch if the seq is bad but not greater than the
            # current one).
            current_seq = cls.get_db().info()["update_seq"]
            try:
                if int(current_seq) < int(ret.seq):
                    ret.seq = "0"
                    ret.save()
            except ValueError:
                # seqs likely weren't ints (e.g. bigcouch)
                # this should never be possible (anything on bigcouch should
                # have a timestamp) so let's fail hard
                raise Exception(
                    'export %s is in a bad state (no timestamp or integer seq)'
                    % ret._id)
        # TODO? handle seq -> datetime migration
        return ret

    @classmethod
    def last(cls, index):
        # search first by timestamp, then fall back to seq id
        shared_kwargs = {
            'descending': True,
            'limit': 1,
            'include_docs': True,
            'reduce': False,
        }
        ret = cls.view("couchexport/schema_checkpoints",
                       startkey=['by_timestamp',
                                 json.dumps(index), {}],
                       endkey=['by_timestamp',
                               json.dumps(index)],
                       **shared_kwargs).one()
        if ret and not ret.timestamp:
            # we found a bunch of old checkpoints but they only
            # had seq ids, so use those instead
            ret = cls.view("couchexport/schema_checkpoints",
                           startkey=['by_seq', json.dumps(index), {}],
                           endkey=['by_seq', json.dumps(index)],
                           **shared_kwargs).one()
        return ret

    @classmethod
    def get_all_indices(cls):
        ret = cls.get_db().view("couchexport/schema_checkpoints",
                                startkey=['by_timestamp'],
                                endkey=['by_timestamp', {}],
                                reduce=True,
                                group=True,
                                group_level=2)
        for row in ret:
            index = row['key'][1]
            try:
                yield json.loads(index)
            except ValueError:
                # ignore this for now - should just be garbage data
                # print "poorly formatted index key %s" % index
                pass

    @classmethod
    def get_all_checkpoints(cls, index):
        return cls.view("couchexport/schema_checkpoints",
                        startkey=['by_timestamp',
                                  json.dumps(index)],
                        endkey=['by_timestamp',
                                json.dumps(index), {}],
                        include_docs=True,
                        reduce=False)

    _tables = None

    @property
    def tables(self):
        if self._tables is None:
            from couchexport.export import get_headers
            headers = get_headers(self.schema, separator=".")
            self._tables = [(index, row[0]) for index, row in headers]
        return self._tables

    @property
    def table_dict(self):
        return dict(self.tables)

    def get_columns(self, index):
        return ['id'] + self.table_dict[index].data

    def get_all_ids(self, database=None):
        database = database or self.get_db()
        return set([
            result['id'] for result in database.view(
                "couchexport/schema_index",
                reduce=False,
                **get_schema_index_view_keys(self.index)).all()
        ])

    def get_new_ids(self, database=None):
        # TODO: deprecate/remove old way of doing this
        database = database or self.get_db()
        if self.timestamp:
            return self._ids_by_timestamp(database)
        else:
            return self._ids_by_seq(database)

    def _ids_by_seq(self, database):
        if self.seq == "0" or self.seq is None:
            return self.get_all_ids()

        consumer = Consumer(database)
        view_results = consumer.fetch(since=self.seq)
        if view_results:
            include_ids = set([res["id"] for res in view_results["results"]])
            return include_ids.intersection(self.get_all_ids())
        else:
            # sometimes this comes back empty. I think it might be a bug
            # in couchdbkit, but it's impossible to consistently reproduce.
            # For now, just assume this is fine.
            return set()

    def _ids_by_timestamp(self, database):
        tag_as_list = force_tag_to_list(self.index)
        startkey = tag_as_list + [self.timestamp.isoformat()]
        endkey = tag_as_list + [{}]
        return set([
            result['id']
            for result in database.view("couchexport/schema_index",
                                        reduce=False,
                                        startkey=startkey,
                                        endkey=endkey)
        ])

    def get_new_docs(self, database=None):
        return iter_docs(self.get_new_ids(database))