Exemple #1
0
class IterationState:
    statedb = attr.ib()
    domain = attr.ib()
    doc_type = attr.ib()

    def __attrs_post_init__(self):
        migration_id = self.statedb.unique_id
        resume_key = "%s.%s.%s" % (self.domain, self.doc_type, migration_id)
        self.itr = ResumableFunctionIterator(resume_key, None, None, None)

    @property
    def value(self):
        return self.itr.state.to_json()

    def backup_resume_state(self, value):
        """Attempt to save iteration state in state db

        :param value: iteration state dict. See `self.value`
        :returns: resume key if saved else `None`
        """
        assert isinstance(value, dict), value
        key = f"resume-{value['timestamp']}"
        pretty_value = json.dumps(value, indent=2)
        log.info("saving resume state with key=%s : %s", key, pretty_value)
        old = self.statedb.get(key)
        if old is None:
            self.statedb.set(key, value)
            log.info("saved.")
        elif old != value:
            log.warn("NOT SAVED! refusing to overwrite:\n%s", old)
            return None
        return key

    def restore_resume_state(self, key):
        """Attempt to restore resume state represented by key

        :returns: true if restored else false
        """
        new_state = self.statedb.get(key)
        if new_state is None:
            return False
        key = self.backup_resume_state(self.value)
        if key is None:
            return False
        log.info("restoring iteration state: %s", new_state)
        self.itr._save_state_json(new_state)
        return True

    def drop_from_couch(self):
        """Delete resume state from Couch"""
        try:
            self.itr.couch_db.delete_doc(self.itr.iteration_id)
        except ResourceNotFound:
            pass
Exemple #2
0
class Rewinder:
    statedb = attr.ib()
    domain = attr.ib()
    doc_type = attr.ib()
    move_to = attr.ib()

    def __attrs_post_init__(self):
        migration_id = self.statedb.unique_id
        resume_key = "%s.%s.%s" % (self.domain, self.doc_type, migration_id)
        self.itr = ResumableFunctionIterator(resume_key, None, None, None)
        for method, regex in [
            ("case_rewind", r"^case-(\d+)$"),
            ("resume_rewind", r"^resume-"),
        ]:
            match = re.search(regex, self.move_to)
            if match:
                getattr(self, method)(match)
                break
        else:
            raise NotImplementedError(self.move_to)

    def resume_rewind(self, match):
        self.offset = None
        new_state = self.statedb.get(self.move_to)
        if new_state is None:
            sys.exit(1, "resume state not found")
        old_state = self.itr.state
        self._save_resume_state(old_state.to_json())
        log.info("restoring iteration state: %s", new_state)
        self.itr._save_state_json(new_state)

    def case_rewind(self, match):
        self.offset = int(match.group(1))
        self.stats = FormStats()

    def __iter__(self):
        def data_function(**view_kwargs):
            return couch_db.view('by_domain_doc_type_date/view', **view_kwargs)

        log.info("preparing to rewind: %s", self.move_to)
        state_json = self.itr.state.to_json()
        self._save_resume_state(state_json)
        couch_db = XFormInstance.get_db()
        args_provider = NoSkipArgsProvider({
            'startkey': state_json["kwargs"]["startkey"],
            'startkey_docid': state_json["kwargs"]["startkey_docid"],
            'endkey': [self.domain, self.doc_type],
            'descending': True,
            'limit': 1000,
            'include_docs': True,
            'reduce': False,
        })
        args, kwargs = args_provider.get_initial_args()
        while True:
            results = list(data_function(*args, **kwargs))
            results = args_provider.adjust_results(results, args, kwargs)
            if not results:
                break
            for result in results:
                yield get_received_on(result["doc"], self.stats)
            try:
                args, kwargs = args_provider.get_next_args(results[-1], *args, **kwargs)
            except StopIteration:
                break

    def save_state(self, received_on):
        state = self.itr.state
        startkey = state.kwargs["startkey"]
        assert len(startkey) == 3, startkey
        assert isinstance(startkey[-1], type(received_on)), (startkey, received_on)
        startkey[-1] = received_on
        assert state.kwargs["startkey"] is startkey, (state.kwargs, startkey)
        state.kwargs.pop("startkey_docid")
        state.timestamp = datetime.utcnow()
        self._save_resume_state(state.to_json())

    def _save_resume_state(self, state_json):
        assert isinstance(state_json, dict), state_json
        key = f"resume-{state_json['timestamp']}"
        log.info("saving resume state. restore with: rewind --to=%s\n%s",
                 key, state_json)
        old = self.statedb.get(key)
        if old is None:
            log.info("saved.")
            self.statedb.set(key, state_json)
        elif old != state_json:
            log.warn("NOT SAVED! refusing to overwrite:\n%s", old)