Exemple #1
0
    def copy(self, keys, source, sample_only_filter=None, sample_size=None, done_copy=None):
        """
        :param keys: THE KEYS TO LOAD FROM source
        :param source: THE SOURCE (USUALLY S3 BUCKET)
        :param sample_only_filter: SOME FILTER, IN CASE YOU DO NOT WANT TO SEND EVERYTHING
        :param sample_size: FOR RANDOM SAMPLE OF THE source DATA
        :param done_copy: CALLBACK, ADDED TO queue, TO FINISH THE TRANSACTION
        :return: LIST OF SUB-keys PUSHED INTO ES
        """
        num_keys = 0
        queue = None
        for key in keys:
            timer = Timer("key")
            try:
                with timer:
                    for rownum, line in enumerate(source.read_lines(strip_extension(key))):
                        if not line:
                            continue

                        if rownum > 0 and rownum % 1000 == 0:
                            Log.note("Ingested {{num}} records from {{key}} in bucket {{bucket}}", num=rownum, key=key, bucket=source.name)

                        row, please_stop = fix(rownum, line, source, sample_only_filter, sample_size)
                        num_keys += 1

                        if queue == None:
                            queue = self._get_queue(row)
                        queue.add(row)

                        if please_stop:
                            break
            except Exception, e:
                done_copy = None
                Log.warning("Could not process {{key}} after {{duration|round(places=2)}}seconds", key=key, duration=timer.duration.seconds, cause=e)
    def copy(self, keys, source, sample_only_filter=None, sample_size=None, done_copy=None):
        """
        :param keys: THE KEYS TO LOAD FROM source
        :param source: THE SOURCE (USUALLY S3 BUCKET)
        :param sample_only_filter: SOME FILTER, IN CASE YOU DO NOT WANT TO SEND EVERYTHING
        :param sample_size: FOR RANDOM SAMPLE OF THE source DATA
        :param done_copy: CALLBACK, ADDED TO queue, TO FINISH THE TRANSACTION
        :return: LIST OF SUB-keys PUSHED INTO ES
        """
        num_keys = 0
        queue = None
        pending = []  # FOR WHEN WE DO NOT HAVE QUEUE YET
        for key in keys:
            timer = Timer("Process {{key}}", param={"key": key})
            try:
                with timer:
                    for rownum, line in enumerate(source.read_lines(strip_extension(key))):
                        if not line:
                            continue

                        if rownum > 0 and rownum % 1000 == 0:
                            Log.note("Ingested {{num}} records from {{key}} in bucket {{bucket}}", num=rownum, key=key, bucket=source.name)

                        row, please_stop = fix(rownum, line, source, sample_only_filter, sample_size)
                        num_keys += 1

                        if queue == None:
                            queue = self._get_queue(row)
                            if queue == None:
                                pending.append(row)
                                if len(pending) > 1000:
                                    self._get_queue(row)
                                    Log.error("first 1000 (key={{key}}) records have no indication what index to put data", key=tuple(keys)[0])
                                continue
                            elif queue is DATA_TOO_OLD:
                                break
                            if pending:
                                queue.extend(pending)
                                pending = []

                        queue.add(row)

                        if please_stop:
                            break
            except Exception as e:
                done_copy = None
                Log.warning("Could not process {{key}} after {{duration|round(places=2)}}seconds", key=key, duration=timer.duration.seconds, cause=e)

        if done_copy:
            if queue == None:
                done_copy()
            else:
                queue.add(done_copy)

        if pending:
            Log.error("Did not find an index to place the data for key={{key}}", key=tuple(keys)[0])

        Log.note("{{num}} keys from {{key|json}} added", num=num_keys, key=keys)
        return num_keys
def get_all_s3(in_es, settings):
    # EVERYTHING FROM S3
    bucket = s3.Bucket(settings.source)
    prefixes = [p.name.rstrip(":") for p in bucket.list(prefix="", delimiter=":")]
    in_s3 = []
    for i, p in enumerate(prefixes):
        if i % 1000 == 0:
            Log.note("Scrubbed {{p|percent(decimal=1)}}",  p= i / len(prefixes))
        try:
            if int(p) not in in_es:
                in_s3.append(int(p))
            else:
                pass
        except Exception, _:
            Log.note("delete key {{key}}",  key= p)
            bucket.delete_key(strip_extension(p))
Exemple #4
0
def get_all_s3(in_es, settings):
    # EVERYTHING FROM S3
    bucket = s3.Bucket(settings.source)
    prefixes = [
        p.name.rstrip(":") for p in bucket.list(prefix="", delimiter=":")
    ]
    in_s3 = []
    for i, p in enumerate(prefixes):
        if i % 1000 == 0:
            Log.note("Scrubbed {{p|percent(decimal=1)}}", p=i / len(prefixes))
        try:
            if int(p) not in in_es:
                in_s3.append(int(p))
            else:
                pass
        except Exception, _:
            Log.note("delete key {{key}}", key=p)
            bucket.delete_key(strip_extension(p))
Exemple #5
0
    def copy(self, keys, source, sample_only_filter=None, sample_size=None):
        num_keys = 0
        for key in keys:
            try:
                for rownum, line in enumerate(
                        source.read_lines(strip_extension(key))):
                    if rownum == 0:
                        value = convert.json2value(line)
                        if len(line) > 1000000:
                            # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests)", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests))
                            value.result.subtests = None
                            value.result.missing_subtests = True

                        _id, value = _fix(value)
                        row = {"id": _id, "value": value}
                        if sample_only_filter and Random.int(
                                int(1.0 / coalesce(
                                    sample_size, 0.01))) != 0 and qb.filter(
                                        [value], sample_only_filter):
                            # INDEX etl.id==0, BUT NO MORE
                            if value.etl.id != 0:
                                Log.error("Expecting etl.id==0")
                            num_keys += 1
                            self.queue.add(row)
                            break
                    elif len(line) > 1000000:
                        value = convert.json2value(line)
                        # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests).", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests))
                        value.result.subtests = None
                        value.result.missing_subtests = True
                        _id, value = _fix(value)
                        row = {"id": _id, "value": value}
                    else:
                        #FAST
                        _id = strings.between(line, "_id\": \"",
                                              "\"")  # AVOID DECODING JSON
                        row = {"id": _id, "json": line}
                    num_keys += 1
                    self.queue.add(row)
            except Exception, e:
                Log.warning("Could not get queue for {{key}}",
                            key=key,
                            cause=e)
    def copy(self,
             keys,
             source,
             sample_only_filter=None,
             sample_size=None,
             done_copy=None):
        """
        :param keys: THE KEYS TO LOAD FROM source
        :param source: THE SOURCE (USUALLY S3 BUCKET)
        :param sample_only_filter: SOME FILTER, IN CASE YOU DO NOT WANT TO SEND EVERYTHING
        :param sample_size: FOR RANDOM SAMPLE OF THE source DATA
        :param done_copy: CALLBACK, ADDED TO queue, TO FINISH THE TRANSACTION
        :return: LIST OF SUB-keys PUSHED INTO ES
        """
        num_keys = 0
        queue = None
        for key in keys:
            timer = Timer("key")
            try:
                with timer:
                    for rownum, line in enumerate(
                            source.read_lines(strip_extension(key))):
                        if not line:
                            continue

                        row, please_stop = fix(rownum, line, source,
                                               sample_only_filter, sample_size)
                        num_keys += 1

                        if queue == None:
                            queue = self._get_queue(row)
                        queue.add(row)

                        if please_stop:
                            break
            except Exception, e:
                done_copy = None
                Log.warning(
                    "Could not process {{key}} after {{duration|round(places=2)}}seconds",
                    key=key,
                    duration=timer.duration.seconds,
                    cause=e)
    def copy(self, keys, source, sample_only_filter=None, sample_size=None):
        num_keys = 0
        for key in keys:
            try:
                for rownum, line in enumerate(source.read_lines(strip_extension(key))):
                    if rownum == 0:
                        value = convert.json2value(line)
                        if len(line) > 1000000:
                            # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests)", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests))
                            value.result.subtests = None
                            value.result.missing_subtests = True

                        _id, value = _fix(value)
                        row = {"id": _id, "value": value}
                        if sample_only_filter and Random.int(int(1.0/coalesce(sample_size, 0.01))) != 0 and qb.filter([value], sample_only_filter):
                            # INDEX etl.id==0, BUT NO MORE
                            if value.etl.id != 0:
                                Log.error("Expecting etl.id==0")
                            num_keys += 1
                            self.queue.add(row)
                            break
                    elif len(line) > 1000000:
                        value = convert.json2value(line)
                        # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests).", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests))
                        value.result.subtests = None
                        value.result.missing_subtests = True
                        _id, value = _fix(value)
                        row = {"id": _id, "value": value}
                    else:
                        #FAST
                        _id = strings.between(line, "_id\": \"", "\"")  # AVOID DECODING JSON
                        row = {"id": _id, "json": line}
                    num_keys += 1
                    self.queue.add(row)
            except Exception, e:
                Log.warning("Could not get queue for {{key}}", key=key, cause=e)
    def copy(self,
             keys,
             source,
             sample_only_filter=None,
             sample_size=None,
             done_copy=None):
        """
        :param keys: THE KEYS TO LOAD FROM source
        :param source: THE SOURCE (USUALLY S3 BUCKET)
        :param sample_only_filter: SOME FILTER, IN CASE YOU DO NOT WANT TO SEND EVERYTHING
        :param sample_size: FOR RANDOM SAMPLE OF THE source DATA
        :param done_copy: CALLBACK, ADDED TO queue, TO FINISH THE TRANSACTION
        :return: LIST OF SUB-keys PUSHED INTO ES
        """
        num_keys = 0
        queue = None
        pending = []  # FOR WHEN WE DO NOT HAVE QUEUE YET
        for key in keys:
            timer = Timer("Process {{key}}",
                          param={"key": key},
                          silent=not DEBUG)
            try:
                with timer:
                    for rownum, line in enumerate(
                            source.read_lines(strip_extension(key))):
                        if not line:
                            continue

                        if rownum > 0 and rownum % 1000 == 0:
                            Log.note(
                                "Ingested {{num}} records from {{key}} in bucket {{bucket}}",
                                num=rownum,
                                key=key,
                                bucket=source.name)

                        insert_me, please_stop = fix(key, rownum, line, source,
                                                     sample_only_filter,
                                                     sample_size)
                        if insert_me == None:
                            continue
                        value = insert_me['value']

                        if '_id' not in value:
                            Log.warning(
                                "expecting an _id in all S3 records. If missing, there can be duplicates"
                            )

                        if queue == None:
                            queue = self._get_queue(insert_me)
                            if queue == None:
                                pending.append(insert_me)
                                if len(pending) > 1000:
                                    if done_copy:
                                        done_copy()
                                    Log.error(
                                        "first 1000 (key={{key}}) records for {{alias}} have no indication what index to put data",
                                        key=tuple(keys)[0],
                                        alias=self.settings.index)
                                continue
                            elif queue is DATA_TOO_OLD:
                                break
                            if pending:
                                queue.extend(pending)
                                pending = []

                        num_keys += 1
                        queue.add(insert_me)

                        if please_stop:
                            break
            except Exception as e:
                if KEY_IS_WRONG_FORMAT in e:
                    Log.warning(
                        "Could not process {{key}} because bad format. Never trying again.",
                        key=key,
                        cause=e)
                    pass
                elif CAN_NOT_DECODE_JSON in e:
                    Log.warning(
                        "Could not process {{key}} because of bad JSON. Never trying again.",
                        key=key,
                        cause=e)
                    pass
                else:
                    Log.warning(
                        "Could not process {{key}} after {{duration|round(places=2)}}seconds",
                        key=key,
                        duration=timer.duration.seconds,
                        cause=e)
                    done_copy = None

        if done_copy:
            if queue == None:
                done_copy()
            elif queue is DATA_TOO_OLD:
                done_copy()
            else:
                queue.add(done_copy)

        if [
                p for p in pending
                if wrap(p).value.task.state not in ('failed', 'exception')
        ]:
            Log.error(
                "Did not find an index for {{alias}} to place the data for key={{key}}",
                key=tuple(keys)[0],
                alias=self.settings.index)

        Log.note("{{num}} keys from {{key|json}} added",
                 num=num_keys,
                 key=keys)
        return num_keys
Exemple #9
0
    def _dispatch_work(self, source_block):
        """
        source_block POINTS TO THE bucket AND key TO PROCESS
        :return: False IF THERE IS NOTHING LEFT TO DO
        """
        source_keys = listwrap(coalesce(source_block.key, source_block.keys))

        if not isinstance(source_block.bucket, basestring):  # FIX MISTAKE
            source_block.bucket = source_block.bucket.bucket
        bucket = source_block.bucket
        work_actions = [w for w in self.settings.workers if w.source.bucket == bucket]

        if not work_actions:
            Log.note("No worker defined for records from {{bucket}}, {{action}}.\n{{message|indent}}",
                bucket= source_block.bucket,
                message= source_block,
                action= "skipping" if self.settings.keep_unknown_on_queue else "deleting")
            return not self.settings.keep_unknown_on_queue

        for action in work_actions:
            try:
                source_key = unicode(source_keys[0])
                if len(source_keys) > 1:
                    multi_source = action._source
                    source = ConcatSources([multi_source.get_key(k) for k in source_keys])
                    source_key = MIN(source_key)
                else:
                    source = action._source.get_key(source_key)
                    source_key = source.key

                Log.note("Execute {{action}} on bucket={{source}} key={{key}}",
                    action= action.name,
                    source= source_block.bucket,
                    key= source_key)

                if action.transform_type == "bulk":
                    old_keys = set()
                else:
                    old_keys = action._destination.keys(prefix=source_block.key)

                new_keys = set(action._transformer(source_key, source, action._destination, resources=self.resources, please_stop=self.please_stop))

                #VERIFY KEYS
                if len(new_keys) == 1 and list(new_keys)[0] == source_key:
                    pass  # ok
                else:
                    etls = map(key2etl, new_keys)
                    etls = qb.sort(etls, "id")
                    for i, e in enumerate(etls):
                        if i != e.id:
                            Log.error("expecting keys to have dense order: {{ids}}", ids=etls.id)
                    #VERIFY KEYS EXIST
                    if hasattr(action._destination, "get_key"):
                        for k in new_keys:
                            action._destination.get_key(k)

                for n in action._notify:
                    for k in new_keys:
                        n.add(k)

                if action.transform_type == "bulk":
                    continue

                # DUE TO BUGS THIS INVARIANT IS NOW BROKEN
                # TODO: FIGURE OUT HOW TO FIX THIS (CHANGE NAME OF THE SOURCE BLOCK KEY?)
                # for n in new_keys:
                #     if not n.startswith(source_key):
                #         Log.error("Expecting new keys ({{new_key}}) to start with source key ({{source_key}})",  new_key= n,  source_key= source_key)

                if not new_keys and old_keys:
                    Log.alert("Expecting some new keys after etl of {{source_key}}, especially since there were old ones\n{{old_keys}}",
                        old_keys= old_keys,
                        source_key= source_key)
                    continue
                elif not new_keys:
                    Log.alert("Expecting some new keys after processing {{source_key}}",
                        old_keys= old_keys,
                        source_key= source_key)
                    continue

                for k in new_keys:
                    if len(k.split(".")) == 3 and action.destination.type!="test_result":
                        Log.error("two dots have not been needed yet, this is a consitency check")

                delete_me = old_keys - new_keys
                if delete_me:
                    if action.destination.bucket == "ekyle-test-result":
                        for k in delete_me:
                            action._destination.delete_key(k)
                    else:
                        Log.note("delete keys?\n{{list}}",  list= sorted(delete_me))
                        # for k in delete_me:
                # WE DO NOT PUT KEYS ON WORK QUEUE IF ALREADY NOTIFYING SOME OTHER
                # AND NOT GOING TO AN S3 BUCKET
                if not action._notify and isinstance(action._destination, (aws.s3.Bucket, S3Bucket)):
                    for k in old_keys | new_keys:
                        self.work_queue.add(Dict(
                            bucket=action.destination.bucket,
                            key=k
                        ))
            except Exception, e:
                if "Key {{key}} does not exist" in e:
                    err = Log.warning
                elif "multiple keys in {{bucket}}" in e:
                    err = Log.warning
                    if source_block.bucket=="ekyle-test-result":
                        for k in action._source.list(prefix=key_prefix(source_key)):
                            action._source.delete_key(strip_extension(k.key))
                elif "expecting keys to have dense order" in e:
                    err = Log.warning
                    if source_block.bucket=="ekyle-test-result":
                        # WE KNOW OF THIS ETL MISTAKE, REPROCESS
                        self.work_queue.add({
                            "key": unicode(key_prefix(source_key)),
                            "bucket": "ekyle-pulse-logger"
                        })
                elif "Expecting a pure key" in e:
                    err = Log.warning
                else:
                    err = Log.error

                err("Problem transforming {{action}} on bucket={{source}} key={{key}} to destination={{destination}}", {
                    "action": action.name,
                    "source": source_block.bucket,
                    "key": source_key,
                    "destination": coalesce(action.destination.name, action.destination.index)
                }, e)
    def copy(self, keys, source, sample_only_filter=None, sample_size=None, done_copy=None):
        """
        :param keys: THE KEYS TO LOAD FROM source
        :param source: THE SOURCE (USUALLY S3 BUCKET)
        :param sample_only_filter: SOME FILTER, IN CASE YOU DO NOT WANT TO SEND EVERYTHING
        :param sample_size: FOR RANDOM SAMPLE OF THE source DATA
        :param done_copy: CALLBACK, ADDED TO queue, TO FINISH THE TRANSACTION
        :return: LIST OF SUB-keys PUSHED INTO ES
        """
        num_keys = 0
        queue = None
        pending = []  # FOR WHEN WE DO NOT HAVE QUEUE YET
        for key in keys:
            timer = Timer("Process {{key}}", param={"key": key}, silent=not DEBUG)
            try:
                with timer:
                    for rownum, line in enumerate(source.read_lines(strip_extension(key))):
                        if not line:
                            continue

                        if rownum > 0 and rownum % 1000 == 0:
                            Log.note("Ingested {{num}} records from {{key}} in bucket {{bucket}}", num=rownum, key=key, bucket=source.name)

                        insert_me, please_stop = fix(key, rownum, line, source, sample_only_filter, sample_size)
                        if insert_me == None:
                            continue
                        value = insert_me['value']

                        if '_id' not in value:
                            Log.warning("expecting an _id in all S3 records. If missing, there can be duplicates")

                        if queue == None:
                            queue = self._get_queue(insert_me)
                            if queue == None:
                                pending.append(insert_me)
                                if len(pending) > 1000:
                                    if done_copy:
                                        done_copy()
                                    Log.error("first 1000 (key={{key}}) records for {{alias}} have no indication what index to put data", key=tuple(keys)[0], alias=self.settings.index)
                                continue
                            elif queue is DATA_TOO_OLD:
                                break
                            if pending:
                                queue.extend(pending)
                                pending = []

                        num_keys += 1
                        queue.add(insert_me)

                        if please_stop:
                            break
            except Exception as e:
                if KEY_IS_WRONG_FORMAT in e:
                    Log.warning("Could not process {{key}} because bad format. Never trying again.", key=key, cause=e)
                    pass
                elif CAN_NOT_DECODE_JSON in e:
                    Log.warning("Could not process {{key}} because of bad JSON. Never trying again.", key=key, cause=e)
                    pass
                else:
                    Log.warning("Could not process {{key}} after {{duration|round(places=2)}}seconds", key=key, duration=timer.duration.seconds, cause=e)
                    done_copy = None

        if done_copy:
            if queue == None:
                done_copy()
            elif queue is DATA_TOO_OLD:
                done_copy()
            else:
                queue.add(done_copy)

        if [p for p in pending if wrap(p).value.task.state not in ('failed', 'exception')]:
            Log.error("Did not find an index for {{alias}} to place the data for key={{key}}", key=tuple(keys)[0], alias=self.settings.index)

        Log.note("{{num}} keys from {{key|json}} added", num=num_keys, key=keys)
        return num_keys
Exemple #11
0
    def _dispatch_work(self, source_block):
        """
        source_block POINTS TO THE bucket AND key TO PROCESS
        :return: False IF THERE IS NOTHING LEFT TO DO
        """
        source_keys = listwrap(coalesce(source_block.key, source_block.keys))

        if not isinstance(source_block.bucket, basestring):  # FIX MISTAKE
            source_block.bucket = source_block.bucket.bucket
        bucket = source_block.bucket
        work_actions = [
            w for w in self.settings.workers if w.source.bucket == bucket
        ]

        if not work_actions:
            Log.note(
                "No worker defined for records from {{bucket}}, {{action}}.\n{{message|indent}}",
                bucket=source_block.bucket,
                message=source_block,
                action="skipping"
                if self.settings.keep_unknown_on_queue else "deleting")
            return not self.settings.keep_unknown_on_queue

        for action in work_actions:
            try:
                source_key = unicode(source_keys[0])
                if len(source_keys) > 1:
                    multi_source = action._source
                    source = ConcatSources(
                        [multi_source.get_key(k) for k in source_keys])
                    source_key = MIN(source_key)
                else:
                    source = action._source.get_key(source_key)
                    source_key = source.key

                Log.note("Execute {{action}} on bucket={{source}} key={{key}}",
                         action=action.name,
                         source=source_block.bucket,
                         key=source_key)

                if action.transform_type == "bulk":
                    old_keys = set()
                else:
                    old_keys = action._destination.keys(
                        prefix=source_block.key)

                new_keys = set(
                    action._transformer(source_key,
                                        source,
                                        action._destination,
                                        resources=self.resources,
                                        please_stop=self.please_stop))

                #VERIFY KEYS
                if len(new_keys) == 1 and list(new_keys)[0] == source_key:
                    pass  # ok
                else:
                    etls = map(key2etl, new_keys)
                    etls = qb.sort(etls, "id")
                    for i, e in enumerate(etls):
                        if i != e.id:
                            Log.error(
                                "expecting keys to have dense order: {{ids}}",
                                ids=etls.id)
                    #VERIFY KEYS EXIST
                    if hasattr(action._destination, "get_key"):
                        for k in new_keys:
                            action._destination.get_key(k)

                for n in action._notify:
                    for k in new_keys:
                        n.add(k)

                if action.transform_type == "bulk":
                    continue

                # DUE TO BUGS THIS INVARIANT IS NOW BROKEN
                # TODO: FIGURE OUT HOW TO FIX THIS (CHANGE NAME OF THE SOURCE BLOCK KEY?)
                # for n in new_keys:
                #     if not n.startswith(source_key):
                #         Log.error("Expecting new keys ({{new_key}}) to start with source key ({{source_key}})",  new_key= n,  source_key= source_key)

                if not new_keys and old_keys:
                    Log.alert(
                        "Expecting some new keys after etl of {{source_key}}, especially since there were old ones\n{{old_keys}}",
                        old_keys=old_keys,
                        source_key=source_key)
                    continue
                elif not new_keys:
                    Log.alert(
                        "Expecting some new keys after processing {{source_key}}",
                        old_keys=old_keys,
                        source_key=source_key)
                    continue

                for k in new_keys:
                    if len(k.split(".")
                           ) == 3 and action.destination.type != "test_result":
                        Log.error(
                            "two dots have not been needed yet, this is a consitency check"
                        )

                delete_me = old_keys - new_keys
                if delete_me:
                    if action.destination.bucket == "ekyle-test-result":
                        for k in delete_me:
                            action._destination.delete_key(k)
                    else:
                        Log.note("delete keys?\n{{list}}",
                                 list=sorted(delete_me))
                        # for k in delete_me:
                # WE DO NOT PUT KEYS ON WORK QUEUE IF ALREADY NOTIFYING SOME OTHER
                # AND NOT GOING TO AN S3 BUCKET
                if not action._notify and isinstance(
                        action._destination, (aws.s3.Bucket, S3Bucket)):
                    for k in old_keys | new_keys:
                        self.work_queue.add(
                            Dict(bucket=action.destination.bucket, key=k))
            except Exception, e:
                if "Key {{key}} does not exist" in e:
                    err = Log.warning
                elif "multiple keys in {{bucket}}" in e:
                    err = Log.warning
                    if source_block.bucket == "ekyle-test-result":
                        for k in action._source.list(
                                prefix=key_prefix(source_key)):
                            action._source.delete_key(strip_extension(k.key))
                elif "expecting keys to have dense order" in e:
                    err = Log.warning
                    if source_block.bucket == "ekyle-test-result":
                        # WE KNOW OF THIS ETL MISTAKE, REPROCESS
                        self.work_queue.add({
                            "key":
                            unicode(key_prefix(source_key)),
                            "bucket":
                            "ekyle-pulse-logger"
                        })
                elif "Expecting a pure key" in e:
                    err = Log.warning
                else:
                    err = Log.error

                err(
                    "Problem transforming {{action}} on bucket={{source}} key={{key}} to destination={{destination}}",
                    {
                        "action":
                        action.name,
                        "source":
                        source_block.bucket,
                        "key":
                        source_key,
                        "destination":
                        coalesce(action.destination.name,
                                 action.destination.index)
                    }, e)