Example #1
0
    def process_item(self, item):

        self.logger.debug("Processing: {}".format(item[self.source.key]))

        time_start = time()

        try:
            with Timeout(seconds=self.timeout):
                processed = self.ufn.__call__(item)
        except Exception as e:
            self.logger.error(traceback.format_exc())
            processed = {"error": str(e)}

        time_end = time()

        key, lu_field = self.source.key, self.source.lu_field

        out = {
            self.target.key:
            item[key],
            self.target.lu_field:
            self.source.lu_func[0](item[self.source.lu_field]),
        }
        if self.store_process_time:
            out["_process_time"] = time_end - time_start

        out.update(processed)
        return out
Example #2
0
    def process_item(self, item: List[Dict]) -> Dict[Tuple, Dict]:  # type: ignore

        keys = list(d[self.source.key] for d in item)

        self.logger.debug("Processing: {}".format(keys))

        time_start = time()

        try:
            with Timeout(seconds=self.timeout):
                processed = self.unary_function(item)
                processed.update({"state": "successful"})
        except Exception as e:
            self.logger.error(traceback.format_exc())
            processed = {"error": str(e), "state": "failed"}

        time_end = time()

        last_updated = [
            self.source._lu_func[0](d[self.source.last_updated_field]) for d in item
        ]

        update_doc = {
            self.target.key: keys[0],
            f"{self.source.key}s": keys,
            self.target.last_updated_field: max(last_updated),
            "_bt": datetime.utcnow(),
        }
        processed.update({k: v for k, v in update_doc.items() if k not in processed})

        if self.store_process_time:
            processed["_process_time"] = time_end - time_start

        return processed
Example #3
0
    def process_item(self, item: Dict):
        """
        Generic process items to process a dictionary using
        a map function
        """

        self.logger.debug("Processing: {}".format(item[self.source.key]))

        time_start = time()

        try:
            with Timeout(seconds=self.timeout):
                processed = dict(self.unary_function(item))
                processed.update({"state": "successful"})

            for k in [self.source.key, self.source.last_updated_field]:
                if k in processed:
                    del processed[k]

        except Exception as e:
            self.logger.error(traceback.format_exc())
            processed = {"error": str(e), "state": "failed"}

        time_end = time()

        key, last_updated_field = self.source.key, self.source.last_updated_field

        out = {
            self.target.key:
            item[key],
            self.target.last_updated_field:
            self.source._lu_func[0](item.get(last_updated_field,
                                             datetime.utcnow())),
        }

        if self.store_process_time:
            out["_process_time"] = time_end - time_start

        out.update(processed)
        return out
Example #4
0
 def takes_too_long():
     with Timeout(seconds=1):
         sleep(2)