Ejemplo n.º 1
0
class CouchdbPuller(OutputModule):
    def __init__(self,
                 actor_config,
                 couchdb_url,
                 payload=None,
                 selection="data",
                 bulk=100,
                 parallel_streams=1,
                 native_events=False,
                 **kw):
        OutputModule.__init__(self, actor_config)
        self.pool.createQueue("inbox")
        self.registerConsumer(self.consume, "inbox")
        self.couchdb = Database(couchdb_url)
        self._bulk_size = bulk
        self._bulk = {}

    def __save(self):
        self.logging.debug("Saving: {} docs".format(len(self._bulk)))
        try:
            responce = self.couchdb.update(
                [doc for doc in self._bulk.values()])
            for ok, doc_id, rest in responce:
                if ok:
                    self.logging.info("Saved {}".format(doc_id))
                else:
                    self.logging.error(
                        "Error on save bulk. Type {}, message {}, doc {}".
                        format(rest, getattr(rest, 'message', ''), doc_id))
        except Exception as e:
            self.logging.error("Uncaught error {} on save bulk".format(e, ))
        finally:
            self._bulk = {}
            self.logging.debug("Cleaned bulk")

        return False

    def consume(self, event):
        data = self.encode(self.getDataToSubmit(event))
        if not isinstance(data, dict):
            try:
                data = loads(data)
            except ValueError:
                self.logging.error(
                    "Unable to parse data from raw string. Skipping")
        id = data.get('id', data.get('_id'))
        if id:
            data['_id'] = data['id'] = id
        if id and (id in self.couchdb):
            rev = self.couchdb.get(id).rev
            data['_rev'] = rev
            self.logging.debug("Update revision in data {} to {}".format(
                id, rev))
        self._bulk[data.get('_id', uuid4().hex)] = data
        self.logging.debug("Added {} to bulk queue. Size {}".format(
            id, len(self._bulk)))
        if len(self._bulk) >= self._bulk_size:
            g = spawn(self.__save)
            g.join()
Ejemplo n.º 2
0
class CouchdbOutput(OutputModule):
    def __init__(self,
                 actor_config,
                 couchdb_url,
                 payload=None,
                 selection="data",
                 parallel_streams=1,
                 native_events=False,
                 **kw):
        OutputModule.__init__(self, actor_config)
        self.pool.createQueue("inbox")
        self.registerConsumer(self.consume, "inbox")
        self.couchdb = Database(couchdb_url)

    def consume(self, event):
        if event.isBulk():
            bulk_docs = {}
            for e in extractBulkItems(event):
                doc = e.get(self.kwargs.selection)
                doc_id = doc.pop('id', doc.pop('_id', ''))
                if doc_id:
                    doc['_id'] = doc['id'] = doc_id
                bulk_docs[doc['id']] = doc

            for row in self.couchdb.view('_all_docs',
                                         keys=list(bulk_docs.keys())).rows:
                if row.id in bulk_docs:
                    bulk_docs[row.id]['_rev'] = row['value']['rev']
            try:
                responce = self.couchdb.update(list(bulk_docs.values()))
                for ok, doc_id, rest in responce:
                    if ok:
                        self.logging.info("Saved {}".format(doc_id))
                    else:
                        self.logging.error(
                            "Error on save bulk. Type {}, message {}, doc {}".
                            format(rest, getattr(rest, 'message', ''), doc_id))
            except Exception as e:
                self.logging.error("Uncaught error {} on save bulk".format(
                    e, ))
        else:
            data = event.get(self.kwargs.selection)
            doc_id = data.get('id', data.get('_id'))
            if doc_id:
                data['_id'] = data['id'] = doc_id
                if doc_id in self.couchdb:
                    rev = self.couchdb.get(id).rev
                    data['_rev'] = rev
                    self.logging.debug(
                        "Update revision in data {} to {}".format(id, rev))
            self.couchdb.save(data)