Esempio n. 1
0
    def consume_batch_async(self, batch):
        if not self.writer:
            csvfile = sys.stdout
            if self.spec.startswith(CSVSink.CSV_JSON_SCHEME):
                if len(batch.msgs) <= 0:
                    future = pump.SinkBatchFuture(self, batch)
                    self.future_done(future, 0)
                    return 0, future

                cmd, vbucket_id, key, flg, exp, cas, meta, val = batch.msgs[0]
                doc = json.loads(val)
                self.fields = sorted(doc.keys())
                if 'id' not in self.fields:
                    self.fields = ['id'] + self.fields
                if self.spec.endswith(".csv"):
                    try:
                        csvfile = open(
                            self.spec[len(CSVSink.CSV_JSON_SCHEME):], "wb")
                    except IOError, e:
                        return ("error: could not write csv to file:%s" % \
                               self.spec[len(CSVSink.CSV_JSON_SCHEME):]), None
                self.writer = csv.writer(csvfile)
                self.writer.writerow(self.fields)
            else:
                if self.spec.endswith(".csv"):
                    try:
                        csvfile = open(self.spec[len(CSVSink.CSV_SCHEME):],
                                       "wb")
                    except IOError, e:
                        return ("error: could not write csv to file:%s" % \
                               self.spec[len(CSVSink.CSV_SCHEME):]), None
                self.writer = csv.writer(csvfile)
                self.writer.writerow(
                    ['id', 'flags', 'expiration', 'cas', 'value'])
Esempio n. 2
0
 def consume_batch_async(self, batch):
     return self.push_next_batch(batch, pump.SinkBatchFuture(self, batch))
Esempio n. 3
0
 def consume_batch_async(
     self, batch: Optional[pump.Batch]
 ) -> Tuple[couchbaseConstants.PUMP_ERROR, Optional[pump.SinkBatchFuture]]:
     return self.push_next_batch(batch, pump.SinkBatchFuture(self, batch))
Esempio n. 4
0
    def consume_batch_async(
        self, batch: pump.Batch
    ) -> Tuple[couchbaseConstants.PUMP_ERROR, Optional[pump.SinkBatchFuture]]:
        if not self.writer:
            self.csvfile = sys.stdout
            if self.spec.startswith(CSVSink.CSV_JSON_SCHEME):
                if len(batch.msgs) <= 0:
                    future = pump.SinkBatchFuture(self, batch)
                    self.future_done(future, 0)
                    return 0, future

                cmd, vbucket_id, key, flg, exp, cas, meta, val_bytes = batch.msgs[
                    0][:8]
                doc = json.loads(val_bytes)
                self.fields = sorted(doc.keys())
                if 'id' not in self.fields:
                    self.fields = ['id'] + self.fields
                if self.spec.endswith(".csv"):
                    filename = self.get_csvfile(
                        self.spec[len(CSVSink.CSV_JSON_SCHEME):])
                    try:
                        self.csvfile = open(filename, "w", encoding='utf-8')
                    except IOError as e:
                        return f'error: could not write csv to file: {filename}', None
                self.writer = csv.writer(self.csvfile)
                self.writer.writerow(self.fields)
            else:
                if self.spec.endswith(".csv"):
                    filename = self.get_csvfile(
                        self.spec[len(CSVSink.CSV_SCHEME):])
                    try:
                        self.csvfile = open(filename, "w", encoding='utf-8')
                    except IOError as e:
                        return f'error: could not write csv to file: {filename}', None
                self.writer = csv.writer(self.csvfile)
                self.writer.writerow([
                    'id', 'flags', 'expiration', 'cas', 'value', 'rev', 'vbid',
                    'dtype'
                ])
        msg_tuple_format = 0
        for msg in batch.msgs:
            cmd, vbucket_id, key, flg, exp, cas, meta, val_bytes = msg[:8]
            if self.skip(key, vbucket_id):
                continue
            if not msg_tuple_format:
                msg_tuple_format = len(msg)
            seqno = dtype = nmeta = 0
            if msg_tuple_format > 8:
                seqno, dtype, nmeta, conf_res = msg[8:12]
            if dtype > 2:
                try:
                    val_bytes = snappy.uncompress(val_bytes)
                except Exception as err:
                    pass
            try:
                if cmd in [
                        couchbaseConstants.CMD_TAP_MUTATION,
                        couchbaseConstants.CMD_DCP_MUTATION
                ]:
                    if self.fields:
                        if val_bytes and len(val_bytes) > 0:
                            try:
                                row = []
                                doc = json.loads(val_bytes)
                                if type(doc) == dict:
                                    for field in self.fields:
                                        if field == 'id':
                                            row.append(pump.returnString(key))
                                        else:
                                            row.append(doc[field])
                                    self.writer.writerow(row)
                            except ValueError:
                                pass
                    else:
                        #rev = self.convert_meta(meta)
                        self.writer.writerow([
                            pump.returnString(key), flg, exp, cas, val_bytes,
                            meta, vbucket_id, dtype
                        ])
                elif cmd in [
                        couchbaseConstants.CMD_TAP_DELETE,
                        couchbaseConstants.CMD_DCP_DELETE
                ]:
                    pass
                elif cmd == couchbaseConstants.CMD_GET:
                    pass
                else:
                    return f'error: CSVSink - unknown cmd: {cmd!s}', None
            except IOError:
                return "error: could not write csv to stdout", None

        future = pump.SinkBatchFuture(self, batch)
        self.future_done(future, 0)
        return 0, future
Esempio n. 5
0
                        #rev = self.convert_meta(meta)
                        self.writer.writerow(
                            [key, flg, exp, cas, val, meta, vbucket_id, dtype])
                elif cmd in [
                        couchbaseConstants.CMD_TAP_DELETE,
                        couchbaseConstants.CMD_DCP_DELETE
                ]:
                    pass
                elif cmd == couchbaseConstants.CMD_GET:
                    pass
                else:
                    return "error: CSVSink - unknown cmd: " + str(cmd), None
            except IOError:
                return "error: could not write csv to stdout", None

        future = pump.SinkBatchFuture(self, batch)
        self.future_done(future, 0)
        return 0, future

    def close(self):
        if self.csvfile is not None and self.csvfile != sys.stdout:
            self.csvfile.close()
            self.csvfile = None

    def get_csvfile(self, base):
        extension = os.path.splitext(base)
        filename = extension[0]
        if self.bucket_name():
            filename = filename + "_" + urllib.quote_plus(self.bucket_name())
        if self.node_name():
            filename = filename + "_" + urllib.quote_plus(self.node_name())