def consume_batch_async(self, batch): if not self.writer: csvfile = sys.stdout if self.spec.startswith(CSVSink.CSV_JSON_SCHEME): if len(batch.msgs) <= 0: future = pump.SinkBatchFuture(self, batch) self.future_done(future, 0) return 0, future cmd, vbucket_id, key, flg, exp, cas, meta, val = batch.msgs[0] doc = json.loads(val) self.fields = sorted(doc.keys()) if 'id' not in self.fields: self.fields = ['id'] + self.fields if self.spec.endswith(".csv"): try: csvfile = open( self.spec[len(CSVSink.CSV_JSON_SCHEME):], "wb") except IOError, e: return ("error: could not write csv to file:%s" % \ self.spec[len(CSVSink.CSV_JSON_SCHEME):]), None self.writer = csv.writer(csvfile) self.writer.writerow(self.fields) else: if self.spec.endswith(".csv"): try: csvfile = open(self.spec[len(CSVSink.CSV_SCHEME):], "wb") except IOError, e: return ("error: could not write csv to file:%s" % \ self.spec[len(CSVSink.CSV_SCHEME):]), None self.writer = csv.writer(csvfile) self.writer.writerow( ['id', 'flags', 'expiration', 'cas', 'value'])
def consume_batch_async(self, batch): return self.push_next_batch(batch, pump.SinkBatchFuture(self, batch))
def consume_batch_async( self, batch: Optional[pump.Batch] ) -> Tuple[couchbaseConstants.PUMP_ERROR, Optional[pump.SinkBatchFuture]]: return self.push_next_batch(batch, pump.SinkBatchFuture(self, batch))
def consume_batch_async( self, batch: pump.Batch ) -> Tuple[couchbaseConstants.PUMP_ERROR, Optional[pump.SinkBatchFuture]]: if not self.writer: self.csvfile = sys.stdout if self.spec.startswith(CSVSink.CSV_JSON_SCHEME): if len(batch.msgs) <= 0: future = pump.SinkBatchFuture(self, batch) self.future_done(future, 0) return 0, future cmd, vbucket_id, key, flg, exp, cas, meta, val_bytes = batch.msgs[ 0][:8] doc = json.loads(val_bytes) self.fields = sorted(doc.keys()) if 'id' not in self.fields: self.fields = ['id'] + self.fields if self.spec.endswith(".csv"): filename = self.get_csvfile( self.spec[len(CSVSink.CSV_JSON_SCHEME):]) try: self.csvfile = open(filename, "w", encoding='utf-8') except IOError as e: return f'error: could not write csv to file: {filename}', None self.writer = csv.writer(self.csvfile) self.writer.writerow(self.fields) else: if self.spec.endswith(".csv"): filename = self.get_csvfile( self.spec[len(CSVSink.CSV_SCHEME):]) try: self.csvfile = open(filename, "w", encoding='utf-8') except IOError as e: return f'error: could not write csv to file: {filename}', None self.writer = csv.writer(self.csvfile) self.writer.writerow([ 'id', 'flags', 'expiration', 'cas', 'value', 'rev', 'vbid', 'dtype' ]) msg_tuple_format = 0 for msg in batch.msgs: cmd, vbucket_id, key, flg, exp, cas, meta, val_bytes = msg[:8] if self.skip(key, vbucket_id): continue if not msg_tuple_format: msg_tuple_format = len(msg) seqno = dtype = nmeta = 0 if msg_tuple_format > 8: seqno, dtype, nmeta, conf_res = msg[8:12] if dtype > 2: try: val_bytes = snappy.uncompress(val_bytes) except Exception as err: pass try: if cmd in [ couchbaseConstants.CMD_TAP_MUTATION, couchbaseConstants.CMD_DCP_MUTATION ]: if self.fields: if val_bytes and len(val_bytes) > 0: try: row = [] doc = json.loads(val_bytes) if type(doc) == dict: for field in self.fields: if field == 'id': row.append(pump.returnString(key)) else: row.append(doc[field]) self.writer.writerow(row) except ValueError: pass else: #rev = self.convert_meta(meta) self.writer.writerow([ pump.returnString(key), flg, exp, cas, val_bytes, meta, vbucket_id, dtype ]) elif cmd in [ couchbaseConstants.CMD_TAP_DELETE, couchbaseConstants.CMD_DCP_DELETE ]: pass elif cmd == couchbaseConstants.CMD_GET: pass else: return f'error: CSVSink - unknown cmd: {cmd!s}', None except IOError: return "error: could not write csv to stdout", None future = pump.SinkBatchFuture(self, batch) self.future_done(future, 0) return 0, future
#rev = self.convert_meta(meta) self.writer.writerow( [key, flg, exp, cas, val, meta, vbucket_id, dtype]) elif cmd in [ couchbaseConstants.CMD_TAP_DELETE, couchbaseConstants.CMD_DCP_DELETE ]: pass elif cmd == couchbaseConstants.CMD_GET: pass else: return "error: CSVSink - unknown cmd: " + str(cmd), None except IOError: return "error: could not write csv to stdout", None future = pump.SinkBatchFuture(self, batch) self.future_done(future, 0) return 0, future def close(self): if self.csvfile is not None and self.csvfile != sys.stdout: self.csvfile.close() self.csvfile = None def get_csvfile(self, base): extension = os.path.splitext(base) filename = extension[0] if self.bucket_name(): filename = filename + "_" + urllib.quote_plus(self.bucket_name()) if self.node_name(): filename = filename + "_" + urllib.quote_plus(self.node_name())