def store_record(self, session, rec, transformer=None): rec.recordStore = self.recordStore.id # Maybe add metadata, etc. if transformer is not None: # Allow custom transformer doc = transformer.process_record(session, rec) data = doc.get_raw(session) elif self.inTransformer is not None: doc = self.inTransformer.process_record(session, rec) data = doc.get_raw(session) elif self.inWorkflow is not None: doc = self.inWorkflow.process(session, rec) data = doc.get_raw(session) else: sax = [x.encode('utf8') for x in rec.get_sax(session)] sax.append("9 " + pickle.dumps(rec.elementHash)) data = nonTextToken.join(sax) dig = self.generate_checkSum(session, data) md = { 'byteCount': rec.byteCount, 'wordCount': rec.wordCount, 'digest': dig } if (self.writeTask is not None): self.writeTask.call(self.recordStore, 'store_data', session, rec.id, data, md) msg = self.writeTask.recv() else: raise ValueError('WriteTask is None... ' 'did you call begin_storing?') if rec.id is None: rec.id = msg.data return rec
def store_record(self, session, rec, transformer=None): rec.recordStore = self.recordStore.id # Maybe add metadata, etc. if transformer != None: # Allow custom transformer doc = transformer.process_record(session, rec) data = doc.get_raw(session) elif self.inTransformer != None: doc = self.inTransformer.process_record(session, rec) data = doc.get_raw(session) elif self.inWorkflow != None: doc = self.inWorkflow.process(session, rec) data = doc.get_raw(session) else: sax = [x.encode('utf8') for x in rec.get_sax(session)] sax.append("9 " + pickle.dumps(rec.elementHash)) data = nonTextToken.join(sax) dig = self.generate_checkSum(session, data) md = {'byteCount' : rec.byteCount, 'wordCount' : rec.wordCount, 'digest' : dig} if (self.writeTask != None): self.writeTask.call(self.recordStore, 'store_data', session, rec.id, data, md) msg = self.writeTask.recv() else: raise ValueError('WriteTask is None... did you call begin_storing?') if rec.id == None: rec.id = msg.data return rec
def process_record(self, session, rec): sax = [x.encode('utf8') for x in rec.get_sax(session)] sax.append("9 " + pickle.dumps(rec.elementHash)) data = nonTextToken.join(sax) return StringDocument(data)
def process_record(self, session, rec): sax = ['1 identifier {}', '3 ' + str(rec.id), '2 identifier'] data = nonTextToken.join(sax) return StringDocument(data)