class DataDiskStorageWriter: def __init__(self, popen_mode, dataset_path, filter_set, trans_prep, view_checker=None, report_mode=False): self.mPath = dataset_path self.mFilterSet = filter_set self.mTransPrep = trans_prep self.mViewChecker = view_checker self.mReportMode = report_mode self.mTotal = 0 if popen_mode: self.mVDataProc = Popen(sys.executable + " -m forome_tools.ixbz2 --calm -o " + self.mPath + "/vdata.ixbz2 /dev/stdin", shell=True, stdin=PIPE, stderr=PIPE, bufsize=1, universal_newlines=False, close_fds=True) self.mVDataOut = TextIOWrapper(self.mVDataProc.stdin, encoding="utf-8", line_buffering=True) else: self.mVDataProc = None self.mVDataOut = FormatterIndexBZ2(self.mPath + "/vdata.ixbz2") self.mFDataOut = gzip.open(self.mPath + "/fdata.json.gz", 'wt', encoding="utf-8") self.mPDataOut = gzip.open(self.mPath + "/pdata.json.gz", 'wt', encoding="utf-8") def __exit__(self, exc_type, exc_val, exc_tb): self.close() def __enter__(self): return self def close(self): if self.mVDataProc is not None: _, vreport_data = self.mVDataProc.communicate() if self.mReportMode: for line in str(vreport_data, encoding="utf-8").splitlines(): print(line, file=sys.stderr) self.mVDataProc.wait() else: self.mVDataOut.close() self.mFDataOut.close() self.mPDataOut.close() def getTotal(self): return self.mTotal def saveRecord(self, record): rec_no = self.mTotal flt_data = self.mFilterSet.process(rec_no, record) if self.mViewChecker is not None: self.mViewChecker.regValue(rec_no, record) pre_data = AnfisaConfig.getVariantSystemFields(record) self.mTransPrep.doRec(rec_no, record, flt_data, pre_data) if self.mVDataProc is not None: print(json.dumps(record, ensure_ascii=False), file=self.mVDataOut) else: self.mVDataOut.putLine(json.dumps(record, ensure_ascii=False)) print(json.dumps(flt_data, ensure_ascii=False), file=self.mFDataOut) print(json.dumps(pre_data, ensure_ascii=False), file=self.mPDataOut) self.mTotal += 1