Example #1
0
class DataDiskStorageWriter:
    def __init__(self,
                 popen_mode,
                 dataset_path,
                 filter_set,
                 trans_prep,
                 view_checker=None,
                 report_mode=False):
        self.mPath = dataset_path
        self.mFilterSet = filter_set
        self.mTransPrep = trans_prep
        self.mViewChecker = view_checker
        self.mReportMode = report_mode
        self.mTotal = 0

        if popen_mode:
            self.mVDataProc = Popen(sys.executable +
                                    " -m forome_tools.ixbz2 --calm -o " +
                                    self.mPath + "/vdata.ixbz2 /dev/stdin",
                                    shell=True,
                                    stdin=PIPE,
                                    stderr=PIPE,
                                    bufsize=1,
                                    universal_newlines=False,
                                    close_fds=True)

            self.mVDataOut = TextIOWrapper(self.mVDataProc.stdin,
                                           encoding="utf-8",
                                           line_buffering=True)
        else:
            self.mVDataProc = None
            self.mVDataOut = FormatterIndexBZ2(self.mPath + "/vdata.ixbz2")

        self.mFDataOut = gzip.open(self.mPath + "/fdata.json.gz",
                                   'wt',
                                   encoding="utf-8")
        self.mPDataOut = gzip.open(self.mPath + "/pdata.json.gz",
                                   'wt',
                                   encoding="utf-8")

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def __enter__(self):
        return self

    def close(self):
        if self.mVDataProc is not None:
            _, vreport_data = self.mVDataProc.communicate()
            if self.mReportMode:
                for line in str(vreport_data, encoding="utf-8").splitlines():
                    print(line, file=sys.stderr)
            self.mVDataProc.wait()
        else:
            self.mVDataOut.close()
        self.mFDataOut.close()
        self.mPDataOut.close()

    def getTotal(self):
        return self.mTotal

    def saveRecord(self, record):
        rec_no = self.mTotal
        flt_data = self.mFilterSet.process(rec_no, record)
        if self.mViewChecker is not None:
            self.mViewChecker.regValue(rec_no, record)
        pre_data = AnfisaConfig.getVariantSystemFields(record)
        self.mTransPrep.doRec(rec_no, record, flt_data, pre_data)
        if self.mVDataProc is not None:
            print(json.dumps(record, ensure_ascii=False), file=self.mVDataOut)
        else:
            self.mVDataOut.putLine(json.dumps(record, ensure_ascii=False))
        print(json.dumps(flt_data, ensure_ascii=False), file=self.mFDataOut)
        print(json.dumps(pre_data, ensure_ascii=False), file=self.mPDataOut)
        self.mTotal += 1