def test(self, infile): _f = anyfile(infile) ff = rec_handler(_f) gd = [] err_cnt = 0 for rec in ff: if not rec.startswith('rs'): continue lines = rec.strip().split('\n') self._parse_rsline(lines) d = self._parse_GMAF(lines) if not d: err_cnt += 1 gd.append(d) print(err_cnt) return gd
def load_data(input_file): # the first two line of clinvar_xml is not useful information cv_data = rec_handler(input_file, block_end='</ClinVarSet>\n', skip=2, include_block_end=True) print input_file for record in cv_data: # some exceptions if record.startswith('\n</ReleaseSet>'): continue try: record_parsed = clinvar.parseString(record, silence=1) except: print(record) raise for record_mapped in _map_line_to_json(record_parsed): yield record_mapped
def parse(self, infile): print(os.path.split(infile)[1]) cnt = 0 err_d = {} _f = anyfile(infile) ff = rec_handler(_f) for rec in ff: if not rec.startswith('rs'): continue doc = self.parse_one_record(rec) if isinstance(doc, dict): cnt += 1 yield doc else: if doc in err_d: err_d[doc] += 1 else: err_d[doc] = 1 print(cnt, err_d)