def test(self, infile):
     _f = anyfile(infile)
     ff = rec_handler(_f)
     gd = []
     err_cnt = 0
     for rec in ff:
         if not rec.startswith('rs'):
             continue
         lines = rec.strip().split('\n')
         self._parse_rsline(lines)
         d = self._parse_GMAF(lines)
         if not d:
             err_cnt += 1
         gd.append(d)
     print(err_cnt)
     return gd
def load_data(input_file):
    # the first two line of clinvar_xml is not useful information
    cv_data = rec_handler(input_file, block_end='</ClinVarSet>\n',
                          skip=2, include_block_end=True)
    print input_file
    for record in cv_data:
        # some exceptions
        if record.startswith('\n</ReleaseSet>'):
            continue
        try:
            record_parsed = clinvar.parseString(record, silence=1)
        except:
            print(record)
            raise
        for record_mapped in _map_line_to_json(record_parsed):
            yield record_mapped
Exemple #3
0
def load_data(input_file):
    # the first two line of clinvar_xml is not useful information
    cv_data = rec_handler(input_file,
                          block_end='</ClinVarSet>\n',
                          skip=2,
                          include_block_end=True)
    print input_file
    for record in cv_data:
        # some exceptions
        if record.startswith('\n</ReleaseSet>'):
            continue
        try:
            record_parsed = clinvar.parseString(record, silence=1)
        except:
            print(record)
            raise
        for record_mapped in _map_line_to_json(record_parsed):
            yield record_mapped
 def parse(self, infile):
     print(os.path.split(infile)[1])
     cnt = 0
     err_d = {}
     _f = anyfile(infile)
     ff = rec_handler(_f)
     for rec in ff:
         if not rec.startswith('rs'):
             continue
         doc = self.parse_one_record(rec)
         if isinstance(doc, dict):
             cnt += 1
             yield doc
         else:
             if doc in err_d:
                 err_d[doc] += 1
             else:
                 err_d[doc] = 1
     print(cnt, err_d)