Esempio n. 1
0
def download_all_structures(db_path, record_type=GlycanRecordWithTaxon):  # pragma: no cover
    response = requests.get(u'http://www.glycome-db.org/http-services/getStructureDump.action?user=eurocarbdb')
    response.raise_for_status()
    handle = gzip.GzipFile(fileobj=StringIO(response.content))
    xml = etree.parse(handle)
    db = RecordDatabase(db_path, record_type=record_type)
    misses = []
    i = 0
    for structure in xml.iterfind(".//structure"):
        try:
            glycomedb_id = int(structure.attrib['id'])
            i += 1
            glycoct_str = structure.find("sequence").text
            taxa = [Taxon(t.attrib['ncbi'], None, None) for t in structure.iterfind(".//taxon")]
            glycan = glycoct.loads(glycoct_str)
            if (glycoct.loads(str(glycan)).mass() - glycan.mass()) > 0.00001:
                raise Exception("Mass did not match on reparse")
            record = record_type(glycan, taxa=taxa, id=glycomedb_id)
            db.load_data(record, commit=False, set_id=False)
            if i % 1000 == 0:
                print(i, "Records parsed.")
        except Exception as e:
            misses.append((glycomedb_id, e))
            print(glycomedb_id, e)
    db.set_metadata("misses", misses)
    db.commit()
    return db