Beispiel #1
0
from pyproto.utils import parse_iter_sdf, rlen

path_fn = '../../tmp/ChEBI_complete.sdf'

i = 0
v_card = []
pc = set()

for me in parse_iter_sdf(path_fn):
    attr = 'PubChem Database Links'

    if attr in me:
        pubchem_id = list(filter(lambda x: x.startswith('CID:'), me[attr]))

        if rlen(pubchem_id) > 1:
            v_card.append([me['ChEBI ID'], pubchem_id])

    i += 1
    if i % 5000 == 0:
        print(i)

print("CHEBI SDF")
print(v_card)
print(pc)
    i += 1

    if i % 100 ==0:
        print(i)

    if r.content is None:
        continue

    data, refs = parse_KEGG(kegg_id, r.content.decode('utf-8'))

    if data is None or refs is None:
        continue

    for attr, val in list(data.items())+list(refs.items()):
        c = rlen(val)
        if c > card[attr]:
            card[attr] = c
        if c > 1:
            count[attr] += 1
        if isinstance(val, str):
            nc = len(val)
        else:
            nc = max([len(f) for f in val])
        if nc > nchar[attr]:
            nchar[attr] = nc

print("kegg:")
print(dict(card))
print(dict(count))
print(dict(nchar))