예제 #1
0
def make_opsin_input():
    seen = set()
    with open('opsin_input.txt', 'w') as op_out:
        result_dir = '../examples/mp/results'
        for filename in os.listdir(result_dir):
            if not filename.endswith('.json'):
                continue
            patent_id = filename[:-9]
            print(patent_id)
            tetko_file = '../examples/mp/tetko/%s.json' % patent_id
            if not os.path.isfile(tetko_file):
                continue
            with open('%s/%s' % (result_dir, filename)) as fin:
                results = json.loads(fin.read().decode('utf8'))
            with open(tetko_file) as tin:
                tetko_results = json.loads(tin.read().decode('utf8'))
            for result in results:
                if 'names' not in result:
                    continue
                for name in result['names']:
                    in_tetko = False
                    for tetko_result in tetko_results:
                        if excess_normalize(name) == excess_normalize(
                                tetko_result['name']):
                            in_tetko = True
                            break
                    if not in_tetko and name not in seen:
                        seen.add(name)
                        op_out.write(('%s\n' % name).encode('utf8'))
예제 #2
0
def _get_standardized_result(result, tetko_results, n2s):
    names = sorted(result['names'], key=len, reverse=True)
    fv = float_value(standardize_value(result['melting_points'][0]['value']))
    if not fv:
        print('INVALID')
        print(names)
        print(result['melting_points'][0]['value'])
        return
    for name in names:
        for tetko_result in tetko_results:
            if excess_normalize(name) == excess_normalize(
                    tetko_result['name']):
                return {
                    'name': name,
                    'smiles': tetko_result['smiles'],
                    'inchikey': tetko_result['inchikey'],
                    'value': result['melting_points'][0]['value'],
                    'float_value': fv
                }
    for name in names:
        if name not in n2s:
            print('MISSING: %s' % name)
            continue
            # results = cirpy.query(name.encode('utf8'), 'smiles', ['name_by_opsin', 'name_by_cir'])
            # print(name)
            # print([(r.value, r.resolver) for r in results])
            # n2s[name] = [(r.value, r.resolver) for r in results]
            # with open('n2s.pickle', 'w') as fout:
            #     pickle.dump(n2s, fout)
        smiles = n2s[name]
        if smiles:
            if '.' in smiles:
                continue
            mol = Chem.MolFromSmiles(smiles)
            if mol:
                inchikey = Chem.InchiToInchiKey(Chem.MolToInchi(mol))
                return {
                    'name': name,
                    'smiles': smiles,
                    'inchikey': inchikey,
                    'value': result['melting_points'][0]['value'],
                    'float_value': fv
                }