Python ChEmblData.ChEmblDataの例

プログラミング言語: Python

名前空間/パッケージ名: cuchemcommon.data.helper.chembldata

クラス/型: ChEmblData

メソッド/関数: ChEmblData

hotexamples.comのコード掲載数: 4

Python ChEmblData.ChEmblData - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのcuchemcommon.data.helper.chembldata.ChEmblData.ChEmblDataの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

ChEmblData(4)

よく使われるメソッド

ChEmblData (4)

コード例 #1

ファイルを表示

ファイル: test_fp_cache.py プロジェクト: NVIDIA/cheminformatics

def test_cache_cddd_embeddings():
    """
    Verify fetching data from chemblDB.
    """
    num_recs = 1000

    logger.info('CDDD Embeddings Check!')

    cache_dir = os.path.join(tempfile.mkdtemp())
    logger.info('Creating cache at %s' % cache_dir)
    logger.info(type(cache_dir))

    # Write to cache
    chem_data = ChEmblData(fp_type=Embeddings)
    chem_data.save_fingerprints(os.path.join(cache_dir, FINGER_PRINT_FILES),
                                num_recs=num_recs)

    # Verify cache
    hdf_path = os.path.join(cache_dir, FINGER_PRINT_FILES)
    logger.info('Reading molecules from %s...' % hdf_path)
    mol_df = dask.dataframe.read_hdf(hdf_path, 'fingerprints')
    mol_df = mol_df.compute()

    logger.info('Expected %s rec found %s.', num_recs, mol_df.shape[0])
    assert mol_df.shape[0] <= num_recs, \
        ('Expected %d rec found %d.' % (num_recs, mol_df.shape[0]))

コード例 #2

ファイルを表示

def test_add_molecule_GpuKmeansUmap():
    """
    Verify fetching data from chemblDB when the input is a cudf df.
    """
    _create_context()

    n_molecules, dao, mol_df = _fetch_chembl_test_dataset()

    if hasattr(mol_df, 'compute'):
        mol_df = mol_df.compute()

    mol_df = cudf.from_pandas(mol_df)
    n_molecules = mol_df.shape[0]

    # test mol should container aviable and new molecules
    test_mol = mol_df[n_molecules - 20:]
    mols_tobe_added = test_mol['id'].to_array().tolist()

    chData = ChEmblData()
    logger.info('Fetching ChEMBLLE id for %s', mols_tobe_added)
    mols_tobe_added = [
        str(row[0])
        for row in chData.fetch_chemblId_by_molregno(mols_tobe_added)
    ]
    logger.info('ChEMBL ids to be added %s', mols_tobe_added)

    # Molecules to be used for clustering
    mol_df = mol_df[:n_molecules - 10]

    wf = GpuKmeansUmap(n_molecules=n_molecules, dao=dao, pca_comps=64)
    wf.cluster(df_mol_embedding=mol_df)

    missing_mols, molregnos, df_embedding = wf.add_molecules(mols_tobe_added)
    assert len(
        missing_mols
    ) == 10, 'Expected 10 missing molecules found %d' % len(missing_mols)

    # TODO: Once the issue with add_molecule in multi-gpu env. is fixed, the
    # number of missing_molregno found should be 0
    missing_mols, molregnos, df_embedding = wf.add_molecules(mols_tobe_added)
    assert len(
        missing_mols
    ) == 0, 'Expected no missing molecules found %d' % len(missing_mols)

コード例 #3

ファイルを表示

ファイル: generative_wf.py プロジェクト: NVIDIA/cheminformatics

 def __init__(self, fp_type):
     self.chem_data = ChEmblData(fp_type)

コード例 #4

ファイルを表示

ファイル: startdash.py プロジェクト: NVIDIA/cheminformatics

    def cache(self):
        """
        Create Cache
        """
        context = Context()
        data_dir = context.get_config('data_mount_path', default='/data')
        cache_dir = os.path.join(data_dir, 'cache')

        parser = argparse.ArgumentParser(description='Create cache')

        parser.add_argument(
            '-ct',
            '--cache_type',
            dest='cache_type',
            type=str,
            default='MorganFingerprint',
            choices=['MorganFingerprint', 'Embeddings'],
            help='Type of data preprocessing (MorganFingerprint or Embeddings)'
        )

        parser.add_argument('-c',
                            '--cache_directory',
                            dest='cache_directory',
                            type=str,
                            default=cache_dir,
                            help='Location to create fingerprint cache')

        parser.add_argument('--batch_size',
                            dest='batch_size',
                            type=int,
                            default=100000,
                            help='Chunksize.')

        parser.add_argument('--n_cpu',
                            dest='n_cpu',
                            type=int,
                            default=12,
                            help='Number of CPU workers to use')

        parser.add_argument('-d',
                            '--debug',
                            dest='debug',
                            action='store_true',
                            default=False,
                            help='Show debug message')

        parser.add_argument(
            '-m',
            '--n_mol',
            dest='n_mol',
            type=int,
            default=-1,
            help=
            'Number of molecules for analysis. Use negative numbers for using the whole dataset.'
        )

        args = parser.parse_args(sys.argv[2:])

        if args.debug:
            logger.setLevel(logging.DEBUG)

        cluster = LocalCluster(dashboard_address=':9001',
                               n_workers=args.n_cpu,
                               threads_per_worker=4)
        client = Client(cluster)

        with client:
            task_start_time = datetime.now()

            if not os.path.exists(args.cache_directory):
                logger.info('Creating folder %s...' % args.cache_directory)
                os.makedirs(args.cache_directory)

            if (args.cache_type == 'MorganFingerprint'):
                prepocess_type = MorganFingerprint
            elif (args.cache_type == 'Embeddings'):
                prepocess_type = Embeddings

            chem_data = ChEmblData(fp_type=prepocess_type)
            chem_data.save_fingerprints(os.path.join(args.cache_directory,
                                                     FINGER_PRINT_FILES),
                                        num_recs=args.n_mol,
                                        batch_size=args.batch_size)

            logger.info('Fingerprint generated in (hh:mm:ss.ms) {}'.format(
                datetime.now() - task_start_time))