def test_filemap_gninatyping(): datadir = os.path.dirname(__file__) + '/data' m = pybel.readstring('smi', 'c1c(Cl)cccc1CO') m.addh() t = molgrid.FileMappedGninaTyper(datadir + "/recmap") names = list(t.get_type_names()) assert len(names) == 14 assert names[-2] == 'Zinc' typs = [t.get_atom_type_index(a.OBAtom) for a in m.atoms] assert len(typs) == 16 ccnt = 0 ocnt = 0 neg = 0 clcnt = 0 for t, r in typs: if t < 0: neg += 1 elif 'Carbon' in names[t]: ccnt += 1 assert r == approx(1.9) elif names[t] == 'OxygenXSDonorAcceptor_OxygenXSDonor': ocnt += 1 assert r == approx(1.7) #aren't any nitrogen elif names[t] == 'Bromine_Iodine_Chlorine_Fluorine': clcnt += 1 assert r == approx(1.8) assert ccnt == 7 assert ocnt == 1 assert neg == 7 assert clcnt == 1
def make_model( encode_type='data', data_dim=24, resolution=0.5, data_options='', n_levels=0, conv_per_level=0, arch_options='', n_filters=32, width_factor=2, n_latent=None, loss_types='', batch_size=16, conv_kernel_size=3, latent_kernel_size=None, pool_type='a', unpool_type='n', growth_rate=16, rec_map='', lig_map='', rec_molcache='', lig_molcache='', loss_weight_L1=1.0, loss_weight_L2=1.0, loss_weight_KL=1.0, loss_weight_log=1.0, loss_weight_wass=1.0, verbose=False ): molgrid_data, encoders, decoders = parse_encode_type(encode_type) use_covalent_radius = 'c' in data_options binary_atoms = 'b' in data_options fixed_radius = 'f' in data_options leaky_relu = 'l' in arch_options gaussian_output = 'g' in arch_options sigmoid_output = 's' in arch_options self_attention = 'a' in arch_options batch_disc = 'b' in arch_options dense_net = 'd' in arch_options init_conv_pool = 'i' in arch_options fully_conv = 'c' in arch_options assert len(decoders) <= 1 assert pool_type in ['c', 'm', 'a'] assert unpool_type in ['c', 'n'] assert conv_kernel_size%2 == 1 assert not latent_kernel_size or latent_kernel_size%2 == 1 # determine number of rec and lig channels n_channels = dict() n_channels['rec'] = molgrid.FileMappedGninaTyper(rec_map).num_types() n_channels['lig'] = molgrid.FileMappedGninaTyper(lig_map).num_types() n_channels['data'] = n_channels['rec'] + n_channels['lig'] net = caffe.NetSpec() # input if molgrid_data: net.data, net.label, net.aff = caffe.layers.MolGridData(ntop=3, include=dict(phase=caffe.TRAIN), source='TRAINFILE', root_folder='DATA_ROOT', has_affinity=True, batch_size=batch_size, dimension=(data_dim - 1)*resolution, resolution=resolution, binary_occupancy=binary_atoms, fixed_radius=fixed_radius and np.sqrt(3)*resolution/2 + 1e-6, shuffle=True, balanced=False, random_rotation=True, random_translate=2.0, radius_multiple=1.5, use_covalent_radius=use_covalent_radius, recmap=rec_map, ligmap=lig_map, recmolcache=rec_molcache, ligmolcache=lig_molcache, ) net._ = caffe.layers.MolGridData(ntop=0, name='data', top=['data', 'label', 'aff'], include=dict(phase=caffe.TEST), source='TESTFILE', root_folder='DATA_ROOT', has_affinity=True, batch_size=batch_size, dimension=(data_dim - 1)*resolution, resolution=resolution, binary_occupancy=binary_atoms, fixed_radius=fixed_radius and np.sqrt(3)*resolution/2 + 1e-6, shuffle=False, balanced=False, random_rotation=False, random_translate=0.0, radius_multiple=1.5, use_covalent_radius=use_covalent_radius, recmap=rec_map, ligmap=lig_map, recmolcache=rec_molcache, ligmolcache=lig_molcache, ) net.rec, net.lig = caffe.layers.Slice(net.data, ntop=2, name='slice_rec_lig', axis=1, slice_point=n_channels['rec']) else: # no molgrid_data layers, just input blobs net.rec = caffe.layers.Input(shape=dict(dim=[batch_size, n_channels['rec']] + [data_dim]*3)) net.lig = caffe.layers.Input(shape=dict(dim=[batch_size, n_channels['lig']] + [data_dim]*3)) net.data = caffe.layers.Concat(net.rec, net.lig, axis=1) if not decoders: # discriminative model, so need label input blob net.label = caffe.layers.Input(shape=dict(dim=[batch_size, n_latent])) # encoder(s) encoder_tops = [] for variational, enc in encoders: curr_top = net[enc] curr_dim = data_dim curr_n_filters = n_channels[enc] next_n_filters = n_filters pool_factors = [] if init_conv_pool: # initial conv and pooling conv = '{}_enc_init_conv'.format(enc) net[conv] = caffe.layers.Convolution(curr_top, num_output=next_n_filters, weight_filler=dict(type='xavier'), kernel_size=conv_kernel_size, pad=conv_kernel_size//2) curr_top = net[conv] curr_n_filters = next_n_filters relu = '{}_relu'.format(conv) net[relu] = caffe.layers.ReLU(curr_top, negative_slope=0.1*leaky_relu, in_place=True) pool = '{}_enc_init_pool'.format(enc) pool_factor = least_prime_factor(curr_dim) pool_factors.append(pool_factor) net[pool] = caffe.layers.Pooling(curr_top, pool=caffe.params.Pooling.AVE, kernel_size=pool_factor, stride=pool_factor) curr_top = net[pool] curr_dim = int(curr_dim//pool_factor) for i in range(n_levels): if i > 0: # pool between convolution blocks assert curr_dim > 1, 'nothing to pool at level {}'.format(i) pool = '{}_enc_level{}_pool'.format(enc, i) pool_factor = least_prime_factor(curr_dim) pool_factors.append(pool_factor) if pool_type == 'c': # convolution with stride net[pool] = caffe.layers.Convolution(curr_top, num_output=curr_n_filters, group=curr_n_filters, weight_filler=dict(type='xavier'), kernel_size=pool_factor, stride=pool_factor, engine=caffe.params.Convolution.CAFFE) elif pool_type == 'm': # max pooling net[pool] = caffe.layers.Pooling(curr_top, pool=caffe.params.Pooling.MAX, kernel_size=pool_factor, stride=pool_factor) elif pool_type == 'a': # average pooling net[pool] = caffe.layers.Pooling(curr_top, pool=caffe.params.Pooling.AVE, kernel_size=pool_factor, stride=pool_factor) curr_top = net[pool] curr_dim = int(curr_dim//pool_factor) next_n_filters = int(width_factor*curr_n_filters) if self_attention and i == 1: att = '{}_enc_level{}_att'.format(enc, i) att_f = '{}_f'.format(att) net[att_f] = caffe.layers.Convolution(curr_top, num_output=curr_n_filters//8, weight_filler=dict(type='xavier'), kernel_size=1) att_g = '{}_g'.format(att) net[att_g] = caffe.layers.Convolution(curr_top, num_output=curr_n_filters//8, weight_filler=dict(type='xavier'), kernel_size=1) att_s = '{}_s'.format(att) net[att_s] = caffe.layers.MatMul(net[att_f], net[att_g], transpose_a=True) att_B = '{}_B'.format(att) net[att_B] = caffe.layers.Softmax(net[att_s], axis=2) att_h = '{}_h'.format(att) net[att_h] = caffe.layers.Convolution(curr_top, num_output=curr_n_filters, weight_filler=dict(type='xavier'), kernel_size=1) att_o = '{}_o'.format(att) net[att_o] = caffe.layers.MatMul(net[att_h], net[att_B], transpose_b=True) att_o_reshape = '{}_o_reshape'.format(att) net[att_o_reshape] = caffe.layers.Reshape(net[att_o], shape=dict(dim=[batch_size, curr_n_filters] + [curr_dim]*3)) curr_top = net[att_o_reshape] for j in range(conv_per_level): # convolutions conv = '{}_enc_level{}_conv{}'.format(enc, i, j) net[conv] = caffe.layers.Convolution(curr_top, num_output=next_n_filters, weight_filler=dict(type='xavier'), kernel_size=conv_kernel_size, pad=conv_kernel_size//2) if dense_net: concat_tops = [curr_top, net[conv]] curr_top = net[conv] curr_n_filters = next_n_filters relu = '{}_relu'.format(conv) net[relu] = caffe.layers.ReLU(curr_top, negative_slope=0.1*leaky_relu, in_place=True) if dense_net: concat = '{}_concat'.format(conv) net[concat] = caffe.layers.Concat(*concat_tops, axis=1) curr_top = net[concat] curr_n_filters += next_n_filters if dense_net: # bottleneck conv conv = '{}_enc_level{}_bottleneck'.format(enc, i) next_n_filters = int(curr_n_filters//2) #TODO implement bottleneck_factor net[conv] = caffe.layers.Convolution(curr_top, num_output=next_n_filters, weight_filler=dict(type='xavier'), kernel_size=1, pad=0) curr_top = net[conv] curr_n_filters = next_n_filters if batch_disc: bd_f = '{}_enc_bd_f'.format(enc) net[bd_f] = caffe.layers.Reshape(curr_top, shape=dict(dim=[batch_size, 1, curr_n_filters*curr_dim**3])) bd_f_tile = '{}_tile'.format(bd_f) net[bd_f_tile] = caffe.layers.Tile(net[bd_f], axis=1, tiles=batch_size) bd_f_T = '{}_T'.format(bd_f) net[bd_f_T] = caffe.layers.Reshape(net[bd_f], shape=dict(dim=[1, batch_size, curr_n_filters*curr_dim**3])) bd_f_T_tile = '{}_tile'.format(bd_f_T) net[bd_f_T_tile] = caffe.layers.Tile(net[bd_f_T], axis=0, tiles=batch_size) bd_f_diff = '{}_diff'.format(bd_f) net[bd_f_diff] = caffe.layers.Eltwise(net[bd_f_tile], net[bd_f_T_tile], operation=caffe.params.Eltwise.SUM, coeff=[1, -1]) bd_f_diff2 = '{}2'.format(bd_f_diff) net[bd_f_diff2] = caffe.layers.Eltwise(net[bd_f_diff], net[bd_f_diff], operation=caffe.params.Eltwise.PROD) bd_f_ssd = '{}_ssd'.format(bd_f) net[bd_f_ssd] = caffe.layers.Convolution(net[bd_f_diff2], param=dict(lr_mult=0, decay_mult=0), convolution_param=dict( num_output=1, weight_filler=dict(type='constant', value=1), bias_term=False, kernel_size=[1], engine=caffe.params.Convolution.CAFFE)) bd_f_ssd_reshape = '{}_reshape'.format(bd_f_ssd) net[bd_f_ssd_reshape] = caffe.layers.Reshape(net[bd_f_ssd], shape=dict(dim=[batch_size, curr_n_filters] + [curr_dim]*3)) bd_o = '{}_bd_o'.format(enc) net[bd_o] = caffe.layers.Concat(curr_top, net[bd_f_ssd_reshape], axis=1) curr_top = net[bd_o] # latent space if variational: if fully_conv: # convolutional latent variables mean = '{}_latent_mean'.format(enc) net[mean] = caffe.layers.Convolution(curr_top, num_output=n_latent, weight_filler=dict(type='xavier'), kernel_size=latent_kernel_size, pad=latent_kernel_size//2) log_std = '{}_latent_log_std'.format(enc) net[log_std] = caffe.layers.Convolution(curr_top, num_output=n_latent, weight_filler=dict(type='xavier'), kernel_size=latent_kernel_size, pad=latent_kernel_size//2) else: mean = '{}_latent_mean'.format(enc) net[mean] = caffe.layers.InnerProduct(curr_top, num_output=n_latent, weight_filler=dict(type='xavier')) log_std = '{}_latent_log_std'.format(enc) net[log_std] = caffe.layers.InnerProduct(curr_top, num_output=n_latent, weight_filler=dict(type='xavier')) std = '{}_latent_std'.format(enc) net[std] = caffe.layers.Exp(net[log_std]) noise = '{}_latent_noise'.format(enc) noise_shape = [batch_size, n_latent] if fully_conv: noise_shape += [1] net[noise] = caffe.layers.DummyData( data_filler=dict(type='gaussian'), shape=dict(dim=noise_shape)) noise_top = net[noise] if fully_conv: # broadcast noise sample along spatial axes noise_tile = '{}_latent_noise_tile'.format(enc) net[noise_tile] = caffe.layers.Tile(net[noise], axis=2, tiles=curr_dim**3) noise_reshape = '{}_latent_noise_reshape'.format(enc) net[noise_reshape] = caffe.layers.Reshape(net[noise_tile], shape=dict(dim=[batch_size, n_latent, curr_dim, curr_dim, curr_dim])) noise_top = net[noise_reshape] std_noise = '{}_latent_std_noise'.format(enc) net[std_noise] = caffe.layers.Eltwise(noise_top, net[std], operation=caffe.params.Eltwise.PROD) sample = '{}_latent_sample'.format(enc) net[sample] = caffe.layers.Eltwise(net[std_noise], net[mean], operation=caffe.params.Eltwise.SUM) curr_top = net[sample] # K-L divergence mean2 = '{}_latent_mean2'.format(enc) net[mean2] = caffe.layers.Eltwise(net[mean], net[mean], operation=caffe.params.Eltwise.PROD) var = '{}_latent_var'.format(enc) net[var] = caffe.layers.Eltwise(net[std], net[std], operation=caffe.params.Eltwise.PROD) one = '{}_latent_one'.format(enc) one_shape = [batch_size, n_latent] if fully_conv: one_shape += [curr_dim]*3 net[one] = caffe.layers.DummyData( data_filler=dict(type='constant', value=1), shape=dict(dim=one_shape)) kldiv_term = '{}_latent_kldiv_term_sum'.format(enc) net[kldiv_term] = caffe.layers.Eltwise(net[one], net[log_std], net[mean2], net[var], operation=caffe.params.Eltwise.SUM, coeff=[-0.5, -1.0, 0.5, 0.5]) kldiv_batch = '{}_latent_kldiv_batch_sum' net[kldiv_batch] = caffe.layers.Reduction(net[kldiv_term], operation=caffe.params.Reduction.SUM) kldiv_loss = 'kldiv_loss' net[kldiv_loss] = caffe.layers.Power(net[kldiv_batch], scale=1.0/batch_size, loss_weight=loss_weight_KL) else: if fully_conv: conv = '{}_latent_conv'.format(enc) net[conv] = caffe.layers.Convolution(curr_top, num_output=n_latent, weight_filler=dict(type='xavier'), kernel_size=latent_kernel_size, pad=latent_kernel_size//2) curr_top = net[conv] else: fc = '{}_latent_fc'.format(enc) net[fc] = caffe.layers.InnerProduct(curr_top, num_output=n_latent, weight_filler=dict(type='xavier')) curr_top = net[fc] encoder_tops.append(curr_top) if len(encoder_tops) > 1: # concat latent vectors net.latent_concat = caffe.layers.Concat(*encoder_tops, axis=1) curr_top = net.latent_concat if decoders: # decoder(s) dec_init_dim = curr_dim dec_init_n_filters = curr_n_filters decoder_tops = [] for dec in decoders: label_top = net[dec] label_n_filters = n_channels[dec] next_n_filters = dec_init_n_filters if conv_per_level else n_channels[dec] if not fully_conv: fc = '{}_dec_fc'.format(dec) net[fc] = caffe.layers.InnerProduct(curr_top, num_output=next_n_filters*dec_init_dim**3, weight_filler=dict(type='xavier')) relu = '{}_relu'.format(fc) net[relu] = caffe.layers.ReLU(net[fc], negative_slope=0.1*leaky_relu, in_place=True) reshape = '{}_reshape'.format(fc) net[reshape] = caffe.layers.Reshape(net[fc], shape=dict(dim=[batch_size, next_n_filters] + [dec_init_dim]*3)) curr_top = net[reshape] curr_n_filters = dec_init_n_filters curr_dim = dec_init_dim for i in reversed(range(n_levels)): if i < n_levels-1: # upsample between convolution blocks unpool = '{}_dec_level{}_unpool'.format(dec, i) pool_factor = pool_factors.pop(-1) if unpool_type == 'c': # deconvolution with stride net[unpool] = caffe.layers.Deconvolution(curr_top, convolution_param=dict( num_output=curr_n_filters, group=curr_n_filters, weight_filler=dict(type='xavier'), kernel_size=pool_factor, stride=pool_factor, engine=caffe.params.Convolution.CAFFE)) elif unpool_type == 'n': # nearest-neighbor interpolation net[unpool] = caffe.layers.Deconvolution(curr_top, param=dict(lr_mult=0, decay_mult=0), convolution_param=dict( num_output=curr_n_filters, group=curr_n_filters, weight_filler=dict(type='constant', value=1), bias_term=False, kernel_size=pool_factor, stride=pool_factor, engine=caffe.params.Convolution.CAFFE)) curr_top = net[unpool] curr_dim = int(pool_factor*curr_dim) next_n_filters = int(curr_n_filters//width_factor) for j in range(conv_per_level): # convolutions deconv = '{}_dec_level{}_deconv{}'.format(dec, i, j) # final convolution has to produce the desired number of output channels last_conv = (i == 0) and (j+1 == conv_per_level) and not (dense_net or init_conv_pool) if last_conv: next_n_filters = label_n_filters net[deconv] = caffe.layers.Deconvolution(curr_top, convolution_param=dict( num_output=next_n_filters, weight_filler=dict(type='xavier'), kernel_size=conv_kernel_size, pad=1)) if dense_net: concat_tops = [curr_top, net[deconv]] curr_top = net[deconv] curr_n_filters = next_n_filters relu = '{}_relu'.format(deconv) net[relu] = caffe.layers.ReLU(curr_top, negative_slope=0.1*leaky_relu, in_place=True) if dense_net: concat = '{}_concat'.format(deconv) net[concat] = caffe.layers.Concat(*concat_tops, axis=1) curr_top = net[concat] curr_n_filters += next_n_filters if dense_net: # bottleneck conv conv = '{}_dec_level{}_bottleneck'.format(dec, i) last_conv = (i == 0) and not init_conv_pool if last_conv: next_n_filters = label_n_filters else: next_n_filters = int(curr_n_filters//2) #TODO implement bottleneck_factor net[conv] = caffe.layers.Deconvolution(curr_top, convolution_param=dict( num_output=next_n_filters, weight_filler=dict(type='xavier'), kernel_size=1, pad=0)) curr_top = net[conv] curr_n_filters = next_n_filters if self_attention and i == 1: att = '{}_dec_level{}_att'.format(dec, i) att_f = '{}_f'.format(att) net[att_f] = caffe.layers.Convolution(curr_top, num_output=curr_n_filters//8, weight_filler=dict(type='xavier'), kernel_size=1) att_g = '{}_g'.format(att) net[att_g] = caffe.layers.Convolution(curr_top, num_output=curr_n_filters//8, weight_filler=dict(type='xavier'), kernel_size=1) att_s = '{}_s'.format(att) net[att_s] = caffe.layers.MatMul(net[att_f], net[att_g], transpose_a=True) att_B = '{}_B'.format(att) net[att_B] = caffe.layers.Softmax(net[att_s], axis=2) att_h = '{}_h'.format(att) net[att_h] = caffe.layers.Convolution(curr_top, num_output=curr_n_filters, weight_filler=dict(type='xavier'), kernel_size=1) att_o = '{}_o'.format(att) net[att_o] = caffe.layers.MatMul(net[att_h], net[att_B], transpose_b=True) att_o_reshape = '{}_o_reshape'.format(att) net[att_o_reshape] = caffe.layers.Reshape(net[att_o], shape=dict(dim=[batch_size, curr_n_filters] + [curr_dim]*3)) curr_top = net[att_o_reshape] if init_conv_pool: # final upsample and deconv unpool = '{}_dec_final_unpool'.format(dec) pool_factor = pool_factors.pop(-1) net[unpool] = caffe.layers.Deconvolution(curr_top, param=dict(lr_mult=0, decay_mult=0), convolution_param=dict( num_output=curr_n_filters, group=curr_n_filters, weight_filler=dict(type='constant', value=1), bias_term=False, kernel_size=pool_factor, stride=pool_factor, engine=caffe.params.Convolution.CAFFE)) curr_top = net[unpool] curr_dim = int(pool_factor*curr_dim) deconv = '{}_dec_final_deconv'.format(dec) next_n_filters = label_n_filters net[deconv] = caffe.layers.Deconvolution(curr_top, convolution_param=dict( num_output=next_n_filters, weight_filler=dict(type='xavier'), kernel_size=conv_kernel_size, pad=1)) curr_top = net[deconv] curr_n_filters = next_n_filters relu = '{}_relu'.format(deconv) net[relu] = caffe.layers.ReLU(curr_top, negative_slope=0.1*leaky_relu, in_place=True) # output if gaussian_output: gauss_kernel_size = 7 conv = '{}_dec_gauss_conv'.format(dec) net[conv] = caffe.layers.Convolution(curr_top, param=dict(lr_mult=0, decay_mult=0), num_output=label_n_filters, group=label_n_filters, weight_filler=dict(type='constant', value=0), # fill from saved weights bias_term=False, kernel_size=gauss_kernel_size, pad=gauss_kernel_size//2, engine=caffe.params.Convolution.CAFFE) curr_top = net[conv] # separate output blob from the one used for gen loss is needed for GAN backprop if sigmoid_output: gen = '{}_gen'.format(dec) net[gen] = caffe.layers.Sigmoid(curr_top) else: gen = '{}_gen'.format(dec) net[gen] = caffe.layers.Power(curr_top) elif loss_types: # discriminative model label_top = net.label # output if sigmoid_output: if n_latent > 1: net.output = caffe.layers.Softmax(curr_top) else: net.output = caffe.layers.Sigmoid(curr_top) else: net.output = caffe.layers.Power(curr_top) # loss if 'e' in loss_types: net.L2_loss = caffe.layers.EuclideanLoss( curr_top, label_top, loss_weight=loss_weight_L2 ) if 'a' in loss_types: net.diff = caffe.layers.Eltwise( curr_top, label_top, operation=caffe.params.Eltwise.SUM, coeff=[1.0, -1.0] ) net.abs_sum = caffe.layers.Reduction( net.diff, operation=caffe.params.Reduction.ASUM ) net.L1_loss = caffe.layers.Power( net.abs_sum, scale=1.0/batch_size, loss_weight=loss_weight_L1 ) if 'f' in loss_types: fit = '{}_gen_fit'.format(dec) net[fit] = caffe.layers.Python( curr_top, module='generate', layer='AtomFittingLayer', param_str=str(dict( resolution=resolution, use_covalent_radius=True, gninatypes_file='/net/pulsar/home/koes/mtr22/gan/data/O_2_0_0.gninatypes' )) ) net.fit_L2_loss = caffe.layers.EuclideanLoss( curr_top, net[fit], loss_weight=1.0 ) if 'F' in loss_types: fit = '{}_gen_fit'.format(dec) net[fit] = caffe.layers.Python(curr_top, module='layers', layer='AtomFittingLayer', param_str=str(dict( resolution=resolution, use_covalent_radius=True )) ) net.fit_L2_loss = caffe.layers.EuclideanLoss( curr_top, net[fit], loss_weight=1.0 ) if 'c' in loss_types: net.chan_L2_loss = caffe.layers.Python( curr_top, label_top, module='layers', layer='ChannelEuclideanLossLayer', loss_weight=1.0 ) if 'm' in loss_types: net.mask_L2_loss = caffe.layers.Python( curr_top, label_top, model='layers', layer='MaskedEuclideanLossLayer', loss_weight=0.0 ) if 'x' in loss_types: if n_latent > 1 and not decoders: net.log_loss = caffe.layers.SoftmaxWithLoss( curr_top, label_top, loss_weight=loss_weight_log ) else: net.log_loss = caffe.layers.SigmoidCrossEntropyLoss( curr_top, label_top, loss_weight=loss_weight_log ) if 'w' in loss_types: net.wass_sign = caffe.layers.Power( label_top, scale=-2, shift=1 ) net.wass_prod = caffe.layers.Eltwise( net.wass_sign, curr_top, operation=caffe.params.Eltwise.PROD ) net.wass_loss = caffe.layers.Reduction( net.wass_prod, operation=caffe.params.Reduction.MEAN, loss_weight=loss_weight_wass ) if verbose: print('iterating over dict of net top blobs and layers') for k, v in net.tops.items(): print('top name = ' + k) try: print('layer params = ' + repr(v.fn.params)) except AttributeError: print('layer params = ' + repr(v.params)) return net.to_proto()
import sys, molgrid import numpy as np sys.path.insert(0, '.') import liGAN rec_typer = molgrid.FileMappedGninaTyper('data/my_rec_map') lig_typer = molgrid.FileMappedGninaTyper('data/my_lig_map') lig_channels = liGAN.atom_types.get_channels_from_map(lig_typer) print('loading data') ex_provider = molgrid.ExampleProvider( rec_typer, lig_typer, data_root='data/molport', recmolcache='data/molportFULL_rec.molcache2' and '', ligmolcache='data/molportFULL_lig.molcache2' and '', shuffle=True) ex_provider.populate('data/molportFULL_rand_test0_1000.types') batch_size = 1000 n_examples = ex_provider.size() n_batches = n_examples // batch_size type_counts = np.zeros(lig_typer.num_types()) mol_count = 0 for i in range(n_batches): for ex in ex_provider.next_batch(batch_size): struct = liGAN.atom_structs.AtomStruct.from_coord_set( ex.coord_sets[1], lig_channels) type_counts += struct.type_counts
p.add_argument("-o", "--output", type=str, default=None, help="Output file") p.add_argument("--dx", action="store_true", help="Output grids as DX files") args = p.parse_args() system = os.path.splitext(os.path.basename(args.sdf))[0] if args.output is None: args.output = f"{system}.pcd" resolution = args.resolution dimension = args.dimension gm = molgrid.GridMaker(resolution=resolution, dimension=dimension) t = molgrid.FileMappedGninaTyper(args.ligmap) # Grid dimensions (including types) gdims = gm.grid_dimensions(t.num_types()) # Pre-allocate grid # Only one example (batch size is 1) grid = torch.zeros(1, *gdims, dtype=torch.float32, device="cuda:0") obmol = next(pybel.readfile("sdf", args.sdf)) obmol.addh() print(obmol, end="") # Use OpenBabel molecule object (obmol.OBmol) instead of PyBel molecule (obmol) cs = molgrid.CoordinateSet(obmol.OBMol, t)
def get_channels_from_file(map_file, use_covalent_radius=False, name_prefix=''): import molgrid map_ = molgrid.FileMappedGninaTyper(map_file) return get_channels_from_map(map_, use_covalent_radius, name_prefix)