def main(): parser = create_train_parser() args = parser.parse_args() args.__dict__['grid_config'] = feature_psp.grid_config if args.test_only: with open(os.path.join(args.model_dir, 'config.json')) as f: model_config = json.load(f) args.num_conv = model_config['num_conv'] args.use_batch_norm = model_config['use_batch_norm'] if 'grid_config' in model_config: args.__dict__['grid_config'] = util.dotdict( model_config['grid_config']) log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' if args.debug: logging.basicConfig(level=logging.DEBUG, format=log_fmt) else: logging.basicConfig(level=logging.INFO, format=log_fmt) logging.info("Running 3D CNN PSP training...") if args.unobserved: args.output_dir = os.path.join(args.output_dir, 'None') os.makedirs(args.output_dir, exist_ok=True) else: num = 0 while True: dirpath = os.path.join(args.output_dir, str(num)) if os.path.exists(dirpath): num += 1 else: args.output_dir = dirpath logging.info('Creating output directory {:}'.format( args.output_dir)) os.mkdir(args.output_dir) break logging.info("\n" + str(json.dumps(args.__dict__, indent=4)) + "\n") # Save config with open(os.path.join(args.output_dir, 'config.json'), 'w') as f: json.dump(args.__dict__, f, indent=4) args.train_sharded = sh.Sharded.load(args.train_sharded) args.val_sharded = sh.Sharded.load(args.val_sharded) args.test_sharded = sh.Sharded.load(args.test_sharded) logging.info("Writing all output to {:}".format(args.output_dir)) with tf.Session() as sess: np.random.seed(args.random_seed) tf.set_random_seed(args.random_seed) train_model(sess, args)
import atom3d.shard.shard as sh import examples.cnn3d.subgrid_gen as subgrid_gen import examples.cnn3d.util as util grid_config = util.dotdict({ # Mapping from elements to position in channel dimension. 'element_mapping': { 'C': 0, 'O': 1, 'N': 2, 'S': 3, }, # Radius of the grids to generate, in angstroms. 'radius': 50.0, # Resolution of each voxel, in angstroms. 'resolution': 1.0, # Number of directions to apply for data augmentation. 'num_directions': 20, # Number of rolls to apply for data augmentation. 'num_rolls': 20, }) def df_to_feature(struct_df, grid_config, random_seed=None): pos = struct_df[['x', 'y', 'z']].astype(np.float32) center = util.get_center(pos) rot_mat = subgrid_gen.gen_rot_matrix(grid_config, random_seed=random_seed)
import examples.cnn3d.util as util grid_config = util.dotdict({ # Mapping from elements to position in channel dimension. 'element_mapping': { 'C': 0, 'O': 1, 'N': 2, 'S': 3 }, # Radius of the grids to generate, in angstroms. 'radius': 17.0, # Resolution of each voxel, in angstroms. 'resolution': 1.0, # Number of directions to apply for data augmentation. 'num_directions': 20, # Number of rolls to apply for data augmentation. 'num_rolls': 20, ### PPI specific # Number of negatives to sample per positive example. -1 means all. 'neg_to_pos_ratio': 1, 'neg_to_pos_ratio_testing': 1, # Max number of positive regions to take from a structure. -1 means all. 'max_pos_regions_per_ensemble': 5, 'max_pos_regions_per_ensemble_testing': 5, # Whether to use all negative at test time. 'full_test': False, })
def main(): parser = create_train_parser() args = parser.parse_args() args.__dict__['grid_config'] = feature_qm9.grid_config if args.test_only or args.resume_training: test_only = args.test_only resume_training = args.resume_training batch_size = args.batch_size unobserved = args.unobserved num_epochs = args.num_epochs model_dir = args.model_dir use_ckpt_num = args.use_ckpt_num with open(os.path.join(args.model_dir, 'config.json')) as f: model_config = json.load(f) args.__dict__ = model_config if 'grid_config' in model_config: args.__dict__['grid_config'] = util.dotdict( model_config['grid_config']) args.resume_training = resume_training args.test_only = test_only args.unobserved = unobserved args.num_epochs = num_epochs args.model_dir = model_dir args.use_ckpt_num = use_ckpt_num if args.test_only: args.batch_size = batch_size log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' if args.debug: logging.basicConfig(level=logging.DEBUG, format=log_fmt) else: logging.basicConfig(level=logging.INFO, format=log_fmt) logging.info("Running 3D CNN QM9 training...") if args.unobserved: args.output_dir = os.path.join(args.output_dir, 'None') os.makedirs(args.output_dir, exist_ok=True) else: num = 0 while True: dirpath = os.path.join(args.output_dir, str(num)) if os.path.exists(dirpath): num += 1 else: try: args.output_dir = dirpath logging.info('Creating output directory {:}'.format(args.output_dir)) os.mkdir(args.output_dir) break except: pass logging.info("\n" + str(json.dumps(args.__dict__, indent=4)) + "\n") # Save config with open(os.path.join(args.output_dir, 'config.json'), 'w') as f: json.dump(args.__dict__, f, indent=4) logging.info("Writing all output to {:}".format(args.output_dir)) with tf.Session() as sess: tf.set_random_seed(args.random_seed) train_model(sess, args)
import examples.cnn3d.subgrid_gen as subgrid_gen import examples.cnn3d.util as util grid_config = util.dotdict({ # Mapping from elements to position in channel dimension. 'element_mapping': { 'H': 0, 'C': 1, 'O': 2, 'N': 3, 'S': 4, 'CL': 5, 'F': 6, }, # Radius of the grids to generate, in angstroms. 'radius': 25.0, # Resolution of each voxel, in angstroms. 'resolution': 1.0, # Number of directions to apply for data augmentation. 'num_directions': 20, # Number of rolls to apply for data augmentation. 'num_rolls': 20, # Number of negatives to sample per positive example. -1 means all. # positive = A (active), negative = I (inactive) 'neg_to_pos_ratio': 1, }) def __get_subunit_name(subunits, mode='inactive'): assert len(subunits) == 2
import examples.cnn3d.subgrid_gen as subgrid_gen import examples.cnn3d.util as util grid_config = util.dotdict({ # Mapping from elements to position in channel dimension. 'element_mapping': { 'H': 0, 'C': 1, 'O': 2, 'N': 3, 'S': 4, }, # Radius of the grids to generate, in angstroms. 'radius': 25.0, # Resolution of each voxel, in angstroms. 'resolution': 1.0, # Number of directions to apply for data augmentation. 'num_directions': 20, # Number of rolls to apply for data augmentation. 'num_rolls': 20, # Number of negatives to sample per positive example. -1 means all. 'neg_to_pos_ratio': 1.0, # Max number of positive regions to take from a structure. -1 means all. 'max_pos_per_shard': 200, }) def __get_mutation_center(struct_df, label_info, center_at_mut=True): if center_at_mut: # Use CA position of the mutated residue as center for subgrid center