Beispiel #1
0
def main():
    parser = create_train_parser()
    args = parser.parse_args()

    args.__dict__['grid_config'] = feature_psp.grid_config

    if args.test_only:
        with open(os.path.join(args.model_dir, 'config.json')) as f:
            model_config = json.load(f)
            args.num_conv = model_config['num_conv']
            args.use_batch_norm = model_config['use_batch_norm']
            if 'grid_config' in model_config:
                args.__dict__['grid_config'] = util.dotdict(
                    model_config['grid_config'])

    log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    if args.debug:
        logging.basicConfig(level=logging.DEBUG, format=log_fmt)
    else:
        logging.basicConfig(level=logging.INFO, format=log_fmt)
    logging.info("Running 3D CNN PSP training...")

    if args.unobserved:
        args.output_dir = os.path.join(args.output_dir, 'None')
        os.makedirs(args.output_dir, exist_ok=True)
    else:
        num = 0
        while True:
            dirpath = os.path.join(args.output_dir, str(num))
            if os.path.exists(dirpath):
                num += 1
            else:
                args.output_dir = dirpath
                logging.info('Creating output directory {:}'.format(
                    args.output_dir))
                os.mkdir(args.output_dir)
                break

    logging.info("\n" + str(json.dumps(args.__dict__, indent=4)) + "\n")

    # Save config
    with open(os.path.join(args.output_dir, 'config.json'), 'w') as f:
        json.dump(args.__dict__, f, indent=4)

    args.train_sharded = sh.Sharded.load(args.train_sharded)
    args.val_sharded = sh.Sharded.load(args.val_sharded)
    args.test_sharded = sh.Sharded.load(args.test_sharded)

    logging.info("Writing all output to {:}".format(args.output_dir))
    with tf.Session() as sess:
        np.random.seed(args.random_seed)
        tf.set_random_seed(args.random_seed)
        train_model(sess, args)
Beispiel #2
0
import atom3d.shard.shard as sh

import examples.cnn3d.subgrid_gen as subgrid_gen
import examples.cnn3d.util as util


grid_config = util.dotdict({
    # Mapping from elements to position in channel dimension.
    'element_mapping': {
        'C': 0,
        'O': 1,
        'N': 2,
        'S': 3,
    },
    # Radius of the grids to generate, in angstroms.
    'radius': 50.0,
    # Resolution of each voxel, in angstroms.
    'resolution': 1.0,
    # Number of directions to apply for data augmentation.
    'num_directions': 20,
    # Number of rolls to apply for data augmentation.
    'num_rolls': 20,
})


def df_to_feature(struct_df, grid_config, random_seed=None):
    pos = struct_df[['x', 'y', 'z']].astype(np.float32)
    center = util.get_center(pos)

    rot_mat = subgrid_gen.gen_rot_matrix(grid_config, random_seed=random_seed)
Beispiel #3
0
import examples.cnn3d.util as util

grid_config = util.dotdict({
    # Mapping from elements to position in channel dimension.
    'element_mapping': {
        'C': 0,
        'O': 1,
        'N': 2,
        'S': 3
    },
    # Radius of the grids to generate, in angstroms.
    'radius': 17.0,
    # Resolution of each voxel, in angstroms.
    'resolution': 1.0,
    # Number of directions to apply for data augmentation.
    'num_directions': 20,
    # Number of rolls to apply for data augmentation.
    'num_rolls': 20,

    ### PPI specific
    # Number of negatives to sample per positive example. -1 means all.
    'neg_to_pos_ratio': 1,
    'neg_to_pos_ratio_testing': 1,
    # Max number of positive regions to take from a structure. -1 means all.
    'max_pos_regions_per_ensemble': 5,
    'max_pos_regions_per_ensemble_testing': 5,
    # Whether to use all negative at test time.
    'full_test': False,
})

Beispiel #4
0
def main():
    parser = create_train_parser()
    args = parser.parse_args()

    args.__dict__['grid_config'] = feature_qm9.grid_config

    if args.test_only or args.resume_training:
        test_only = args.test_only
        resume_training = args.resume_training
        batch_size = args.batch_size
        unobserved = args.unobserved
        num_epochs = args.num_epochs
        model_dir = args.model_dir
        use_ckpt_num = args.use_ckpt_num
        with open(os.path.join(args.model_dir, 'config.json')) as f:
            model_config = json.load(f)
            args.__dict__ = model_config
            if 'grid_config' in model_config:
                args.__dict__['grid_config'] = util.dotdict(
                    model_config['grid_config'])
        args.resume_training = resume_training
        args.test_only = test_only
        args.unobserved = unobserved
        args.num_epochs = num_epochs
        args.model_dir = model_dir
        args.use_ckpt_num = use_ckpt_num
        if args.test_only:
            args.batch_size = batch_size


    log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    if args.debug:
        logging.basicConfig(level=logging.DEBUG, format=log_fmt)
    else:
        logging.basicConfig(level=logging.INFO, format=log_fmt)
    logging.info("Running 3D CNN QM9 training...")

    if args.unobserved:
        args.output_dir = os.path.join(args.output_dir, 'None')
        os.makedirs(args.output_dir, exist_ok=True)
    else:
        num = 0
        while True:
            dirpath = os.path.join(args.output_dir, str(num))
            if os.path.exists(dirpath):
                num += 1
            else:
                try:
                    args.output_dir = dirpath
                    logging.info('Creating output directory {:}'.format(args.output_dir))
                    os.mkdir(args.output_dir)
                    break
                except:
                    pass

    logging.info("\n" + str(json.dumps(args.__dict__, indent=4)) + "\n")

    # Save config
    with open(os.path.join(args.output_dir, 'config.json'), 'w') as f:
        json.dump(args.__dict__, f, indent=4)

    logging.info("Writing all output to {:}".format(args.output_dir))
    with tf.Session() as sess:
        tf.set_random_seed(args.random_seed)
        train_model(sess, args)
Beispiel #5
0
import examples.cnn3d.subgrid_gen as subgrid_gen
import examples.cnn3d.util as util

grid_config = util.dotdict({
    # Mapping from elements to position in channel dimension.
    'element_mapping': {
        'H': 0,
        'C': 1,
        'O': 2,
        'N': 3,
        'S': 4,
        'CL': 5,
        'F': 6,
    },
    # Radius of the grids to generate, in angstroms.
    'radius': 25.0,
    # Resolution of each voxel, in angstroms.
    'resolution': 1.0,
    # Number of directions to apply for data augmentation.
    'num_directions': 20,
    # Number of rolls to apply for data augmentation.
    'num_rolls': 20,
    # Number of negatives to sample per positive example. -1 means all.
    # positive = A (active), negative = I (inactive)
    'neg_to_pos_ratio': 1,
})


def __get_subunit_name(subunits, mode='inactive'):
    assert len(subunits) == 2
Beispiel #6
0
import examples.cnn3d.subgrid_gen as subgrid_gen
import examples.cnn3d.util as util


grid_config = util.dotdict({
    # Mapping from elements to position in channel dimension.
    'element_mapping': {
        'H': 0,
        'C': 1,
        'O': 2,
        'N': 3,
        'S': 4,
    },
    # Radius of the grids to generate, in angstroms.
    'radius': 25.0,
    # Resolution of each voxel, in angstroms.
    'resolution': 1.0,
    # Number of directions to apply for data augmentation.
    'num_directions': 20,
    # Number of rolls to apply for data augmentation.
    'num_rolls': 20,
    # Number of negatives to sample per positive example. -1 means all.
    'neg_to_pos_ratio': 1.0,
    # Max number of positive regions to take from a structure. -1 means all.
    'max_pos_per_shard': 200,
})


def __get_mutation_center(struct_df, label_info, center_at_mut=True):
    if center_at_mut:
        # Use CA position of the mutated residue as center for subgrid center