Exemple #1
0
def train(infile, outfile, seed):
    print("infile:", infile)
    np.random.seed(seed)

    d = pickle.load(open(infile, 'rb'))

    tgt_nucs = d['tgt_nucs']
    MAX_N = d['MAX_N']
    print("TGT_NUCS=", tgt_nucs)
    print("output is", outfile)
    USE_CUDA = True

    mu_scale = []
    std_scale = []
    for tn in tgt_nucs:
        for k, v in NUC_LOSS_SCALE.items():
            if k in tn:
                mu_scale.append(v)
        for k, v in NUC_STD_SCALE.items():
            if k in tn:
                std_scale.append(v)
    assert len(mu_scale) == len(tgt_nucs)
    assert len(std_scale) == len(tgt_nucs)
    print("NUC_LOSS_SCALE=", NUC_LOSS_SCALE)
    print("mu_scale=", mu_scale)
    print("std_scale=", std_scale)
    print("tgt_nucs=", tgt_nucs)

    ### Create datasets and data loaders

    BATCH_SIZE = 16

    dataset_hparams = graph_conv_many_nuc_util.DEFAULT_DATA_HPARAMS

    ds_train, ds_test = graph_conv_many_nuc_util.make_datasets(
        {'filename': infile}, dataset_hparams)

    dl_train = torch.utils.data.DataLoader(ds_train,
                                           batch_size=BATCH_SIZE,
                                           shuffle=True,
                                           pin_memory=True)
    dl_test = torch.utils.data.DataLoader(ds_test,
                                          batch_size=BATCH_SIZE,
                                          shuffle=True,
                                          pin_memory=True)

    net_params = net_params_base.copy()
    opt_params = opt_params_base.copy()

    net_params['g_feature_n'] = ds_test[0][1].shape[-1]
    net_params['OUT_DIM'] = len(tgt_nucs)

    use_std = True

    net = nets.GraphVertModel(**net_params)

    net = move(net, USE_CUDA)

    for n, p in net.named_parameters():
        print(n, p.shape)
    loss_config = {
        'std_regularize': STD_REGULARIZE,
        'mu_scale': mu_scale,
        'std_scale': std_scale
    }

    if use_std:
        std_regularize = loss_config['std_regularize']
        mu_scale = move(torch.Tensor(loss_config['mu_scale']), USE_CUDA)
        std_scale = move(torch.Tensor(loss_config['std_scale']), USE_CUDA)
        criterion = nets.NormUncertainLoss(
            mu_scale, std_scale, std_regularize=loss_config['std_regularize'])
    else:
        criterion = nets.MaskedMSELoss()

    validate_func = create_uncertain_validate_func(tgt_nucs)

    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=opt_params['lr'],
                                 amsgrad=opt_params['amsgrad'],
                                 eps=opt_params['eps'],
                                 weight_decay=opt_params['weight_decay'])

    scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer,
        step_size=opt_params['scheduler_step_size'],
        gamma=opt_params['scheduler_gamma'])

    MODEL_NAME = "{}.{:08d}".format(EXP_NAME, int(time.time() % 1e8))

    checkpoint_filename = os.path.join(CHECKPOINT_DIR,
                                       MODEL_NAME + ".{epoch_i:08d}")
    print("checkpoint:", checkpoint_filename)
    checkpoint_func = graph_conv_many_nuc_util.create_checkpoint_func(
        10, checkpoint_filename)

    writer = SummaryWriter("{}/{}".format(TENSORBOARD_DIR, MODEL_NAME))

    metadata = {
        'dataset_hparams': dataset_hparams,
        'net_params': net_params,
        'opt_params': opt_params,
        'infile': infile,
        'tgt_nucs': tgt_nucs,
        'max_n': MAX_N,
        'loss_params': loss_config
    }

    json.dump(metadata,
              open(os.path.join(CHECKPOINT_DIR, MODEL_NAME + ".json"), 'w'),
              indent=4)
    print(json.dumps(metadata, indent=4))
    pickle.dump(metadata,
                open(os.path.join(CHECKPOINT_DIR, MODEL_NAME + ".meta"), 'wb'))

    graph_conv_many_nuc_util.generic_runner(net,
                                            optimizer,
                                            scheduler,
                                            criterion,
                                            dl_train,
                                            dl_test,
                                            MAX_EPOCHS=MAX_EPOCHS,
                                            USE_CUDA=USE_CUDA,
                                            writer=writer,
                                            validate_func=validate_func,
                                            checkpoint_func=checkpoint_func)

    pickle.dump(
        {
            'net_params': net_params,
            'opt_params': opt_params,
            'loss_params': loss_config
        }, open(outfile, 'wb'))
Exemple #2
0
def train(infiles, outfile, seed, cv_i):
    print("infiles:", infiles)
    meta_infile, data_infile = infiles

    np.random.seed(seed)

    config = pickle.load(open(meta_infile, 'rb'))['config']

    tgt_nucs = config['spectra_nuc']
    MAX_N = config['max_atom_n']
    print("TGT_NUCS=", tgt_nucs)
    print("output is", outfile)
    USE_CUDA = True

    mu_scale = []
    std_scale = []
    for tn in tgt_nucs:
        for k, v in NUC_LOSS_SCALE.items():
            if k in tn:
                mu_scale.append(v)
        for k, v in NUC_STD_SCALE.items():
            if k in tn:
                std_scale.append(v)
    assert len(mu_scale) == len(tgt_nucs)
    assert len(std_scale) == len(tgt_nucs)
    print("NUC_LOSS_SCALE=", NUC_LOSS_SCALE)
    print("mu_scale=", mu_scale)
    print("std_scale=", std_scale)
    print("tgt_nucs=", tgt_nucs)

    ### Create datasets and data loaders

    BATCH_SIZE = 32

    dataset_hparams = graph_conv_many_nuc_util.DEFAULT_DATA_HPARAMS.copy()
    
    dataset_hparams['feat_vect_args']['mmff_atom_types_onehot'] = True
    dataset_hparams['feat_vect_args']['feat_atomicno'] = False
    # dataset_hparams['adj_args']['mat_power'] = [1,2,3,4, 6, 8]
    # dataset_hparams['adj_args']['split_weights'] = None
    # dataset_hparams['adj_args']['edge_weighted'] = False

    ds_train, ds_test = graph_conv_many_nuc_util.make_datasets({'filename' : data_infile}, 
                                                               dataset_hparams, MAX_N, cv_i=cv_i)
                                                               
    dl_train = torch.utils.data.DataLoader(ds_train, batch_size=BATCH_SIZE, 
                                           shuffle=True,pin_memory=True)
    dl_test = torch.utils.data.DataLoader(ds_test, batch_size=BATCH_SIZE, 
                                          shuffle=True,pin_memory=True)

    net_params = net_params_base.copy()
    opt_params = opt_params_base.copy()

    net_params['vert_f_in'] = ds_test[0][1].shape[-1]
    net_params['OUT_DIM'] =  len(tgt_nucs)
    net_params['MAX_N'] = MAX_N

    #net_name = 'nets.SGCModel'
    net_name = 'nets.RelNetFromS2S'
    
    net = eval(net_name)(**net_params)
 
    net = move(net, USE_CUDA)

    for n, p in net.named_parameters():
        print(n, p.shape)
    if USE_STD:
        loss_config = {'std_regularize' : STD_REGULARIZE, 
                       'mu_scale' : mu_scale, 
                       'std_scale' : std_scale}

        std_regularize = loss_config['std_regularize']
        mu_scale = move(torch.Tensor(loss_config['mu_scale']), USE_CUDA)
        std_scale = move(torch.Tensor(loss_config['std_scale']), USE_CUDA)
        criterion = nets.NormUncertainLoss(mu_scale, 
                                           std_scale,
                                           std_regularize = loss_config['std_regularize'])
    else:
        loss_config = {'norm' : 'huber', 
                       'scale' : 0.1}

        criterion = nets.NoUncertainLoss(**loss_config)


    opt_direct_params = {}
    optimizer_name = opt_params.get('optimizer', 'adam') 
    if optimizer_name == 'adam':
        for p in ['lr', 'amsgrad', 'eps', 'weight_decay', 'momentum']:
            if p in opt_params:
                opt_direct_params[p] = opt_params[p]

        optimizer = torch.optim.Adam(net.parameters(), **opt_direct_params)
    elif optimizer_name == 'adamax':
        for p in ['lr', 'eps', 'weight_decay', 'momentum']:
            if p in opt_params:
                opt_direct_params[p] = opt_params[p]

        optimizer = torch.optim.Adamax(net.parameters(), **opt_direct_params)
        
    
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 
                                                step_size=opt_params['scheduler_step_size'], 
                                                gamma=opt_params['scheduler_gamma'])

    MODEL_NAME = "{}.{:08d}".format(EXP_NAME,int(time.time() % 1e8))
    
    checkpoint_filename = os.path.join(CHECKPOINT_DIR, MODEL_NAME + ".{epoch_i:08d}")
    print("checkpoint:", checkpoint_filename)
    checkpoint_func = graph_conv_many_nuc_util.create_checkpoint_func(CHECKPOINT_EVERY, checkpoint_filename)
                        
    writer = SummaryWriter("{}/{}".format(TENSORBOARD_DIR, MODEL_NAME))
    validate_func = create_uncertain_validate_func(tgt_nucs, writer)

    metadata = {'dataset_hparams' : dataset_hparams, 
                'net_params' : net_params, 
                'opt_params' : opt_params, 
                'meta_infile' : meta_infile, 
                'data_infile' : data_infile, 
                'tgt_nucs' : tgt_nucs, 
                'max_n' : MAX_N,
                'net_name' : net_name, 
                'batch_size' : BATCH_SIZE, 
                'loss_params' : loss_config}

    json.dump(metadata, open(os.path.join(CHECKPOINT_DIR, MODEL_NAME + ".json"), 'w'), 
              indent=4)
    print(json.dumps(metadata, indent=4))
    print("MODEL_NAME=", MODEL_NAME)
    pickle.dump(metadata, 
                open(os.path.join(CHECKPOINT_DIR, MODEL_NAME + ".meta"), 'wb'))

    graph_conv_many_nuc_util.generic_runner(net, optimizer, scheduler, criterion, 
                                            dl_train, dl_test, 
                                            MAX_EPOCHS=MAX_EPOCHS, 
                                            USE_CUDA=USE_CUDA, writer=writer, 
                                            validate_func= validate_func, 
                                            checkpoint_func= checkpoint_func)

    pickle.dump({'net_params' : net_params, 
                 'opt_params' : opt_params, 
                 'loss_params' : loss_config}, 
                open(outfile, 'wb'))