def fun(x):
    print("on %s print %g" % (MPI.COMM_WORLD.rank, x))
    seed = uuid.uuid4()
    run_number = str(seed)
    print('run_number: ' + run_number)
    #try:
    #    run_number = sys.argv[1]
    #except:
    #    "requires an input argument"
    #    raise

    L1orL2 = 'L1'
    TRAINING_ERROR = 'gaussian'
    hidden_layer1 = 50
    hidden_layer2 = 50
    hidden_layers = (hidden_layer1, hidden_layer2)

    setup_list = np.repeat([('CO','low','GCN'),('CO','high','binding_type'),('CO',1,'GCN')\
                                    ,('CO','high','combine_hollow_sites'),('CO','high','GCN')\
                                    ,('NO','low','GCN'),('NO','high','binding_type')\
                                    ,('NO','high','combine_hollow_sites')\
                                    ,('NO',1,'binding_type'),('NO',1,'combine_hollow_sites'),('NO',1,'GCN')\
                                    ,('C2H4','low','binding_type'),('C2H4','low','GCN')\
                                    ,('CO','low','binding_type'),('CO','low','combine_hollow_sites')\
                                    ,('CO',1,'binding_type'),('CO',1,'combine_hollow_sites')\
                                    ,('NO','low','binding_type'),('NO','low','combine_hollow_sites')],5,axis=0).tolist()

    which_setup = setup_list[x]
    ADSORBATE = which_setup[0]
    COVERAGE = which_setup[1]
    try:
        COVERAGE = float(COVERAGE)
    except:
        COVERAGE = COVERAGE
    TARGET = which_setup[2]
    epsilon = 10**-12
    alpha = 10**-3
    learning_rate = 0.0004
    if COVERAGE == 'high' and TARGET in [
            'binding_type', 'combine_hollow_sites'
    ]:
        NUM_TRAIN = 500
        training_sets = 100
        batch_size = 5
        epochs = 20

    else:
        NUM_TRAIN = 5000
        training_sets = 200
        batch_size = 50
        epochs = 10

    print('batch_size: ' + str(batch_size))
    print('learning_rate: ' + str(learning_rate))
    print('epsilon: ' + str(epsilon))
    print('alpha: ' + str(alpha))
    print('NUM_TRAIN: ' + str(NUM_TRAIN))
    print('training_sets: ' + str(training_sets))
    print('epochs: ' + str(epochs))
    print('hidden layers: ' + str(hidden_layers))
    #batch_size: 2297
    #learning_rate = 0.06651629336077657
    #epsilon = 0.02872678693613251
    #alpha = 0.001931666535492889
    #NUM_TRAIN = 100000
    #epochs=2
    #training_sets = 2
    if ADSORBATE == 'CO':
        INCLUDED_BINDING_TYPES = [1, 2, 3, 4]
        MAX_COVERAGES = [1, 0.7, 0.2, 0.2]
        BINDING_TYPE_FOR_GCN = [1]
        HIGH_FREQUENCY = 2200
        ENERGY_POINTS = 500
        GCN_ALL = False
    elif ADSORBATE == 'NO':
        INCLUDED_BINDING_TYPES = [1, 2, 3, 4]
        MAX_COVERAGES = [1, 1, 1, 1]
        BINDING_TYPE_FOR_GCN = [1]
        HIGH_FREQUENCY = 2000
        ENERGY_POINTS = 450
        GCN_ALL = False
    elif ADSORBATE == 'C2H4':
        INCLUDED_BINDING_TYPES = [1, 2]
        MAX_COVERAGES = [1, 1]
        BINDING_TYPE_FOR_GCN = [2]
        HIGH_FREQUENCY = 2000
        ENERGY_POINTS = 450
        if TARGET == 'GCN':
            GCN_ALL = True
        else:
            GCN_ALL = False
    print('ADSORBATE: ' + ADSORBATE)
    print('TARGET: ' + str(TARGET))
    print('COVERAGE: ' + str(COVERAGE))
    print('GCN_ALL: ' + str(GCN_ALL))

    work_dir = os.getcwd()
    cross_validation_path = os.path.join(
        work_dir,
        'cross_validation_' + ADSORBATE + '_' + TARGET + '_' + str(COVERAGE))
    print(cross_validation_path)
    CV_class = CROSS_VALIDATION(ADSORBATE=ADSORBATE,INCLUDED_BINDING_TYPES=INCLUDED_BINDING_TYPES\
                                ,cross_validation_path=cross_validation_path, VERBOSE=False)
    CV_SPLITS = 3
    CV_class.generate_test_cv_indices(CV_SPLITS=CV_SPLITS, BINDING_TYPE_FOR_GCN=BINDING_TYPE_FOR_GCN\
        , test_fraction=0.25, random_state=x, read_file=False, write_file=False)
    properties_dictionary = {'batch_size':batch_size, 'learning_rate_init':learning_rate\
    , 'epsilon':epsilon,'hidden_layer_sizes':hidden_layers,'regularization':L1orL2\
    ,'alpha':alpha, 'epochs_per_training_set':epochs,'training_sets':training_sets,'loss': 'wasserstein_loss'}
    CV_class.set_model_parameters(TARGET=TARGET, COVERAGE=COVERAGE\
    , MAX_COVERAGES = MAX_COVERAGES, NN_PROPERTIES=properties_dictionary\
    , NUM_TRAIN=NUM_TRAIN, NUM_VAL=10000, NUM_TEST=10000\
    , MIN_GCN_PER_LABEL=12, NUM_GCN_LABELS=10, GCN_ALL = GCN_ALL, TRAINING_ERROR = TRAINING_ERROR\
    , LOW_FREQUENCY=200, HIGH_FREQUENCY=HIGH_FREQUENCY, ENERGY_POINTS=ENERGY_POINTS)
    CV_RESULTS_FILE = ADSORBATE + '_' + TARGET + '_' + str(
        COVERAGE) + '_' + run_number
    CV_class.run_CV_multiprocess(write_file=True,
                                 CV_RESULTS_FILE=CV_RESULTS_FILE,
                                 num_procs=CV_SPLITS + 1)
Ejemplo n.º 2
0
                        HIGH_FREQUENCY = 3350
                        ENERGY_POINTS = 750
                        GCN_ALL = True

                    print('ADSORBATE: ' + ADSORBATE)
                    print('TARGET: ' + TARGET)
                    print('COVERAGE: ' + str(COVERAGE))
                    print('TRAINING_ERROR: ' + str(TRAINING_ERROR))
                    print('GCN_ALL: ' + str(GCN_ALL))

                    work_dir = os.getcwd()
                    cross_validation_path = os.path.join(
                        work_dir, 'cross_validation_' + ADSORBATE + '_' +
                        TARGET + '_' + str(COVERAGE))
                    print(cross_validation_path)
                    CV_class = CROSS_VALIDATION(ADSORBATE,INCLUDED_BINDING_TYPES\
                                                ,cross_validation_path, VERBOSE=True)
                    CV_SPLITS = 3
                    CV_class.generate_test_cv_indices(CV_SPLITS, BINDING_TYPE_FOR_GCN\
                        , test_fraction=0.25, random_state=0, read_file=False, write_file=False)
                    properties_dictionary = {'batch_size':batch_size, 'learning_rate_init':learning_rate\
                    , 'epsilon':epsilon,'hidden_layer_sizes':hidden_layers,'regularization':'L2'\
                    ,'alpha':alpha, 'epochs_per_training_set':epochs,'training_sets':training_sets,'loss': 'wasserstein_loss'}
                    CV_class.set_model_parameters(TARGET=TARGET, COVERAGE=COVERAGE\
                    , MAX_COVERAGES = MAX_COVERAGES, NN_PROPERTIES=properties_dictionary\
                    , NUM_TRAIN=NUM_TRAIN, NUM_VAL=100, NUM_TEST=100\
                    , MIN_GCN_PER_LABEL=12, NUM_GCN_LABELS=10, GCN_ALL = GCN_ALL, TRAINING_ERROR = TRAINING_ERROR\
                    , LOW_FREQUENCY=200, HIGH_FREQUENCY=HIGH_FREQUENCY, ENERGY_POINTS=ENERGY_POINTS)
                    CV_RESULTS_FILE = ADSORBATE + '_' + TARGET + '_' + str(
                        COVERAGE) + '_' + run_number
                    #CV_class.run_CV_multiprocess(write_file=True, CV_RESULTS_FILE = CV_RESULTS_FILE, num_procs=CV_SPLITS+1)
                    #CV_class.run_CV(write_file=True, CV_RESULTS_FILE = CV_RESULTS_FILE)
Ejemplo n.º 3
0
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 25 11:46:31 2017

@author: lansford
"""

from __future__ import absolute_import, division, print_function
from jl_spectra_2_structure.cross_validation import CROSS_VALIDATION

CV_SPLITS = 3

#write data for CO
CV_class = CROSS_VALIDATION(ADSORBATE='CO',
                            INCLUDED_BINDING_TYPES=[1, 2, 3, 4])
CV_class.generate_test_cv_indices(CV_SPLITS=CV_SPLITS, BINDING_TYPE_FOR_GCN=[1]\
    , test_fraction=0.25, random_state=0, read_file=False, write_file=True)

#Write data for NO
CV_class = CROSS_VALIDATION(ADSORBATE='NO',
                            INCLUDED_BINDING_TYPES=[1, 2, 3, 4])
CV_class.generate_test_cv_indices(CV_SPLITS=CV_SPLITS, BINDING_TYPE_FOR_GCN=[1]\
    , test_fraction=0.25, random_state=0, read_file=False, write_file=True)

#write data for C2H4
CV_class = CROSS_VALIDATION(ADSORBATE='C2H4', INCLUDED_BINDING_TYPES=[1, 2])
CV_class.generate_test_cv_indices(CV_SPLITS=CV_SPLITS, BINDING_TYPE_FOR_GCN=[2]\
    , test_fraction=0.25, random_state=0, read_file=False, write_file=True)
Ejemplo n.º 4
0
def fun(x):
    print("on %s print %g" % (MPI.COMM_WORLD.rank, x))
    seed = uuid.uuid4()
    run_number = str(seed)
    print('run_number: ' + run_number)
    #try:
    #    run_number = sys.argv[1]
    #except:
    #    "requires an input argument"
    #    raise

    L1orL2 = np.random.RandomState().choice(['L1', 'L2'])
    TRAINING_ERROR = np.random.RandomState().choice(['gaussian', 0.003, None])
    hidden_layer1 = np.random.RandomState().randint(50, 151)
    hidden_layer2 = np.random.RandomState().randint(50, 151)
    hidden_layer_3 = np.random.RandomState().randint(50, 151)
    num_layers = np.random.RandomState().randint(2, 3 + 1)
    if num_layers == 2:
        hidden_layers = (hidden_layer1, hidden_layer2)
    elif num_layers == 3:
        hidden_layers = (hidden_layer1, hidden_layer2, hidden_layer_3)

    random_numbers = np.random.RandomState().random_sample(7)
    setup_list = [('CO','low','binding_type'),('CO','low','combine_hollow_sites')\
                                    ,('CO','low','GCN'),('CO','high','binding_type')\
                                    ,('CO','high','combine_hollow_sites'),('CO','high','GCN')\
                                    ,('CO',1,'binding_type'),('CO',1,'combine_hollow_sites'),('CO',1,'GCN')\
                                    ,('NO','low','binding_type'),('NO','low','combine_hollow_sites')\
                                    ,('NO','low','GCN'),('NO','high','binding_type')\
                                    ,('NO','high','combine_hollow_sites')\
                                    ,('NO',1,'binding_type'),('NO',1,'combine_hollow_sites'),('NO',1,'GCN')\
                                    ,('C2H4','low','binding_type'),('C2H4','low','GCN',False),('C2H4','low','GCN',True)]
    which_setup = setup_list[np.random.RandomState().choice(
        np.arange(len(setup_list)))]

    batch_size = int(10**(2 * random_numbers[0] + 1))
    learning_rate = 10**(random_numbers[1] - 4)
    epsilon = 10**(4 * random_numbers[2] - 14)
    alpha = 10**(5 * random_numbers[3] - 6)
    NUM_TRAIN = int(5 * 10**(random_numbers[4] + 3))
    training_sets = int(10**(random_numbers[5] + 2))
    epochs = 5
    if which_setup[1] == 'high' and which_setup[2] in [
            'binding_type', 'combine_hollow_sites'
    ]:
        NUM_TRAIN = 500
        batch_size = 5
        training_sets = 100
    print('batch_size: ' + str(batch_size))
    print('learning_rate: ' + str(learning_rate))
    print('epsilon: ' + str(epsilon))
    print('alpha: ' + str(alpha))
    print('NUM_TRAIN: ' + str(NUM_TRAIN))
    print('training_sets: ' + str(training_sets))
    print('epochs: ' + str(epochs))
    print('hidden layers: ' + str(hidden_layers))
    #batch_size: 2297
    #learning_rate = 0.06651629336077657
    #epsilon = 0.02872678693613251
    #alpha = 0.001931666535492889
    #NUM_TRAIN = 100000
    #epochs=2
    #training_sets = 2
    ADSORBATE = which_setup[0]
    if ADSORBATE == 'CO':
        INCLUDED_BINDING_TYPES = [1, 2, 3, 4]
        MAX_COVERAGES = [1, 0.7, 0.2, 0.2]
        BINDING_TYPE_FOR_GCN = [1]
        HIGH_FREQUENCY = 2200
        ENERGY_POINTS = 500
        GCN_ALL = False
    elif ADSORBATE == 'NO':
        INCLUDED_BINDING_TYPES = [1, 2, 3, 4]
        MAX_COVERAGES = [1, 1, 1, 1]
        BINDING_TYPE_FOR_GCN = [1]
        HIGH_FREQUENCY = 2000
        ENERGY_POINTS = 450
        GCN_ALL = False
    elif ADSORBATE == 'C2H4':
        INCLUDED_BINDING_TYPES = [1, 2]
        MAX_COVERAGES = [1, 1]
        BINDING_TYPE_FOR_GCN = [2]
        HIGH_FREQUENCY = 2000
        ENERGY_POINTS = 450
        if which_setup[2] == 'GCN':
            GCN_ALL = which_setup[3]
        else:
            GCN_ALL = False
    COVERAGE = which_setup[1]
    TARGET = which_setup[2]
    print('ADSORBATE: ' + ADSORBATE)
    print('TARGET: ' + str(TARGET))
    print('COVERAGE: ' + str(COVERAGE))
    print('GCN_ALL: ' + str(GCN_ALL))

    work_dir = os.getcwd()
    cross_validation_path = os.path.join(
        work_dir,
        'cross_validation_' + ADSORBATE + '_' + TARGET + '_' + str(COVERAGE))
    print(cross_validation_path)
    CV_class = CROSS_VALIDATION(ADSORBATE=ADSORBATE,INCLUDED_BINDING_TYPES=INCLUDED_BINDING_TYPES\
                                ,cross_validation_path=cross_validation_path, VERBOSE=True)
    CV_SPLITS = 3
    CV_class.generate_test_cv_indices(CV_SPLITS=CV_SPLITS, BINDING_TYPE_FOR_GCN=BINDING_TYPE_FOR_GCN\
        , test_fraction=0.25, random_state=0, read_file=True, write_file=False)
    properties_dictionary = {'batch_size':batch_size, 'learning_rate_init':learning_rate\
    , 'epsilon':epsilon,'hidden_layer_sizes':hidden_layers,'regularization':L1orL2\
    ,'alpha':alpha, 'epochs_per_training_set':epochs,'training_sets':training_sets,'loss': 'wasserstein_loss'}
    CV_class.set_model_parameters(TARGET=TARGET, COVERAGE=COVERAGE\
    , MAX_COVERAGES = MAX_COVERAGES, NN_PROPERTIES=properties_dictionary\
    , NUM_TRAIN=NUM_TRAIN, NUM_VAL=10000, NUM_TEST=10000\
    , MIN_GCN_PER_LABEL=12, NUM_GCN_LABELS=10, GCN_ALL = GCN_ALL, TRAINING_ERROR = TRAINING_ERROR\
    , LOW_FREQUENCY=200, HIGH_FREQUENCY=HIGH_FREQUENCY, ENERGY_POINTS=ENERGY_POINTS)
    CV_RESULTS_FILE = ADSORBATE + '_' + TARGET + '_' + str(
        COVERAGE) + '_' + run_number
    CV_class.run_CV_multiprocess(write_file=True,
                                 CV_RESULTS_FILE=CV_RESULTS_FILE,
                                 num_procs=CV_SPLITS + 1)