Esempio n. 1
0
def get_run_parameters(SB):
	#GET MISCELANEOUS RUN PARAMETERS
	SB['src_path']		= 	path.dirname(path.realpath(__file__))
	SB['run_path']		= 	writer.run_path	
	SB['start_time']	= 	writer.start_time;	
	writer.log("RUN PARAMETERS:");							
	writer.log_dict(SB)
Esempio n. 2
0
def read_pot_file(SB):
    if (SB['pot_type'] == "NN"):
        file_path = SB['pot_file']
        writer.log("READING NEURAL NETWORK FILE:")
        if (path.exists(file_path)):
            input_file = open(file_path, "r")
            lines = []
            for line in input_file:
                parts = line.strip().split()  #rm /n
                if (len(parts) == 1):
                    parts = parts[0]  #dont save single numbers as arrays
                if (parts != []): lines.append(parts)
            pot = neural.NN(lines,
                            SB)  #send lines to NN class to create NN object
        else:
            raise ValueError("NN_FILE=" + str(file_path) + " DOESNT EXIST")
        writer.log_dict(pot.info)
        SB.update(pot.info)
        SB['nn'] = pot
Esempio n. 3
0
def read_input(SB):

    #READ DEFAULT FILE
    file_path = SB['src_path'] + '/defaults.json'
    if (path.exists(file_path)):
        writer.log(["READING DEFAULT PARAMETERS USING FILE:", file_path])
        with open(file_path, "r") as read_file:  #READ USER INPUT FILE
            input_data = load(read_file)
        writer.log_dict(input_data)
        SB.update(input_data)
    else:
        raise ValueError("DEFAULT_FILE=" + str(file_path) + " DOESNT EXIST")

    #READ INPUT FILE
    file_path = SB['input_file']
    if (path.exists(file_path)):
        writer.log([
            "OVERWRITING SELECT DEFAULTS USING INPUT FILE:", SB['input_file']
        ])
        with open(file_path, "r") as read_file:  #READ USER INPUT FILE
            input_data = load(read_file)
        writer.log_dict(input_data)
        SB.update(input_data)
    else:
        raise ValueError("INPUT_FILE=" + str(file_path) + " DOESNT EXIST")

    if (SB['use_cuda'] and cuda.is_available() == False):
        writer.log("NOTE: CUDA IS NOT AVAILABLE (RE-SETTING)")
        writer.log("	use_cuda		:	False")
        SB['use_cuda'] = False

    if ('pot_type' not in SB.keys() or 'pot_file' not in SB.keys()
            or 'dataset_path' not in SB.keys()):
        raise ValueError(
            "INPUT FILE MUST CONTAIN KEYS FOLLOWING KEYS: pot_type, pot_file, dataset_path"
        )
    if (SB['pot_type'] != "NN"):
        raise ValueError("REQUESTED POT_TYPE=" + str(SB['pot_type']) +
                         " IS NOT AVAILABLE")
Esempio n. 4
0
def read_database(SB):
    file_path = SB['dataset_path']
    writer.log("READING DATASET FILE:")

    full_set = data.Dataset("full", SB)  #INITIALIZE FULL DATASET OBJECT

    SID = 0
    new_structure = True
    if (path.exists(file_path)):
        input_file = open(file_path, "r")
        for line in input_file:
            if (new_structure):
                lines = []
                new_structure = False
                counter = 1
                full_set.Ns += 1
            else:
                counter += 1
            parts = line.strip().split()
            if (len(parts) == 1):
                parts = parts[0]  #dont save single numbers as arrays
            lines.append(parts)
            #TODO: THIS HARDCODE FOR CURRENT POSCAR FORMAT (NEED TO GENERALIZE)
            if (counter == 6): Natom = int(parts)
            if (counter > 6):
                if (counter == 6 + 1 + Natom +
                        1):  #see dataset examples for format
                    full_set.Na += Natom
                    full_set.structures[SID] = data.Structure(lines, SID, SB)
                    #create structure object
                    GID = str(full_set.structures[SID].gid)
                    if (GID not in full_set.group_sids.keys()):
                        full_set.group_sids[GID] = []
                    full_set.group_sids[GID].append(
                        [full_set.structures[SID].v, SID])
                    new_structure = True
                    SID += 1
                    #print(lines)
    else:
        raise ValueError("DATABASE_FILE=" + str(file_path) + " DOESNT EXIST")

    full_set.sort_group_sids()
    writer.log(["	TOTAL NUMBER OF STRUCTURES:", full_set.Ns])
    writer.log(["	TOTAL NUMBER OF ATOMS:	", full_set.Na])
    SB['full_set'] = full_set
Esempio n. 5
0
    def add_neurons(self):
        #add N neurons to each hidden layer

        if (self.info['activation'] != 1):
            raise ValueError(
                "ERROR: CAN ONLY ADD NEURONS TO SHIFTD SIGMOID FUNCTION")

        #START FRESH EVERY TIME
        start_fresh = self.info['start_fresh']
        if (start_fresh):
            self.randomize()
            max_rand_wb = self.info['max_rand_wb']
        else:
            max_rand_wb = 1.0

        self.unset_grad()

        new_nfit = 0
        N_neuron_2_add = 2
        writer.log("ADDING " + str(N_neuron_2_add) + " NEURONS TO EACH LAYER")
        writer.log(["	original num_fit_param	=", self.info['num_fit_param']])

        for layer_add in range(1, len(self.info['nn_layers']) - 1):
            for neurons in range(0, N_neuron_2_add):
                for i in range(0, len(self.submatrices)):
                    layer = 2 * (i - 1)

                    if (layer_add == (i + 2.0) / 2):
                        #ADD ROW (WEIGHT MATRIX)
                        shp2 = self.submatrices[i].shape[1]
                        TMP = max_rand_wb * torch.empty(1, shp2).uniform_(
                            -1.0, 1.0)
                        self.submatrices[i] = torch.cat(
                            (self.submatrices[i], TMP))

                        #ADD BIAS
                        shp2 = self.submatrices[i + 1].shape[1]
                        TMP = max_rand_wb * torch.empty(1, shp2).uniform_(
                            -1.0, 1.0)
                        self.submatrices[i + 1] = torch.cat(
                            (self.submatrices[i + 1], TMP))

                        # #ADD COL (WEIGHT MATRIX)
                        shp1 = self.submatrices[i + 2].shape[0]
                        TMP = max_rand_wb * torch.empty(shp1, 1).uniform_(
                            -1.0, 1.0)
                        self.submatrices[i + 2] = torch.cat(
                            (self.submatrices[i + 2], TMP), 1)

                        self.info['nn_layers'][
                            layer_add] = self.info['nn_layers'][layer_add] + 1

        #COUNT NFIT
        for i in range(0, len(self.submatrices)):
            new_nfit += self.submatrices[i].shape[0] * self.submatrices[
                i].shape[1]

        self.info['num_fit_param'] = new_nfit
        writer.log(["	new num_fit_param	=", new_nfit])

        self.set_grad()
Esempio n. 6
0
    def __init__(self, lines, SB):

        info = {}
        info['lsp_type'] = int(lines[0][0])
        info['pot_type'] = str(SB['pot_type'])
        info['lsp_shift'] = float(lines[0][1])
        info['activation'] = int(lines[0][2])
        info['num_species'] = int(lines[1][0])
        info['species'] = str(lines[2][0])
        info['atomic_weight'] = float(lines[2][1])
        info['randomize_nn'] = bool(int(lines[3][0]))
        info['max_rand_wb'] = float(lines[3][1])
        info['cutoff_dist'] = float(lines[3][2])
        info['cutoff_range'] = float(lines[3][3])
        info['lsp_sigma'] = float(lines[3][4])
        info['N_lg_poly'] = int(lines[4][0])
        #map converts str list to int list
        info['lsp_lg_poly'] = list(map(int, lines[4][1:]))
        info['N_ro_val'] = int(lines[5][0])
        info['lsp_ro_val'] = list(map(float, lines[5][1:]))
        #map converts str list to float list
        info['ibaseline'] = bool(int(lines[6][0]))
        info['bop_param'] = list(map(float, lines[6][1:]))
        info['nn_layers'] = list(map(int, lines[7][1:]))
        info['cnst_final_bias'] = SB['cnst_final_bias']
        info['final_bias'] = SB['final_bias']
        info['start_fresh'] = SB['start_fresh']
        info['constrain_WB'] = SB['constrain_WB']

        #DETERMINE NUMBER OF FITITNG PARAM AND RANDOMIZE IF NEEDED
        nfit = 0
        layers = info['nn_layers']
        for i in range(1, len(layers)):
            nfit = nfit + layers[i - 1] * layers[i] + layers[i]
        info['num_fit_param'] = nfit

        self.info = info

        self.normalize_by_ro = SB["normalize_by_ro"]

        self.dtype = torch.FloatTensor
        if (SB['use_cuda']): self.dtype = torch.cuda.FloatTensor

        if (info['randomize_nn'] or SB['re_randomize']):
            writer.log(["	 RANDOMIZING NN MIN/MAX	=", info['max_rand_wb']])
            self.randomize()
        else:
            #always do LR ramp up when re-starting
            SB['ramp_LR'] = True
            WB = np.array(lines[8:]).astype(np.float)[:, 0]
            self.submatrices = self.extract_submatrices(WB)
            if (len(WB) != info['num_fit_param']):
                raise ValueError("INCORRECT NUMBER OF FITTING PARAMETERS")

        #SOME ERROR CHECKS
        if (info['num_species'] != 1):
            raise ValueError("NUM_SPECIES != 1 IN EURAL NETWORK FILE")
        if (len(info['nn_layers']) != int(lines[7][0])):
            raise ValueError(
                "NUMBER OF LAYERS IN NEURAL NETWORK FILE IS INCORRECT")

        if (int(lines[0][0]) not in [5, 6, 7, 20]):
            raise ValueError("REQUESTED POT_TYPE=" + str(int(lines[0][0])) +
                             " NOT AVAILABLE")
        if (info['pot_type'] == 'PINN_BOP' and info['nn_layers'][-1] != 8):
            raise ValueError("ERROR: NN OUTPUT DIMENSION INCORRECT")
        if (info['pot_type'] == 'NN' and info['nn_layers'][-1] != 1):
            raise ValueError("ERROR: NN OUTPUT DIMENSION INCORRECT")
        if (info['N_ro_val'] != len(info['lsp_ro_val'])):
            raise ValueError("ERROR: N_ro_val != len(ro)")
        if (info['N_lg_poly'] != len(info['lsp_lg_poly'])):
            raise ValueError("ERROR: N_lg_poly != len(lsp_lg_poly)")
        if (info['nn_layers'][0] !=
                len(info['lsp_ro_val']) * len(info['lsp_lg_poly'])):
            raise ValueError(
                "ERROR: NN INPUT DIMENSION INCORRECT FOR Gi CHOICE")
Esempio n. 7
0
			optimizer=optim.LBFGS(SB['nn'].submatrices, max_iter=SB['lbfgs_max_iter'], lr=SB['LR_f']) 
set_optim()

def closure():
	global loss,OBE1,OBL1,OBLP,OB_DU,rmse,OBT
	optimizer.zero_grad(); loss=0.0 
	[rmse,OBE1,OB_DU,OBL1,OBLP]=training_set.compute_objective(SB)
	loss=OBE1+OB_DU+OBL1+OBLP
	loss.backward();
	OBE1=OBE1.item();	OB_DU=OB_DU.item();	OBLP=OBLP.item()
	OBL1=OBL1.item();	OBT=loss.item();
	return loss

#OPTIMIZATION LOOP
start=time();  
writer.log('STARTING FITTING LOOP:')
writer.log(["	INITIAL LR:",'%10.7s'%str(optimizer.param_groups[0]['lr'])])
N_TRY=1; 
while(t<max_iter):  

	optimizer.step(closure)
	if(SB['ramp_LR']): scheduler.step() #ADJUST LR 

	#CHECK CONVERGENCE
	if(str(OBE1)=='nan' or rmse>1000000): #START OVER
		writer.log("NOTE: THE OBJ FUNCTION BLEW UP (IM STARTING OVER)(MAYBE TRY SMALLER LR)")
		SB['nn'].unset_grad();	SB['nn'].randomize();	set_optim(); N_TRY=N_TRY+1

	delta1=((rmse_m1-rmse)**2.0)**0.5
	delta2=((rmse_m2-rmse)**2.0)**0.5
Esempio n. 8
0
def partition_data(SB):

	test_set=data.Dataset("test",SB) #INITIALIZE DATASET OBJECT
	training_set=data.Dataset("train",SB) #INITIALIZE DATASET OBJECT
	validation_set=data.Dataset("validate",SB) #INITIALIZE DATASET OBJECT
	no_dft_set=data.Dataset("no_dft",SB) #INITIALIZE DATASET OBJECT

	writer.log("PARTITIONING DATA:")
	writer.log(["	TOTAL NUMBER OF GROUPS=",len(SB['full_set'].group_sids.keys())])
	fraction_train=SB['fraction_train']
	train_edges=SB['train_edges']

	#ERROR CHECKS 
	if(fraction_train==0): 						
		raise ValueError("FRACTION_TRAIN=0 (CANT TRAIN WITHOUT TRAINING DATA)");
	if(fraction_train<0 or fraction_train>1): 
		ERR="BAD VALUE FOR FRACTION_TRAIN: (I.E. FRACTION_TRAIN<0 OR FRACTION_TRAIN>1)"	
		raise ValueError(ERR);
	if(SB['n_rand_GIDS']>=len(SB['full_set'].group_sids.keys())): 	
			ERR="N_RAND_GIDS IS LARGER THAN TOTAL NUMBER OF GIDS: USE N_RAND_GIDS<"  \
			+str(len(SB['full_set'].group_sids.keys()))
			raise ValueError(ERR);

	#-------------------------------------
	#TEST-SET (EXTRAPOLATION)
	#-------------------------------------

	if(SB['fix_rand_seed']): random.seed(a=412122, version=2)  #SAME RAND TEST SET EVERY TIME
			
	SB['test_set_gids']=[]					
	#COLLECT GID FOR TEST SET 
	if(SB['n_rand_GIDS']!=0):
			k=1
			while(k<=SB['n_rand_GIDS']):
				rand_GID=random.choice(list(SB['full_set'].group_sids.keys()))
				#if(rand_GID not in SB['test_set_gids'] ):
				for i1 in SB['exclude_from_test']:
					if(i1 not in rand_GID): 
						keep=True
					else:
						keep=False; break

				if(rand_GID not in SB['test_set_gids'] and keep and rand_GID != "NO_DFT"): #REMOVE
					#writer.log("	"+rand_GID)
					SB['test_set_gids'].append(rand_GID)
					k=k+1

	for key in SB['test_set_tags']:
		for GID in SB['full_set'].group_sids.keys(): 
			for i1 in SB['exclude_from_test']:
				if(i1 not in GID): 
					keep=True
				else:
					keep=False; break
			if(key in SB['test_set_gids']): keep=False
			if(key in GID and keep and GID != "NO_DFT"):
				SB['test_set_gids'].append(GID)

	writer.log("	TEST SET (UNTRAINED):")

	#COLLECT STRUCTURES FOR TEST SET
	for GID in SB['full_set'].group_sids.keys(): 
		if(GID in SB['test_set_gids']): 
			writer.log(["		GID		: ",GID])
			#test_set.group_sids[GID]= SB['full_set'].group_sids[GID] 
			for SID in SB['full_set'].group_sids[GID]:
				test_set.structures[SID] = SB['full_set'].structures[SID]
				test_set.Ns+=1;		
				test_set.Na+=SB['full_set'].structures[SID].N

	#EXTAPOLATION
	if("NO_DFT" in SB['full_set'].group_sids.keys()):
		for SID in SB['full_set'].group_sids["NO_DFT"]:
			no_dft_set.structures[SID] = SB['full_set'].structures[SID]
			no_dft_set.Ns+=1;		
			no_dft_set.Na+=SB['full_set'].structures[SID].N

	#COLLECT WHATS LEFT (use for train+val)
	remainder=[] 
	for SID in SB['full_set'].structures.keys(): 
		if(SID not in test_set.structures.keys() and SID not in no_dft_set.structures.keys()):
			remainder.append(SID)

	#-------------------------------------
	#TRAIN-VALIDATION-SET (TRAIN+INTERPOLATION SET)
	#-------------------------------------
	#TRAINING SIDS (LIST OF DICTIONARY KEYS)a
	train_indices=np.random.choice(len(remainder),int(fraction_train*len(remainder)), replace=False).tolist() #keys for training structures
	for i in train_indices: 
		training_set.structures[remainder[i]]= SB['full_set'].structures[remainder[i]] 
		training_set.Ns+=1;		
		training_set.Na+=SB['full_set'].structures[remainder[i]].N

	# #ADD MIN/MAX VOLUME STRUCTURES IN EACH GROUP TO TRAINING SET
	if(train_edges):
		sid_2_add=[]  
		for i in SB['full_set'].group_sids.values():
			if(len(i)>4): #already sorted by volume 
				sid_2_add.append(i[0]);   sid_2_add.append(i[1])  
				sid_2_add.append(i[-2]);  sid_2_add.append(i[-1])   
		#print(sid_2_add)
		for SID in sid_2_add: 
			if(SID not in training_set.structures.keys() and SID not in test_set.structures.keys() \
			and SID not in no_dft_set.structures.keys()):  
				training_set.structures[SID]= SB['full_set'].structures[SID] 
				training_set.Ns+=1;		
				training_set.Na+=SB['full_set'].structures[SID].N

	# #VALIDATION SID
	for SID in remainder: 
		if(SID not in training_set.structures.keys()):  
			validation_set.structures[SID]= SB['full_set'].structures[SID] 
			validation_set.Ns+=1;		
			validation_set.Na+=SB['full_set'].structures[SID].N

	#if(SB['full_set'].Ns != training_set.Ns+test_set.Ns+validation_set.Ns):
	#	raise ValueError("LOST A STUCTURE IN DATA PARTITIONING");

	# if(test_SIDS==[]): test_SIDS=validation_SIDS #not ideal but test_SIDS cant be empty
	writer.log(["	N_train_structures	: ",training_set.Ns])
	writer.log(["	N_val_structures	: ",validation_set.Ns])
	writer.log(["	N_test_structures	: ",test_set.Ns])
	writer.log(["	N_combined		: ",training_set.Ns+test_set.Ns+validation_set.Ns])

	test_set.build_arrays(SB)
	training_set.build_arrays(SB)
	validation_set.build_arrays(SB)

	SB['training_set']=training_set;    SB['datasets']=['training_set']

	if(validation_set.Ns>0):
		SB['validation_set']=validation_set
		SB['datasets'].append('validation_set')
	if(test_set.Ns>0):
		SB['test_set']=test_set
		SB['datasets'].append('test_set')

	if("NO_DFT" in SB['full_set'].group_sids.keys()):
		no_dft_set.build_arrays(SB)
		SB['no_dft_set']=no_dft_set
		SB['datasets'].append('no_dft_set')
Esempio n. 9
0
def compute_all_lsps(SB):
	writer.log(["COMPUTING LOCAL STRUCTURE PARAMETERS (LSP):"])
	start = time.time();	
	for structure in SB['full_set'].structures.values():  
		structure.compute_lsp(SB); 
	writer.log(["	LSP CONSTRUCTION TIME (SEC)	=",time.time()-start])
Esempio n. 10
0
def compute_all_nbls(SB):
	writer.log(["COMPUTING NEIGHBOR LIST (NBL):"])
	start = time.time();	
	for structure in SB['full_set'].structures.values():  
		structure.compute_nbl(SB); 
	writer.log(["	NBL CONSTRUCTION TIME (SEC)	=",time.time()-start])