## DEFINING INPUT LOCATION if want_full_path is False: input_dir_path = os.path.join(input_trajectory_location, solute, directory_name) else: input_dir_path = os.path.join(input_trajectory_location, directory_name) print("Input dir path: %s" % (input_dir_path)) ## CHECKING IF DIRECTORY PATH EXISTS if os.path.exists(input_dir_path) is True: ## DEFINING NAME FOR PICKLE pickle_name = convert_to_single_name( solute=solute, solvent=solvents, mass_fraction=mass_frac, temp=temperature) ## DEFINING PICKLE PATH pickle_file_path = os.path.join(output_database_location, pickle_name + picklesuffix) ### LOADING TRAJECTORY traj_data = import_tools.import_traj( directory=input_dir_path, # Directory to analysis structure_file=gro_file, # structure file xtc_file=xtc_file, # trajectories ) ## RUNNING GRID INTERPOLATION
def load_xy_data(self): ''' This function loads all desired xy data. ''' ## READING CSV FILE csv_file = pd.read_csv(self.class_file_path) ## STORE X DATA AND ITS LABEL (POSITIVE OR NEGATIVE) self.x_data = [] self.y_label = [] ## STORING INSTANCE INFORMATION self.instance_names = [] ## LOOPING THROUGH SOLUTES for solute in self.solute_list: ## LOOPING THROUGH COSOLVENT for cosolvent in self.solvent_list: ## LOOPING THROUGH MASS FRACTION OF WATER for mass_frac in self.mass_frac_data: ## SPECIFYING SPECIFIC TRAINING INSTANCE training_instance = { 'solute': solute, 'cosolvent': cosolvent, 'mass_frac': mass_frac, # mass fraction of water 'temp': SOLUTE_TO_TEMP_DICT[solute], # Temperature } ## CONVERTING TRAINING INSTANCE NAME TO NOMENCLATURE training_instance_name = convert_to_single_name( solute=training_instance['solute'], solvent=training_instance['cosolvent'], mass_fraction=training_instance['mass_frac'], temp=training_instance['temp']) ############################## ### EXTRACTING CLASS VALUE ### ############################## ## FINDING INSTANCE VALUE class_instance_value = locate_test_instance_value( csv_file=csv_file, solute=training_instance['solute'], cosolvent=training_instance['cosolvent'], mass_frac_water=training_instance['mass_frac'], ) ## PRINTING if (str(class_instance_value) != 'nan'): self.y_label.append(class_instance_value) else: print( "Training instance: %s, Class value: %s -- skipping!" % (training_instance_name, class_instance_value)) ################################ ### EXTRACTING TRAINING SETS ### ################################ if (str(class_instance_value) != 'nan'): ## DEFINING FULL TRAINING PATH full_train_pickle_path = os.path.join( self.database_path, training_instance_name) ## PRINTING print(full_train_pickle_path) ## EXTRACTION PROTOCOL A PARTICULAR TRAINING EXAMPLE training_data_for_instance = load_pickle( full_train_pickle_path) ## CHANGING TRAINING DATA INSTANCE REPRESENTATION training_data_representation, str_output = combine_training_data( training_data_for_instance= training_data_for_instance, representation_type=self.representation_type, representation_inputs=self.representation_inputs) ## PRINTING if self.verbose == True: if str_output is None: print("Instance: %s, Class value: %s" % (training_instance_name, class_instance_value) ) # Should output negative else: print("Instance: %s, Class value: %s, %s" % (training_instance_name, class_instance_value, str_output)) # Should output negative ## STORING self.x_data.append(training_data_representation) self.instance_names.append(training_instance_name) ## FINDING TOTAL INSTANCES self.total_instances = len(self.instance_names) return
(class_instance_value)) # Should output negative # if (class_instance_value == 'positive'): # y_label.append(1) # elif (class_instance_value == 'negative'): # y_label.append(0) if (str(class_instance_value) != 'nan'): y_label.append(class_instance_value) y_label.append(class_instance_value) y_label.append(class_instance_value) y_label.append(class_instance_value) y_label.append(class_instance_value) ## CONVERTING TRAINING INSTANCE NAME TO NOMENCLATURE training_instance_name = convert_to_single_name( solute=training_instance['solute'], solvent=training_instance['cosolvent'], mass_fraction=training_instance['mass_frac'], temp=training_instance['temp']) ## DEFINING FULL TRAINING PATH full_train_pickle_path = os.path.join(database_path, training_instance_name) ## EXTRACTION PROTOCOL A PARTICULAR TRAINING EXAMPLE training_data_for_instance = load_pickle( full_train_pickle_path) print(training_data_for_instance.shape ) # OUTPUT: (1001, 20, 20, 20, 3) ## I TAKE THE AVG, MANY OTHER WAYS ARE AVAILABLE ensemble_avg1 = np.average(