def main(): # Set up logging logging.basicConfig(level=logging.INFO) output_directory = "training_sets" os.makedirs(output_directory, exist_ok=True) rho_pure_h_vap = PhysicalPropertyDataSet.from_json( "../../../pure_optimisation/data_set_generation/expanded_set/training_set.json" ) rho_pure = PhysicalPropertyDataSet.from_json( "../../../pure_optimisation/data_set_generation/expanded_set/training_set.json" ) rho_pure.filter_by_property_types("Density") h_mix_rho_x = PhysicalPropertyDataSet.from_json( "../../../mixture_optimisation/data_set_generation/" "expanded_set/training_sets/h_mix_rho_x_training_set.json") h_mix_rho_x_rho_pure = PhysicalPropertyDataSet() h_mix_rho_x_rho_pure.merge(rho_pure) h_mix_rho_x_rho_pure.merge(h_mix_rho_x) h_mix_rho_x_rho_pure.json( os.path.join(output_directory, "h_mix_rho_x_rho_pure.json")) h_mix_rho_x_rho_pure.to_pandas().to_csv( os.path.join(output_directory, "h_mix_rho_x_rho_pure.csv")) h_mix_rho_x_rho_pure_h_vap = PhysicalPropertyDataSet() h_mix_rho_x_rho_pure_h_vap.merge(rho_pure_h_vap) h_mix_rho_x_rho_pure_h_vap.merge(h_mix_rho_x) h_mix_rho_x_rho_pure_h_vap.json( os.path.join(output_directory, "h_mix_rho_x_rho_pure_h_vap.json")) h_mix_rho_x_rho_pure_h_vap.to_pandas().to_csv( os.path.join(output_directory, "h_mix_rho_x_rho_pure_h_vap.csv"))
def main(): training_set_smiles = [ "CCO", "CC(=O)O", "COC=O", "CC(C)(C)O", "CC(C)O", "CO", "CCOC(C)=O", "CCOC(=O)CC(=O)OCC", "CC(C)CO", "CCCCO", "CCCCOC(C)=O", "CCCOC(C)=O", ] # Ensure the smiles patterns are standardized. smiles = [Component(x).smiles for x in training_set_smiles] # Load in the Hvap data h_vap_data_frame = pandas.read_csv( os.path.join( "..", "..", "..", "data_availability", "sourced_h_vap_data", "enthalpy_of_vaporization_pure.csv", )) h_vap_data_frame = filter_by_smiles(h_vap_data_frame, smiles_to_include=smiles, smiles_to_exclude=None) h_vap_data_set = data_set_from_data_frame(h_vap_data_frame) # # Load in the density data density_data_frame = pandas.read_csv( os.path.join( "..", "..", "..", "data_availability", "data_by_environments", "alcohol_ester", "all_data", "density_pure.csv", )) density_data_frame = filter_by_smiles(density_data_frame, smiles_to_include=smiles, smiles_to_exclude=None) density_data_set = data_set_from_data_frame(density_data_frame) # Retain the density measurements which were made closest to 298.15K and 1 atm. target_state_point = StatePoint( temperature=298.15 * unit.kelvin, pressure=1.0 * unit.atmosphere, mole_fractions=(1.0, ), ) final_data_set = PhysicalPropertyDataSet() for substance in density_data_set.substances: properties_per_state = defaultdict(list) # Refactor the properties into more convenient data structures. for physical_property in density_data_set.properties_by_substance( substance): state_point = StatePoint.from_physical_property(physical_property) properties_per_state[state_point].append(physical_property) # Sort the state points based on their distance to the target state. sorted_states_points = list( sorted( properties_per_state.keys(), key=functools.partial(StatePoint.individual_distances, target_state_point), )) final_data_set.add_properties( properties_per_state[sorted_states_points[0]][0]) final_data_set.merge(h_vap_data_set) final_data_set.json("training_set.json", format=True) final_data_set.to_pandas().to_csv("training_set.csv", index=False)
def main(): # Set up logging logging.basicConfig(level=logging.INFO) output_directory = "training_sets" os.makedirs(output_directory, exist_ok=True) pure_density_h_vap = PhysicalPropertyDataSet.from_json( "../../pure_optimisation/data_set_generation/training_set.json" ) pure_density = PhysicalPropertyDataSet.from_json( "../../pure_optimisation/data_set_generation/training_set.json" ) pure_density.filter_by_property_types("Density") h_mix_v_excess = PhysicalPropertyDataSet.from_json( "../../mixture_optimisation/data_set_generation/" "training_sets/h_mix_v_excess_training_set.json" ) h_mix_binary_density = PhysicalPropertyDataSet.from_json( "../../mixture_optimisation/data_set_generation/" "training_sets/h_mix_density_training_set.json" ) h_mix_binary_density_pure_density = PhysicalPropertyDataSet() h_mix_binary_density_pure_density.merge(pure_density) h_mix_binary_density_pure_density.merge(h_mix_binary_density) h_mix_binary_density_pure_density.json( os.path.join(output_directory, "h_mix_binary_density_pure_density.json") ) h_mix_binary_density_pure_density.to_pandas().to_csv( os.path.join(output_directory, "h_mix_binary_density_pure_density.csv") ) h_mix_v_excess_pure_density = PhysicalPropertyDataSet() h_mix_v_excess_pure_density.merge(pure_density) h_mix_v_excess_pure_density.merge(h_mix_v_excess) h_mix_v_excess_pure_density.json( os.path.join(output_directory, "h_mix_v_excess_pure_density.json") ) h_mix_v_excess_pure_density.to_pandas().to_csv( os.path.join(output_directory, "h_mix_v_excess_pure_density.csv") ) h_mix_binary_density_pure_density_h_vap = PhysicalPropertyDataSet() h_mix_binary_density_pure_density_h_vap.merge(pure_density_h_vap) h_mix_binary_density_pure_density_h_vap.merge(h_mix_binary_density) h_mix_binary_density_pure_density_h_vap.json( os.path.join(output_directory, "h_mix_binary_density_pure_density_h_vap.json") ) h_mix_binary_density_pure_density_h_vap.to_pandas().to_csv( os.path.join(output_directory, "h_mix_binary_density_pure_density_h_vap.csv") ) h_mix_v_excess_pure_density_h_vap = PhysicalPropertyDataSet() h_mix_v_excess_pure_density_h_vap.merge(pure_density_h_vap) h_mix_v_excess_pure_density_h_vap.merge(h_mix_v_excess) h_mix_v_excess_pure_density_h_vap.json( os.path.join(output_directory, "h_mix_v_excess_pure_density_h_vap.json") ) h_mix_v_excess_pure_density_h_vap.to_pandas().to_csv( os.path.join(output_directory, "h_mix_v_excess_pure_density_h_vap.csv") )