def process_run(): folder = os.getcwd() if os.path.isfile("error.json"): error = loadfn("error.json") print("{} ERROR: {}".format(folder, error)) required_files = ['seed_data.pickle'] if not all([os.path.isfile(fn) for fn in required_files]): print("{} ERROR: no seed data, no analysis to be done") else: analyzer = StabilityAnalyzer(hull_distance=0.2, parallel=True) with open(os.path.join("seed_data.pickle"), "rb") as f: result_df = pickle.load(f) unique_structures = loadfn("discovered_unique_structures.json") all_result_ids = list(unique_structures.keys()) summary = result_df.loc[all_result_ids] summary = summary[['Composition', 'delta_e']] analyzer.analyze(result_df, all_result_ids=all_result_ids, new_result_ids=all_result_ids) # Add stabilities summary['stabilities'] = pd.Series(analyzer.stabilities) chemsys = os.path.split(folder)[-1] # Get all DFT data response = requests.get('{}/synthesis-discovery/{}/dft-results'.format( API_URL, chemsys)) data = json.loads(response.content.decode('utf-8')) data = pd.DataFrame(data) aggregated = {} for result in data['dft_results']: aggregated.update(result) simulation_data = pd.DataFrame.from_dict(aggregated, orient='index') summary['bandgap'] = simulation_data['bandgap'] # Apply garcia correction summary['bandgap_garcia_exp'] = 1.358 * summary['bandgap'] + 0.904 summary['structure'] = pd.Series(unique_structures) summary['chemsys'] = [ '-'.join(sorted(list(Composition(comp).as_dict().keys()))) for comp in summary['Composition'] ] # Add structure data symmetry_data = { key: get_structure_data(structure) for key, structure in unique_structures.items() } symmetry_df = pd.DataFrame.from_dict(symmetry_data, orient='index') summary = pd.concat([summary, symmetry_df], axis=1) summary['url'] = simulation_data['url'] return summary
def test_analyze(self): df = pd.read_csv(os.path.join(CAMD_TEST_FILES, "test_df_analysis.csv"), index_col="id") df['Composition'] = df['formula'] analyzer = StabilityAnalyzer(hull_distance=0.1) seed_data = filter_dataframe_by_composition(df, "TiNO") # TODO: resolve drop_duplicates filtering mp data seed_data = seed_data.drop_duplicates(keep='last').dropna() new_exp_indices = ["mp-30998", "mp-572822"] new_experimental_results = seed_data.loc[new_exp_indices] seed_data = seed_data.drop(index=new_exp_indices) summary, seed_data = analyzer.analyze( new_experimental_results=seed_data, seed_data=pd.DataFrame(), ) summary, new_seed = analyzer.analyze( new_experimental_results=new_experimental_results, seed_data=seed_data ) self.assertAlmostEqual(new_seed.loc['mp-30998', 'stability'], 0) self.assertAlmostEqual(new_seed.loc["mp-572822", 'stability'], 0.52784795) self.assertTrue(new_seed.loc['mp-30998', 'is_stable']) self.assertFalse(new_seed.loc["mp-572822", 'is_stable'])