Esempio n. 1
0
def process_run():
    folder = os.getcwd()
    if os.path.isfile("error.json"):
        error = loadfn("error.json")
        print("{} ERROR: {}".format(folder, error))

    required_files = ['seed_data.pickle']
    if not all([os.path.isfile(fn) for fn in required_files]):
        print("{} ERROR: no seed data, no analysis to be done")
    else:
        analyzer = StabilityAnalyzer(hull_distance=0.2, parallel=True)
        with open(os.path.join("seed_data.pickle"), "rb") as f:
            result_df = pickle.load(f)

        unique_structures = loadfn("discovered_unique_structures.json")
        all_result_ids = list(unique_structures.keys())

        summary = result_df.loc[all_result_ids]
        summary = summary[['Composition', 'delta_e']]
        analyzer.analyze(result_df,
                         all_result_ids=all_result_ids,
                         new_result_ids=all_result_ids)
        # Add stabilities
        summary['stabilities'] = pd.Series(analyzer.stabilities)

        chemsys = os.path.split(folder)[-1]
        # Get all DFT data
        response = requests.get('{}/synthesis-discovery/{}/dft-results'.format(
            API_URL, chemsys))
        data = json.loads(response.content.decode('utf-8'))
        data = pd.DataFrame(data)
        aggregated = {}
        for result in data['dft_results']:
            aggregated.update(result)
        simulation_data = pd.DataFrame.from_dict(aggregated, orient='index')
        summary['bandgap'] = simulation_data['bandgap']
        # Apply garcia correction
        summary['bandgap_garcia_exp'] = 1.358 * summary['bandgap'] + 0.904
        summary['structure'] = pd.Series(unique_structures)
        summary['chemsys'] = [
            '-'.join(sorted(list(Composition(comp).as_dict().keys())))
            for comp in summary['Composition']
        ]

        # Add structure data
        symmetry_data = {
            key: get_structure_data(structure)
            for key, structure in unique_structures.items()
        }
        symmetry_df = pd.DataFrame.from_dict(symmetry_data, orient='index')
        summary = pd.concat([summary, symmetry_df], axis=1)
        summary['url'] = simulation_data['url']
        return summary
Esempio n. 2
0
 def test_analyze(self):
     df = pd.read_csv(os.path.join(CAMD_TEST_FILES, "test_df_analysis.csv"),
                      index_col="id")
     df['Composition'] = df['formula']
     analyzer = StabilityAnalyzer(hull_distance=0.1)
     seed_data = filter_dataframe_by_composition(df, "TiNO")
     # TODO: resolve drop_duplicates filtering mp data
     seed_data = seed_data.drop_duplicates(keep='last').dropna()
     new_exp_indices = ["mp-30998", "mp-572822"]
     new_experimental_results = seed_data.loc[new_exp_indices]
     seed_data = seed_data.drop(index=new_exp_indices)
     summary, seed_data = analyzer.analyze(
         new_experimental_results=seed_data, seed_data=pd.DataFrame(),
     )
     summary, new_seed = analyzer.analyze(
         new_experimental_results=new_experimental_results,
         seed_data=seed_data
     )
     self.assertAlmostEqual(new_seed.loc['mp-30998', 'stability'], 0)
     self.assertAlmostEqual(new_seed.loc["mp-572822", 'stability'], 0.52784795)
     self.assertTrue(new_seed.loc['mp-30998', 'is_stable'])
     self.assertFalse(new_seed.loc["mp-572822", 'is_stable'])