def process_outputs(self, result: Result): # Get basic task information smiles, = result.args # Release nodes for use by other processes self.rec.release("simulation", 1) # If successful, add to the database if result.success: # Mark that we've had another complete result self.n_evaluated += 1 self.logger.info(f'Success! Finished screening {self.n_evaluated}/{self.n_to_evaluate} molecules') # Determine whether to start re-training if self.n_evaluated % self.n_complete_before_retrain == 0: if self.update_in_progress.is_set(): self.logger.info(f'Waiting until previous training run completes.') else: self.logger.info(f'Starting retraining.') self.start_training.set() self.logger.info(f'{self.n_complete_before_retrain - self.n_evaluated % self.n_complete_before_retrain} results needed until we re-train again') # Store the data in a molecule data object data = MoleculeData.from_identifier(smiles=smiles) opt_records, hess_records = result.value for r in opt_records: data.add_geometry(r) for r in hess_records: data.add_single_point(r) data.update_thermochem() apply_recipes(data) # Attach the data source for the molecule data.subsets.append(self.search_space_name) # Add the IPs to the result object result.task_info["ip"] = data.oxidation_potential.copy() # Add to database with open(self.output_dir.joinpath('moldata-records.json'), 'a') as fp: print(json.dumps([datetime.now().timestamp(), data.json()]), file=fp) self.database.update_molecule(data) # Write to disk with open(self.output_dir.joinpath('qcfractal-records.json'), 'a') as fp: for r in opt_records + hess_records: print(r.json(), file=fp) self.logger.info(f'Added complete calculation for {smiles} to database.') # Mark that we've completed one if self.n_evaluated >= self.n_to_evaluate: self.logger.info(f'No more molecules left to screen') self.done.set() else: self.logger.info(f'Computations failed for {smiles}. Check JSON file for stacktrace') # Write out the result to disk with open(self.output_dir.joinpath('simulation-results.json'), 'a') as fp: print(result.json(exclude={'value'}), file=fp) self.logger.info(f'Processed simulation task.')
def get_next_step(self, record: MoleculeData) -> Optional[str]: """Get the next fidelity level for a certain molecule given what we know about it Args: record: Molecule to be evaluated Returns: The name of the next level of fidelity needed for this computation. ``None`` if all have been completed """ # Make sure all of our evaluations are up-to-date apply_recipes(record) # Get the appropriate property we are looking through data = record.reduction_potential if self.oxidation_state == OxidationState.REDUCED \ else record.oxidation_potential # If the highest level is found, return None as we're done if self.levels[-1] in data: return None # Get the first level to be found in the molecule current_level = None for level in self.levels[::-1]: if level in data: current_level = level break # If no level has been completed, return the first level if current_level is None: return self.levels[0] # Otherwise, return the next level in the chain return self.levels[self.levels.index(current_level) + 1]
def get_current_step(self, record: MoleculeData) -> str: """Get the current level of fidelity for a certain molecule Args: record: Molecule to be evaluated Returns: The name of the highest-level achieved so far. "base" if the molecule has yet to be assessed """ # Make sure all of our evaluations are up-to-date apply_recipes(record) # Get the appropriate property we are looking through data = record.reduction_potential if self.oxidation_state == OxidationState.REDUCED \ else record.oxidation_potential # Get the current level for level in self.levels[::-1]: if level in data: return level return 'base'
def record_qc(self, result: Result): # Get basic task information smiles, = result.args # Release nodes for use by other processes self.rec.release("simulation", self.nodes_per_qc) # If successful, add to the database if result.success: # Store the data in a molecule data object data = MoleculeData.from_identifier(smiles=smiles) opt_records, hess_records = result.value for r in opt_records: data.add_geometry(r) for r in hess_records: data.add_single_point(r) apply_recipes(data) # Compute the IP # Add to database with open(self.output_dir.joinpath('moldata-records.json'), 'a') as fp: print(json.dumps([datetime.now().timestamp(), data.json()]), file=fp) self.database.append(data) # If the database is complete, set "done" if len(self.database) >= self.target_size: self.logger.info(f'Database has reached target size of {len(self.database)}. Exiting') self.done.set() # Write to disk with open(self.output_dir.joinpath('qcfractal-records.json'), 'a') as fp: for r in opt_records + hess_records: print(r.json(), file=fp) self.logger.info(f'Added complete calculation for {smiles} to database.') else: self.logger.info(f'Computations failed for {smiles}. Check JSON file for stacktrace') # Write out the result to disk with open(self.output_dir.joinpath('simulation-results.json'), 'a') as fp: print(result.json(exclude={'value'}), file=fp)
def update_molecule(self, molecule: MoleculeData) -> UpdateResult: """Update the data for a single molecule Args: molecule: Data for a certain molecule to be updated. All fields specified in this record will be updated or added to the matching document. No fields will be deleted by this operation. Returns: An update result """ # Double-check the format MoleculeData.validate(molecule) # Update the derived properties molecule.update_thermochem() apply_recipes(molecule) # Generate the update and send it to the database update_record = generate_update(molecule) return self.collection.update_one({'key': molecule.key}, update_record, upsert=True)
def record_qc(self, result: Result): # Get basic task information inchi = result.task_info['inchi'] self.logger.info(f'{result.method} computation for {inchi} finished') # Release nodes for use by other processes self.rec.release("simulation", self.nodes_per_qc) # If successful, add to the database if result.success: self.n_evaluated += 1 # Check if we are done if self.n_evaluated >= self.n_to_evaluate: self.logger.info(f'We have evaluated as many molecules as requested. exiting') self.done.set() # Store the data in a molecule data object data = self.database.get_molecule_record(inchi=inchi) # Get existing information opt_records, spe_records = result.value for r in opt_records: data.add_geometry(r, overwrite=True) for r in spe_records: data.add_single_point(r) apply_recipes(data) # Compute the IP # Add ionization potentials to the task_info result.task_info['ips'] = data.oxidation_potential # Add to database with open(self.output_dir.joinpath('moldata-records.json'), 'a') as fp: print(json.dumps([datetime.now().timestamp(), data.json()]), file=fp) self.database.update_molecule(data) # If the database is complete, set "done" if self.output_property.split(".")[-1] in data.oxidation_potential: self.until_retrain -= 1 self.logger.info(f'High fidelity complete. {self.until_retrain} before retraining') else: self.to_reevaluate.append(data) self.until_reevaluate -= 1 self.logger.info(f'Low fidelity complete. {self.until_reevaluate} before re-ordering') # Check if we should re-do training if self.until_retrain <= 0 and not self.done.is_set(): # If we have enough new self.logger.info('Triggering training to start') self.start_training.set() elif self.until_reevaluate <= 0 and not (self.start_training.is_set() or self.done.is_set()): # Restart inference if we have had enough complete computations self.logger.info('Triggering inference to begin again') self.start_inference.set() # Write to disk with open(self.output_dir.joinpath('qcfractal-records.json'), 'a') as fp: for r in opt_records + spe_records: print(r.json(), file=fp) self.logger.info(f'Added complete calculation for {inchi} to database.') else: self.logger.info(f'Computations failed for {inchi}. Check JSON file for stacktrace') # Write out the result to disk with open(self.output_dir.joinpath('simulation-results.json'), 'a') as fp: print(result.json(exclude={'value'}), file=fp)
def process_outputs(self, result: Result): # Release nodes for use by other processes self.rec.release("simulation", 1) # Unpack the task information inchi = result.task_info['inchi'] method = result.method level = result.task_info['level'] # If successful, add to the database self.logger.info(f'Completed {method} at {level} for {inchi}') if result.success: # Store the data in a molecule data object data = self.database.get_molecule_record(inchi=inchi) if method == 'relax_structure': data.add_geometry(result.value) else: data.add_single_point(result.value) data.update_thermochem() apply_recipes(data) # If there are still more computations left to complete a level, re-add it to the priority queue # This happens only if a new geometry was created cur_recipe = get_recipe_by_name(result.task_info['level']) try: to_run = cur_recipe.get_required_calculations( data, self.search_spec.oxidation_state) except KeyError: to_run = [] if len(to_run) > 0 and result.method == 'relax_structure': self.logger.info( 'Not yet done with the recipe. Re-adding to task queue') self.task_queue.put( _PriorityEntry( inchi=inchi, item=result.task_info, score=-np.inf # Put it at the front of the queue )) elif len(to_run) == 0: # Mark that we've had another complete result self.n_evaluated += 1 self.logger.info( f'Success! Finished screening {self.n_evaluated}/{self.n_to_evaluate} molecules' ) # Determine whether to start re-training if self.n_evaluated % self.n_complete_before_retrain == 0: if self.update_in_progress.is_set(): self.logger.info( f'Waiting until previous training run completes.') else: self.logger.info(f'Starting retraining.') self.start_training.set() self.logger.info( f'{self.n_complete_before_retrain - self.n_evaluated % self.n_complete_before_retrain}' ' results needed until we re-train again') # Attach the data source for the molecule data.subsets.append(self.search_space_name) # Add the IPs to the result object result.task_info["ip"] = data.oxidation_potential.copy() result.task_info["ea"] = data.reduction_potential.copy() # Add to database with open(self.output_dir.joinpath('moldata-records.json'), 'a') as fp: print(json.dumps([datetime.now().timestamp(), data.json()]), file=fp) self.database.update_molecule(data) # Write to disk with gzip.open('qcfractal-records.json.gz', 'at') as fp: print(result.value.json(), file=fp) self.logger.info( f'Added complete calculation for {inchi} to database.') else: self.failed_molecules.add(inchi) self.logger.info( f'Computations failed for {inchi}. Check JSON file for stacktrace' ) # Write out the result to disk result.task_info['inputs'] = str(result.inputs) with open(self.output_dir.joinpath('simulation-results.json'), 'a') as fp: print(result.json(exclude={'inputs', 'value'}), file=fp) self.logger.info(f'Processed simulation task.')
def record_qc(self, result: Result): # Get basic task information inchi = result.task_info['inchi'] self.logger.info(f'{result.method} computation for {inchi} finished') # Check if it failed due to a ManagerLost exception if result.failure_info is not None and \ 'Task failure due to loss of manager' in result.failure_info.exception: # If so, resubmit it self.logger.info('Task failed due to manager loss. Resubmitting, as this task could still succeed') self.queues.send_inputs(*result.args, input_kwargs=result.kwargs, task_info=result.task_info, method=result.method, keep_inputs=True, topic='simulate') return # Release nodes for use by other processes self.rec.release("simulation", self.nodes_per_qc) # If successful, add to the database if result.success: self.n_evaluated += 1 # Check if we are done if self.n_evaluated >= self.n_to_evaluate: self.logger.info(f'We have evaluated as many molecules as requested. exiting') self.done.set() # Write outputs to disk opt_records, spe_records = result.value with open(self.output_dir.joinpath('..', '..', 'qcfractal-records.json'), 'a') as fp: for r in opt_records + spe_records: r.extras['inchi'] = inchi print(r.json(), file=fp) # Store the data in a molecule data object data = self.database.get_molecule_record(inchi=inchi) # Get existing information store_success = False try: for r in opt_records: data.add_geometry(r, overwrite=True) for r in spe_records: data.add_single_point(r) store_success = True except UnmatchedGeometry: self.logger.warning(f'Failed to match {inchi} geometry to an existing record.' ' Tell Logan his hashes are broken again!') apply_recipes(data) # Compute the IP # Add ionization potentials to the task_info result.task_info['ips'] = data.oxidation_potential result.task_info['eas'] = data.reduction_potential # Add to database with open(self.output_dir.joinpath('moldata-records.json'), 'a') as fp: print(json.dumps([datetime.now().timestamp(), data.json()]), file=fp) self.database.update_molecule(data) # Mark if we have completed a new record of the output property outputs = data.oxidation_potential if self.oxidize else data.reduction_potential if self.target_recipe.name in outputs: # All SPE are complete self.until_retrain -= 1 self.logger.info(f'High fidelity complete. {self.until_retrain} before retraining') elif result.task_info['method'] != "compute_single_point" and store_success: self.until_reevaluate -= 1 self.logger.info(f'Low fidelity complete. {self.until_reevaluate} before re-ordering') if result.method == 'compute_vertical': self.to_reevaluate['adiabatic'].append(data) else: self.to_reevaluate['normal'].append(data) # Check if we should re-do training or re-run inference if self.until_retrain <= 0 and not self.done.is_set(): # If we have enough new self.logger.info('Triggering training to start') self.start_training.set() elif self.until_reevaluate <= 0 and not (self.start_training.is_set() or self.done.is_set()): # Restart inference if we have had enough complete computations self.logger.info('Triggering inference to begin again') self.start_inference.set() self.logger.info(f'Added complete calculation for {inchi} to database.') else: self.logger.info(f'Computations failed for {inchi}. Check JSON file for stacktrace') # Write out the result to disk with open(self.output_dir.joinpath('simulation-results.json'), 'a') as fp: print(result.json(exclude={'value'}), file=fp)