def test_from_csv_2(self): csv_file = os.path.join(TEST_FILE_DIR, "PredictionDiagnostics_parameters.csv") # Test basic functionality with ScratchDir('.') as scratch_dir: makedirs_p(os.path.join(scratch_dir, "procedures")) makedirs_p(os.path.join(scratch_dir, "names")) generate_protocol_files_from_csv(csv_file, scratch_dir) self.assertEqual(len(os.listdir(os.path.join(scratch_dir, "procedures"))), 2) original = open(os.path.join(PROCEDURE_TEMPLATE_DIR, "diagnosticV2.000")).readlines() parsed = open(os.path.join(os.path.join(scratch_dir, "procedures"), "PredictionDiagnostics_000000.000")).readlines() self.assertFalse(list(difflib.unified_diff(original, parsed))) for line in difflib.unified_diff(original, parsed): self.assertIsNotNone(line) original = open(os.path.join(PROCEDURE_TEMPLATE_DIR, "diagnosticV3.000")).readlines() parsed = open(os.path.join(os.path.join(scratch_dir, "procedures"), "PredictionDiagnostics_000196.000")).readlines() diff = list(difflib.unified_diff(original, parsed)) diff_expected = ['--- \n', '+++ \n', '@@ -27,7 +27,7 @@\n', ' <SpecialType> </SpecialType>\n', ' <Oper> = </Oper>\n', ' <Step>002</Step>\n', '- <Value>03:00:00</Value>\n', '+ <Value>03:02:00</Value>\n', ' </EndEntry>\n', ' <EndEntry>\n', ' <EndType>Voltage </EndType>\n'] self.assertEqual(diff, diff_expected) for line in difflib.unified_diff(original, parsed): self.assertIsNotNone(line) _, namefile = os.path.split(csv_file) namefile = namefile.split('_')[0] + '_names_' namefile = namefile + datetime.datetime.now().strftime("%Y%m%d_%H%M") + '.csv' names_test = open(os.path.join(scratch_dir, "names", namefile)).readlines() self.assertEqual(names_test, ['PredictionDiagnostics_000000_\n', 'PredictionDiagnostics_000196_\n'])
def cache_download(url, path): """ Quick helper function to cache a generic download from a url in the CAMD local data directory Args: url (str): url for download path (str): path for download, is appended to the CAMD_CACHE location Returns: (None) """ # Prep cache path and make necessary dirs cache_path = os.path.join(CAMD_CACHE, path) # Download and write file if not os.path.isfile(cache_path): makedirs_p(os.path.split(cache_path)[0]) r = requests.get(url, stream=True) total_size = int(r.headers.get('content-length', 0)) block_size = 1024 # 1 Kibibyte t = tqdm(total=total_size, unit='iB', unit_scale=True) with open(cache_path, 'wb') as f: for data in r.iter_content(block_size): t.update(len(data)) f.write(data)
def test_from_csv_3(self): csv_file_list = os.path.join(TEST_FILE_DIR, "PreDiag_parameters - GP.csv") makedirs_p(os.path.join(TEST_FILE_DIR, "procedures")) makedirs_p(os.path.join(TEST_FILE_DIR, "names")) generate_protocol_files_from_csv(csv_file_list, TEST_FILE_DIR) self.assertEqual(len(os.listdir(os.path.join(TEST_FILE_DIR, "procedures"))), 192)
def main(): all_s3_prefixes = get_all_s3_folders() makedirs_p("cache") os.chdir("cache") print(list(enumerate(all_s3_prefixes))) for run in all_s3_prefixes[27:]: local_folder = run.split('/')[-2] sync_s3_folder(run, local_folder=local_folder) update_run_w_structure(local_folder) update_s3(run, local_folder=local_folder)
def main(): all_s3_prefixes = get_all_s3_folders() makedirs_p("cache") os.chdir("cache") import nose nose.tools.set_trace() for run in all_s3_prefixes: local_folder = run.split('/')[-2] sync_s3_folder(run, local_folder=local_folder) update_run(local_folder) update_s3(run, local_folder=local_folder)
def test_from_csv_3(self): csv_file_list = os.path.join(TEST_FILE_DIR, "PreDiag_parameters - GP.csv") makedirs_p(os.path.join(TEST_FILE_DIR, "procedures")) makedirs_p(os.path.join(TEST_FILE_DIR, "names")) generate_protocol_files_from_csv(csv_file_list, output_directory=TEST_FILE_DIR) if os.path.isfile( os.path.join(TEST_FILE_DIR, "procedures", ".DS_Store")): os.remove(os.path.join(TEST_FILE_DIR, "procedures", ".DS_Store")) self.assertEqual( len(os.listdir(os.path.join(TEST_FILE_DIR, "procedures"))), 265)
def test_from_csv(self): csv_file = os.path.join(TEST_FILE_DIR, "parameter_test.csv") # Test basic functionality with ScratchDir('.') as scratch_dir: makedirs_p(os.path.join(scratch_dir, "procedures")) makedirs_p(os.path.join(scratch_dir, "names")) new_files, result, message = generate_protocol_files_from_csv( csv_file, output_directory=scratch_dir) self.assertEqual( len(os.listdir(os.path.join(scratch_dir, "procedures"))), 3) self.assertEqual(result, "error") # Test avoid overwriting file functionality with ScratchDir('.') as scratch_dir: makedirs_p(os.path.join(scratch_dir, "procedures")) makedirs_p(os.path.join(scratch_dir, "names")) dumpfn({"hello": "world"}, os.path.join("procedures", "name_000007.000")) new_files, result, message = generate_protocol_files_from_csv( csv_file, output_directory=scratch_dir) post_file = loadfn(os.path.join("procedures", "name_000007.000")) self.assertEqual(post_file, {"hello": "world"}) self.assertEqual( len(os.listdir(os.path.join(scratch_dir, "procedures"))), 3) self.assertEqual(result, "error") self.assertEqual( message, { 'comment': 'Unable to find template: EXP-D3.000', 'error': 'Not Found' })
def test_console_script(self): csv_file = os.path.join(TEST_FILE_DIR, "parameter_test.csv") # Test script functionality with ScratchDir('.') as scratch_dir: # Set BEEP_ROOT directory to scratch_dir os.environ['BEEP_ROOT'] = os.getcwd() makedirs_p("data-share/protocols/procedures") makedirs_p("data-share/protocols/names") # Test the script json_input = json.dumps( {"file_list": [csv_file], "mode": self.events_mode}) os.system("generate_protocol '{}'".format(json_input)) self.assertEqual(len(os.listdir('data-share/protocols/procedures')), 3)
def test_console_script(self): csv_file = os.path.join(TEST_FILE_DIR, "parameter_test.csv") # Test script functionality with ScratchDir(".") as scratch_dir: # Set BEEP_PROCESSING_DIR directory to scratch_dir os.environ["BEEP_PROCESSING_DIR"] = os.getcwd() procedures_path = os.path.join("data-share", "protocols", "procedures") names_path = os.path.join("data-share", "protocols", "names") makedirs_p(procedures_path) makedirs_p(names_path) # Test the script json_input = json.dumps({ "file_list": [csv_file], "mode": self.events_mode }) os.system("generate_protocol {}".format(os_format(json_input))) self.assertEqual(len(os.listdir(procedures_path)), 3)
def main(): if S3_SYNC: all_s3_prefixes = get_all_s3_folders() makedirs_p("cache") os.chdir("cache") print(all_s3_prefixes) for run in all_s3_prefixes: local_folder = run.split('/')[-2] sync_s3_folder(run, local_folder=local_folder) os.chdir('..') all_dfs = [] problem_folders = [] local_folders = os.listdir('cache') # local_folders = ['Mn-S'] for local_folder in tqdm(local_folders): with cd(os.path.join('cache', local_folder)): if CATCH_ERRORS: try: all_dfs.append(process_run()) success = True except Exception as e: print(e) problem_folders.append(local_folder) success = False else: all_dfs.append(process_run()) success = True if success and all_dfs[-1] is not None: chemsys = os.path.split(local_folder)[-1] all_dfs[-1].to_csv("summaries/{}.csv".format(chemsys)) output = pd.concat(all_dfs, axis=0) output = output.sort_values('stabilities') output.to_csv("summary.csv") import nose nose.tools.set_trace() print("problems:") print(problem_folders)
def cpc(self, dest_dir, force_overwrite=False): """ Copies the control file and all files referenced here. Creates the destination folder if not already existing. Args: dest_dir (str): path to the destionation folder. force_overwrite (bool): if True files already present in the destination folder will be overwritten. """ dest_dir = os.path.abspath(dest_dir) makedirs_p(dest_dir) dest_control_path = os.path.join(dest_dir, "control") if not os.path.isfile(dest_control_path) or force_overwrite: self.to_file(dest_control_path) for fn in self.get_subfiles_list(): dest_file_path = os.path.join(dest_dir, fn) # copy only if the file exists if os.path.isfile(fn) and (not os.path.isfile(dest_file_path) or force_overwrite): shutil.copy2(fn, dest_file_path)
def test_from_csv(self): csv_file = os.path.join(TEST_FILE_DIR, "parameter_test.csv") # Test basic functionality with ScratchDir('.') as scratch_dir: makedirs_p(os.path.join(scratch_dir, "procedures")) makedirs_p(os.path.join(scratch_dir, "names")) generate_protocol_files_from_csv(csv_file, scratch_dir) self.assertEqual(len(os.listdir(os.path.join(scratch_dir, "procedures"))), 3) # Test avoid overwriting file functionality with ScratchDir('.') as scratch_dir: makedirs_p(os.path.join(scratch_dir, "procedures")) makedirs_p(os.path.join(scratch_dir, "names")) dumpfn({"hello": "world"}, "procedures/name_000007.000") generate_protocol_files_from_csv(csv_file, scratch_dir) post_file = loadfn("procedures/name_000007.000") self.assertEqual(post_file, {"hello": "world"}) self.assertEqual(len(os.listdir(os.path.join(scratch_dir, "procedures"))), 3)
def launch_rocket_to_queue(launchpad, fworker, qadapter, launcher_dir='.', reserve=False, strm_lvl='INFO', create_launcher_dir=False, fill_mode=False, fw_id=None): """ Submit a single job to the queue. Args: launchpad (LaunchPad) fworker (FWorker) qadapter (QueueAdapterBase) launcher_dir (str): The directory where to submit the job reserve (bool): Whether to queue in reservation mode strm_lvl (str): level at which to stream log messages create_launcher_dir (bool): Whether to create a subfolder launcher+timestamp, if needed fill_mode (bool): whether to submit jobs even when there is nothing to run (only in non-reservation mode) fw_id (int): specific fw_id to reserve (reservation mode only) """ fworker = fworker if fworker else FWorker() launcher_dir = os.path.abspath(launcher_dir) l_logger = get_fw_logger('queue.launcher', l_dir=launchpad.logdir, stream_level=strm_lvl) l_logger.debug('getting queue adapter') qadapter = load_object(qadapter.to_dict( )) # make a defensive copy, mainly for reservation mode fw, launch_id = None, None # only needed in reservation mode if not os.path.exists(launcher_dir): raise ValueError( 'Desired launch directory {} does not exist!'.format(launcher_dir)) if '--offline' in qadapter['rocket_launch'] and not reserve: raise ValueError("Must use reservation mode (-r option) of qlaunch " "when using offline option of rlaunch!!") if reserve and 'singleshot' not in qadapter.get('rocket_launch', ''): raise ValueError( 'Reservation mode of queue launcher only works for singleshot Rocket Launcher!' ) if fill_mode and reserve: raise ValueError( "Fill_mode cannot be used in conjunction with reserve mode!") if fw_id and not reserve: raise ValueError( "qlaunch for specific fireworks may only be used in reservation mode." ) if fill_mode or launchpad.run_exists(fworker): launch_id = None try: if reserve: if fw_id: l_logger.debug('finding a FW to reserve...') fw, launch_id = launchpad.reserve_fw(fworker, launcher_dir, fw_id=fw_id) if not fw: l_logger.info( 'No jobs exist in the LaunchPad for submission to queue!' ) return False l_logger.info('reserved FW with fw_id: {}'.format(fw.fw_id)) # update qadapter job_name based on FW name job_name = get_slug(fw.name)[0:QUEUE_JOBNAME_MAXLEN] qadapter.update({'job_name': job_name}) if '_queueadapter' in fw.spec: l_logger.debug( 'updating queue params using Firework spec..') qadapter.update(fw.spec['_queueadapter']) # reservation mode includes --fw_id in rocket launch qadapter['rocket_launch'] += ' --fw_id {}'.format(fw.fw_id) # update launcher_dir if _launch_dir is selected in reserved fw if '_launch_dir' in fw.spec: fw_launch_dir = os.path.expandvars(fw.spec['_launch_dir']) if not os.path.isabs(fw_launch_dir): fw_launch_dir = os.path.join(launcher_dir, fw_launch_dir) launcher_dir = fw_launch_dir makedirs_p(launcher_dir) launchpad.change_launch_dir(launch_id, launcher_dir) elif create_launcher_dir: # create launcher_dir launcher_dir = create_datestamp_dir(launcher_dir, l_logger, prefix='launcher_') launchpad.change_launch_dir(launch_id, launcher_dir) elif create_launcher_dir: # create launcher_dir launcher_dir = create_datestamp_dir(launcher_dir, l_logger, prefix='launcher_') # move to the launch directory l_logger.info('moving to launch_dir {}'.format(launcher_dir)) with cd(launcher_dir): if '--offline' in qadapter['rocket_launch']: setup_offline_job(launchpad, fw, launch_id) l_logger.debug('writing queue script') with open(SUBMIT_SCRIPT_NAME, 'w') as f: queue_script = qadapter.get_script_str(launcher_dir) f.write(queue_script) l_logger.info('submitting queue script') reservation_id = qadapter.submit_to_queue(SUBMIT_SCRIPT_NAME) if not reservation_id: raise RuntimeError( 'queue script could not be submitted, check queue ' 'script/queue adapter/queue server status!') elif reserve: launchpad.set_reservation_id(launch_id, reservation_id) return reservation_id except: log_exception(l_logger, 'Error writing/submitting queue script!') if reserve and launch_id is not None: try: l_logger.info( 'Un-reserving FW with fw_id, launch_id: {}, {}'.format( fw.fw_id, launch_id)) launchpad.cancel_reservation(launch_id) launchpad.forget_offline(launch_id) except: log_exception( l_logger, 'Error unreserving FW with fw_id {}'.format(fw.fw_id)) return False else: l_logger.info( 'No jobs exist in the LaunchPad for submission to queue!') return None # note: this is a hack (rather than False) to indicate a soft failure to rapidfire()
def abinit_to_phonopy(anaddbnc, supercell_matrix, symmetrize_tensors=False, output_dir_path=None, prefix_outfiles="", symprec=1e-5, set_masses=False): """ Converts the interatomic force constants(IFC), born effective charges(BEC) and dielectric tensor obtained from anaddb to the phonopy format. Optionally writes the standard phonopy files to a selected directory: FORCE_CONSTANTS, BORN (if BECs are available) POSCAR of the unit cell, POSCAR of the supercell. The conversion is performed taking the IFC in the Wigner–Seitz supercell with weights as produced by anaddb and reorganizes them in a standard supercell multiple of the unit cell. Operations are vectorized using numpy. This may lead to the allocation of large arrays in case of very large supercells. Performs a check to verify if the two codes identify the same symmetries and it gives a warning in case of failure. Mismatching symmetries may lead to incorrect conversions. Args: anaddbnc: an instance of AnaddbNcFile. Should contain the output of the IFC analysis, the BEC and the dielectric tensor. supercell_matrix: the supercell matrix used for phonopy. Any choice is acceptable, however the best agreement between the abinit and phonopy results is obtained if this is set to a diagonal matrix with on the diagonal the ngqpt used to generate the anaddb.nc. symmetrize_tensors: if True the tensors will be symmetrized in the Phonopy object and in the output files. This will apply to IFC, BEC and dielectric tensor. output_dir_path: a path to a directory where the phonopy files will be created prefix_outfiles: a string that will be added as a prefix to the name of the written files symprec: distance tolerance in Cartesian coordinates to find crystal symmetry in phonopy. It might be that the value should be tuned so that it leads to the the same symmetries as in the abinit calculation. set_masses: if True the atomic masses used by abinit will be added to the PhonopyAtoms and will be present in the returned Phonopy object. This should improve compatibility among abinit and phonopy results if frequencies needs to be calculated. Returns: An instance of a Phonopy object that contains the IFC, BEC and dieletric tensor data. """ ifc = anaddbnc.ifc nac_params = None becs = None epsinf = None if anaddbnc.becs is not None and anaddbnc.epsinf is not None: becs = anaddbnc.becs.values epsinf = anaddbnc.epsinf # according to the phonopy website 14.399652 is not the coefficient for abinit # probably it relies on the other conventions in the output. nac_params = {"born": becs, "dielectric": epsinf, "factor": 14.399652} s = anaddbnc.structure phon_at = get_phonopy_structure(s) if set_masses: phon_at.masses = [anaddbnc.amu[n] for n in phon_at.numbers] # use phonopy to get the proper supercell given by the primitive and the matrix # and convert it to pymatgen phonon = Phonopy(phon_at, supercell_matrix, primitive_matrix=np.eye(3), nac_params=nac_params, symprec=symprec) phon_supercell = phonon.get_supercell() supercell = get_pmg_structure(phon_supercell) abi_hall_num = s.abi_spacegroup.get_spglib_hall_number() spglib_hall_num = phonon.symmetry.dataset["hall_number"] if abi_hall_num != spglib_hall_num: warnings.warn( "The hall number obtained based on the DDB symmetries differs " f"from the one calculated with spglib: {abi_hall_num} versus " f"{spglib_hall_num}. The conversion may be incorrect. Try changing symprec." ) # convert to phonopy units at_cart = ifc.atoms_cart_coord * abu.Bohr_Ang ifccc = ifc.ifc_cart_coord * abu.Ha_eV / abu.Bohr_Ang**2 weights = ifc.ifc_weights latt = supercell.lattice ifcph = np.zeros((len(s), len(supercell), 3, 3)) # loop over the atoms in the primitive cell # other operations are vectorized using numpy arrays. Some array may require large allocations for i, (site, c_list, w_list) in enumerate(zip(s, at_cart, weights)): ind_w = np.where(w_list > 0) ifccc_loc = ifccc[i, ind_w[0]] w_list = w_list[ind_w] c_list = c_list[ind_w] # align the coordinates of the first atom in the list (the site under consideration) # with the site in the primitive cell. c_list = c_list - c_list[0] + site.coords # convert to fractional coordinates as needed by the Lattice to get the distances f_list = latt.get_fractional_coords(c_list) sc_fcoords = supercell.frac_coords # construct the list of sites of the supercell that are closer to sites in # the primitive cell dist_and_img = [ latt.get_distance_and_image(f_list[0], fc) for fc in sc_fcoords ] # the function gives the translation of the image, but it should be applied to the coordinates. # Only the positions are needed nearest_sc_fcoords = [ fc + trasl for (_, trasl), fc in zip(dist_and_img, sc_fcoords) ] # divide by the corresponding weights. Elements with weights 0 were discarded above ifccc_loc = np.transpose(ifccc_loc, (0, 2, 1)) / w_list[:, None, None] # create an array with all the possible pairs # instantiating this array seems slow but seems still faster than the required loops coord_pairs = np.array( list(itertools.product(nearest_sc_fcoords, f_list))) # find the pairs that match between the coordinates of the modified supercell and the f_list ind_match = np.where( np.abs(coord_pairs[:, 0] - coord_pairs[:, 1]).sum(axis=1) < 1e-6)[0] # set the ifc for phonopy in the final array corresponding to the matching indices. n_points_f_list = len(f_list) ifcph[i, ind_match // n_points_f_list] = ifccc_loc[ind_match % n_points_f_list] phonon.set_force_constants(ifcph) if symmetrize_tensors: phonon.symmetrize_force_constants() if output_dir_path: makedirs_p(output_dir_path) fc_filepath = os.path.join(output_dir_path, prefix_outfiles + "FORCE_CONSTANTS") write_FORCE_CONSTANTS(phonon.get_force_constants(), fc_filepath) if becs is not None and epsinf is not None: born_filepath = os.path.join(output_dir_path, prefix_outfiles + "BORN") write_BORN(phon_at, borns=becs, epsilon=epsinf, filename=born_filepath, symmetrize_tensors=symmetrize_tensors) poscar_filepath = os.path.join(output_dir_path, prefix_outfiles + "POSCAR") poscar = Poscar(s) poscar.write_file(poscar_filepath, significant_figures=15) supercell_filepath = os.path.join(output_dir_path, prefix_outfiles + "supercell_POSCAR") superce_poscar = Poscar(supercell) superce_poscar.write_file(supercell_filepath, significant_figures=15) return phonon
def test_parameterization(self): filename = "formationV1.mps" bcs = Settings.from_file(os.path.join(BIOLOGIC_TEMPLATE_DIR, filename)) protocol_params_df = pd.read_csv( os.path.join(TEST_FILE_DIR, "data-share", "raw", "parameters", "Form_parameters - GP.csv")) test_name = "test.mps" with ScratchDir(".") as scratch_dir: makedirs_p(os.path.join(scratch_dir, "settings")) for index, protocol_params in protocol_params_df.iterrows(): template = protocol_params["template"] filename_prefix = "_".join([ protocol_params["project_name"], "{:06d}".format(protocol_params["seq_num"]), ]) if template == "formationV1.mps": bcs = Settings.from_file( os.path.join(BIOLOGIC_TEMPLATE_DIR, filename)) self.assertEqual(bcs.get("Metadata.Cycle Definition"), "Charge/Discharge alternance") bcs = bcs.formation_protocol_bcs(protocol_params) self.assertEqual(bcs.get("Technique.1.Step.2.ctrl1_val"), float(round(0.2 * 0.1, 3))) self.assertEqual(bcs.get("Technique.1.Step.3.lim1_value"), float(round(60, 3))) self.assertEqual(bcs.get("Technique.1.Step.4.lim1_value"), float(round(30, 3))) self.assertEqual(bcs.get("Technique.1.Step.5.ctrl1_val"), float(round(0.2 * 0.2, 3))) self.assertEqual(bcs.get("Technique.1.Step.6.lim1_value"), float(round(30, 3))) self.assertEqual(bcs.get("Technique.1.Step.7.lim1_value"), float(round(30, 3))) self.assertEqual(bcs.get("Technique.1.Step.8.ctrl1_val"), float(round(0.2 * 0.2, 3))) self.assertEqual(bcs.get("Technique.1.Step.8.lim1_value"), float(round(3.0, 3))) self.assertEqual(bcs.get("Technique.1.Step.9.ctrl1_val"), float(round(0.2 * 0.5, 3))) self.assertEqual(bcs.get("Technique.1.Step.10.lim1_value"), float(round(3.9, 3))) self.assertEqual( bcs.get("Technique.1.Step.11.ctrl_repeat"), int(1)) self.assertEqual(bcs.get("Technique.1.Step.12.ctrl1_val"), float(round(0.2 * 1, 3))) self.assertEqual(bcs.get("Technique.1.Step.13.lim1_value"), float(round(3.5, 3))) self.assertEqual(bcs.get("Technique.1.Step.14.ctrl1_val"), float(round(3.5, 3))) test_name = "{}.mps".format(filename_prefix) test_name = os.path.join(scratch_dir, "settings", test_name) bcs.to_file(test_name) self.assertEqual( len(os.listdir(os.path.join(scratch_dir, "settings"))), 16) original = open(os.path.join(BIOLOGIC_TEMPLATE_DIR, filename), encoding="ISO-8859-1").readlines() parsed = open(test_name, encoding="ISO-8859-1").readlines() udiff = list(difflib.unified_diff(original, parsed)) for line in udiff: print(line) self.assertTrue( udiff) # Assert that last file is not the same as the template
def install_config_files(workdir=None, force_reinstall=False): """ Install pre-defined configuration files for the TaskManager and the Scheduler in the workdir directory. Args: workdir: Directory when configuration files should be produced. Use ~/abinit/abipy/ if None force_reinstall: Allow overwrite pre-existent configuration files. By default, the function raises RuntimeError if configuration files are already present. """ workdir = os.path.join(os.path.expanduser("~"), ".abinit", "abipy") if workdir is None else workdir print("Installing configuration files in directory:", workdir) from monty.os import makedirs_p makedirs_p(workdir) scheduler_path = os.path.join(workdir, "scheduler.yaml") scheduler_yaml = """ # The launcher will stop submitting jobs when the # number of jobs in the queue is >= Max number of jobs max_njobs_inqueue: 2 # Maximum number of cores that can be used by the scheduler. max_ncores_used: 2 # number of hours to wait. #hours: 0 # number of minutes to wait. #minutes: 0 # number of seconds to wait. seconds: 2 # Send mail to the specified address (accepts string or list of strings). # PRO TIP: the scheduler WILL try to send and email after a default time of 4 days. If you # comment out the mailto address, this will cause the scheduler to terminate, with # potentially nefarious effects on your running jobs. If you do not wish to receive # emails, a work around is to set the variable `remindme_s` below to something very # large (say, 100 days). #mailto: [email protected] # verbosity level (int, default 0) #verbose: 0 # The scheduler will shutdown when the number of python exceptions is > max_num_pyexcs #max_num_pyexcs: 2 # The scheduler will shutdown when the number of Abinit errors is > max_num_abierrs #max_num_abierrs: 0 # The scheduler will shutdow when the total number of tasks launched is > safety_ratio * tot_num_tasks. #safety_ratio: 5 # Send an e-mail to mailto every remindme_s seconds. #remindme_s: 345600 """ manager_path = os.path.join(workdir, "manager.yaml") manager_yaml = """ qadapters: - priority: 1 queue: qname: abipy qtype: shell job: mpi_runner: mpirun pre_run: - export OMP_NUM_THREADS=1 # IMPORTANT: Change the below line so that the abinit executable is in PATH #- export PATH=$HOME/git_repos/abinit/_build/src/98_main:$PATH #- ulimit -s unlimited; ulimit -n 2048 limits: min_cores: 1 max_cores: 2 timelimit: 0:10:0 hardware: num_nodes: 1 sockets_per_node: 1 cores_per_socket: 2 mem_per_node: 4 Gb """ # Write configuration files. if not os.path.isfile(scheduler_path) or force_reinstall: with open(scheduler_path, "wt") as fh: fh.write(scheduler_yaml) print("Scheduler configuration file written to:", scheduler_path) else: raise RuntimeError( "Configuration file: %s already exists.\nUse force_reinstall option to overwrite it" % scheduler_path) if not os.path.isfile(manager_path) or force_reinstall: with open(manager_path, "wt") as fh: fh.write(manager_yaml) print("Manager configuration file written to:", manager_path) else: raise RuntimeError( "Configuration file: %s already exists.\nUse force_reinstall option to overwrite it" % manager_path) print(""" Configuration files installed successfully. Please edit the configuration options according to your installation. In particular, edit the `pre_run` section in manager.yml so that the abinit executable is in $PATH. """) return 0
def fit(self, X, y, index=None, columns=None, tasks=None): """Fit a SISSO regression based on inputs X and output y. This method supports Multi-Task SISSO. For Single-Task SISSO, y must have a shape (n_samples) or (n_samples, 1). For Multi-Task SISSO, y must have a shape (n_samples, n_tasks). The arrays will be reshaped to fit SISSO's input files. For example, with 10 samples and 3 properties, the output array (y) will be reshaped to (30, 1). The input array (X) is left unchanged. It is also possible to provide samples without an output for some properties by setting that property to NaN. In that case, the corresponding values in the input (X) and output (y) arrays will be removed from the SISSO inputs. In the previous example, if 2 of the samples have NaN for the first property, 1 sample has Nan for the second property and 4 samples have Nan for the third property, the final output array (y) will have a shape (30-2-1-4, 1), i.e. (23, 1), while the final input array (X) will have a shape (23, n_features). Args: X: Feature vectors as an array-like of shape (n_samples, n_features). y: Target values as an array-like of shape (n_samples,) or (n_samples, n_tasks). index: List of string identifiers for each sample. If None, "sampleN" with N=[1, ..., n_samples] will be used. columns: List of string names of the features. If None, "featN" with N=[1, ..., n_features] will be used. tasks: When Multi-Task SISSO is used, this is the list of string names that will be used for each task/property. If None, "taskN" with N=[1, ..., n_tasks] will be used. """ if not self.use_custodian: raise NotImplementedError self.sisso_in = SISSOIn.from_sisso_keywords( # pylint: disable=W0201 ptype=1, ntask=self.ntask, task_weighting=self.task_weighting, desc_dim=self.desc_dim, restart=self.restart, rung=self.rung, opset=self.opset, maxcomplexity=self.maxcomplexity, dimclass=self.dimclass, maxfval_lb=self.maxfval_lb, maxfval_ub=self.maxfval_ub, subs_sis=self.subs_sis, method=self.method, L1L0_size4L0=self.L1L0_size4L0, fit_intercept=self.fit_intercept, metric=self.metric, nm_output=self.nm_output, isconvex=self.isconvex, width=self.width, nvf=self.nvf, vfsize=self.vfsize, vf2sf=self.vf2sf, npf_must=self.npf_must, L1_max_iter=self.L1_max_iter, L1_tole=self.L1_tole, L1_dens=self.L1_dens, L1_nlambda=self.L1_nlambda, L1_minrmse=self.L1_minrmse, L1_warm_start=self.L1_warm_start, L1_weighted=self.L1_weighted, ) # Set up columns. These columns are used by the SISSO model wrapper afterwards # for the prediction if columns is None and isinstance(X, pd.DataFrame): columns = list(X.columns) self.columns = columns or [ # pylint: disable=W0201 "feat{:d}".format(ifeat) for ifeat in range(1, X.shape[1] + 1) ] if len(self.columns) != X.shape[1]: raise ValueError( "Columns should be of the size of the second axis of X.") # Set up data X = np.array(X) y = np.array(y) if y.ndim == 1 or (y.ndim == 2 and y.shape[1] == 1): # Single-Task SISSO self.ntasks = 1 # pylint: disable=W0201 index = index or [ "sample{:d}".format(ii) for ii in range(1, X.shape[0] + 1) ] if len(index) != len(y) or len(index) != len(X): raise ValueError("Index, X and y should have same size.") nsample = None elif y.ndim == 2 and y.shape[1] > 1: # Multi-Task SISSO self.ntasks = y.shape[1] # pylint: disable=W0201 samples_index = index or [ "sample{:d}".format(ii) for ii in range(1, X.shape[0] + 1) ] tasks = tasks or [ "task{:d}".format(ii) for ii in range(1, self.ntasks + 1) ] newX = np.zeros((0, X.shape[1])) newy = np.array([]) index = [] nsample = [] for itask in range(self.ntasks): yadd = y[:, itask] nanindices = np.argwhere(np.isnan(yadd)).flatten() totake = [ ii for ii in range(len(yadd)) if ii not in nanindices ] newy = np.concatenate([newy, np.take(yadd, indices=totake)]) newX = np.row_stack([newX, np.take(X, indices=totake, axis=0)]) nsample.append(len(totake)) index.extend([ "{}_{}".format(sample_index, tasks[itask]) for i_sample, sample_index in enumerate(samples_index) if i_sample in totake ]) X = newX y = newy else: raise ValueError("Wrong shapes.") data = pd.DataFrame(X, index=index, columns=self.columns) data.insert(0, "target", y) data.insert(0, "identifier", index) # Set up SISSODat and SISSOIn sisso_dat = SISSODat(data=data, features_dimensions=self.features_dimensions, nsample=nsample) self.sisso_in.set_keywords_for_SISSO_dat(sisso_dat=sisso_dat) # Run SISSO if self.run_dir is None: makedirs_p("SISSO_runs") timestamp = get_timestamp() self.run_dir = tempfile.mkdtemp(suffix=None, prefix=f"SISSO_dir_{timestamp}_", dir="SISSO_runs") else: makedirs_p(self.run_dir) with cd(self.run_dir): self.sisso_in.to_file(filename="SISSO.in") sisso_dat.to_file(filename="train.dat") job = SISSOJob() c = Custodian(jobs=[job], handlers=[], validators=[]) c.run() self.sisso_out = SISSOOut.from_file( # pylint: disable=W0201 filepath="SISSO.out") # Clean run directory if (self.clean_run_dir ): # TODO: add check here to not remove "." if the user passes . ? shutil.rmtree(self.run_dir)
def test_makedirs_p(self): makedirs_p(self.test_dir_path) self.assertTrue(os.path.exists(self.test_dir_path)) makedirs_p(self.test_dir_path) self.assertRaises(OSError, makedirs_p, os.path.join(test_dir, "myfile_txt"))
def test_from_csv_2(self): csv_file = os.path.join(TEST_FILE_DIR, "PredictionDiagnostics_parameters.csv") # Test basic functionality with ScratchDir(".") as scratch_dir: makedirs_p(os.path.join(scratch_dir, "procedures")) makedirs_p(os.path.join(scratch_dir, "names")) new_files, result, message = generate_protocol_files_from_csv( csv_file, output_directory=scratch_dir) self.assertEqual(result, "success") self.assertEqual(message, { "comment": "Generated 2 protocols", "error": "" }) self.assertEqual( len(os.listdir(os.path.join(scratch_dir, "procedures"))), 2) original = open( os.path.join(PROCEDURE_TEMPLATE_DIR, "diagnosticV2.000")).readlines() parsed = open( os.path.join( os.path.join(scratch_dir, "procedures"), "PredictionDiagnostics_000000.000", )).readlines() self.assertFalse(list(difflib.unified_diff(original, parsed))) for line in difflib.unified_diff(original, parsed): self.assertIsNotNone(line) original = open( os.path.join(PROCEDURE_TEMPLATE_DIR, "diagnosticV3.000")).readlines() parsed = open( os.path.join( os.path.join(scratch_dir, "procedures"), "PredictionDiagnostics_000196.000", )).readlines() diff = list(difflib.unified_diff(original, parsed)) diff_expected = [ "--- \n", "+++ \n", "@@ -27,7 +27,7 @@\n", " <SpecialType> </SpecialType>\n", " <Oper> = </Oper>\n", " <Step>002</Step>\n", "- <Value>03:00:00</Value>\n", "+ <Value>03:12:00</Value>\n", " </EndEntry>\n", " <EndEntry>\n", " <EndType>Voltage </EndType>\n", ] self.assertEqual(diff, diff_expected) for line in difflib.unified_diff(original, parsed): self.assertIsNotNone(line) _, namefile = os.path.split(csv_file) namefile = namefile.split("_")[0] + "_names_" namefile = (namefile + datetime.datetime.now().strftime("%Y%m%d_%H%M") + ".csv") names_test = open(os.path.join(scratch_dir, "names", namefile)).readlines() self.assertEqual( names_test, [ "PredictionDiagnostics_000000_\n", "PredictionDiagnostics_000196_\n" ], )
def launch_rocket_to_queue(launchpad, fworker, qadapter, launcher_dir='.', reserve=False, strm_lvl='INFO', create_launcher_dir=False, fill_mode=False, fw_id=None): """ Submit a single job to the queue. Args: launchpad (LaunchPad) fworker (FWorker) qadapter (QueueAdapterBase) launcher_dir (str): The directory where to submit the job reserve (bool): Whether to queue in reservation mode strm_lvl (str): level at which to stream log messages create_launcher_dir (bool): Whether to create a subfolder launcher+timestamp, if needed fill_mode (bool): whether to submit jobs even when there is nothing to run (only in non-reservation mode) fw_id (int): specific fw_id to reserve (reservation mode only) """ fworker = fworker if fworker else FWorker() launcher_dir = os.path.abspath(launcher_dir) l_logger = get_fw_logger('queue.launcher', l_dir=launchpad.logdir, stream_level=strm_lvl) l_logger.debug('getting queue adapter') qadapter = load_object(qadapter.to_dict()) # make a defensive copy, mainly for reservation mode fw, launch_id = None, None # only needed in reservation mode if not os.path.exists(launcher_dir): raise ValueError('Desired launch directory {} does not exist!'.format(launcher_dir)) if '--offline' in qadapter['rocket_launch'] and not reserve: raise ValueError("Must use reservation mode (-r option) of qlaunch " "when using offline option of rlaunch!!") if reserve and 'singleshot' not in qadapter.get('rocket_launch', ''): raise ValueError('Reservation mode of queue launcher only works for singleshot Rocket Launcher!') if fill_mode and reserve: raise ValueError("Fill_mode cannot be used in conjunction with reserve mode!") if fw_id and not reserve: raise ValueError("qlaunch for specific fireworks may only be used in reservation mode.") if fill_mode or launchpad.run_exists(fworker): launch_id = None try: if reserve: if fw_id: l_logger.debug('finding a FW to reserve...') fw, launch_id = launchpad.reserve_fw(fworker, launcher_dir, fw_id=fw_id) if not fw: l_logger.info('No jobs exist in the LaunchPad for submission to queue!') return False l_logger.info('reserved FW with fw_id: {}'.format(fw.fw_id)) # update qadapter job_name based on FW name job_name = get_slug(fw.name)[0:QUEUE_JOBNAME_MAXLEN] qadapter.update({'job_name': job_name}) if '_queueadapter' in fw.spec: l_logger.debug('updating queue params using Firework spec..') qadapter.update(fw.spec['_queueadapter']) # reservation mode includes --fw_id in rocket launch qadapter['rocket_launch'] += ' --fw_id {}'.format(fw.fw_id) # update launcher_dir if _launch_dir is selected in reserved fw if '_launch_dir' in fw.spec: fw_launch_dir = os.path.expandvars(fw.spec['_launch_dir']) if not os.path.isabs(fw_launch_dir): fw_launch_dir = os.path.join(launcher_dir, fw_launch_dir) launcher_dir = fw_launch_dir makedirs_p(launcher_dir) launchpad.change_launch_dir(launch_id, launcher_dir) elif create_launcher_dir: # create launcher_dir launcher_dir = create_datestamp_dir(launcher_dir, l_logger, prefix='launcher_') launchpad.change_launch_dir(launch_id, launcher_dir) elif create_launcher_dir: # create launcher_dir launcher_dir = create_datestamp_dir(launcher_dir, l_logger, prefix='launcher_') # move to the launch directory l_logger.info('moving to launch_dir {}'.format(launcher_dir)) with cd(launcher_dir): if '--offline' in qadapter['rocket_launch']: setup_offline_job(launchpad, fw, launch_id) l_logger.debug('writing queue script') with open(SUBMIT_SCRIPT_NAME, 'w') as f: queue_script = qadapter.get_script_str(launcher_dir) f.write(queue_script) l_logger.info('submitting queue script') reservation_id = qadapter.submit_to_queue(SUBMIT_SCRIPT_NAME) if not reservation_id: raise RuntimeError('queue script could not be submitted, check queue ' 'script/queue adapter/queue server status!') elif reserve: launchpad.set_reservation_id(launch_id, reservation_id) return reservation_id except: log_exception(l_logger, 'Error writing/submitting queue script!') if reserve and launch_id is not None: try: l_logger.info('Un-reserving FW with fw_id, launch_id: {}, {}'.format( fw.fw_id, launch_id)) launchpad.cancel_reservation(launch_id) launchpad.forget_offline(launch_id) except: log_exception(l_logger, 'Error unreserving FW with fw_id {}'.format(fw.fw_id)) return False else: l_logger.info('No jobs exist in the LaunchPad for submission to queue!') return None # note: this is a hack (rather than False) to indicate a soft failure to rapidfire()
def generate_protocol_files_from_csv(csv_filename, output_directory=None): """ Generates a set of protocol files from csv filename input by reading protocol file input corresponding to each line of the csv file. Writes a csv file that. Args: csv_filename (str): CSV containing protocol file parameters. output_directory (str): directory in which to place the output files """ # Read csv file protocol_params_df = pd.read_csv(csv_filename) successfully_generated_files = [] file_generation_failures = [] names = [] result = "" message = {"comment": "", "error": ""} if output_directory is None: output_directory = PROCEDURE_TEMPLATE_DIR # Create required directories if not already present for subdir in ["settings", "procedures", "names"]: subdir_path = os.path.abspath(os.path.join(output_directory, subdir)) if not os.path.exists(subdir_path): makedirs_p(subdir_path) for index, protocol_params in protocol_params_df.iterrows(): template = protocol_params["template"] protocol = None # Filename for the output filename_prefix = "_".join( [ protocol_params["project_name"], "{:06d}".format(protocol_params["seq_num"]), ] ) if ".000" in template: # Extension for maccor procedure files template_fullpath = os.path.join(PROCEDURE_TEMPLATE_DIR, template) template_length = template_detection(template_fullpath) if "diagnostic_parameter_set" in protocol_params: # For parameters include diagnostics load those values diag_params_df = pd.read_csv( os.path.join(PROCEDURE_TEMPLATE_DIR, "PreDiag_parameters - DP.csv") ) diagnostic_params = diag_params_df[ diag_params_df["diagnostic_parameter_set"] == protocol_params["diagnostic_parameter_set"] ].squeeze() if template_length == 23 and template == "EXP.000": # length and name for initial procedure files protocol = Procedure.from_exp( **protocol_params[["cutoff_voltage", "charge_rate", "discharge_rate"]] ) elif template_length == 72: # length for V1 and V1 diagnostic templates without ending diagnostics protocol = Procedure.from_regcyclev2(protocol_params) protocol.add_procedure_diagcyclev2( protocol_params["capacity_nominal"], diagnostic_params ) elif template_length == 96: # template length for diagnostic type cycling mwf_dir = os.path.join(output_directory, "mwf_files") if protocol_params["project_name"] == "RapidC": # Project with charging waveform waveform_name = insert_charging_parametersv1(protocol_params, waveform_directory=mwf_dir) protocol = Procedure.generate_procedure_chargingv1(index, protocol_params, waveform_name, template=template_fullpath) elif protocol_params["project_name"] == "Drive": # Project with discharging waveform waveform_name = insert_driving_parametersv1(protocol_params, waveform_directory=mwf_dir) protocol = Procedure.generate_procedure_drivingv1(index, protocol_params, waveform_name, template=template_fullpath) else: # Use the default parameterization for PreDiag/Prediction Diagnostic projects protocol = Procedure.generate_procedure_regcyclev3(index, protocol_params, template=template_fullpath) protocol.generate_procedure_diagcyclev3( protocol_params["capacity_nominal"], diagnostic_params ) else: # Case where its not possible to match the procedure template failure = { "comment": "Unable to find template: " + template, "error": "Not Found", } file_generation_failures.append(failure) warnings.warn("Unsupported file template {}, skipping.".format(template)) result = "error" continue filename = "{}.000".format(filename_prefix) filename = os.path.join(output_directory, "procedures", filename) elif ".mps" in template and template == "formationV1.mps": # biologic settings template and formation project protocol = Settings.from_file(os.path.join(BIOLOGIC_TEMPLATE_DIR, template)) protocol = protocol.formation_protocol_bcs(protocol_params) filename = "{}.mps".format(filename_prefix) filename = os.path.join(output_directory, "settings", filename) elif ".sdu" in template: # No schedule file templates implemented failure = { "comment": "Schedule file generation is not yet implemented", "error": "Not Implemented" } file_generation_failures.append(failure) logger.warning("Schedule file generation not yet implemented") result = "error" continue else: # Unable to match to any known template format failure = { "comment": "Unable to find template: " + template, "error": "Not Found", } file_generation_failures.append(failure) warnings.warn("Unsupported file template {}, skipping.".format(template)) result = "error" continue logger.info(filename) protocol.to_file(filename) successfully_generated_files.append(filename) names.append(filename_prefix + "_") # This block of code produces the file containing all of the run file # names produced in this function call. This is to make starting tests easier _, namefile = os.path.split(csv_filename) namefile = namefile.split("_")[0] + "_names_" namefile = namefile + datetime.datetime.now().strftime("%Y%m%d_%H%M") + ".csv" names_dir = os.path.join(output_directory, "names") os.makedirs(names_dir, exist_ok=True) with open(os.path.join(names_dir, namefile), "w", newline="") as outputfile: wr = csv.writer(outputfile) for name in names: wr.writerow([name]) outputfile.close() num_generated_files = len(successfully_generated_files) num_generation_failures = len(file_generation_failures) num_files = num_generated_files + num_generation_failures message = { "comment": "Generated {} of {} protocols".format(num_generated_files, num_files), "error": "" } if not result: result = "success" else: message["error"] = "Failed to generate {} of {} protocols".format(num_generation_failures, num_files) logger.error(message["error"]) return successfully_generated_files, file_generation_failures, result, message