def distribute_Q_over_procs(self, num_procs): """ num_Q_per_proc is determined as the largest integer dividing the total_Qsteps number. the remainder is placed on rank 0 (if there is a remainder...) """ # set up array of how many Q on each proc nQpp_arr = np.zeros(num_procs).astype(int) # Num_Q_Per_Processor_ARRay proc = 0 for qq in range(self.total_Qsteps): if proc == num_procs: proc = 0 nQpp_arr[proc] = nQpp_arr[proc] + 1 proc = proc + 1 # if any procs have 0 Q, print error message and exit if nQpp_arr.min() == 0: message = 'atleast one processor will do 0 Qpoints.\n' \ ' increase number of procs or decrease number of Q points' raise PSF_exception(message) # print parellelism info message = f'process: 0 Q points: {nQpp_arr[0]:g}\n' for ii in range(1, num_procs): message = message + f' process: {ii:g} Q points: {nQpp_arr[ii]:g}\n' print_stdout(message, msg_type='Q points on each process') # put the Qpoint indicies for each proc in the list self.Q_on_procs = [] shift = 0 for ii in range(num_procs): self.Q_on_procs.append(list(range(shift, shift + nQpp_arr[ii]))) shift = shift + nQpp_arr[ii]
def parse_input(self, input_file): """ read the input file """ # test that the input file exists/isnt broken try: with open(input_file, 'r') as inp: self.input_txt = inp.readlines() except: message = f'input file \'{input_file}\' not found' raise PSF_exception(message) # check the key_words in the file, remove empty lines and comments self._check_file() # get the variables from file self.traj_file = self._parse_str('traj_file', self.traj_file) self.outfile_prefix = self._parse_str('outfile_prefix', self.outfile_prefix) self.output_dir = self._parse_str('output_dir', self.output_dir) self.save_progress = self._parse_bool('save_progress', self.save_progress) self.parse_custom = self._parse_bool('parse_custom', self.parse_custom) self.dt = self._parse_float('dt', self.dt) self.stride = self._parse_int('stride', self.stride) self.total_steps = self._parse_int('total_steps', self.total_steps) self.num_atoms = self._parse_int('num_atoms', self.num_atoms) self.num_processes = self._parse_int('num_processes', self.num_processes) self.supercell = self._parse_int_list('supercell', self.supercell) self.lattice_vectors = self._parse_float_list('lattice_vectors', self.lattice_vectors) self.unwrap_pos = self._parse_bool('unwrap_pos', self.unwrap_pos) self.recalculate_cell_lengths = self._parse_bool( 'recalculate_cell_lengths', self.recalculate_cell_lengths) self.ins_xlengths = self._parse_float_list('ins_xlengths', self.ins_xlengths) self.types = self._parse_str_list('types', self.types) self.exp_type = self._parse_str('exp_type', self.exp_type) self.Qpoints_file = self._parse_str('Qpoints_file', self.Qpoints_file) self.Qmin = self._parse_float_list('Qmin', self.Qmin) self.Qmax = self._parse_float_list('Qmax', self.Qmax) self.total_Qsteps = self._parse_int_list('total_Qsteps', self.total_Qsteps) self.num_blocks = self._parse_int('num_blocks', self.num_blocks) self.blocks = list(range(self.num_blocks)) self.blocks = self._parse_int_list('blocks', self.blocks) self.compute_bragg = self._parse_bool('compute_bragg', self.compute_bragg) self.compute_timeavg = self._parse_bool('compute_timeavg', self.compute_timeavg) self.compute_sqw = self._parse_bool('compute_sqw', self.compute_sqw) # check that input variables are valid where applicable self._check_variables()
def _check_file(self): """ check the key_words in input files and remove comments/blank lines """ input_txt = [] for line in self.input_txt: if len(line.split()) == 0 or line.strip().startswith('#'): continue else: tmp_line = line.split('#')[0].strip() key_word = tmp_line.split('=')[0].strip() if key_word not in self.key_words: message = f'key word \'{key_word}\' is unknown. check the input file' raise PSF_exception(message) if key_word in self.doubles: message = f'key word \'{key_word}\' appears more than once in the input file' raise PSF_exception(message) self.doubles.append(key_word) input_txt.append(tmp_line)
def __init__(self, invars): """ open the hdf5 file. had to open it on each process rather than open and copy. mpi4py complains when trying to pass open files, atleast using the 'pickle' versions of send & recv. """ try: self.handle = h5py.File(invars.traj_file, 'r') except: message = 'file \'{invars.traj_file}\' seems borked' raise PSF_exception(message)
def _parse_bool(self, key_word, default): """ get bool variable from file """ return_value = default for line in self.input_txt: if line.split('=')[0].strip() == key_word: return_value = line.split('=')[-1] return_value = return_value.split('#')[0].strip() try: return_value = bool(int(return_value)) except: message = f'key word \'{key_word}\' seems wrongs.' raise PSF_exception(message) return return_value
def _parse_str_list(self, key_word, default): """ get list of ints from file """ return_value = default for line in self.input_txt: if line.split('=')[0].strip() == key_word: return_value = line.split('=')[-1] return_value = return_value.split('#')[0].strip() return_value = return_value.split() try: return_value = [str(x) for x in return_value] except: message = f'key word \'{key_word}\' seems wrongs.' raise PSF_exception(message) return return_value
def _Qpoints_from_list(self, invars): """ Give a csv file of Qpoints. 1 per line, each coord seperated by spaces. Overwrites other definitions for Q slices if a file name is given. """ try: self.total_reduced_Q = np.loadtxt( invars.Qpoints_file) # read the Q points except: message = f'Qpoints file \'{invars.Qpoints_file}\' is broken' raise PSF_exception(message) if len(self.total_reduced_Q.shape ) == 1: # if only 1 Q, reshape to avoid breaking stuff later self.total_reduced_Q = self.total_reduced_Q.reshape((1, 3)) self.total_Qsteps = self.total_reduced_Q.shape[0] # number of Q points
def _loop_over_blocks(self, invars, Qpoints, lattice, traj_file): """ contains outer loop over blocks info about scattering lengths: there should be 1 length per TYPE, in order of types. e.g. for 4 types = 1,2,3,4 there should be for lengths atom 1 : length 1, atom 2 : lenght 2, etc... i am also assuming that dump_modify sort id was used so that the order of atoms is the same for each step. this can be changed easily if not the case using the atom_types variable, but that will slow down the calc a little. the b_array variable has shape [num_steps, num_atoms] to vectorize calculating the neutron weighted density-density correlation fn """ for block_index in invars.blocks: # loop over blocks to 'ensemble' average # used below self.block_index = block_index # print progress and start timer start_time = timer() message = '\n............................................' print_stdout(message) message = f' now on block {self.counter} out of {self.num_blocks}' print_stdout(message, msg_type='NOTE') # get the positions from file traj_file.parse_trajectory(invars, self) # check that the number of b's defined in input file are consistent with traj file if np.unique(self.atom_types[0, :]).shape[0] != invars.num_types: message = 'number of types in input file doesnt match simulation' raise PSF_exception(message) # look up ins scattering lengths OR parameters to compute xray form factors. self.xlengths_tools.map_types_to_data(invars, self) # box lengths read from traj file a = self.box_lengths[0] / invars.supercell[0] b = self.box_lengths[1] / invars.supercell[1] c = self.box_lengths[2] / invars.supercell[2] # print box lengths read from traj file to compare to input file message = f'cell lengths from hdf5 file: {a:2.3f} {b:2.3f} {c:2.3f} Angstrom' print_stdout(message, msg_type='NOTE') # recall, only ortho lattice vectors used (for now) if invars.recalculate_cell_lengths: # optionally recalculates from avg in MD file lattice.lattice_vectors = np.array([[a, 0, 0], [0, b, 0], [0, 0, c]]) lattice.recompute_lattice() # recompute reciprocal lattice Qpoints.reconvert_Q_points( lattice) # convert Q to 1/A in new basis Q_start_time = timer( ) # track time per Q not including the read/write time message = ('printing progess for process 0, which has >= the number of Q on other' \ ' processes.\n -- now entering loop over Q -- ') print_stdout(message, msg_type='NOTE') # ----------------------------------------------------------------------------------------- # ------------- multiprocessing part. ------------- # ----------------------------------------------------------------------------------------- # a Queue to hold the retured SQW data self.mp_queue = mp.Queue() # a container to hold the processes procs = [] # loop over processes, setting up the loop over Q on each. for pp in range(invars.num_processes): procs.append( mp.Process(target=self._loop_over_Q, args=(pp, invars, Qpoints))) # now start running the function on each process for proc in procs: proc.start() # note, doing it this way with the queue 'blocks' until the next processes adds to queue if # it is empty. I dont know if this will freeze the whole calculation or just the background # proc that is running the queue. anyway, everything with the queue has to be done before # joining the procs or the data will corrupt/crash the program. # get the stuff calculated on each proc for pp in range(invars.num_processes): # i think this is FIFO sqw_pp, bragg_pp, timeavg_pp, proc = self.mp_queue.get() Q_inds = Qpoints.Q_on_procs[proc] # now put it into main arrays if requested if invars.compute_sqw: self.sqw[:, Q_inds] = self.sqw[:, Q_inds] + sqw_pp if invars.compute_bragg: self.bragg[Q_inds] = self.bragg[Q_inds] + bragg_pp if invars.compute_timeavg: self.timeavg[Q_inds] = self.timeavg[Q_inds] + timeavg_pp # now close the queue and rejoin its proc self.mp_queue.close() self.mp_queue.join_thread() # wait here for all to finish before moving on to the next block for proc in procs: proc.join() # ----------------------------------------------------------------------------------------- # ------------ end of multiprocessing part ---------------- # ----------------------------------------------------------------------------------------- # optionally save progress if invars.save_progress: if self.counter != self.num_blocks: # dont write if this is the last block if invars.compute_sqw: f_name = invars.outfile_prefix + f'_SQW_B{block_index}.hdf5' mod_io.save_sqw(invars, Qpoints.reduced_Q, self.meV, self.sqw / self.counter, f_name) if invars.compute_bragg: f_name = invars.outfile_prefix + f'_BRAGG_B{block_index}.hdf5' mod_io.save_bragg(invars, Qpoints.reduced_Q, self.bragg / self.counter, f_name) if invars.compute_timeavg: f_name = invars.outfile_prefix + f'_TIMEAVG_B{block_index}.hdf5' mod_io.save_timeavg(invars, Qpoints.reduced_Q, self.timeavg / self.counter, f_name) # print timing to screen end_time = timer() elapsed_time = end_time - start_time Q_time = end_time - Q_start_time io_time = elapsed_time - Q_time # time per Qpoint Q_time = Q_time / len(Qpoints.Q_on_procs[0]) # avg over all Q message = f' avg time per Q-point: {Q_time:2.3f} seconds' print_stdout(message, msg_type='TIMING') # time spent in i/o message = f' total io time: {io_time:2.3f} seconds' print_stdout(message) # total time for in this method message = ( f' total time for this block: {elapsed_time:2.3f} seconds' f' ({elapsed_time/60:2.3f} minutes)') print_stdout(message) # update the block counter self.counter = self.counter + 1
def _check_variables(self): """ where applicable, do some checks on input variables and exit if need be """ # check that the lattice vectors make sense try: self.lattice_vectors = np.array(self.lattice_vectors).reshape( (3, 3)) except: message = 'lattice vectors seem wrong. should be a list of 9 floats with no commas' raise PSF_exception(message) # check that lattice vectors are ortho # the issue is that positions etc. are in cartesian coords with ortho boxes. different lattice # vectors should work, but i haven't tested it yet. it will be necessary to convert Q in 1/A # to cartesian coordinates so that the vectorized multiplication done in mod_sqw._loop_over_blocks # works. if (self.lattice_vectors[0, 1] != 0 or self.lattice_vectors[0, 2] != 0 or self.lattice_vectors[1, 0] != 0 or self.lattice_vectors[1, 2] != 0 or self.lattice_vectors[2, 0] != 0 or self.lattice_vectors[2, 1] != 0): message = 'only ortho. lattice vectors are currently supported. \n' \ ' contact the author at [email protected] if you need this feature' raise PSF_exception(message) # print the traj file message = f'reading trajectories from file \'{self.traj_file}\'' print_stdout(message, msg_type='NOTE') # check for user defined scattering lenghts (only for ins) if self.ins_xlengths != False: self.num_types = len(self.ins_xlengths) message = 'using user specified scattering lengths (only works for ins, ignored for xray)' print_stdout(message, msg_type='NOTE') else: self.num_types = len(self.types) message = 'using scattering lengths from mod_xlengts' print_stdout(message, msg_type='NOTE') # check that the requested number of processes makes sense if self.num_processes == None: self.num_processes = os.cpu_count() if self.num_processes < 1: message = 'requested number of processes should be 1 or larger' raise PSF_exception(message) # check experiment type if self.exp_type not in ['xray', 'ins']: message = 'experiment type should be either \'xray\' or \'ins\'' raise PSF_exception(message) else: message = f'the experiment type is \'{self.exp_type}\'' print_stdout(message, msg_type='NOTE') # check that Q paths opts make sense if len(self.Qmin) % 3: message = 'each vertex for the Q path should have 3 coords' raise PSF_exception(message) self.num_Qpath = len(self.Qmin) // 3 if len(self.Qmin) != len(self.Qmax): message = f'variable Qmin and Qmax should have same number of vertices' raise PSF_exception(message) if len(self.total_Qsteps) != self.num_Qpath: message = 'number of steps should equal number of paths' raise PSF_exception(message) # check that the requested blocks make sense if max(self.blocks) >= self.num_blocks or len( self.blocks) > self.num_blocks: message = f'variable blocks should be a list of the blocks to calculate' raise PSF_exception(message) # if the output dir. doesnt exist, create it if not os.path.exists(self.output_dir): message = f'creating directory \'{self.output_dir}\'' print_stdout(message, msg_type='NOTE') os.mkdir(self.output_dir) # check that atleast one of compute_* is not False if not self.compute_sqw and not self.compute_timeavg and not self.compute_bragg: message = ( 'there is nothing to do! set atleast one of compute_sqw, \n compute_timeavg,' ' or compute_bragg to 1 in the input file') raise PSF_exception(message) # check if traj file opens if not os.path.exists(self.traj_file): message = f'file \'{self.traj_file}\' not found' raise PSF_exception(message)