def test_h5reader_shuffle_reproducible(): file_list = [h5file] reader = hdf5.H5Reader(file=file_list, shuffle=True, shuffle_reproducible=True, verbose=True) writer = dummy.DummyWriter(source=reader, verbose=True) writer.dump()
def test_h5reader(): reader = hdf5.H5Reader(file=h5file, verbose=True) writer = dummy.DummyWriter(source=reader, verbose=True) writer.dump()
def test_h5reader_file_list(): file_list = [h5file, h5file, h5file] reader = hdf5.H5Reader(file=file_list, verbose=True) writer = dummy.DummyWriter(source=reader, verbose=True) writer.dump()
def __next__(self): """ Self-consistent solvent matching, implemented as a Python generator. Implementation follows <lsz/shell-sc-solv-match.py>. """ for obj in next(self.src): if obj is not None: assert isinstance(obj, base.Container) if self.g_scaled_file is not None: # --- read g_scaled and rho from previous calculation --- reader = hdf5.H5Reader(filename=self.g_scaled_file) for frm in reader.next(): obj_g_scaled = frm break del reader g_dict = obj_g_scaled.get_data(base.loc_solv_match + '/g_scaled') rho_dict = obj_g_scaled.get_data(base.loc_solv_match + '/rho') else: # --- compute g_scaled and rho --- # obtain information from the pipeline log dr = obj.query_meta('histograms/histogram/dr') assert (dr is not None) # --- self.geometry = obj.get_geometry() assert (self.geometry is not None) geometry_param = obj.query_meta(self.geometry) assert (geometry_param is not None) # --- virtual_param = obj.query_meta('VirtualParticles') if (virtual_param is not None): self.x_particle_method = virtual_param['method'] xrho = virtual_param['x_density'] # --- obtain the shell volume from the pipeline log if (self.geometry in ['Sphere', 'Cuboid', 'Ellipsoid']): V_shell = geometry_param['shell_volume'] VSqr_shell = V_shell ** 2 elif (self.geometry == 'ReferenceStructure' or self.geometry == 'MultiReferenceStructure'): # V_shell is determined below V_shell = None else: raise NotImplementedError('Geometry ' + self.geometry + 'not implemented for self-consistent solvent matching') # --- read and prepare g-function for matching g_header = (rdf.readHeader(self.g_ascii_file)).rstrip('\n').split() assert (self.g_match in g_header) _g_el_set = set([]) for item in g_header: if (item == '#'): continue pair = item.split(',') _g_el_set.add(pair[0]) _g_el_set.add(pair[1]) # obsolete g_elements = sorted(list(_g_el_set)) # --- g_table_0 = np.loadtxt(self.g_ascii_file) # TODO: Assert that histograms and rdfs have same bin size. Else, generate new rdf by interpolation. g_dr = (g_table_0[1:, 0] - g_table_0[:-1, 0]).mean() # print g_dr # Tapers noise for self.g_noise_fraction<1. Determines and set ginfty in last bins. g_table_0_smooth = rdf.smooth(g_table_0, g_dr, self.g_plateau_fraction, self.g_noise_fraction, verb=False) # np.savetxt("g_table_0_smooth.dat", g_table_0_smooth) if (self.debug): obj.put_data(base.loc_solv_match + '/g_table_0_smooth', g_table_0_smooth) g_table_0 = g_table_0_smooth _radii = obj.get_data(base.loc_histograms + '/radii') # Extend rdf in distance AFTER noise tapering, where rdf values at largest distance are set to ginfty. if _radii.shape[0] > g_table_0.shape[0]: new_g_table = np.zeros((_radii.shape[0], g_table_0.shape[1])) new_g_table[:g_table_0.shape[0], :] = g_table_0 new_g_table[:, 0] = _radii tmp = g_table_0[-1, 1:] new_g_table[g_table_0.shape[0]:, 1:] = tmp[np.newaxis, :] g_table_0 = new_g_table # np.savetxt("g_table_0_smooth_extended.dat", new_g_table) if (self.debug): obj.put_data(base.loc_solv_match + '/g_table_0_smooth_extended', new_g_table) # if do_g_extension: # g_dr_0 = g_table_0[-1, 0] - g_table_0[-2, 0] # g_nr_0 = g_table_0.shape[0] # g_nrow = g_extension_factor * g_nr_0 # g_ncol = g_table_0.shape[1] # g_table = np.zeros((g_nrow, g_ncol)) # g_table[0:g_nr_0, :] = g_table_0[0:g_nr_0, :] # for idx in range(g_nr_0, g_nrow): # g_table[idx, 0] = g_table[idx - 1, 0] + g_dr_0 # g_table[idx, 1:] = g_table[idx - 1, 1:] # else: # g_table = g_table_0 g_table = g_table_0 # --- assert (len(g_header) == g_table.shape[1]) g_idx = g_header.index(self.g_match) g_org = g_table[:, [0, g_idx]] if (self.debug): obj.put_data(base.loc_solv_match + '/g_org', g_org) rho_g_org = g_org[0, 1] # rho value stored at [0,1] (code by JK) # --- split g_table into a dict holding individual arrays g_dict = {} g_dict['radii'] = g_table[:, 0] for i in range(1, len(g_header)): g_dict[g_header[i]] = g_table[:, i] # --- calculate particle and density of the matching solvent # get_shell also merges virtual particles X1 and X2 shell = selection.get_shell(obj) # --- multiref: set properly (volume-weighted) averaged shell H_{xx}(r) # if (virtual_param is not None): if (virtual_param is not None and self.geometry == 'MultiReferenceStructure'): shell.put_data(base.loc_histograms + "/X,X", obj.get_data(base.loc_shell_Hxx + "/X.s,X.s")) # --- determine V_shell and VSqr_shell for the reference and multiref structure case # JK: Can/should be moved to Average filter? if (self.geometry == 'ReferenceStructure' or self.geometry == 'MultiReferenceStructure'): nx = (shell.get_data(base.loc_nr_particles + '/X')).mean() V_shell = old_div(nx, xrho) VSqr_shell = V_shell ** 2 if self.geometry == 'MultiReferenceStructure': nxSqr = ((shell.get_data(base.loc_nr_particles + '/X')) ** 2).mean() VSqr_shell = old_div(nxSqr, xrho ** 2) # --- # print "###", self.g_match, shell.get_keys(base.loc_histograms) assert (self.g_match in shell.get_keys(base.loc_histograms)) pair = self.g_match.split(',') assert (pair[0] == pair[1]) assert (pair[0] in shell.get_keys(base.loc_nr_particles)) # print shell.particles[pair[0]] n_match_avg = (shell.get_data(base.loc_nr_particles + '/' + pair[0])).mean() rho_match = old_div(n_match_avg, V_shell) # JK: Should we instead use <n_i/V_i> averaged over frames for multiref?? # Use SciPy interpolator object to operate on the # reference g function. Warning: Linear interpolation! g_int = sint.interp1d(g_org[:, 0], g_org[:, 1]) # --- solvent-matching calculation _radii = obj.get_data(base.loc_histograms + '/radii') pShell = np.zeros_like(_radii) H = np.zeros_like(_radii) gAct = np.zeros_like(_radii) # --- if (self.geometry == 'Sphere') and (self.x_particle_method is None): R = geometry_param['radius'] sw = geometry_param['shell_width'] for i, r in enumerate(_radii): pShell[i] = rdf.PSh(R - sw, R, r) H[i] = pShell[i] * g_int(_radii[i]) else: histgrms = shell.get_data(base.loc_histograms) pShell = histgrms['X,X'].copy() pShell /= pShell.sum() pShell /= dr for i, r in enumerate(_radii): if (pShell[i] > 0.0): gAct[i] /= pShell[i] else: gAct[i] = 0.0 if (_radii[i] < g_dict['radii'][0]) or (_radii[i] >= g_dict['radii'][-1]): H[i] = 0.0 else: H[i] = pShell[i] * g_int(_radii[i]) # --- pre_factor = rho_match ** 2 * VSqr_shell * dr / 2. # print "### pre_factor =", pre_factor H[:] *= pre_factor histo = shell.get_data(base.loc_histograms + '/' + self.g_match) scale_factor = old_div(np.sum(histo[:] * H[:]), np.sum(H[:] ** 2)) # print "### scale_factor =", scale_factor obj.put_data(base.loc_solv_match + '/scale_factor', scale_factor) if (self.debug): obj.put_data(base.loc_solv_match + '/pre_factor', pre_factor) obj.put_data(base.loc_solv_match + '/scale_factor', scale_factor) obj.put_data(base.loc_solv_match + '/histo', histo) obj.put_data(base.loc_solv_match + '/pShell', pShell) obj.put_data(base.loc_solv_match + '/H', H) # --- H *= scale_factor gAct /= scale_factor if (self.debug): obj.put_data(base.loc_solv_match + '/H_scaled', H) obj.put_data(base.loc_solv_match + '/gAct', gAct) # --- rho_dict = {} for name in g_elements: avg = (shell.get_data(base.loc_nr_particles + '/' + name)).mean() rho_dict[name] = old_div(avg, V_shell) if (self.debug): obj.put_data(base.loc_solv_match + '/rho_g_org', rho_g_org) obj.put_data(base.loc_solv_match + '/rho_match', rho_match) # --- Patch zeroeth elements of g arrays with the density, # --> Do we really want to keep this convention? for key in rho_dict: pair = key + ',' + key assert (pair in g_dict) (g_dict[pair])[0] = rho_dict[key] if (self.debug): obj.put_data(base.loc_solv_match + '/g_original', g_dict) # --- final rescaled g functions used by delta_h for key in g_dict: if (key == 'radii'): continue else: (g_dict[key])[1:] *= scale_factor obj.put_data(base.loc_solv_match + '/g_scaled', g_dict) obj.put_data(base.loc_solv_match + '/rho', rho_dict) obj.put_meta(self.get_meta()) if self.verb: print("Solvent.next() :", obj.i) yield obj else: yield None