def saveClassAttributes(clss, form, save_addr): """ Save class attributes. :Parameters: clss : instance Handle of the instance to be saved. form : str Format to save in ('h5' or 'mat'). save_addr : str The address to save the attributes in. """ save_addr = u.appendformat(save_addr, form) if form == 'mat': sio.savemat(save_addr, clss.__dict__) elif form in ('h5', 'hdf5'): try: dictdump.dicttoh5(clss.__dict__, save_addr) except: dio.save(save_addr, clss.__dict__, compression=None) else: raise NotImplementedError
def saveClassAttributes(clss, form, save_addr): """ Save class attributes. **Parameters**\n clss: instance Handle of the instance to be saved. form: str Format to save in ('h5'/'hdf5', 'mat', or 'dmp'/'dump'). save_addr: str The address to save the attributes in. """ save_addr = u.appendformat(save_addr, form) if form == 'mat': sio.savemat(save_addr, clss.__dict__) elif form in ('h5', 'hdf5'): try: dictdump.dicttoh5(clss.__dict__, save_addr) except: dio.save(save_addr, clss.__dict__, compression=None) elif form in ('dmp', 'dump'): fh = open(save_addr, 'wb') pickle.dump(clss, fh) fh.close() else: raise NotImplementedError
def athena_to_hdf5( filename, fileout=None, overwrite=False, match=None, do_preedge=True, do_bkg=True, do_fft=True, use_hashkey=False, _larch=None, ): """Read Athena project file (.prj) and write to HDF5 (.h5) Arguments: filename (string): name of Athena Project file fileout (None or string): name of the output file [None -> filename_root.h5] overwrite (boolean): force overwrite if fileout exists [False] match (string): pattern to use to limit imported groups (see Note 1) do_preedge (bool): whether to do pre-edge subtraction [True] do_bkg (bool): whether to do XAFS background subtraction [True] do_fft (bool): whether to do XAFS Fast Fourier transform [True] use_hashkey (bool): whether to use Athena's hash key as the group name instead of the Athena label [False] Returns: None, writes HDF5 file. Notes: 1. There is currently a bug in h5py, track_order is ignored for the root group: https://github.com/h5py/h5py/issues/1471 """ aprj = AthenaProject(_larch=_larch) aprj.read( filename, match=match, do_preedge=do_preedge, do_bkg=do_bkg, do_fft=do_fft, use_hashkey=use_hashkey, ) adict = aprj.as_dict() if fileout is None: froot = filename.split(".")[0] fileout = f"{froot}.h5" if os.path.isfile(fileout) and os.access(fileout, os.R_OK): _logger.info(f"{fileout} exists") _fileExists = True if overwrite is False: _logger.info(f"overwrite is {overwrite} -> nothing to do!") return else: _fileExists = False if overwrite and _fileExists: os.remove(fileout) h5out = h5py.File(fileout, mode="a", track_order=True) create_ds_args = {"track_order": True} dicttoh5(adict, h5out, create_dataset_args=create_ds_args) h5out.close() _logger.info(f"Athena project converted to {fileout}")
def run(input_dir, output_dir, affinity_data): subdir_name = os.listdir(input_dir) start_tot = timer() create_ds_args = { 'compression': "lzf", 'shuffle': True, 'fletcher32': True } if affinity_data: affinity = pd.read_csv(affinity_data) #print(input_dir) for j, dirname in enumerate(subdir_name): start = timer() torch.cuda.empty_cache() if j % 10 == 0: progress = '{}/{} structures voxelized ({}%)'.format(j, len(subdir_name), \ round(j / len(subdir_name) * 100, 2)) print('\r' + progress) #sys.stdout.write('\r'+progress) input_filename = input_dir + dirname + '/' + dirname + '_pocket.mol2' output_filename = os.path.join(output_dir, dirname + '.h5') if not os.path.isfile(output_filename): #print(output_filename) voxel = voxelize(input_filename, channels=channel_list, bin_size=2.0, num_bins=50, ligand=True) #print(voxel) if affinity_data: if dirname in affinity.values: affinity_value = affinity.loc[affinity['pdbid'] == dirname, '-logKd/Ki'].iloc[0] voxel['affinity'] = affinity_value else: voxel['affinity'] = NULL else: continue dicttoh5(voxel, dirname + ".h5", h5path='/', overwrite_data=True, create_dataset_args=create_ds_args) shutil.move(dirname + '.h5', output_dir + dirname + '.h5') else: continue end = timer() #print(end - start) end_tot = timer() print(end_tot - start_tot) return
def addinfogroup(fout, name, datadict): # Create info group when not there if "processing" not in fout: ginfo = newNXentry(fout, "processing") else: ginfo = fout["processing"] # Add new info group index = len(ginfo.keys()) + 1 name = "%d.%s" % (index, name) newgroup = ginfo.create_group(name) newgroup.attrs["NX_class"] = "NXprocess" newgroup.attrs["program"] = "spectrocrunch" newgroup.attrs["version"] = pkg_resources.require("SpectroCrunch")[0].version newgroup.attrs["sequence_index"] = index newgroup.attrs["date"] = timestamp() dicttoh5(datadict, fout, h5path=newgroup.name)
def profiles_zbins(snapdir, redshifts, Rvir_allsnaps, zmin=1, zmax=4, zbinwidth=0.5, outfile=None): '''Compute profiles for all snapshots in each redshift bin. In each redshift bin, the virial radius at the median (center) snapshot is used. Parameters: `snapdir`: directory with snapshots `redshifts`: 1d array where `redshifts[i]` is the redshift at snapshot `i` `Rvir_allsnaps`: dictionary where `Rvir_allsnaps[i]` is the virial radius (in kpc) at snapshot `i` `zmin`, `zmax`, `zbinwidth`: redshift bins will be created with edges `z=[z0,z0+zbinwidth)`, where `z0` is in `np.arange(zmin,zmax,zbinwidth)` Returns: Dictionary where each key is a redshift bin, and each item is a list of `(rmid, logTavgbins, rhoavgbins)` calculated for each snapshot in that redshift bin. Output will be pickled and saved to disk if `outfile` is passed (output file path/name). ''' allprofiles = {} for z0 in np.arange(zmin, zmax, zbinwidth): allprofiles[str(z0)] = {} z1 = z0 + zbinwidth print(f'Beginning bin from z={z0} to {z1}.') snapnums_bin = np.flatnonzero( inrange(redshifts, (z0, z1), right_bound_inclusive=False)) snapnum_median = snapnums_bin[len(snapnums_bin) // 2] Rvir = Rvir_allsnaps[snapnum_median] print( f'Median redshift is {redshifts[snapnum_median]} with snapnum {snapnum_median} and virial radius {Rvir} kpc.' ) print( f'Computing profiles for snapshots {snapnums_bin.min()} to {snapnums_bin.max()}.' ) for snapnum in tqdm(snapnums_bin): p0 = load_p0(snapdir, snapnum, Rvir=Rvir, loud=0) allprofiles[str(z0)][str(snapnum)] = profiles(p0) if outfile: dicttoh5(allprofiles, outfile, mode='w') return allprofiles
def save_to_h5(self, fname): """Dump dictionary representation to HDF5 file""" dicttoh5(self.__dict__) self._logger.info("RixsData saved to {0}".format(fname))
def dict_to_hdf5(file, save_dict): create_ds_args = {'compression': "gzip", 'fletcher32': True} dictdump.dicttoh5(save_dict, file, h5path="/", mode='w', create_dataset_args=create_ds_args)
def get_rixs_13ide(sample_name, scan_name, rixs_no='001', data_dir='.', out_dir=None, counter_signal='ROI1', counter_norm=None, interp_ene_in=True, save_rixs=False): """Build RIXS map as X,Y,Z 1D arrays Parameters ---------- sample_name : str scan_name : str rixs_no : str, optional length 3 string, ['001'] data_dir : str, optional path to the data ['.'] out_dir : str, optional path to save the data [None -> data_dir] counter_signal : str name of the data column to use as signal counter_norm : str name of the data column to use as normaliztion interp_ene_in: bool perform interpolation ene_in to the energy step of ene_out [True] save_rixs : bool if True -> save outdict to disk (in 'out_dir') Returns ------- outdict : dict { '_x': array, energy in '_y': array, energy out '_z': array, signal 'writer_name': str, 'writer_version': str, 'writer_timestamp': str, 'filename_all' : list, 'filename_root': str, 'name_sample': str, 'name_scan': str, 'counter_all': str, 'counter_signal': str, 'counter_norm': str, 'ene_grid': float, 'ene_unit': str, } """ _writer = 'get_rixs_13ide' _writer_version = '1.5' #: used for reading back in RixsData.load_from_h5() _writer_timestamp = '{0:04d}-{1:02d}-{2:02d}_{3:02d}{4:02d}'.format(*time.localtime()) if out_dir is None: out_dir = data_dir fnstr = "{0}_{1}".format(scan_name, sample_name) grepstr = "{0}*.{1}".format(fnstr, rixs_no) fnames = glob.glob(os.path.join(data_dir, grepstr)) enes = np.sort(np.array([_parse_header(fname)['Analyzer.energy'] for fname in fnames])) estep = round(np.average(enes[1:]-enes[:-1]), 2) fname0 = fnames[0] header = _parse_header(fname0) cols = header['columns'] ix = cols.index('Energy') or 0 iz = cols.index(counter_signal) i0 = cols.index(counter_norm) if interp_ene_in: dat = np.loadtxt(fname0) x0 = dat[:, ix] xnew = np.arange(x0.min(), x0.max()+estep, estep) for ifn, fname in enumerate(fnames): dat = np.loadtxt(fname) x = dat[:, ix] y = np.ones_like(x) * enes[ifn] if counter_norm is not None: z = dat[:, iz] / dat[:, i0] else: z = dat[:, iz] if interp_ene_in: y = np.ones_like(xnew) * enes[ifn] z = np.interp(xnew, x, z) x = xnew if ifn == 0: _xcol = x _ycol = y _zcol = z else: _xcol = np.append(x, _xcol) _ycol = np.append(y, _ycol) _zcol = np.append(z, _zcol) _logger.info("Loaded scan {0}: {1} eV".format(ifn+1, enes[ifn])) outdict = { '_x': _xcol, '_y': _ycol, '_z': _zcol, 'writer_name': _writer, 'writer_version': _writer_version, 'writer_timestamp': _writer_timestamp, 'filename_root': fnstr, 'filename_all': fnames, 'counter_all': cols, 'counter_signal': counter_signal, 'counter_norm': counter_norm, 'sample_name': sample_name, 'scan_name': scan_name, 'ene_grid': estep, 'ene_unit': 'eV', } if save_rixs: fnout = "{0}_rixs.h5".format(fnstr) dicttoh5(outdict, os.path.join(out_dir, fnout)) _logger.info("RIXS saved to {0}".format(fnout)) return outdict