def _can_load(f) : try : if h5py.is_hdf5(f) or h5py.is_hdf5(f+".0.hdf5") : return True else : return False except AttributeError : if "hdf5" in f : warnings.warn("It looks like you're trying to load HDF5 files, but python's HDF support (h5py module) is missing.", RuntimeWarning) return False
def _can_load(cls, f): if hasattr(h5py, "is_hdf5"): if h5py.is_hdf5(f): return cls._test_for_hdf5_key(f) elif h5py.is_hdf5(f+".0.hdf5"): return cls._test_for_hdf5_key(f+".0.hdf5") else: return False else: if "hdf5" in f: warnings.warn( "It looks like you're trying to load HDF5 files, but python's HDF support (h5py module) is missing.", RuntimeWarning) return False
def __init__(self, filename, groupname, index=None): """Initializes a h5features reader to read a group in a HDF5 file.""" # check filename if not h5py.is_hdf5(filename): raise IOError('{} is not a HDF5 file'.format(filename)) self.filename = filename # open the HDF5 file for reading self.h5file = h5py.File(self.filename, 'r') # access to the requested group if not groupname in self.h5file: raise IOError('{} is not a valid group in {}' .format(groupname, self.filename)) self.groupname = groupname self.group = self.h5file[groupname] # Get the version of the readed file self.version = self._read_version() # read the index from group if not provided if index is None: # Choose the good index according to file version if self.version == '0.1': index_class = IndexV0_1() elif self.version == '1.0': index_class = IndexV1_0() else: index_class = Index() self.index = index_class.read(self.group) else: self.index = index
def load(filename): """ Load a file as an `h5py.File`-like object. Format supported: - h5 files, if `h5py` module is installed - Spec files if `SpecFile` module is installed :param str filename: A filename :raises: IOError if the file can't be loaded as an h5py.File like object :rtype: h5py.File """ if not os.path.isfile(filename): raise IOError("Filename '%s' must be a file path" % filename) if not h5py_missing: if h5py.is_hdf5(filename): return h5py.File(filename) try: from . import spech5 return spech5.SpecH5(filename) except ImportError: logger.debug("spech5 can't be loaded.", exc_info=True) except IOError: logger.debug("File '%s' can't be read as spec file.", filename, exc_info=True) raise IOError("File '%s' can't be read as HDF5" % filename)
def getUserInfo(user_name): """ getUserInfo: return user data """ log = logging.getLogger("h5serv") userid = None if not user_name: return None log.info("get info for user: [" + user_name + "]") filename = config.get('password_file') if not filename: log.error("no config for password_file") raise HTTPError(500, message="bad configuration") # verify file exists and is writable if not op.isfile(filename): log.error("password file is missing") raise HTTPError(500, message="bad configuration") if not h5py.is_hdf5(filename): log.error("password file is invalid") raise HTTPError(500, message="bad configuration") with h5py.File(filename, 'r') as f: if user_name not in f.attrs: return None data = f.attrs[user_name] return data
def h5repack(infile, h5repack_path, fs_strategy='FSM_AGGR', outfile=None): if not isinstance(infile,str): raise TypeError('Input infile must be a string') if not h5py.is_hdf5(infile): raise IOError('Input infile is not a HDF5 file') if not isinstance(h5repack_path, str): raise TypeError('Input h5repack_path must be a string') if not isinstance(fs_strategy, str): raise TypeError('Input fs_strategy must be a string') if fs_strategy.upper() not in ['FSM_AGGR', 'PAGE', 'AGGR', 'NONE']: raise ValueError('Invalid value specified in fs_strategy') fs_strategy = fs_strategy.upper() if outfile is None: outfile = infile else: if not isinstance(outfile, str): raise TypeError('outfile must be a string') try: if outfile == infile: tmpfile = infile + '.tmp' mv_result = subprocess.call('mv {0} {1}'.format(infile, tmpfile), shell=True) else: tmpfile = infile h5repack_result = subprocess.call('{0} -S {1} {2} {3}'.format(h5repack_path, fs_strategy, tmpfile, outfile), shell=True) if h5repack_result != 0: # problem with h5repack, just rename tmpfile back to original rm_result = subprocess.call('mv {0} {1}'.format(tmpfile, outfile), shell=True) else: # h5repack is successful, remove the tmpfile rm_result = subprocess.call('rm {0}'.format(tmpfile), shell=True) except Exception as x: return (mv_result, h5repack_result, rm_result, x) else: return (mv_result, h5repack_result, rm_result, None)
def __init__(self, filename, groupname=None): # open the file for reading if not os.path.exists(filename) or not h5py.is_hdf5(filename): raise IOError('{} is not a HDF5 file'.format(filename)) self.h5file = h5py.File(filename, 'r') # open the requested group in the file if groupname is None: # expect only one group in the file groups = list(self.h5file.keys()) if not len(groups) == 1: raise IOError('groupname is None and cannot be guessed in {}.' .format(filename)) groupname = groups[0] elif not groupname in self.h5file: raise IOError('{} is not a valid group in {}' .format(groupname, filename)) self.group = self.h5file[groupname] # load h5features attributes and datasets self.version = read_version(self.group) self.items = read_items(self.group, self.version) self._index = read_index(self.group, self.version) self.dformat = self.group.attrs['format'] if self.dformat == 'sparse': self.dim = self.group.attrs['dim'] self.frames = (self.group['lines'] if self.version == '0.1' else self.group['frames'])[...]
def open_file(filename, f_start=None, f_stop=None,t_start=None, t_stop=None,load_data=True,max_load=1.): """Open a HDF5 or filterbank file Returns instance of a Reader to read data from file. ================== ================================================== Filename extension File type ================== ================================================== h5, hdf5 HDF5 format fil fil format *other* Will raise NotImplementedError ================== ================================================== """ if not os.path.isfile(filename): type(filename) print(filename) raise IOError("No such file or directory: " + filename) filename = os.path.expandvars(os.path.expanduser(filename)) # Get file extension to determine type ext = filename.split(".")[-1].strip().lower() if six.PY3: ext = bytes(ext, 'ascii') if h5py.is_hdf5(filename): # Open HDF5 file return H5Reader(filename, f_start=f_start, f_stop=f_stop, t_start=t_start, t_stop=t_stop, load_data=load_data, max_load=max_load) elif sigproc.is_filterbank(filename): # Open FIL file return FilReader(filename, f_start=f_start, f_stop=f_stop, t_start=t_start, t_stop=t_stop, load_data=load_data, max_load=max_load) else: raise NotImplementedError('Cannot open this type of file with Waterfall')
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if opts.level <= 0: option_parser.error("level must be greater than zero!") collapse_f = make_collapse_f(opts.metadata_category, opts.level, opts.ignore) table = load_table(opts.input_fp) if h5py.is_hdf5(opts.input_fp): # metadata are not deserializing correctly. Duct tape it. update_d = {} for i, md in zip(table.ids(axis='observation'), table.metadata(axis='observation')): update_d[i] = {k: json.loads(v[0]) for k, v in md.items()} table.add_metadata(update_d, axis='observation') result = table.collapse(collapse_f, axis='observation', one_to_many=True, norm=False, one_to_many_md_key=opts.metadata_category) if(opts.format_tab_delimited): f = open(opts.output_fp, 'w') f.write(result.to_tsv(header_key=opts.metadata_category, header_value=opts.metadata_category, metadata_formatter=lambda s: '; '.join(s))) f.close() else: format_fs = {opts.metadata_category: vlen_list_of_str_formatter} write_biom_table(result, opts.output_fp, format_fs=format_fs)
def can_convert_h5_to(dst_type, h5_filename=None): """Whether conversion from this particular h5 file to dst type is supported @param h5_filename: h5 filename or None @type h5_filename: string @param dst_type: name of type @type dst_type: string @return: True if possible """ if dst_type in ('matlab', 'octave'): return True if h5_filename and h5_filename.endswith('.h5') and h5py.is_hdf5(h5_filename): try: h5 = h5py.File(h5_filename, 'r') if dst_type=='libsvm': # libsvm requires data/label ordering=set(('label','data')) if ordering.issubset(set(h5['data'].keys())): return True # TODO check if this is sparse data / ndarray data elif dst_type in ('csv', 'arff', 'rdata'): # csv/arff/RData support everything except sparse data for k in h5['data'].keys(): if k.endswith('_indptr') or k.endswith('_indices'): return False return True except: pass return False
def getFileHandle(self,inputfile): try: self._HDF5 = False if HDF5SUPPORT: if h5py.is_hdf5(inputfile): self._HDF5 = True try: return HDF5Stack1D.HDF5Stack1D(self._filelist, self.selection) except: raise ffile = self.__tryEdf(inputfile) if ffile is None: ffile = self.__tryLucia(inputfile) if ffile is None: if inputfile[-3:] == "DAT": ffile = self.__tryAifira(inputfile) if ffile is None: if LispixMap.isLispixMapFile(inputfile): ffile = LispixMap.LispixMap(inputfile, native=False) if (ffile is None): del ffile ffile = SpecFileLayer.SpecFileLayer() ffile.SetSource(inputfile) return ffile except: raise IOError("I do not know what to do with file %s" % inputfile)
def canhandle(url): if not url.startswith('file://'): return False path = url.replace('file://','') if not h5py.is_hdf5(path): return False return True
def __init__(self, filename, chunk_size=0.1, version='1.1'): """Initialize an HDF5 file for writing h5features. Parameters ---------- filename : str --- The name of the HDF5 file to write on. For clarity you should use a *.h5 extension but this is not required. chunk_size : float, optional --- The size in Mo of a chunk in the file. Default is 0.1 Mo. A chunk size below 8 Ko is not allowed as it results in poor performances. Raise ----- IOError if the file exists but is not HDF5. IOError if the chunk size is below 8 Ko. """ if not is_supported_version(version): raise IOError('version {} is not supported'.format(version)) self.version = version # Raise if the file exists but is not HDF5 if os.path.isfile(filename) and not h5py.is_hdf5(filename): raise IOError('{} is not a HDF5 file.'.format(filename)) self.filename = filename if chunk_size < 0.008: raise IOError('chunk size is below 8 Ko') self.chunk_size = chunk_size
def get_attribute_types(fname): if not h5py.is_hdf5(fname): return "" types=set() dt = h5py.special_dtype(vlen=str) try: h5 = h5py.File(fname, 'r') have_type = '/data_descr/types' in h5 all_types = set(h5['/data_descr/types']) for o in h5['/data_descr/ordering']: indptr_name='/data/' + o + '_indptr' indices_name='/data/' + o + '_indices' if indptr_name in h5 and indices_name in h5: types += 'Sparse Matrix' else: if have_type and o in all_types: types += h5['/data_descr/types'][o] else: t=h5['/data/' + o].dtype if t==dt: types.add("String") elif t in (numpy.int64, numpy.int32): types.add("Integer") elif t in (numpy.float64, numpy.float32): types.add("Floating Point") else: types.add(str(t)) h5.close() except: pass return ','.join(list(types))
def pre_process(self, args=''): parser = argparse.ArgumentParser() parser.add_argument('--output', default='processed', help="directory to save the pre-processed data to") parser.add_argument('--single', action='store_true', help="process single layer only") args = parser.parse_args(args) renamer = Renamer() params = Parameters() for item in self.items: index = renamer.pattern_match(item).groupdict()['index'] run_type = params.determine_run_type(index) if run_type != 'single layer' and args.single: print "{} is not single layer, skipping.".format(item) elif not h5py.is_hdf5(item): print "{} is not hdf5!".format(item) else: try: self._pre_process(item, args.output, args.single) logging.info('Processed {}'.format(item)) except Exception: print("Failed to process {}".format(item)) logging.exception('Could not process {}'.format(item))
def dump_mcscf(mol, chkfile, mo_coeff, mcscf_energy=None, e_cas=None, ci_vector=None, iter_micro_tot=None, iter_macro=None, converged=None, ): """Dumps MCSCF/CASSCF calculation to checkpoint file. """ if h5py.is_hdf5(chkfile): fh5 = h5py.File(chkfile) if 'mcscf' in fh5: del(fh5['mcscf']) else: fh5 = h5py.File(chkfile, 'w') if 'mol' not in fh5: fh5['mol'] = format(mol.pack()) fh5['mcscf/mo_coeff'] = mo_coeff def store(key, val): if val is not None: fh5[key] = val store('mcscf/mcscf_energy', mcscf_energy) store('mcscf/e_cas', e_cas) store('mcscf/ci_vector', ci_vector) store('mcscf/iter_macro', iter_macro) store('mcscf/iter_micro_tot', iter_micro_tot) store('mcscf/converged', converged) fh5.close()
def open_file(self, filename=GlobalDefaults.file_resultdatafile): """Load a given file that contains the results from another simulation. :param filename: The filename (optionally with filepath) of the file we try to load. If not given the default value from `GlobalDefaults` is used. """ # Try to open the file or raise an exception if it does not exist. if os.path.lexists(filename): if hdf.is_hdf5(filename): self._srf = hdf.File(filename) else: raise IOError("File '" + str(filename) + "' is not a hdf5 file") else: raise IOError("File '" + str(filename) + "' does not exist!") # Check if the file format can be read by the IOManager if not "file_version" in self._srf.attrs.keys(): raise IOError("Unsupported file format without version number") if self._srf.attrs["file_version"] != self._hdf_file_version: raise IOError("Unsupported file format version " + str(self._srf.attrs["file_version"])) # Initialize the internal book keeping data self._block_ids = [s[len(self._prefixb) :] for s in self._srf.keys() if s.startswith(self._prefixb)] self._block_count = len(self._block_ids) self._group_ids = [s[len(self._prefixg) :] for s in self._srf.keys() if s.startswith(self._prefixg)] self._group_count = len(self._group_ids) # Load the simulation parameters from data block 0. self._parameters = self.load_parameters(blockid="global")
def read_vaspdump(path, h5dump=None): #NOTE read_hfdump returns the integrals in MO representation clustdump = os.path.join(path, 'FCIDUMP.CLUST.GTO') jdump = os.path.join(path, 'JDUMP') kdump = os.path.join(path, 'KDUMP') fockdump = os.path.join(path, 'FOCKDUMP') if h5py.is_hdf5(clustdump): f = h5py.File(clustdump, 'r') dic = {} for k,v in f.items(): if v.shape: # I'm ndarray dic[k] = numpy.array(v) else: dic[k] = v.value f.close() else: hfdic = read_hfdump(jdump, kdump, fockdump) dic = read_clustdump(clustdump, hfdic) mo_coeff = dic['MO_COEFF'] hfdic['HCORE'] = reduce(numpy.dot, (mo_coeff, hfdic['HCORE'], mo_coeff.T)) hfdic['J'] = reduce(numpy.dot, (mo_coeff, hfdic['J'], mo_coeff.T)) hfdic['K'] = reduce(numpy.dot, (mo_coeff, hfdic['K'], mo_coeff.T)) dic.update(hfdic) if h5dump is None: h5dump = clustdump+'.h5' f = h5py.File(h5dump, 'w') for k,v in dic.items(): sys.stdout.write('h5dump %s\n' % k) f[k] = v f.close() return dic
def getUserName(userid): """ getUserName: return user name for given user id #todo: may need to be optimized to support large number of users """ log = logging.getLogger("h5serv") log.info("get user name for userid: [" + str(userid) + "]") filename = config.get('password_file') if not filename: log.error("no config for password_file") raise HTTPError(500, message="bad configuration") # verify file exists and is writable if not op.isfile(filename): log.error("password file is missing") raise HTTPError(500, message="bad configuration") if not h5py.is_hdf5(filename): log.error("password file is invalid") raise HTTPError(500, message="bad configuration") with h5py.File(filename, 'r') as f: for attr_name in f.attrs: attr = f.attrs[attr_name] if attr['userid'] == userid: return attr_name return None
def readIn_generator_QMP(file_path,rootname='QMP',coefsite=False,conjugate=False): if not _h5.is_hdf5(file_path): raise Exception(' <HDF5> {file} is not a valid hdf5 file'.format(file=file_path)) f = _h5.File(file_path, "r") if coefsite: coefonsite_gen = (_ for _ in f['/'+rootname+'_coefsite'].value) for per_site in f['/'+rootname].itervalues(): block_array = dict() if coefsite: coefonsite = coefonsite_gen.next() for m in per_site.itervalues(): line = m.name.split('/')[-1] # if transpose: # # I do not know how to implement transpose here ='/ # pass # else: if (coefsite and conjugate): block_array[tuple(int(line[_:(_+2)]) for _ in range(0, len(line), 2))] = coefonsite*m.value.conjugate() elif (coefsite and not conjugate): block_array[tuple(int(line[_:(_+2)]) for _ in range(0, len(line), 2))] = coefonsite*m.value elif (not coefsite and conjugate): block_array[tuple(int(line[_:(_+2)]) for _ in range(0, len(line), 2))] = m.value.conjugate() else: block_array[tuple(int(line[_:(_+2)]) for _ in range(0, len(line), 2))] = m.value yield block_array f.close()
def dump_mcscf(mol, chkfile, e_tot, mo_coeff, ncore, ncas, mo_occ=None, mo_energy=None, e_cas=None, ci_vector=None): """Dumps MCSCF/CASSCF calculation to checkpoint file. """ if h5py.is_hdf5(chkfile): fh5 = h5py.File(chkfile) if 'mcscf' in fh5: del(fh5['mcscf']) else: fh5 = h5py.File(chkfile, 'w') if 'mol' not in fh5: fh5['mol'] = format(mol.pack()) fh5['mcscf/mo_coeff'] = mo_coeff def store(key, val): if val is not None: fh5[key] = val store('mcscf/e_tot', e_tot) store('mcscf/e_cas', e_cas) store('mcscf/ci', ci_vector) store('mcscf/ncore', ncore) store('mcscf/ncas', ncas) store('mcscf/mo_occ', mo_occ) store('mcscf/mo_energy', mo_energy) fh5.close()
def __init__(self, filename): global config super(GadgetHDFSnap, self).__init__() self._filename = filename if not h5py.is_hdf5(filename): h1 = h5py.File(filename + ".0.hdf5", "r") numfiles = h1['Header'].attrs['NumFilesPerSnapshot'] self._hdf = HdfFileGenerator(filename, numfiles) else: self._hdf = [h5py.File(filename, "r")] self._family_slice = {} self._loadable_keys = set([]) self._family_arrays = {} self._arrays = {} self.properties = {} # determine which particle types are in the output my_type_map = {} for fam, g_types in _type_map.iteritems(): my_types = [] for x in g_types: # Get all keys from all hdf files for hdf in self._hdf: if x in hdf.keys(): my_types.append(x) break if len(my_types): my_type_map[fam] = my_types sl_start = 0 for x in my_type_map: l = 0 for name in my_type_map[x]: for hdf in self._hdf: # Skip PartType if not in this hdf file try: l += hdf[name]['Coordinates'].shape[0] k = self._get_hdf_allarray_keys(hdf[name]) self._loadable_keys = self._loadable_keys.union(set(k)) except KeyError as e: if 'PartType' in name: continue else: raise e self._family_slice[x] = slice(sl_start, sl_start + l) sl_start += l self._loadable_keys = [_translate_array_name( x, reverse=True) for x in self._loadable_keys] self._num_particles = sl_start self._my_type_map = my_type_map self._decorate()
def load_file_as_h5py(filename): """ Load a file as an h5py.File object :param str filename: A filename :raises: IOError if the file can't be loaded as an h5py.File like object :rtype: h5py.File """ if not os.path.isfile(filename): raise IOError("Filename '%s' must be a file path" % filename) if h5py.is_hdf5(filename): return h5py.File(filename) try: from ..io import spech5 return spech5.SpecH5(filename) except ImportError: _logger.debug("spech5 can't be loaded.", filename, exc_info=True) except IOError: _logger.debug("File '%s' can't be read as spec file.", filename, exc_info=True) try: from silx.io import fabioh5 return fabioh5.File(filename) except ImportError: _logger.debug("fabioh5 can't be loaded.", filename, exc_info=True) except Exception: _logger.debug("File '%s' can't be read as fabio file.", filename, exc_info=True) raise IOError("Format of filename '%s' is not supported" % filename)
def OpenDatabase(self, DatabaseName, PRINT=False): """ Open a specific database. :param DatabaseName: name of the database to open :type DatabaseName: str :param PRINT: select whether to print message or not. \n 0 = Yes \n 1 = No \n :type PRINT: int """ self.file = [] try: test = h5.is_hdf5(DatabaseName) if test: self.file = h5.File(DatabaseName, mode="a") if PRINT == True: print '{0} database opened'.format(DatabaseName) if not test: print '{0} does not exist'.format(DatabaseName) except IOError: raise DatabaseError('{0} database does not exist'.format( DatabaseName))
def __init__(self, file_path): assert h5py.is_hdf5(file_path) self.file_path = file_path f = h5py.File(file_path, "r") stars_snap = f['PartType4'] stars_coords = stars_snap['Coordinates'] stars_mags = stars_snap['GFM_StellarPhotometrics'] self.stars_coords = (stars_coords - numpy.mean(stars_coords, axis=0)) / numpy.std(stars_coords, axis=0) self.stars_mags = {'U': stars_mags[:,0], 'B': stars_mags[:,1], 'V': stars_mags[:,2], 'K': stars_mags[:,3], 'g': stars_mags[:,4], 'r': stars_mags[:,5], 'i': stars_mags[:,6], 'z': stars_mags[:,7]} self.image = 0. self.image_parameters = {'alpha' : None, 'beta' : None, 'gamma' : None, 'intensity' : None, 'scale' : None, 'xshift' : None, 'yshift' : None, 'bg' : None, 'psf_size': None}
def read(self): """Get data and description in-memory Retrieve contents from file. @return: example names, ordering and the examples @rtype: dict of: list of names, list of ordering and dict of examples """ # we want the exception handled elsewhere if not h5py.is_hdf5(self.fname): return h5 = h5py.File(self.fname, 'r') contents = { 'name': h5.attrs['name'], 'comment': h5.attrs['comment'], 'mldata': h5.attrs['mldata'], } if contents['comment']=='Task file': contents['task']=dict() contents['ordering']=list() group='task' for field in ml2h5.task.task_data_fields: if field in h5[group]: contents['ordering'].append(field) else: contents['data']=dict() contents['ordering']=h5['/data_descr/ordering'][...].tolist() group='data' contents['group']=group if '/%s_descr/names' % group in h5: contents['names']=h5['/%s_descr/names' % group][...].tolist() if '/%s_descr/types' % group in h5: contents['types'] = h5['/%s_descr/types' % group ][...] for name in contents['ordering']: vname='/%s/%s' % (group, name) sp_indices=vname+'_indices' sp_indptr=vname+'_indptr' if sp_indices in h5['/%s' % group] and sp_indptr in h5['/%s' % group]: contents[group][name] = csc_matrix((h5[vname], h5[sp_indices], h5[sp_indptr]) ) else: d = numpy.array(h5[vname],order='F') try: d=d['vlen'] except: pass contents[group][name] = d h5.close() return contents
def readIn_generator_coefficient(file_path, rootname='QMP'): if not _h5.is_hdf5(file_path): raise Exception(' <HDF5> {file} is not a valid hdf5 file'.format(file=file_path)) f = _h5.File(file_path, "r") coefonsite_gen = (_ for _ in f['/'+rootname+'_coefsite'].value) for coefsite in coefonsite_gen: yield coefsite f.close()
def _create_h5file(erifile, dataname): if h5py.is_hdf5(erifile): feri = h5py.File(erifile) if dataname in feri: del(feri[dataname]) else: feri = h5py.File(erifile, 'w') return feri
def is_workspace_file(path): '''Return True if the file along the given path is a workspace file''' if not h5py.is_hdf5(path): return False h5file = h5py.File(path, mode="r") if not HDF5FileList.has_file_list(h5file): return False return HDF5Dict.has_hdf5_dict(h5file)
def _try_h5(fname): """Try if given file is in hdf5 format @param fname: name of file to determine format for @type fname: string """ return h5py.is_hdf5(fname)
def __init__(self, *args, **kwds): '''Instantiate a data tool frame module_name: name of module to instantiate measurements_file_name: name of measurements file ''' assert kwds.has_key( "module_name"), "DataToolFrame() needs a module_name argument" assert kwds.has_key( "measurements_file_name" ), "DataToolFrame() needs a measurements_file_name argument" module_name = kwds["module_name"] measurements_file_name = kwds["measurements_file_name"] kwds_copy = kwds.copy() del kwds_copy["module_name"] del kwds_copy["measurements_file_name"] kwds_copy["title"] = "%s data tool" % module_name wx.Frame.__init__(self, *args, **kwds_copy) self.module = instantiate_module(module_name) self.pipeline = cpp.Pipeline() if h5py.is_hdf5(measurements_file_name): self.workspace = cpw.Workspace(self.pipeline, self.module, None, None, None, None) self.workspace.load(measurements_file_name, True) self.measurements = self.workspace.measurements else: self.pipeline.load(measurements_file_name) self.load_measurements(measurements_file_name) self.workspace = cpw.Workspace(self.pipeline, self.module, None, None, self.measurements, None) self.module.module_num = len(self.pipeline.modules()) + 1 self.pipeline.add_module(self.module) self.sizer = wx.BoxSizer(wx.VERTICAL) module_panel = wx.lib.scrolledpanel.ScrolledPanel( self, -1, style=wx.SUNKEN_BORDER) module_panel.BackgroundColour = cpprefs.get_background_color() self.BackgroundColour = cpprefs.get_background_color() self.module_view = ModuleView(module_panel, self.workspace, True) self.module_view.set_selection(self.module.module_num) def on_change(caller, event): setting = event.get_setting() proposed_value = event.get_proposed_value() setting.value = proposed_value self.pipeline.edit_module(event.get_module().module_num, False) self.module_view.reset_view() self.module_view.request_validation() self.module_view.add_listener(on_change) # # Add a panel for the "run" button # panel = wx.Panel(self) panel_sizer = wx.BoxSizer(wx.HORIZONTAL) button = wx.Button(panel, label="Run") self.sizer.Add(module_panel, 1, wx.EXPAND) self.sizer.Add(panel, 0, wx.EXPAND) panel_sizer.AddStretchSpacer() panel_sizer.Add(button, 0, wx.RIGHT, button.Size[1]) panel.SetSizer(panel_sizer) wx.EVT_BUTTON(self, button.Id, self.on_run) # # Add a file menu # file_menu = wx.Menu() file_menu.Append(ID_FILE_LOAD_MEASUREMENTS, "&Load measurements") file_menu.Append(ID_FILE_SAVE_MEASUREMENTS, "&Save measurements") file_menu.Append(ID_FILE_EXIT, "E&xit") self.MenuBar = wx.MenuBar() self.MenuBar.Append(file_menu, "&File") self.Bind(wx.EVT_MENU, self.on_load_measurements, id=ID_FILE_LOAD_MEASUREMENTS) self.Bind(wx.EVT_MENU, self.on_save_measurements, id=ID_FILE_SAVE_MEASUREMENTS) self.Bind(wx.EVT_MENU, self.on_exit, id=ID_FILE_EXIT) accelerators = wx.AcceleratorTable([ (wx.ACCEL_CMD, ord("W"), ID_FILE_EXIT), (wx.ACCEL_CMD, ord("O"), ID_FILE_LOAD_MEASUREMENTS), (wx.ACCEL_CMD, ord("S"), ID_FILE_SAVE_MEASUREMENTS) ]) self.SetAcceleratorTable(accelerators) # # Add an image menu # image_menu = wx.Menu() image_menu.Append(ID_IMAGE_CHOOSE, "&Choose") self.MenuBar.Append(image_menu, "&Image") self.Bind(wx.EVT_MENU, self.on_image_choose, id=ID_IMAGE_CHOOSE) self.SetSizer(self.sizer) self.Size = (self.module_view.get_max_width(), self.Size[1]) module_panel.Layout() self.Show() self.tbicon = wx.TaskBarIcon() self.tbicon.SetIcon(get_cp_icon(), "CellProfiler2.0") self.SetIcon(get_cp_icon())
def run_pipeline_headless(options, args): """ Run a CellProfiler pipeline in headless mode """ if options.first_image_set is not None: if not options.first_image_set.isdigit(): raise ValueError("The --first-image-set option takes a numeric argument") else: image_set_start = int(options.first_image_set) else: image_set_start = None image_set_numbers = None if options.last_image_set is not None: if not options.last_image_set.isdigit(): raise ValueError("The --last-image-set option takes a numeric argument") else: image_set_end = int(options.last_image_set) if image_set_start is None: image_set_numbers = numpy.arange(1, image_set_end + 1) else: image_set_numbers = numpy.arange(image_set_start, image_set_end + 1) else: image_set_end = None if (options.pipeline_filename is not None) and ( not options.pipeline_filename.lower().startswith("http") ): options.pipeline_filename = os.path.expanduser(options.pipeline_filename) pipeline = cellprofiler.pipeline.Pipeline() initial_measurements = None try: if h5py.is_hdf5(options.pipeline_filename): initial_measurements = cellprofiler.measurement.load_measurements( options.pipeline_filename, image_numbers=image_set_numbers ) except: logging.root.info("Failed to load measurements from pipeline") if initial_measurements is not None: pipeline_text = initial_measurements.get_experiment_measurement( cellprofiler.pipeline.M_PIPELINE ) pipeline_text = pipeline_text.encode("us-ascii") pipeline.load(six.moves.StringIO(pipeline_text)) if not pipeline.in_batch_mode(): # # Need file list in order to call prepare_run # with h5py.File(options.pipeline_filename, "r") as src: if cellprofiler.utilities.hdf5_dict.HDF5FileList.has_file_list(src): cellprofiler.utilities.hdf5_dict.HDF5FileList.copy( src, initial_measurements.hdf5_dict.hdf5_file ) else: pipeline.load(options.pipeline_filename) if options.groups is not None: kvs = [x.split("=") for x in options.groups.split(",")] groups = dict(kvs) else: groups = None file_list = cellprofiler.preferences.get_image_set_file() if file_list is not None: pipeline.read_file_list(file_list) elif options.image_directory is not None: pathnames = [] os.path.walk( os.path.abspath(options.image_directory), lambda pathnames, dirname, fnames: pathnames.append( [ os.path.join(dirname, fname) for fname in fnames if os.path.isfile(os.path.join(dirname, fname)) ] ), pathnames, ) pathnames = sum(pathnames, []) pipeline.add_pathnames_to_file_list(pathnames) # # Fixup CreateBatchFiles with any command-line input or output directories # if pipeline.in_batch_mode(): create_batch_files = [ m for m in pipeline.modules() if m.is_create_batch_module() ] if len(create_batch_files) > 0: create_batch_files = create_batch_files[0] if options.output_directory is not None: create_batch_files.custom_output_directory.value = ( options.output_directory ) if options.image_directory is not None: create_batch_files.default_image_directory.value = ( options.image_directory ) use_hdf5 = len(args) > 0 and not args[0].lower().endswith(".mat") measurements = pipeline.run( image_set_start=image_set_start, image_set_end=image_set_end, grouping=groups, measurements_filename=None if not use_hdf5 else args[0], initial_measurements=initial_measurements, ) if len(args) > 0 and not use_hdf5: pipeline.save_measurements(args[0], measurements) if options.done_file is not None: if measurements is not None and measurements.has_feature( cellprofiler.measurement.EXPERIMENT, cellprofiler.pipeline.EXIT_STATUS ): done_text = measurements.get_experiment_measurement( cellprofiler.pipeline.EXIT_STATUS ) exit_code = 0 if done_text == "Complete" else -1 else: done_text = "Failure" exit_code = -1 fd = open(options.done_file, "wt") fd.write("%s\n" % done_text) fd.close() elif not measurements.has_feature( cellprofiler.measurement.EXPERIMENT, cellprofiler.pipeline.EXIT_STATUS ): # The pipeline probably failed exit_code = 1 else: exit_code = 0 if measurements is not None: measurements.close() return exit_code
def run_pipeline_headless(options, args): '''Run a CellProfiler pipeline in headless mode''' if sys.platform == 'darwin': if options.start_awt: import bioformats from javabridge import activate_awt activate_awt() if not options.first_image_set is None: if not options.first_image_set.isdigit(): raise ValueError( "The --first-image-set option takes a numeric argument") else: image_set_start = int(options.first_image_set) else: image_set_start = None image_set_numbers = None if not options.last_image_set is None: if not options.last_image_set.isdigit(): raise ValueError( "The --last-image-set option takes a numeric argument") else: image_set_end = int(options.last_image_set) if image_set_start is None: image_set_numbers = np.arange(1, image_set_end + 1) else: image_set_numbers = np.arange(image_set_start, image_set_end + 1) else: image_set_end = None if ((options.pipeline_filename is not None) and (not options.pipeline_filename.lower().startswith('http'))): options.pipeline_filename = os.path.expanduser( options.pipeline_filename) from cellprofiler.pipeline import Pipeline, EXIT_STATUS, M_PIPELINE import cellprofiler.measurements as cpmeas pipeline = Pipeline() initial_measurements = None try: if h5py.is_hdf5(options.pipeline_filename): initial_measurements = cpmeas.load_measurements( options.pipeline_filename, image_numbers=image_set_numbers) except: logging.root.info("Failed to load measurements from pipeline") if initial_measurements is not None: pipeline_text = \ initial_measurements.get_experiment_measurement( M_PIPELINE) pipeline_text = pipeline_text.encode('us-ascii') pipeline.load(StringIO(pipeline_text)) if not pipeline.in_batch_mode(): # # Need file list in order to call prepare_run # from cellprofiler.utilities.hdf5_dict import HDF5FileList with h5py.File(options.pipeline_filename, "r") as src: if HDF5FileList.has_file_list(src): HDF5FileList.copy(src, initial_measurements.hdf5_dict.hdf5_file) else: pipeline.load(options.pipeline_filename) if options.groups is not None: kvs = [x.split('=') for x in options.groups.split(',')] groups = dict(kvs) else: groups = None use_hdf5 = len(args) > 0 and not args[0].lower().endswith(".mat") measurements = pipeline.run( image_set_start=image_set_start, image_set_end=image_set_end, grouping=groups, measurements_filename=None if not use_hdf5 else args[0], initial_measurements=initial_measurements) if len(args) > 0 and not use_hdf5: pipeline.save_measurements(args[0], measurements) if options.done_file is not None: if (measurements is not None and measurements.has_feature(cpmeas.EXPERIMENT, EXIT_STATUS)): done_text = measurements.get_experiment_measurement(EXIT_STATUS) else: done_text = "Failure" fd = open(options.done_file, "wt") fd.write("%s\n" % done_text) fd.close() if measurements is not None: measurements.close()
def getStackFromPattern(self, filepattern, begin, end, increment=None, imagestack=None, fileindex=0): #get the first filename filename = filepattern % tuple(begin) if not os.path.exists(filename): raise IOError("Filename %s does not exist." % filename) #get the file list args = self.getFileListFromPattern(filepattern, begin, end, increment=increment) #get the file type f = open(args[0], 'rb') #read 10 characters line = f.read(10) f.close() if hasattr(line, "decode"): # convert to string ignoring errors line = line.decode("utf-8", "ignore") specfile = False marCCD = False if line.startswith("II") or line.startswith("MM"): marCCD = True if line[0] == "\n": line = line[1:] if line.startswith("{") or marCCD: if imagestack is None: if marCCD: imagestack = True if imagestack: #prevent any modification fileindex = 0 if filepattern is not None: #this dows not seem to put any trouble #(because of no redimensioning attempt) if False and (len(begin) != 1): raise IOError("EDF stack redimensioning not supported yet") stack = QStack(imagestack=imagestack) elif line.startswith('Spectral'): stack = OmnicMap.OmnicMap(args[0]) elif line.startswith('#\tDate:'): stack = LuciaMap.LuciaMap(args[0]) elif args[0][-4:].upper() in ["PIGE", "PIXE"]: stack = SupaVisioMap.SupaVisioMap(args[0]) elif args[0][-3:].upper() in ["RBS"]: stack = SupaVisioMap.SupaVisioMap(args[0]) elif args[0][-3:].lower() in [".h5", "nxs", "hdf", "hdf5"]: if not HDF5: raise IOError(\ "No HDF5 support while trying to read an HDF5 file") stack = QHDF5Stack1D.QHDF5Stack1D(args) elif args[0].upper().endswith("RAW.GZ")or\ args[0].upper().endswith("EDF.GZ")or\ args[0].upper().endswith("CCD.GZ")or\ args[0].upper().endswith("RAW.BZ2")or\ args[0].upper().endswith("EDF.BZ2")or\ args[0].upper().endswith("CCD.BZ2"): if imagestack is None: imagestack = True stack = QStack(imagestack=imagestack) else: if HDF5: if h5py.is_hdf5(args[0]): stack = QHDF5Stack1D.QHDF5Stack1D(args) else: stack = QSpecFileStack() specfile = True else: stack = QSpecFileStack() specfile = True if specfile and (len(begin) == 2): if increment is None: increment = [1] * len(begin) shape = (len(range(begin[0], end[0] + 1, increment[0])), len(range(begin[1], end[1] + 1, increment[1]))) stack.loadFileList(args, fileindex=fileindex, shape=shape) else: stack.loadFileList(args, fileindex=fileindex) return stack
def general(eri, mo_coeffs, erifile, dataname='eri_mo', ioblk_size=IOBLK_SIZE, compact=True, verbose=logger.NOTE): '''For the given four sets of orbitals, transfer arbitrary spherical AO integrals to MO integrals on disk. Args: eri : 8-fold reduced eri vector mo_coeffs : 4-item list of ndarray Four sets of orbital coefficients, corresponding to the four indices of (ij|kl) erifile : str or h5py File or h5py Group object To store the transformed integrals, in HDF5 format. Kwargs dataname : str The dataset name in the erifile (ref the hierarchy of HDF5 format http://www.hdfgroup.org/HDF5/doc1.6/UG/09_Groups.html). By assigning different dataname, the existed integral file can be reused. If the erifile contains the dataname, the new integrals data will overwrite the old one. ioblk_size : float or int The block size for IO, large block size may **not** improve performance compact : bool When compact is True, depending on the four oribital sets, the returned MO integrals has (up to 4-fold) permutation symmetry. If it's False, the function will abandon any permutation symmetry, and return the "plain" MO integrals Pseudocode / algorithm: u = mu v = nu l = lambda o = sigma Assume eri's are 8-fold reduced. nij/nkl_pair = npair or i*j/k*l if only transforming a subset First half transform: Initialize half_eri of size (nij_pair,npair) For lo = 1 -> npair Unpack row lo Unpack row lo to matrix E_{uv}^{lo} Transform C_ui^+*E*C_nj -> E_{ij}^{lo} Ravel or pack E_{ij}^{lo} Save E_{ij}^{lo} -> half_eri[:,lo] Second half transform: Initialize h5d_eri of size (nij_pair,nkl_pair) For ij = 1 -> nij_pair Load and unpack half_eri[ij,:] -> E_{lo}^{ij} Transform C_{lk}E_{lo}^{ij}C_{ol} -> E_{kl}^{ij} Repack E_{kl}^{ij} Save E_{kl}^{ij} -> h5d_eri[ij,:] Each matrix is indexed by the composite index ij x kl, where ij/kl is either npair or ixj/kxl, if only a subset of MOs are being transformed. Since entire rows or columns need to be read in, the arrays are chunked such that IOBLK_SIZE = row/col x chunking col/row. For example, for the first half transform, we would save in nij_pair x IOBLK_SIZE/nij_pair, then load in IOBLK_SIZE/nkl_pair x npair for the second half transform. ------ kl -----> |jxl | ij | | v As a first guess, the chunking size is jxl. If the super-rows/cols are larger than IOBLK_SIZE, then the chunk rectangle jxl is trimmed accordingly. The pathological limiting case is where the dimensions nao_pair, nij_pair, or nkl_pair are so large that the arrays are chunked 1x1, in which case IOBLK_SIZE needs to be increased. ''' log = logger.new_logger(None, verbose) log.info('******** ao2mo disk, custom eri ********') nmoi = mo_coeffs[0].shape[1] nmoj = mo_coeffs[1].shape[1] nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] nao = mo_coeffs[0].shape[0] nao_pair = nao * (nao + 1) // 2 if compact and iden_coeffs(mo_coeffs[0], mo_coeffs[1]): ij_red = False nij_pair = nmoi * (nmoi + 1) // 2 else: ij_red = True nij_pair = nmoi * nmoj if compact and iden_coeffs(mo_coeffs[2], mo_coeffs[3]): kl_red = False nkl_pair = nmok * (nmok + 1) // 2 else: kl_red = True nkl_pair = nmok * nmol dtype = numpy.result_type(eri, *mo_coeffs) typesize = dtype.itemsize / 1e6 # in MB chunks_half = (max( 1, numpy.minimum(int(ioblk_size // (nao_pair * typesize)), nmoj)), max( 1, numpy.minimum(int(ioblk_size // (nij_pair * typesize)), nmol))) ''' ideally, the final transformed eris should have a chunk of nmoj x nmol to optimize read operations. However, I'm chunking the row size so that the write operations during the transform can be done as fast as possible. ''' chunks_full = (numpy.minimum(int(ioblk_size // (nkl_pair * typesize)), nmoj), nmol) if isinstance(erifile, str): if h5py.is_hdf5(erifile): feri = h5py.File(erifile, 'a') if dataname in feri: del (feri[dataname]) else: feri = h5py.File(erifile, 'w', libver='latest') else: assert (isinstance(erifile, h5py.Group)) feri = erifile h5d_eri = feri.create_dataset(dataname, (nij_pair, nkl_pair), dtype.char, chunks=chunks_full) feri_swap = lib.H5TmpFile(libver='latest') half_eri = feri_swap.create_dataset(dataname, (nij_pair, nao_pair), dtype.char, chunks=chunks_half) log.debug('Memory information:') log.debug(' IOBLK_SIZE (MB): {}'.format(ioblk_size)) log.debug(' jxl {}x{}, half eri chunk dim {}x{}'.format( nmoj, nmol, chunks_half[0], chunks_half[1])) log.debug(' jxl {}x{}, full eri chunk dim {}x{}'.format( nmoj, nmol, chunks_full[0], chunks_full[1])) log.debug(' Final disk eri size (MB): {:.3g}, chunked {:.3g}'.format( nij_pair * nkl_pair * typesize, numpy.prod(chunks_full) * typesize)) log.debug( ' Half transformed eri size (MB): {:.3g}, chunked {:.3g}'.format( nij_pair * nao_pair * typesize, numpy.prod(chunks_half) * typesize)) log.debug(' RAM buffer for half transform (MB): {:.3g}'.format( nij_pair * chunks_half[1] * typesize * 2)) log.debug(' RAM buffer for full transform (MB): {:.3g}'.format( typesize * chunks_full[0] * nkl_pair * 2 + chunks_half[0] * nao_pair * typesize * 2)) def save1(piece, buf): start = piece * chunks_half[1] stop = (piece + 1) * chunks_half[1] if stop > nao_pair: stop = nao_pair half_eri[:, start:stop] = buf[:, :stop - start] return def load2(piece): start = piece * chunks_half[0] stop = (piece + 1) * chunks_half[0] if stop > nij_pair: stop = nij_pair if start >= nij_pair: start = stop - 1 return half_eri[start:stop, :] def prefetch2(piece): start = piece * chunks_half[0] stop = (piece + 1) * chunks_half[0] if stop > nij_pair: stop = nij_pair if start >= nij_pair: start = stop - 1 buf_prefetch[:stop - start, :] = half_eri[start:stop, :] return def save2(piece, buf): start = piece * chunks_full[0] stop = (piece + 1) * chunks_full[0] if stop > nij_pair: stop = nij_pair h5d_eri[start:stop, :] = buf[:stop - start, :] return # transform \mu\nu -> ij cput0 = time.clock(), time.time() Cimu = mo_coeffs[0].conj().transpose() buf_write = numpy.empty((nij_pair, chunks_half[1])) buf_out = numpy.empty_like(buf_write) wpiece = 0 with lib.call_in_background(save1) as async_write: for lo in range(nao_pair): if lo % chunks_half[1] == 0 and lo > 0: #save1(wpiece,buf_write) buf_out, buf_write = buf_write, buf_out async_write(wpiece, buf_out) wpiece += 1 buf = lib.unpack_row(eri, lo) uv = lib.unpack_tril(buf) uv = Cimu.dot(uv).dot(mo_coeffs[1]) if ij_red: ij = numpy.ravel(uv) # grabs by row else: ij = lib.pack_tril(uv) buf_write[:, lo % chunks_half[1]] = ij # final write operation & cleanup save1(wpiece, buf_write) log.timer('(uv|lo) -> (ij|lo)', *cput0) uv = None ij = None buf = None # transform \lambda\sigma -> kl cput1 = time.clock(), time.time() Cklam = mo_coeffs[2].conj().transpose() buf_write = numpy.empty((chunks_full[0], nkl_pair)) buf_out = numpy.empty_like(buf_write) buf_read = numpy.empty((chunks_half[0], nao_pair)) buf_prefetch = numpy.empty_like(buf_read) rpiece = 0 wpiece = 0 with lib.call_in_background(save2, prefetch2) as (async_write, prefetch): buf_read = load2(rpiece) prefetch(rpiece + 1) for ij in range(nij_pair): if ij % chunks_full[0] == 0 and ij > 0: #save2(wpiece,buf_write) buf_out, buf_write = buf_write, buf_out async_write(wpiece, buf_out) wpiece += 1 if ij % chunks_half[0] == 0 and ij > 0: #buf_read = load2(rpiece) buf_read, buf_prefetch = buf_prefetch, buf_read rpiece += 1 prefetch(rpiece + 1) lo = lib.unpack_tril(buf_read[ij % chunks_half[0], :]) lo = Cklam.dot(lo).dot(mo_coeffs[3]) if kl_red: kl = numpy.ravel(lo) else: kl = lib.pack_tril(lo) buf_write[ij % chunks_full[0], :] = kl save2(wpiece, buf_write) log.timer('(ij|lo) -> (ij|kl)', *cput1) if isinstance(erifile, str): feri.close() return erifile
def sample_filter(peak_matrix: Union[PeakMatrix, str], min_fraction: float, within: bool = False, rsd_thres: Union[float, None] = None, qc_label: Union[str, None] = None, labels: Union[str, None] = None): """ Removes peaks from the input PeakMatrix object (or .hdf5 file that were detected in fewer-than a user-defined minimum number of study samples. There are many and varied reasons why a peak may not have been detected in all study samples, including: * due to having an intensity (concentration) close to the signal-to-noise limit of the system; * due to having been present in only one of the study classes (e.g. a drug administered to the ‘treatment’ class samples); * due to ion suppression/enhancement effects in the mass spectrometer source region; etc. :param peak_matrix: PeakMatrix object or path to .hdf5 file :param min_fraction: Minimum fraction - a numeric value between 0 and 1 indicating the proportion of study samples in which a peak must have a recorded intensity value in order for it to be retained in the output peak intensity matrix; e.g. 0.5 means that at least 50% of samples (whether assessed across all classes, or within each class individually) must have a recorded intensity value for a specific peak in order for it to be retained in the output peak matrix. :param within: Apply sample filter within each sample class * **False** - check across ALL classes simultaneously whether greater-than the user-defined “Minimum fraction” of samples contained an intensity value for a specific mass spectral peak. * **True** - check within EACH class separately whether greater-than the user-defined “Minimum fraction” of samples contained an intensity value for a specific mass spectral peak. .. warning:: if in ANY class a peak is detected in greater-than the user-defined minimum fraction of samples, then the peak is retained in the output peak matrix. For classes in which this condition is not met, the peak intensity recorded for that peak (if any) will still be presented in the output peak matrix. If no peak intensity was recorded in a sample, then a ‘0’ is inserted in to the peak matrix. :param rsd_thres: Relative standard deviation threshold - A numerical value equal-to or greater-than 0. If greater than 0, then peaks whose intensity values have a percent relative standard deviation (otherwise termed the percent coefficient of variation) greater-than this value are excluded from the output PeakMatrix object. :param qc_label: Label for the QC samples - a string indicating the name of the class to be used for filtering, i.e. the “reference” class. This string must have been included in the “classLabel” column of the metadata file associated with the process_sans or replicate_filter function(s). :param labels: Path to a metadata file :return: PeakMatrix object """ if not isinstance(peak_matrix, PeakMatrix): if h5py.is_hdf5(peak_matrix): peak_matrix = hdf5_portal.load_peak_matrix_from_hdf5(peak_matrix) else: peak_matrix = txt_portal.load_peak_matrix_from_txt(peak_matrix) if labels is not None: if not os.path.isfile(labels): raise IOError("{} does not exist".format(labels)) peak_matrix = update_labels(peak_matrix, labels) if qc_label is not None: if Tag(qc_label, 'classLabel') not in peak_matrix.peaklist_tags: raise IOError("QC label ({}) does not exist".format(qc_label)) peak_matrix = filter_fraction(peak_matrix, min_fraction, within_classes=within, class_tag_type="classLabel") if rsd_thres is not None: peak_matrix = filter_rsd(peak_matrix, rsd_thres, Tag(qc_label, "classLabel")) return peak_matrix
def assert_h5_format(self, path): if h5py is not None: self.assertTrue( h5py.is_hdf5(path), 'Model saved at path {} is not a valid hdf5 file.'.format( path))
def visualize_file(): """ Open the HDF5 file and display the result """ try: import pylab as p except ImportError: print("Whoops! Matplotlib is required to view the fractal.") raise f = h5py.File('mandelbrot.hdf5', 'r') dset = f['mandelbrot'] a = dset[...] p.imshow(a.transpose()) print("Displaying fractal. Close window to exit program.") try: p.show() finally: f.close() if __name__ == '__main__': if not h5py.is_hdf5('mandelbrot.hdf5'): run_calculation() else: print( 'Fractal found in "mandelbrot.hdf5". Delete file to re-run calculation.' ) visualize_file()
import os, datetime from spacepy import DOT_FLN, help from spacepy.toolbox import loadpickle try: import h5py _ext = '.h5' except ImportError: _ext = '.pkl' #dotfln = os.environ['HOME']+'/.spacepy' omnifln = os.path.join(DOT_FLN, 'data', 'omnidata{0}'.format(_ext)) omni2fln = os.path.join(DOT_FLN, 'data', 'omni2data{0}'.format(_ext)) testfln = os.path.join('data', 'OMNItest{0}'.format(_ext)) if _ext == '.h5': presentQD = h5py.is_hdf5(omnifln) presentO2 = h5py.is_hdf5(omni2fln) if not (presentQD and presentO2): print( "Qin-Denton/OMNI2 data not found in current format. This module has limited functionality." ) print("Run spacepy.toolbox.update(QDomni=True) to download data") else: presentQD = os.path.isfile(omnifln) presentO2 = os.path.isfile(omni2fln) if not (presentQD and presentO2): print( "No Qin-Denton/OMNI2 data found. This module has limited functionality." ) print("Run spacepy.toolbox.update(QDomni=True) to download data") else:
def merge_files(destination, sources, force_headless=False): is_headless = force_headless or get_headless() if not is_headless: import wx if len(sources) == 0: return if not is_headless: progress = wx.ProgressDialog( "Writing " + destination, "Loading " + sources[0], maximum=len(sources) * 4 + 1, style=wx.PD_CAN_ABORT | wx.PD_APP_MODAL | wx.PD_ELAPSED_TIME | wx.PD_REMAINING_TIME, ) count = 0 try: pipeline = cpp.Pipeline() has_error = [False] def callback(caller, event): if isinstance(event, cpp.LoadExceptionEvent): has_error = True wx.MessageBox( message="Could not load %s: %s" % (sources[0], event.error), caption="Failed to load %s" % sources[0], ) has_error[0] = True pipeline.add_listener(callback) pipeline.load(sources[0]) if has_error[0]: return if destination.lower().endswith(".h5"): mdest = cpmeas.Measurements(filename=destination, multithread=False) h5_dest = True else: mdest = cpmeas.Measurements(multithread=False) h5_dest = False for source in sources: if not is_headless: count += 1 keep_going, skip = progress.Update(count, "Loading " + source) if not keep_going: return if h5py.is_hdf5(source): msource = cpmeas.Measurements( filename=source, mode="r", multithread=False ) else: msource = cpmeas.load_measurements(source) dest_image_numbers = mdest.get_image_numbers() source_image_numbers = msource.get_image_numbers() if len(dest_image_numbers) == 0 or len(source_image_numbers) == 0: offset_source_image_numbers = source_image_numbers else: offset_source_image_numbers = ( np.max(dest_image_numbers) - np.min(source_image_numbers) + source_image_numbers + 1 ) for object_name in msource.get_object_names(): if object_name in mdest.get_object_names(): destfeatures = mdest.get_feature_names(object_name) else: destfeatures = [] for feature in msource.get_feature_names(object_name): if object_name == cpmeas.EXPERIMENT: if not mdest.has_feature(object_name, feature): src_value = msource.get_experiment_measurement(feature) mdest.add_experiment_measurement(feature, src_value) continue src_values = msource.get_measurement( object_name, feature, image_set_number=source_image_numbers ) mdest[ object_name, feature, offset_source_image_numbers ] = src_values destset = set(destfeatures) if not is_headless: keep_going, skip = progress.Update( count + 1, "Saving to " + destination ) if not keep_going: return if not h5_dest: pipeline.save_measurements(destination, mdest) finally: if not is_headless: progress.Destroy()
def __init__(self, filename, f_start=None, f_stop=None, t_start=None, t_stop=None, load_data=True, max_load=1.): """ Constructor. Args: filename (str): filename of blimpy file. f_start (float): start frequency, in MHz f_stop (float): stop frequency, in MHz t_start (int): start time bin t_stop (int): stop time bin """ super(H5Reader, self).__init__() if filename and os.path.isfile(filename) and h5py.is_hdf5(filename): #These values may be modified once code for multi_beam and multi_stokes observations are possible. self.freq_axis = 2 self.time_axis = 0 self.beam_axis = 1 # Place holder self.stokes_axis = 4 # Place holder self.filename = filename self.filestat = os.stat(filename) self.filesize = self.filestat.st_size / (1024.0**2) self.load_data = load_data self.h5 = h5py.File(self.filename, mode='r') self.read_header() self.file_size_bytes = os.path.getsize(self.filename) # In bytes self.n_ints_in_file = self.h5["data"].shape[self.time_axis] # self.n_channels_in_file = self.h5["data"].shape[self.freq_axis] # self.n_beams_in_file = self.header[ 'nifs'] #Placeholder for future development. self.n_pols_in_file = 1 #Placeholder for future development. self._n_bytes = int(self.header['nbits'] / 8) #number of bytes per digit. self._d_type = self._setup_dtype() self.file_shape = (self.n_ints_in_file, self.n_beams_in_file, self.n_channels_in_file) if self.header['foff'] < 0: self.f_end = self.header['fch1'] self.f_begin = self.f_end + self.n_channels_in_file * self.header[ 'foff'] else: self.f_begin = self.header['fch1'] self.f_end = self.f_begin + self.n_channels_in_file * self.header[ 'foff'] self.t_begin = 0 self.t_end = self.n_ints_in_file #Taking care all the frequencies are assigned correctly. self._setup_selection_range(f_start=f_start, f_stop=f_stop, t_start=t_start, t_stop=t_stop, init=True) #Convert input frequencies into what their corresponding channel number would be. self._setup_chans() #Update frequencies ranges from channel number. self._setup_freqs() #Applying data size limit to load. if max_load is not None: if max_load > 1.0: logger.warning( 'Setting data limit > 1GB, please handle with care!') self.MAX_DATA_ARRAY_SIZE = max_load * MAX_DATA_ARRAY_SIZE_UNIT else: self.MAX_DATA_ARRAY_SIZE = MAX_DATA_ARRAY_SIZE_UNIT if self.file_size_bytes > self.MAX_DATA_ARRAY_SIZE: self.large_file = True else: self.large_file = False if self.load_data: if self.large_file: #Only checking the selection, if the file is too large. if self.f_start or self.f_stop or self.t_start or self.t_stop: if self.isheavy(): logger.warning( "Selection size of %.2f GB, exceeding our size limit %.2f GB. Instance created, header loaded, but data not loaded, please try another (t,v) selection." % (self._calc_selection_size() / (1024.**3), self.MAX_DATA_ARRAY_SIZE / (1024.**3))) self._init_empty_selection() else: self.read_data() else: logger.warning( "The file is of size %.2f GB, exceeding our size limit %.2f GB. Instance created, header loaded, but data not loaded. You could try another (t,v) selection." % (self.file_size_bytes / (1024.**3), self.MAX_DATA_ARRAY_SIZE / (1024.**3))) self._init_empty_selection() else: self.read_data() else: logger.debug("Skipping loading data ...") self._init_empty_selection() else: raise IOError("Need a file to open, please give me one!")
def main(args): '''Run CellProfiler args - command-line arguments, e.g. sys.argv ''' import cellprofiler.preferences as cpprefs if any([arg.startswith('--work-announce') for arg in args]): # # Go headless ASAP # cpprefs.set_headless() for i, arg in enumerate(args): if arg == "--ij-plugins-directory" and len(args) > i + 1: cpprefs.set_ij_plugin_directory(args[i + 1]) break import cellprofiler.analysis_worker cellprofiler.analysis_worker.aw_parse_args() cellprofiler.analysis_worker.main() sys.exit(0) options, args = parse_args(args) # # Important to go headless ASAP # if not options.show_gui: import cellprofiler.preferences as cpprefs cpprefs.set_headless() # What's there to do but run if you're running headless? # Might want to change later if there's some headless setup options.run_pipeline = True if options.jvm_heap_size != None: from cellprofiler.preferences import set_jvm_heap_mb set_jvm_heap_mb(options.jvm_heap_size, False) set_log_level(options) if not hasattr(sys, "frozen") and options.code_statistics: print_code_statistics() return if options.print_groups_file is not None: print_groups(options.print_groups_file) return if options.batch_commands_file is not None: get_batch_commands(options.batch_commands_file) return if options.run_ilastik: run_ilastik() return if options.add_message_for_user: if len(args) != 3: sys.stderr.write("Usage: (for add_message-for-user)\n") sys.stderr.write( "CellProfiler --add-message-for-user <caption> <message> <pipeline-or-project>\n" ) sys.stderr.write("where:\n") sys.stderr.write(" <caption> - the message box caption\n") sys.stderr.write( " <message> - the message displayed inside the message box\n" ) sys.stderr.write( " <pipeline-or-project> - the path to the pipeline or project file to modify\n" ) return caption = args[0] message = args[1] path = args[2] import h5py using_hdf5 = h5py.is_hdf5(path) if using_hdf5: import cellprofiler.measurements as cpmeas m = cpmeas.Measurements(filename=path, mode="r+") pipeline_text = m[cpmeas.EXPERIMENT, "Pipeline_Pipeline"] else: with open(path, "r") as fd: pipeline_text = fd.read() header, body = pipeline_text.split("\n\n", 1) pipeline_text = header + \ ("\nMessageForUser:%s|%s\n\n" % (caption, message)) + body if using_hdf5: m[cpmeas.EXPERIMENT, "Pipeline_Pipeline"] = pipeline_text m.close() else: with open(path, "w") as fd: fd.write(pipeline_text) print "Message added to %s" % path return # necessary to prevent matplotlib trying to use Tkinter as its backend. # has to be done before CellProfilerApp is imported from matplotlib import use as mpluse mpluse('WXAgg') if (not hasattr(sys, 'frozen')) and options.fetch_external_dependencies: import external_dependencies external_dependencies.fetch_external_dependencies( options.overwrite_external_dependencies) if (not hasattr(sys, 'frozen')) and options.build_extensions: build_extensions() if options.build_and_exit: return if options.output_html: from cellprofiler.gui.html.manual import generate_html webpage_path = options.output_directory if options.output_directory else None generate_html(webpage_path) return if options.print_measurements: print_measurements(options) return if options.omero_credentials is not None: set_omero_credentials_from_string(options.omero_credentials) if options.plugins_directory is not None: cpprefs.set_plugin_directory(options.plugins_directory, globally=False) if options.ij_plugins_directory is not None: cpprefs.set_ij_plugin_directory(options.ij_plugins_directory, globally=False) if options.temp_dir is not None: if not os.path.exists(options.temp_dir): os.makedirs(options.temp_dir) cpprefs.set_temporary_directory(options.temp_dir, globally=False) if not options.allow_schema_write: cpprefs.set_allow_schema_write(False) # # After the crucial preferences are established, we can start the VM # from cellprofiler.utilities.cpjvm import cp_start_vm cp_start_vm() try: if options.show_gui: import wx wx.Log.EnableLogging(False) from cellprofiler.cellprofilerapp import CellProfilerApp from cellprofiler.workspace import is_workspace_file show_splashbox = (options.pipeline_filename is None and (not options.new_project) and options.show_splashbox) if options.pipeline_filename: if is_workspace_file(options.pipeline_filename): workspace_path = os.path.expanduser( options.pipeline_filename) pipeline_path = None else: pipeline_path = os.path.expanduser( options.pipeline_filename) workspace_path = None elif options.new_project: workspace_path = False pipeline_path = None else: workspace_path = None pipeline_path = None App = CellProfilerApp( 0, check_for_new_version=(options.pipeline_filename is None), show_splashbox=show_splashbox, workspace_path=workspace_path, pipeline_path=pipeline_path) if options.data_file is not None: cpprefs.set_data_file(os.path.abspath(options.data_file)) if options.image_set_file is not None: cpprefs.set_image_set_file(options.image_set_file, False) from cellprofiler.utilities.version import version_string, version_number logging.root.info("Version: %s / %d" % (version_string, version_number)) if options.run_pipeline and not options.pipeline_filename: raise ValueError("You must specify a pipeline filename to run") if options.output_directory: if not os.path.exists(options.output_directory): os.makedirs(options.output_directory) cpprefs.set_default_output_directory(options.output_directory) if options.image_directory: cpprefs.set_default_image_directory(options.image_directory) if options.show_gui: if options.run_pipeline: App.frame.pipeline_controller.do_analyze_images() App.MainLoop() return elif options.run_pipeline: run_pipeline_headless(options, args) except Exception, e: logging.root.fatal("Uncaught exception in CellProfiler.py", exc_info=True) raise
def merge_peaklists(source: Sequence[PeakList], filelist: Union[str, None] = None): """ Extracts and exports specific PeakList object from one or more list or one or more .hdf5 files, to one or more lists or .hdf5 files. If more-than one .hdf5 file is exported, users can control which subset of peaklists are exported to which list. :param source: List or tuple of Peaklist objects, or .hdf5 files :param filelist: A tab-delimited text file containing metadata to determine which peaklists are exported together: **Example of a filelist** - the optional multilist column determines which peaklists are exported together. +-----------------+------------+-----------+-------+----------------+-----------+-------+ | filename | classLabel | replicate | batch | injectionOrder | multilist | [...] | +-----------------+------------+-----------+-------+----------------+-----------+-------+ | sample_rep1.raw | sample | 1 | 1 | 1 | 1 | [...] | +-----------------+------------+-----------+-------+----------------+-----------+-------+ | sample_rep2.raw | sample | 2 | 1 | 2 | 1 | [...] | +-----------------+------------+-----------+-------+----------------+-----------+-------+ | sample_rep3.raw | sample | 3 | 1 | 3 | 1 | [...] | +-----------------+------------+-----------+-------+----------------+-----------+-------+ | sample_rep4.raw | sample | 4 | 1 | 4 | 1 | [...] | +-----------------+------------+-----------+-------+----------------+-----------+-------+ | blank_rep1.raw | blank | 1 | 1 | 5 | 2 | [...] | +-----------------+------------+-----------+-------+----------------+-----------+-------+ | blank_rep2.raw | blank | 2 | 1 | 6 | 2 | [...] | +-----------------+------------+-----------+-------+----------------+-----------+-------+ | blank_rep3.raw | blank | 3 | 1 | 7 | 2 | [...] | +-----------------+------------+-----------+-------+----------------+-----------+-------+ | blank_rep4.raw | blank | 4 | 1 | 8 | 2 | [...] | +-----------------+------------+-----------+-------+----------------+-----------+-------+ | ... | ... | ... | ... | ... | ... | [...] | +-----------------+------------+-----------+-------+----------------+-----------+-------+ :return: Nested lists of Peaklist objects (e.g. [[pl_01, pl_02], [pl_03, pl_04, pl05]] """ if not isinstance(source, list): raise IOError( "Incorrect input: list of lists of peaklists, list of peak matrix objects or list of .HDF5 files expected.") pls_merged = [] for s in source: if isinstance(s, list) or isinstance(s, tuple): if isinstance(s[0], PeakList): pls_merged.extend(s) else: raise IOError("Incorrect Object in list. Peaklist Object expected.") elif isinstance(s, PeakMatrix): pls = s.extract_peaklists() pls_merged.extend(pls) elif h5py.is_hdf5(s): f = h5py.File(s, 'r') if "mz" in f: pm = hdf5_portal.load_peak_matrix_from_hdf5(s) pls = pm.extract_peaklists() else: pls = hdf5_portal.load_peaklists_from_hdf5(s) f.close() pls_merged.extend(pls) else: raise IOError( "Incorrect input: list of lists of peaklists, list of peak matrix objects or list of HDF5 files expected.") if filelist is not None: fl = validate_metadata(filelist) pls_merged = update_metadata_and_labels(pls_merged, fl) if 'multilist' in list(fl.keys()): # make sure the peaklists are in the correct order (need to be sorted ascending) order_indx = np.argsort([i.metadata['multilist'] for i in pls_merged]) nlists = [fl['multilist'][i] for i in order_indx] pls_merged = [pls_merged[i] for i in order_indx] # get the break points of the different lists to join together bp = list(np.cumsum(np.unique(nlists, return_counts=True)[1])) bp = bp[:-1] # break up the list into a list of lists pls_merged = partition(pls_merged, bp) return pls_merged
def main(): if os.name == 'nt': print("Sorry, this utility is not supported on Windows!") return -1 parser = argparse.ArgumentParser() parser.add_argument('-u', "--user", help='user id') parser.add_argument('-p', "--passwd", help='user password') args = parser.parse_args() filename = None passwd = None username = None filename = config.get('password_file') if not filename: print("no password file in config") return -1 if not args.user: print("no userid supplied") return -1 username = args.user if username.find(':') != -1: print("invalid username (':' is not allowed)") return -1 if username.find('/') != -1: print("invalid username ('/' is not allowed)") return -1 if args.passwd: passwd = args.passwd if len(passwd) < 4: print("password must be at least 4 characters long") return -1 else: passwd = generate_temp_password() # verify file exists and is writable if not op.isfile(filename): print("password file:", filename, " does not exist") return -1 if not h5py.is_hdf5(filename): print("invalid password file") return -1 if not os.access(filename, os.W_OK): print("password file is not writable") return -1 f = h5py.File(filename, 'r+') if 'user_type' not in f: print("invalid password file") return -1 user_type = f['user_type'] now = int(time.time()) # add a new user if username in f.attrs: print("user already exists") return -1 # create userid 1 greater than previous used userid = len(f.attrs) + 1 data = np.empty((), dtype=user_type) data['pwd'] = encrypt_pwd(passwd) data['state'] = 'A' data['userid'] = userid data['ctime'] = now data['mtime'] = now f.attrs.create(username, data, dtype=user_type) f.close() datapath = config.get('datapath') if not op.isdir(datapath): print("data directory not found") return -1 userpath = op.join(datapath, config.get('home_dir')) if not op.isdir(userpath): os.mkdir(userpath) userdir = op.join(userpath, username) if op.isdir(userdir): print("user directory already exists") return -1 # create user directory os.mkdir(userdir) # link to "public" directory link_name = op.join(userdir, "public") # create symlink to public directory os.symlink("../../public", link_name) print(passwd) return
def __init__( self, hdf5_file, root, num_classes=None, classes=None, load_as_images=True, replicas_per_sample=1, **kwargs ): assert h5py.is_hdf5(hdf5_file) super(HDF5Dataset, self).__init__(root=root, **kwargs) self.replicas_per_sample = replicas_per_sample self._hdf5_file = hdf5_file self._load_as_images = load_as_images with h5py.File(name=self._hdf5_file, mode="r") as hdf5: hdf5_root = hdf5[root] self._classes = { posixpath.join("/", root, g): i for i, g in enumerate(hdf5_root.keys()) } # Load image file names from indices self._images = None index_file = Path(hdf5_file).with_suffix(".__hdf5_index__") if index_file.exists(): with h5py.File(name=index_file, mode="r") as hdf5_idx: if root in hdf5_idx: images = hdf5_idx[root]["images"][()] # convert null terminated strings to unicode self._images = images.astype("U").tolist() if self._images is None: # Create index with image file names. Depending on the size and # location of the hdf5 this process may take a few minutes. self._images = [] for class_name in self._classes: group = hdf5_root[class_name] if self._load_as_images: files = filter(is_image_file, group) else: files = filter(is_tensor_file, group) # Construct the absolute path name within the HDF5 file path_names = map( partial(posixpath.join, "/", root, class_name), files) self._images.extend(path_names) # Convert from python string to null terminated string index_data = np.array(self._images, dtype="S") # Save cache with h5py.File(name=index_file, mode="a") as hdf5_idx: hdf5_idx_root = hdf5_idx.require_group(root) hdf5_idx_root.create_dataset("images", data=index_data) total_classes = len(self._classes) # Select subset of the dataset filtering only images from the given classes if classes is not None: self._classes = { posixpath.join("/", root, g): i for i, g in enumerate(classes) } # Limit dataset size by num_classes if num_classes is not None: self._classes = dict(itertools.islice(self._classes.items(), num_classes)) if len(self._classes) != total_classes: self._images = list(filter( lambda x: posixpath.dirname(x) in self._classes.keys(), self._images)) # Lazy open hdf5 file on __getitem__ of each dataloader worker. # See https://github.com/pytorch/pytorch/issues/11887 self._hdf5 = None
def blank_filter(peak_matrix: Union[PeakMatrix, str], blank_label: str, min_fraction: float = 1.0, min_fold_change: float = 1.0, function: str = "mean", rm_samples: bool = True, labels: Union[str, None] = None): """ :param peak_matrix: PeakMatrix object :param blank_label: Label for the blank samples - a string indicating the name of the class to be used for filtering (e.g. blank), i.e. the “reference” class. This string must have been included in the “classLabel” column of the metadata file associated with the process_sans or replicate_filter function(s). :param min_fraction: A numeric value ranging from 0 to 1. Setting this value to None or 0 will skip this filtering step. A value greater than 0 requires that for each peak in the peak intensity matrix, at least this proportion of non-reference samples have to have an intensity value that exceeds the product of: (A) the average intensity of “reference” class intensities and (B) the user-defined “min_fold_change”. If this condition is not met, the peak is removed from the peak intensity matrix. :param min_fold_change: A numeric value from 0 upwards. When minimum fraction filtering is enabled, this value defines the minimum required ratio between the intensity of a peak in a “non-reference” sample and the average intensity of the “reference” sample(s). Peaks with ratios exceeding this threshold are considered to have been reliably detected in a “non-reference” sample. :param function: Function to calculate the 'reference' intensity * **mean** - corresponds to using the non-weighted average of “reference” sample peak intensities (NA values are ignored) in calculating the “reference” to “non-reference” peak intensity ratio. * **median** - corresponds to using the median of “reference” sample peak intensities (NA values are ignored) in calculating the “reference” to “non-reference” peak intensity ratio. * **max** corresponds to the use of the maximum intensity among “reference” sample peak intensities (NA values are ignored) in calculating the “reference” to “non-reference” peak intensity ratio. :param rm_samples: Remove blank samples from the output peak matrix: * **True** - samples belonging to the user-defined “reference” class are removed from the output peak matrix * **False** - samples belonging to the user-defined “reference” class are retained in the output peak matrix. :param labels: Path to the metadata file :return: PeakMatrix object """ if min_fraction < 0.0 or min_fraction > 1.0: raise ValueError("Provide a value between 0. and 1.") if min_fold_change < 0: raise ValueError("Provide a value larger than zero.") if function not in ("mean", "median", "max"): raise ValueError("Mean, median or max intensity") if not isinstance(peak_matrix, PeakMatrix): if h5py.is_hdf5(peak_matrix): peak_matrix = hdf5_portal.load_peak_matrix_from_hdf5(peak_matrix) else: peak_matrix = txt_portal.load_peak_matrix_from_txt(peak_matrix) if labels is not None: peak_matrix = update_labels(peak_matrix, labels) if not any([Tag(blank_label, 'classLabel') in x for x in peak_matrix.peaklist_tags]): raise IOError("Blank label ({}) does not exist".format(blank_label)) return filter_blank_peaks(peak_matrix, Tag(blank_label, 'classLabel'), min_fraction, min_fold_change, function, rm_samples)
def convertToOPMD(input_file): """ Take native wpg output and rewrite in openPMD conformant way. @param input_file: The hdf5 file to be converted. @type: string @example: input_file = "prop_out.h5" """ # Check input file. if not h5py.is_hdf5(input_file): raise IOError("Not a valid hdf5 file: %s. " % (input_file)) # Open in and out files. with h5py.File(input_file, 'r') as h5: with h5py.File(input_file.replace(".h5", ".opmd.h5"), 'w') as opmd_h5: # Get number of time slices in wpg output, assuming horizontal and vertical polarizations have same dimensions, which is always true for wpg output. data_shape = h5['data/arrEhor'].value.shape # Branch off if this is a non-time dependent calculation in frequency domain. if data_shape[2] == 1 and h5['params/wDomain'].value == "frequency": # Time independent calculation in frequency domain. _convert_from_frequency_representation(h5, opmd_h5, data_shape) return number_of_x_meshpoints = data_shape[0] number_of_y_meshpoints = data_shape[1] number_of_time_steps = data_shape[2] time_max = h5['params/Mesh/sliceMax'].value #s time_min = h5['params/Mesh/sliceMin'].value #s time_step = abs(time_max - time_min) / number_of_time_steps #s photon_energy = h5['params/photonEnergy'].value # eV photon_energy = photon_energy * e # Convert to J # Copy misc and params from original wpg output. opmd_h5.create_group('history/parent') try: h5.copy('/params', opmd_h5['history/parent']) h5.copy('/misc', opmd_h5['history/parent']) h5.copy('/history', opmd_h5['history/parent']) # Some keys may not exist, e.g. if the input file comes from a non-simex wpg run. except KeyError: pass except: raise sum_x = 0.0 sum_y = 0.0 for it in range(number_of_time_steps): # Write opmd # Setup the root attributes for iteration 0 opmd.setup_root_attr(opmd_h5) full_meshes_path = opmd.get_basePath( opmd_h5, it) + opmd_h5.attrs["meshesPath"] # Setup basepath. time = time_min + it * time_step opmd.setup_base_path(opmd_h5, iteration=it, time=time, time_step=time_step) opmd_h5.create_group(full_meshes_path) meshes = opmd_h5[full_meshes_path] # Path to the E field, within the h5 file. full_e_path_name = b"E" meshes.create_group(full_e_path_name) E = meshes[full_e_path_name] # Create the dataset (2d cartesian grid) E.create_dataset( b"x", (number_of_x_meshpoints, number_of_y_meshpoints), dtype=numpy.complex64, compression='gzip') E.create_dataset( b"y", (number_of_x_meshpoints, number_of_y_meshpoints), dtype=numpy.complex64, compression='gzip') # Write the common metadata for the group E.attrs["geometry"] = numpy.string_("cartesian") # Get grid geometry. nx = h5['params/Mesh/nx'].value xMax = h5['params/Mesh/xMax'].value xMin = h5['params/Mesh/xMin'].value dx = (xMax - xMin) / nx ny = h5['params/Mesh/ny'].value yMax = h5['params/Mesh/yMax'].value yMin = h5['params/Mesh/yMin'].value dy = (yMax - yMin) / ny E.attrs["gridSpacing"] = numpy.array([dx, dy], dtype=numpy.float64) E.attrs["gridGlobalOffset"] = numpy.array( [h5['params/xCentre'].value, h5['params/yCentre'].value], dtype=numpy.float64) E.attrs["gridUnitSI"] = numpy.float64(1.0) E.attrs["dataOrder"] = numpy.string_("C") E.attrs["axisLabels"] = numpy.array([b"x", b"y"]) E.attrs["unitDimension"] = \ numpy.array([1.0, 1.0, -3.0, -1.0, 0.0, 0.0, 0.0 ], dtype=numpy.float64) # L M T I theta N J # E is in volts per meters: V / m = kg * m / (A * s^3) # -> L * M * T^-3 * I^-1 # Add time information E.attrs[ "timeOffset"] = 0. # Time offset with respect to basePath's time # Write attribute that is specific to each dataset: # - Staggered position within a cell E["x"].attrs["position"] = numpy.array([0.0, 0.5], dtype=numpy.float32) E["y"].attrs["position"] = numpy.array([0.5, 0.0], dtype=numpy.float32) # - Conversion factor to SI units # WPG writes E fields in units of sqrt(W/mm^2), i.e. it writes E*sqrt(c * eps0 / 2). # Unit analysis: # [E] = V/m # [eps0] = As/Vm # [c] = m/s # ==> [E^2 * eps0 * c] = V**2/m**2 * As/Vm * m/s = V*A/m**2 = W/m**2 = [Intensity] # Converting to SI units by dividing by sqrt(c*eps0/2)*1e3, 1e3 for conversion from mm to m. c = 2.998e8 # m/s eps0 = 8.854e-12 # As/Vm E["x"].attrs["unitSI"] = numpy.float64( 1.0 / math.sqrt(0.5 * c * eps0) / 1.0e3) E["y"].attrs["unitSI"] = numpy.float64( 1.0 / math.sqrt(0.5 * c * eps0) / 1.0e3) # Copy the fields. Ex = h5['data/arrEhor'][:, :, it, 0] + 1j * h5['data/arrEhor'][:, :, it, 1] Ey = h5['data/arrEver'][:, :, it, 0] + 1j * h5['data/arrEver'][:, :, it, 1] E["x"][:, :] = Ex E["y"][:, :] = Ey # Get area element. dA = dx * dy ### Number of photon fields. # Path to the number of photons. full_nph_path_name = b"Nph" meshes.create_group(full_nph_path_name) Nph = meshes[full_nph_path_name] # Create the dataset (2d cartesian grid) Nph.create_dataset( b"x", (number_of_x_meshpoints, number_of_y_meshpoints), dtype=numpy.float32, compression='gzip') Nph.create_dataset( b"y", (number_of_x_meshpoints, number_of_y_meshpoints), dtype=numpy.float32, compression='gzip') # Write the common metadata for the group Nph.attrs["geometry"] = numpy.string_("cartesian") Nph.attrs["gridSpacing"] = numpy.array([dx, dy], dtype=numpy.float64) Nph.attrs["gridGlobalOffset"] = numpy.array( [h5['params/xCentre'].value, h5['params/yCentre'].value], dtype=numpy.float64) Nph.attrs["gridUnitSI"] = numpy.float64(1.0) Nph.attrs["dataOrder"] = numpy.string_("C") Nph.attrs["axisLabels"] = numpy.array([b"x", b"y"]) Nph.attrs["unitDimension"] = \ numpy.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=numpy.float64) # Add time information Nph.attrs[ "timeOffset"] = 0. # Time offset with respect to basePath's time # Nph - Staggered position within a cell Nph["x"].attrs["position"] = numpy.array([0.0, 0.5], dtype=numpy.float32) Nph["y"].attrs["position"] = numpy.array([0.5, 0.0], dtype=numpy.float32) Nph["x"].attrs["unitSI"] = numpy.float64(1.0) Nph["y"].attrs["unitSI"] = numpy.float64(1.0) # Calculate number of photons via intensity and photon energy. # Since fields are stored as sqrt(W/mm^2), have to convert to W/m^2 (factor 1e6 below). number_of_photons_x = numpy.round( abs(Ex)**2 * dA * time_step * 1.0e6 / photon_energy) number_of_photons_y = numpy.round( abs(Ey)**2 * dA * time_step * 1.0e6 / photon_energy) sum_x += number_of_photons_x.sum(axis=-1).sum(axis=-1) sum_y += number_of_photons_y.sum(axis=-1).sum(axis=-1) Nph["x"][:, :] = number_of_photons_x Nph["y"][:, :] = number_of_photons_y ### Phases. # Path to phases full_phases_path_name = b"phases" meshes.create_group(full_phases_path_name) phases = meshes[full_phases_path_name] # Create the dataset (2d cartesian grid) phases.create_dataset( b"x", (number_of_x_meshpoints, number_of_y_meshpoints), dtype=numpy.float32, compression='gzip') phases.create_dataset( b"y", (number_of_x_meshpoints, number_of_y_meshpoints), dtype=numpy.float32, compression='gzip') # Write the common metadata for the group phases.attrs["geometry"] = numpy.string_("cartesian") phases.attrs["gridSpacing"] = numpy.array([dx, dy], dtype=numpy.float64) phases.attrs["gridGlobalOffset"] = numpy.array( [h5['params/xCentre'].value, h5['params/yCentre'].value], dtype=numpy.float64) phases.attrs["gridUnitSI"] = numpy.float64(1.0) phases.attrs["dataOrder"] = numpy.string_("C") phases.attrs["axisLabels"] = numpy.array([b"x", b"y"]) phases.attrs["unitDimension"] = numpy.array( [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=numpy.float64) phases["x"].attrs["unitSI"] = numpy.float64(1.0) phases["y"].attrs["unitSI"] = numpy.float64(1.0) # Add time information phases.attrs[ "timeOffset"] = 0. # Time offset with respect to basePath's time # phases positions. - Staggered position within a cell phases["x"].attrs["position"] = numpy.array( [0.0, 0.5], dtype=numpy.float32) phases["y"].attrs["position"] = numpy.array( [0.5, 0.0], dtype=numpy.float32) phases["x"][:, :] = numpy.angle(Ex) phases["y"][:, :] = numpy.angle(Ey) print "Found %e and %e photons for horizontal and vertical polarization, respectively." % ( sum_x, sum_y) opmd_h5.close() h5.close()
def getStack(self, filelist=None, imagestack=None): if filelist in [None, []]: filelist, filefilter = self._getStackOfFiles(getfilter=True) else: filefilter = "" if not len(filelist): return None if filefilter in ["", "All Files (*)"]: if HDF5: if h5py.is_hdf5(filelist[0]): filefilter = "HDF5" fileindex = 0 begin = None end = None aifirafile = False if len(filelist): PyMcaDirs.inputDir = os.path.dirname(filelist[0]) #if we are dealing with HDF5, no more tests needed if not filefilter.upper().startswith('HDF5'): f = open(filelist[0], 'rb') #read 10 characters if sys.version < '3.0': line = f.read(10) else: try: line = str(f.read(10).decode()) except UnicodeDecodeError: #give a dummy value line = " " f.close() omnicfile = False if filefilter.upper().startswith('HDF5'): stack = QHDF5Stack1D.QHDF5Stack1D(filelist) omnicfile = True elif filefilter.upper().startswith('OPUS-DPT'): stack = OpusDPTMap.OpusDPTMap(filelist[0]) omnicfile = True elif filefilter.upper().startswith("AIFIRA"): stack = AifiraMap.AifiraMap(filelist[0]) omnicfile = True aifirafile = True elif filefilter.upper().startswith("SUPAVISIO"): stack = SupaVisioMap.SupaVisioMap(filelist[0]) omnicfile = True elif filefilter.upper().startswith("TEXTIMAGE"): imagestack = True fileindex = 0 stack = TextImageStack.TextImageStack(imagestack=True) elif filefilter.upper().startswith("IMAGE") and\ (filelist[0].upper().endswith("TIF") or\ filelist[0].upper().endswith("TIFF")): stack = TiffStack.TiffStack(imagestack=True) elif filefilter.upper().startswith("RENISHAW"): stack = RenishawMap.RenishawMap(filelist[0]) omnicfile = True elif filefilter == "" and\ (filelist[0].upper().endswith("TIF") or\ filelist[0].upper().endswith("TIFF")): stack = TiffStack.TiffStack(imagestack=True) elif filefilter.upper().startswith("IMAGE"): if imagestack is None: imagestack = True fileindex = 0 stack = QStack(imagestack=imagestack) elif line[0] == "{": if filelist[0].upper().endswith("RAW"): if imagestack is None: imagestack = True stack = QStack(imagestack=imagestack) elif line[0:2] in ["II", "MM"]: if imagestack is None: imagestack = True stack = QStack(imagestack=imagestack) elif line.startswith('Spectral'): stack = OmnicMap.OmnicMap(filelist[0]) omnicfile = True elif line.startswith('#\tDate'): stack = LuciaMap.LuciaMap(filelist[0]) omnicfile = True elif filelist[0].upper().endswith("RAW.GZ")or\ filelist[0].upper().endswith("EDF.GZ")or\ filelist[0].upper().endswith("CCD.GZ")or\ filelist[0].upper().endswith("RAW.BZ2")or\ filelist[0].upper().endswith("EDF.BZ2")or\ filelist[0].upper().endswith("CCD.BZ2")or\ filelist[0].upper().endswith(".CBF"): if imagestack is None: imagestack = True stack = QStack(imagestack=imagestack) elif filelist[0].upper().endswith(".RTX"): stack = RTXMap.RTXMap(filelist[0]) omnicfile = True elif filelist[0][-4:].upper() in ["PIGE", "PIGE"]: stack = SupaVisioMap.SupaVisioMap(filelist[0]) omnicfile = True elif filelist[0][-3:].upper() in ["RBS"]: stack = SupaVisioMap.SupaVisioMap(filelist[0]) omnicfile = True elif filelist[0][-3:].upper() in ["SPE"] and\ (line[0] not in ['$', '#']): #Roper Scientific format #handle it as MarCCD stack stack = QStack(imagestack=True) elif MRCMap.isMRCFile(filelist[0]): stack = MRCMap.MRCMap(filelist[0]) omnicfile = True imagestack = True elif LispixMap.isLispixMapFile(filelist[0]): stack = LispixMap.LispixMap(filelist[0]) omnicfile = True elif RenishawMap.isRenishawMapFile(filelist[0]): # This is dangerous. Any .txt file with four # columns would be accepted as a Renishaw Map # by other hand, I do not know how to handle # that case as a stack. stack = RenishawMap.RenishawMap(filelist[0]) omnicfile = True elif OmdaqLmf.isOmdaqLmf(filelist[0]): stack = OmdaqLmf.OmdaqLmf(filelist[0]) omnicfile = True elif JcampOpusStack.isJcampOpusStackFile(filelist[0]): stack = JcampOpusStack.JcampOpusStack(filelist[0]) omnicfile = True else: stack = QSpecFileStack() if len(filelist) == 1: if not omnicfile: try: stack.loadIndexedStack(filelist[0], begin, end, fileindex=fileindex) except: msg = qt.QMessageBox() msg.setIcon(qt.QMessageBox.Critical) msg.setInformativeText("%s" % sys.exc_info()[1]) msg.setDetailedText(traceback.format_exc()) msg.exec_() if _logger.getEffectiveLevel() == logging.DEBUG: raise elif len(filelist): if not omnicfile: try: stack.loadFileList(filelist, fileindex=fileindex) except: msg = qt.QMessageBox() msg.setIcon(qt.QMessageBox.Critical) msg.setText("%s" % sys.exc_info()[1]) if QTVERSION < '4.0.0': msg.exec_loop() else: msg.exec_() if _logger.getEffectiveLevel() == logging.DEBUG: raise if aifirafile: masterStack = DataObject.DataObject() masterStack.info = copy.deepcopy(stack.info) masterStack.data = stack.data[:, :, 0:1024] masterStack.info['Dim_2'] = int(masterStack.info['Dim_2'] / 2) slaveStack = DataObject.DataObject() slaveStack.info = copy.deepcopy(stack.info) slaveStack.data = stack.data[:, :, 1024:] slaveStack.info['Dim_2'] = int(slaveStack.info['Dim_2'] / 2) return [masterStack, slaveStack] else: return stack
def bin_fast5_file(f5_path, tax_annot_res_dir, sens, min_qual, min_qlen, min_pident, min_coverage, no_trash): # Function bins FAST5 file without untwisting. # # :param f5_path: path to FAST5 file meant to be processed; # :type f5_path: str; # :param tax_annot_res_dir: path to directory containing taxonomic annotation; # :type tax_annot_res_dir: str; # :param sens: binning sensitivity; # :type sens: str; # :param min_qual: threshold for quality filter; # :type min_qual: float; # :param min_qlen: threshold for length filter; # :type min_qlen: int (or None, if this filter is disabled); # :param min_pident: threshold for alignment identity filter; # :type min_pident: float (or None, if this filter is disabled); # :param min_coverage: threshold for alignment coverage filter; # :type min_coverage: float (or None, if this filter is disabled); # :param no_trash: loical value. True if user does NOT want to output trash files; # :type no_trash: bool; outdir_path = os.path.dirname( logging.getLoggerClass().root.handlers[0].baseFilename) seqs_pass = 0 # counter for sequences, which pass filters QL_seqs_fail = 0 # counter for too short or too low-quality sequences align_seqs_fail = 0 # counter for sequences, which align to their best hit with too low identity or coverage srt_file_dict = dict() new_dpath = glob("{}{}*{}*".format(tax_annot_res_dir, os.sep, get_checkstr(f5_path)))[0] tsv_res_fpath = get_res_tsv_fpath(new_dpath) taxonomy_path = os.path.join(tax_annot_res_dir, "taxonomy", "taxonomy.tsv") resfile_lines = configure_resfile_lines(tsv_res_fpath, sens, taxonomy_path) # Configure path to "classification not found" file classif_not_found_fpath = get_classif_not_found_fpath(f5_path, outdir_path) # Make filter for quality and length QL_filter = get_QL_filter(f5_path, min_qual, min_qlen) # Configure path to trash file if not no_trash: QL_trash_fpath = get_QL_trash_fpath( f5_path, outdir_path, min_qual, min_qlen, ) else: QL_trash_fpath = None # end if # Make filter for identity and coverage align_filter = get_align_filter(min_pident, min_coverage) # Configure path to this trash file if not no_trash: align_trash_fpath = get_align_trash_fpath(f5_path, outdir_path, min_pident, min_coverage) else: align_trash_fpath = None # end if # File validation: # RuntimeError will be raised if FAST5 file is broken. try: # File existance checking is performed while parsing CL arguments. # Therefore, this if-statement will trigger only if f5_path's file is not a valid HDF5 file. if not h5py.is_hdf5(f5_path): raise RuntimeError("file is not of HDF5 (i.e. not FAST5) format") # end if from_f5 = h5py.File(f5_path, 'r') for _ in from_f5: break # end for except RuntimeError as runterr: printlog_error_time("FAST5 file is broken") printlog_error("Reading the file `{}` crashed.".format( os.path.basename(f5_path))) printlog_error("Reason: {}".format(str(runterr))) printlog_error("Omitting this file...") print() # Return zeroes -- inc_val won't be incremented and this file will be omitted return (0, 0, 0) # end try # singleFAST5 and multiFAST5 files should be processed in different ways # "Raw" group always in singleFAST5 root and never in multiFAST5 root if "Raw" in from_f5.keys(): f5_cpy_func = copy_single_f5 else: f5_cpy_func = copy_read_f5_2_f5 # end if for _, read_name in enumerate(fast5_readids(from_f5)): try: hit_names, *vals_to_filter = resfile_lines[sys.intern( fmt_read_id(read_name) )[1:]] # omit 'read_' in the beginning of FAST5 group's name except KeyError: # Place this sequence into the "classification not found" file if classif_not_found_fpath not in srt_file_dict.keys(): srt_file_dict = update_file_dict(srt_file_dict, classif_not_found_fpath) # end if f5_cpy_func(from_f5, read_name, srt_file_dict[classif_not_found_fpath]) continue # end try # If read is found in TSV file: if not QL_filter(vals_to_filter): QL_seqs_fail += 1 # Get name of result FASTQ file to write this read in if QL_trash_fpath not in srt_file_dict.keys(): srt_file_dict = update_file_dict(srt_file_dict, QL_trash_fpath) # end if f5_cpy_func(from_f5, read_name, srt_file_dict[QL_trash_fpath]) elif not align_filter(vals_to_filter): align_seqs_fail += 1 # Get name of result FASTQ file to write this read in if QL_trash_fpath not in srt_file_dict.keys(): srt_file_dict = update_file_dict(srt_file_dict, align_trash_fpath) # end if f5_cpy_func(from_f5, read_name, srt_file_dict[align_trash_fpath]) else: for hit_name in hit_names.split( "&&" ): # there can be multiple hits for single query sequence # Get name of result FASTQ file to write this read in binned_file_path = os.path.join(outdir_path, "{}.fast5".format(hit_name)) if binned_file_path not in srt_file_dict.keys(): srt_file_dict = update_file_dict(srt_file_dict, binned_file_path) # end if f5_cpy_func(from_f5, read_name, srt_file_dict[binned_file_path]) # end for seqs_pass += 1 # end if # end for from_f5.close() # Close all binned files for file_obj in filter(lambda x: not x is None, srt_file_dict.values()): file_obj.close() # end for return (seqs_pass, QL_seqs_fail, align_seqs_fail)
def load(filename: str, lazy: bool = False, **kwargs): """Load an :class:`~kikuchipy.signals.EBSD` or :class:`~kikuchipy.signals.EBSDMasterPattern` object from a supported file format. This function is a modified version of :func:`hyperspy.io.load`. Parameters ---------- filename Name of file to load. lazy Open the data lazily without actually reading the data from disk until required. Allows opening arbitrary sized datasets. Default is False. kwargs Keyword arguments passed to the corresponding kikuchipy reader. See their individual documentation for available options. Returns ------- kikuchipy.signals.EBSD, kikuchipy.signals.EBSDMasterPattern, \ list of kikuchipy.signals.EBSD or \ list of kikuchipy.signals.EBSDMasterPattern Examples -------- Import nine patterns from an HDF5 file in a directory `DATA_DIR` >>> import kikuchipy as kp >>> s = kp.load(DATA_DIR + "/patterns.h5") >>> s <EBSD, title: patterns My awes0m4 ..., dimensions: (3, 3|60, 60)> """ if not os.path.isfile(filename): raise IOError(f"No filename matches '{filename}'.") # Find matching reader for file extension extension = os.path.splitext(filename)[1][1:] readers = [] for plugin in plugins: if extension.lower() in plugin.file_extensions: readers.append(plugin) if len(readers) == 0: raise IOError( f"Could not read '{filename}'. If the file format is supported, please " "report this error") elif len(readers) > 1 and is_hdf5(filename): reader = _plugin_from_footprints(filename, plugins=readers) else: reader = readers[0] # Get data and metadata (from potentially multiple signals if an h5ebsd # file) signal_dicts = reader.file_reader(filename, lazy=lazy, **kwargs) signals = [] for signal in signal_dicts: signals.append(_dict2signal(signal, lazy=lazy)) directory, filename = os.path.split(os.path.abspath(filename)) filename, extension = os.path.splitext(filename) signals[-1].tmp_parameters.folder = directory signals[-1].tmp_parameters.filename = filename signals[-1].tmp_parameters.extension = extension.replace(".", "") if len(signals) == 1: signals = signals[0] return signals
os.mkdir(test_output_folder) logger.info("test output folder: %s" % test_output_folder) # retrieve the list of BAG files in the test/data folder bag_paths = list() for root, _, files in os.walk(test_data_folder): for f in files: if f.endswith(".bag"): bag_paths.append(os.path.join(root, f)) logger.info("nr. of available BAG files: %d" % len(bag_paths)) # select an input from the list of BAG files bag_path = bag_paths[0] # change this index to select another bag file if not h5py.is_hdf5(bag_path): raise RuntimeError( "The passed BAG file is not recognized as a valid HDF5 format") logger.info("input BAG file: %s" % bag_path) # setup comparison parameters copyBaseBag = False ziptype = None # To test with compression, set this to "gzip" or "lzf". test_suffix = "CMP" if ziptype != None: test_suffix += "_" + ziptype # open the input BAG in reading mode (and check the presence of the BAG_root group) fid = h5py.File(bag_path, 'r') try: