Beispiel #1
0
 def _can_load(f) :
     try :
         if h5py.is_hdf5(f) or h5py.is_hdf5(f+".0.hdf5") :
             return True
         else :
             return False
     except AttributeError :
         if "hdf5" in f :
             warnings.warn("It looks like you're trying to load HDF5 files, but python's HDF support (h5py module) is missing.", RuntimeWarning)
         return False
Beispiel #2
0
    def _can_load(cls, f):

        if hasattr(h5py, "is_hdf5"):
            if h5py.is_hdf5(f):
                return cls._test_for_hdf5_key(f)
            elif h5py.is_hdf5(f+".0.hdf5"):
                return cls._test_for_hdf5_key(f+".0.hdf5")
            else:
                return False
        else:
            if "hdf5" in f:
                warnings.warn(
                    "It looks like you're trying to load HDF5 files, but python's HDF support (h5py module) is missing.", RuntimeWarning)
            return False
Beispiel #3
0
    def __init__(self, filename, groupname, index=None):
        """Initializes a h5features reader to read a group in a HDF5 file."""

        # check filename
        if not h5py.is_hdf5(filename):
            raise IOError('{} is not a HDF5 file'.format(filename))
        self.filename = filename

        # open the HDF5 file for reading
        self.h5file = h5py.File(self.filename, 'r')

        # access to the requested group
        if not groupname in self.h5file:
            raise IOError('{} is not a valid group in {}'
                          .format(groupname, self.filename))
        self.groupname = groupname
        self.group = self.h5file[groupname]

        # Get the version of the readed file
        self.version = self._read_version()

        # read the index from group if not provided
        if index is None:
            # Choose the good index according to file version
            if self.version == '0.1':
                index_class = IndexV0_1()
            elif self.version == '1.0':
                index_class = IndexV1_0()
            else:
                index_class = Index()

            self.index = index_class.read(self.group)
        else:
            self.index = index
Beispiel #4
0
def load(filename):
    """
    Load a file as an `h5py.File`-like object.

    Format supported:
    - h5 files, if `h5py` module is installed
    - Spec files if `SpecFile` module is installed

    :param str filename: A filename
    :raises: IOError if the file can't be loaded as an h5py.File like object
    :rtype: h5py.File
    """
    if not os.path.isfile(filename):
        raise IOError("Filename '%s' must be a file path" % filename)

    if not h5py_missing:
        if h5py.is_hdf5(filename):
            return h5py.File(filename)

    try:
        from . import spech5
        return spech5.SpecH5(filename)
    except ImportError:
        logger.debug("spech5 can't be loaded.", exc_info=True)
    except IOError:
        logger.debug("File '%s' can't be read as spec file.", filename, exc_info=True)

    raise IOError("File '%s' can't be read as HDF5" % filename)
Beispiel #5
0
def getUserInfo(user_name):
    """
      getUserInfo: return user data
    """
    log = logging.getLogger("h5serv")
    userid = None

    if not user_name:
        return None

    log.info("get info for user: [" + user_name + "]")
    filename = config.get('password_file')
    if not filename:
        log.error("no config for password_file")
        raise HTTPError(500, message="bad configuration")
    # verify file exists and is writable
    if not op.isfile(filename):
        log.error("password file is missing")
        raise HTTPError(500, message="bad configuration")

    if not h5py.is_hdf5(filename):
        log.error("password file is invalid")
        raise HTTPError(500, message="bad configuration")

    with h5py.File(filename, 'r') as f:
        if user_name not in f.attrs:
            return None

        data = f.attrs[user_name]
        return data
Beispiel #6
0
def h5repack(infile, h5repack_path, fs_strategy='FSM_AGGR', outfile=None):
    if not isinstance(infile,str):
        raise TypeError('Input infile must be a string')
    if not h5py.is_hdf5(infile):
        raise IOError('Input infile is not a HDF5 file')
    if not isinstance(h5repack_path, str):
        raise TypeError('Input h5repack_path must be a string')
    if not isinstance(fs_strategy, str):
        raise TypeError('Input fs_strategy must be a string')
    if fs_strategy.upper() not in ['FSM_AGGR', 'PAGE', 'AGGR', 'NONE']:
        raise ValueError('Invalid value specified in fs_strategy')
    fs_strategy = fs_strategy.upper()
    if outfile is None:
        outfile = infile
    else:
        if not isinstance(outfile, str):
            raise TypeError('outfile must be a string')

    try:
        if outfile == infile:
            tmpfile = infile + '.tmp'
            mv_result = subprocess.call('mv {0} {1}'.format(infile, tmpfile), shell=True)
        else:
            tmpfile = infile
        h5repack_result = subprocess.call('{0} -S {1} {2} {3}'.format(h5repack_path, fs_strategy, tmpfile, outfile), shell=True)
        if h5repack_result != 0: # problem with h5repack, just rename tmpfile back to original
            rm_result = subprocess.call('mv {0} {1}'.format(tmpfile, outfile), shell=True)
        else: # h5repack is successful, remove the tmpfile
            rm_result = subprocess.call('rm {0}'.format(tmpfile), shell=True)
    except Exception as x:
        return (mv_result, h5repack_result, rm_result, x)
    else:
        return (mv_result, h5repack_result, rm_result, None)
Beispiel #7
0
    def __init__(self, filename, groupname=None):
        # open the file for reading
        if not os.path.exists(filename) or not h5py.is_hdf5(filename):
            raise IOError('{} is not a HDF5 file'.format(filename))
        self.h5file = h5py.File(filename, 'r')

        # open the requested group in the file
        if groupname is None:
            # expect only one group in the file
            groups = list(self.h5file.keys())
            if not len(groups) == 1:
                raise IOError('groupname is None and cannot be guessed in {}.'
                              .format(filename))
            groupname = groups[0]
        elif not groupname in self.h5file:
            raise IOError('{} is not a valid group in {}'
                          .format(groupname, filename))
        self.group = self.h5file[groupname]

        # load h5features attributes and datasets
        self.version = read_version(self.group)
        self.items = read_items(self.group, self.version)
        self._index = read_index(self.group, self.version)

        self.dformat = self.group.attrs['format']
        if self.dformat == 'sparse':
            self.dim = self.group.attrs['dim']
            self.frames = (self.group['lines'] if self.version == '0.1'
                           else self.group['frames'])[...]
def open_file(filename, f_start=None, f_stop=None,t_start=None, t_stop=None,load_data=True,max_load=1.):
    """Open a HDF5 or filterbank file

    Returns instance of a Reader to read data from file.

    ================== ==================================================
    Filename extension File type
    ================== ==================================================
    h5, hdf5           HDF5 format
    fil                fil format
    *other*            Will raise NotImplementedError
    ================== ==================================================

    """
    if not os.path.isfile(filename):
        type(filename)
        print(filename)
        raise IOError("No such file or directory: " + filename)

    filename = os.path.expandvars(os.path.expanduser(filename))
    # Get file extension to determine type
    ext = filename.split(".")[-1].strip().lower()

    if six.PY3:
        ext = bytes(ext, 'ascii')

    if h5py.is_hdf5(filename):
        # Open HDF5 file
        return H5Reader(filename, f_start=f_start, f_stop=f_stop, t_start=t_start, t_stop=t_stop,
                        load_data=load_data, max_load=max_load)
    elif sigproc.is_filterbank(filename):
        # Open FIL file
        return FilReader(filename, f_start=f_start, f_stop=f_stop, t_start=t_start, t_stop=t_stop, load_data=load_data, max_load=max_load)
    else:
        raise NotImplementedError('Cannot open this type of file with Waterfall')
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    if opts.level <= 0:
        option_parser.error("level must be greater than zero!")

    collapse_f = make_collapse_f(opts.metadata_category, opts.level,
                                 opts.ignore)
    table = load_table(opts.input_fp)

    if h5py.is_hdf5(opts.input_fp):
        # metadata are not deserializing correctly. Duct tape it.
        update_d = {}
        for i, md in zip(table.ids(axis='observation'),
                         table.metadata(axis='observation')):
            update_d[i] = {k: json.loads(v[0]) for k, v in md.items()}
        table.add_metadata(update_d, axis='observation')

    result = table.collapse(collapse_f, axis='observation', one_to_many=True,
                            norm=False,
                            one_to_many_md_key=opts.metadata_category)

    if(opts.format_tab_delimited):
        f = open(opts.output_fp, 'w')
        f.write(result.to_tsv(header_key=opts.metadata_category,
                              header_value=opts.metadata_category,
                              metadata_formatter=lambda s: '; '.join(s)))
        f.close()
    else:
        format_fs = {opts.metadata_category: vlen_list_of_str_formatter}
        write_biom_table(result, opts.output_fp, format_fs=format_fs)
Beispiel #10
0
def can_convert_h5_to(dst_type, h5_filename=None):
    """Whether conversion from this particular h5 file to dst type is supported

    @param h5_filename: h5 filename or None
    @type h5_filename: string
    @param dst_type: name of type
    @type dst_type: string
    @return: True if possible
    """

    if dst_type in ('matlab', 'octave'):
        return True

    if h5_filename and h5_filename.endswith('.h5') and h5py.is_hdf5(h5_filename):
        try:
            h5 = h5py.File(h5_filename, 'r')

            if dst_type=='libsvm': # libsvm requires data/label
                ordering=set(('label','data'))
                if ordering.issubset(set(h5['data'].keys())):
                    return True # TODO check if this is sparse data / ndarray data
            elif dst_type in ('csv', 'arff', 'rdata'): # csv/arff/RData support everything except sparse data
                for k in h5['data'].keys():
                    if k.endswith('_indptr') or k.endswith('_indices'):
                        return False
                return True
        except:
            pass
    return False
Beispiel #11
0
 def getFileHandle(self,inputfile):
     try:
         self._HDF5 = False
         if HDF5SUPPORT:
             if h5py.is_hdf5(inputfile):
                 self._HDF5 = True
                 try:
                     return HDF5Stack1D.HDF5Stack1D(self._filelist,
                                                   self.selection)
                 except:
                     raise
         ffile = self.__tryEdf(inputfile)
         if ffile is None:
             ffile = self.__tryLucia(inputfile)
         if ffile is None:
             if inputfile[-3:] == "DAT":
                 ffile = self.__tryAifira(inputfile)
         if ffile is None:
             if LispixMap.isLispixMapFile(inputfile):
                 ffile = LispixMap.LispixMap(inputfile, native=False)
         if (ffile is None):
             del ffile
             ffile   = SpecFileLayer.SpecFileLayer()
             ffile.SetSource(inputfile)
         return ffile
     except:
         raise IOError("I do not know what to do with file %s" % inputfile)
Beispiel #12
0
 def canhandle(url):
     if not url.startswith('file://'):
         return False
     path = url.replace('file://','')
     if not h5py.is_hdf5(path):
         return False
     return True
Beispiel #13
0
    def __init__(self, filename, chunk_size=0.1, version='1.1'):
        """Initialize an HDF5 file for writing h5features.

        Parameters
        ----------

        filename : str --- The name of the HDF5 file to write on. For
            clarity you should use a *.h5 extension but this is not
            required.

        chunk_size : float, optional --- The size in Mo of a chunk in
            the file. Default is 0.1 Mo. A chunk size below 8 Ko is
            not allowed as it results in poor performances.

        Raise
        -----

        IOError if the file exists but is not HDF5.
        IOError if the chunk size is below 8 Ko.

        """
        if not is_supported_version(version):
            raise IOError('version {} is not supported'.format(version))
        self.version = version

        # Raise if the file exists but is not HDF5
        if os.path.isfile(filename) and not h5py.is_hdf5(filename):
            raise IOError('{} is not a HDF5 file.'.format(filename))
        self.filename = filename

        if chunk_size < 0.008:
            raise IOError('chunk size is below 8 Ko')
        self.chunk_size = chunk_size
def get_attribute_types(fname):
    if not h5py.is_hdf5(fname):
        return ""

    types=set()
    dt = h5py.special_dtype(vlen=str)
    try:
        h5 = h5py.File(fname, 'r')
        have_type = '/data_descr/types' in h5
        all_types = set(h5['/data_descr/types'])
        for o in h5['/data_descr/ordering']:
            indptr_name='/data/' + o + '_indptr'
            indices_name='/data/' + o + '_indices'
            if indptr_name in h5 and indices_name in h5:
                types += 'Sparse Matrix'
            else:
                if have_type and o in all_types:
                    types += h5['/data_descr/types'][o]
                else:
                    t=h5['/data/' + o].dtype
                    if t==dt:
                        types.add("String")
                    elif t in (numpy.int64, numpy.int32):
                        types.add("Integer")
                    elif t in (numpy.float64, numpy.float32):
                        types.add("Floating Point")
                    else:
                        types.add(str(t))
        h5.close()
    except:
        pass

    return ','.join(list(types))
Beispiel #15
0
    def pre_process(self, args=''):
        parser = argparse.ArgumentParser()
        parser.add_argument('--output', default='processed',
                            help="directory to save the pre-processed data to")
        parser.add_argument('--single', action='store_true',
                            help="process single layer only")
        args = parser.parse_args(args)

        renamer = Renamer()
        params = Parameters()

        for item in self.items:
            index = renamer.pattern_match(item).groupdict()['index']
            run_type = params.determine_run_type(index)
            if run_type != 'single layer' and args.single:
                print "{} is not single layer, skipping.".format(item)

            elif not h5py.is_hdf5(item):
                print "{} is not hdf5!".format(item)

            else:
                try:
                    self._pre_process(item, args.output, args.single)
                    logging.info('Processed {}'.format(item))
                except Exception:
                    print("Failed to process {}".format(item))
                    logging.exception('Could not process {}'.format(item))
Beispiel #16
0
def dump_mcscf(mol, chkfile, mo_coeff,
               mcscf_energy=None, e_cas=None,
               ci_vector=None,
               iter_micro_tot=None, iter_macro=None,
               converged=None,
              ):
    """Dumps MCSCF/CASSCF calculation to checkpoint file.
    """
    if h5py.is_hdf5(chkfile):
        fh5 = h5py.File(chkfile)
        if 'mcscf' in fh5:
            del(fh5['mcscf'])
    else:
        fh5 = h5py.File(chkfile, 'w')
    if 'mol' not in fh5:
        fh5['mol'] = format(mol.pack())
    fh5['mcscf/mo_coeff'] = mo_coeff
    def store(key, val):
        if val is not None: fh5[key] = val
    store('mcscf/mcscf_energy', mcscf_energy)
    store('mcscf/e_cas', e_cas)
    store('mcscf/ci_vector', ci_vector)
    store('mcscf/iter_macro', iter_macro)
    store('mcscf/iter_micro_tot', iter_micro_tot)
    store('mcscf/converged', converged)
    fh5.close()
Beispiel #17
0
    def open_file(self, filename=GlobalDefaults.file_resultdatafile):
        """Load a given file that contains the results from another simulation.

        :param filename: The filename (optionally with filepath) of the file we try to load.
                         If not given the default value from `GlobalDefaults` is used.
        """
        # Try to open the file or raise an exception if it does not exist.
        if os.path.lexists(filename):
            if hdf.is_hdf5(filename):
                self._srf = hdf.File(filename)
            else:
                raise IOError("File '" + str(filename) + "' is not a hdf5 file")
        else:
            raise IOError("File '" + str(filename) + "' does not exist!")

        # Check if the file format can be read by the IOManager
        if not "file_version" in self._srf.attrs.keys():
            raise IOError("Unsupported file format without version number")

        if self._srf.attrs["file_version"] != self._hdf_file_version:
            raise IOError("Unsupported file format version " + str(self._srf.attrs["file_version"]))

        # Initialize the internal book keeping data
        self._block_ids = [s[len(self._prefixb) :] for s in self._srf.keys() if s.startswith(self._prefixb)]
        self._block_count = len(self._block_ids)

        self._group_ids = [s[len(self._prefixg) :] for s in self._srf.keys() if s.startswith(self._prefixg)]
        self._group_count = len(self._group_ids)

        # Load the simulation parameters from data block 0.
        self._parameters = self.load_parameters(blockid="global")
Beispiel #18
0
def read_vaspdump(path, h5dump=None):
#NOTE read_hfdump returns the integrals in MO representation
    clustdump = os.path.join(path, 'FCIDUMP.CLUST.GTO')
    jdump     = os.path.join(path, 'JDUMP')
    kdump     = os.path.join(path, 'KDUMP')
    fockdump  = os.path.join(path, 'FOCKDUMP')
    if h5py.is_hdf5(clustdump):
        f = h5py.File(clustdump, 'r')
        dic = {}
        for k,v in f.items():
            if v.shape: # I'm ndarray
                dic[k] = numpy.array(v)
            else:
                dic[k] = v.value
        f.close()
    else:
        hfdic = read_hfdump(jdump, kdump, fockdump)
        dic = read_clustdump(clustdump, hfdic)
        mo_coeff = dic['MO_COEFF']
        hfdic['HCORE'] = reduce(numpy.dot, (mo_coeff, hfdic['HCORE'], mo_coeff.T))
        hfdic['J'] = reduce(numpy.dot, (mo_coeff, hfdic['J'], mo_coeff.T))
        hfdic['K'] = reduce(numpy.dot, (mo_coeff, hfdic['K'], mo_coeff.T))
        dic.update(hfdic)
        if h5dump is None:
            h5dump = clustdump+'.h5'
        f = h5py.File(h5dump, 'w')
        for k,v in dic.items():
            sys.stdout.write('h5dump %s\n' % k)
            f[k] = v
        f.close()
    return dic
Beispiel #19
0
def getUserName(userid):
    """
      getUserName: return user name for given user id
      #todo: may need to be optimized to support large number of users

    """
    log = logging.getLogger("h5serv")

    log.info("get user name for userid: [" + str(userid) + "]")
    filename = config.get('password_file')
    if not filename:
        log.error("no config for password_file")
        raise HTTPError(500, message="bad configuration")
    # verify file exists and is writable
    if not op.isfile(filename):
        log.error("password file is missing")
        raise HTTPError(500, message="bad configuration")

    if not h5py.is_hdf5(filename):
        log.error("password file is invalid")
        raise HTTPError(500, message="bad configuration")

    with h5py.File(filename, 'r') as f:
        for attr_name in f.attrs:
            attr = f.attrs[attr_name]
            if attr['userid'] == userid:
                return attr_name

    return None
Beispiel #20
0
def readIn_generator_QMP(file_path,rootname='QMP',coefsite=False,conjugate=False):
    if not _h5.is_hdf5(file_path):
        raise Exception(' <HDF5> {file} is not a valid hdf5 file'.format(file=file_path))
    f = _h5.File(file_path, "r")
    if coefsite:
        coefonsite_gen = (_ for _ in f['/'+rootname+'_coefsite'].value)
    for per_site in f['/'+rootname].itervalues():
        block_array = dict()
        if coefsite:
            coefonsite = coefonsite_gen.next()
        for m in per_site.itervalues():
            line = m.name.split('/')[-1]
            # if transpose:
            #     # I do not know how to implement transpose here ='/
            #     pass
            # else:
            if (coefsite and conjugate):
                block_array[tuple(int(line[_:(_+2)]) for _ in range(0, len(line), 2))] = coefonsite*m.value.conjugate()
            elif (coefsite and not conjugate):
                block_array[tuple(int(line[_:(_+2)]) for _ in range(0, len(line), 2))] = coefonsite*m.value
            elif (not coefsite and conjugate):
                block_array[tuple(int(line[_:(_+2)]) for _ in range(0, len(line), 2))] = m.value.conjugate()
            else:
                block_array[tuple(int(line[_:(_+2)]) for _ in range(0, len(line), 2))] = m.value
        yield block_array
    f.close()
Beispiel #21
0
def dump_mcscf(mol, chkfile, e_tot, mo_coeff, ncore, ncas,
               mo_occ=None, mo_energy=None,
               e_cas=None, ci_vector=None):
    """Dumps MCSCF/CASSCF calculation to checkpoint file.
    """
    if h5py.is_hdf5(chkfile):
        fh5 = h5py.File(chkfile)
        if 'mcscf' in fh5:
            del(fh5['mcscf'])
    else:
        fh5 = h5py.File(chkfile, 'w')
    if 'mol' not in fh5:
        fh5['mol'] = format(mol.pack())

    fh5['mcscf/mo_coeff'] = mo_coeff
    def store(key, val):
        if val is not None: fh5[key] = val
    store('mcscf/e_tot', e_tot)
    store('mcscf/e_cas', e_cas)
    store('mcscf/ci', ci_vector)
    store('mcscf/ncore', ncore)
    store('mcscf/ncas', ncas)
    store('mcscf/mo_occ', mo_occ)
    store('mcscf/mo_energy', mo_energy)
    fh5.close()
Beispiel #22
0
    def __init__(self, filename):

        global config
        super(GadgetHDFSnap, self).__init__()

        self._filename = filename

        if not h5py.is_hdf5(filename):
            h1 = h5py.File(filename + ".0.hdf5", "r")
            numfiles = h1['Header'].attrs['NumFilesPerSnapshot']
            self._hdf = HdfFileGenerator(filename, numfiles)
        else:
            self._hdf = [h5py.File(filename, "r")]

        self._family_slice = {}

        self._loadable_keys = set([])
        self._family_arrays = {}
        self._arrays = {}
        self.properties = {}

        # determine which particle types are in the output

        my_type_map = {}

        for fam, g_types in _type_map.iteritems():
            my_types = []
            for x in g_types:
                # Get all keys from all hdf files
                for hdf in self._hdf:
                    if x in hdf.keys():
                        my_types.append(x)
                        break
            if len(my_types):
                my_type_map[fam] = my_types

        sl_start = 0
        for x in my_type_map:
            l = 0
            for name in my_type_map[x]:
                for hdf in self._hdf:
                    # Skip PartType if not in this hdf file
                    try:
                        l += hdf[name]['Coordinates'].shape[0]
                        k = self._get_hdf_allarray_keys(hdf[name])
                        self._loadable_keys = self._loadable_keys.union(set(k))
                    except KeyError as e:
                        if 'PartType' in name: continue
                        else: raise e
            self._family_slice[x] = slice(sl_start, sl_start + l)

            sl_start += l

        self._loadable_keys = [_translate_array_name(
            x, reverse=True) for x in self._loadable_keys]
        self._num_particles = sl_start

        self._my_type_map = my_type_map

        self._decorate()
Beispiel #23
0
def load_file_as_h5py(filename):
    """
    Load a file as an h5py.File object

    :param str filename: A filename
    :raises: IOError if the file can't be loaded as an h5py.File like object
    :rtype: h5py.File
    """
    if not os.path.isfile(filename):
        raise IOError("Filename '%s' must be a file path" % filename)

    if h5py.is_hdf5(filename):
        return h5py.File(filename)

    try:
        from ..io import spech5
        return spech5.SpecH5(filename)
    except ImportError:
        _logger.debug("spech5 can't be loaded.", filename, exc_info=True)
    except IOError:
        _logger.debug("File '%s' can't be read as spec file.", filename, exc_info=True)

    try:
        from silx.io import fabioh5
        return fabioh5.File(filename)
    except ImportError:
        _logger.debug("fabioh5 can't be loaded.", filename, exc_info=True)
    except Exception:
        _logger.debug("File '%s' can't be read as fabio file.", filename, exc_info=True)

    raise IOError("Format of filename '%s' is not supported" % filename)
Beispiel #24
0
    def OpenDatabase(self, DatabaseName, PRINT=False):
        """

        Open a specific database.

        :param DatabaseName: name of the database to open
        :type DatabaseName: str
        :param PRINT: select whether to print message or not. \n
                        0 = Yes \n
                        1 = No \n
        :type PRINT: int

        """

        self.file = []
        try:
            test = h5.is_hdf5(DatabaseName)

            if test:
                self.file = h5.File(DatabaseName, mode="a")
                if PRINT == True:
                    print '{0} database opened'.format(DatabaseName)

            if not test:
                print '{0} does not exist'.format(DatabaseName)
                
        except IOError:
            raise DatabaseError('{0} database does not exist'.format(
                            DatabaseName))
	def __init__(self, file_path):
		assert h5py.is_hdf5(file_path)

		self.file_path = file_path

		f = h5py.File(file_path, "r")
		stars_snap = f['PartType4']
		stars_coords = stars_snap['Coordinates']
		stars_mags = stars_snap['GFM_StellarPhotometrics']

		self.stars_coords = (stars_coords - numpy.mean(stars_coords, axis=0)) / numpy.std(stars_coords, axis=0)
		self.stars_mags = {'U': stars_mags[:,0],
						   'B': stars_mags[:,1],
						   'V': stars_mags[:,2],
						   'K': stars_mags[:,3],
						   'g': stars_mags[:,4],
						   'r': stars_mags[:,5],
						   'i': stars_mags[:,6],
						   'z': stars_mags[:,7]}
		self.image = 0.
		self.image_parameters = {'alpha' : None,
						   		 'beta' : None,
						   		 'gamma' : None,
						   		 'intensity' : None,
						   		 'scale' : None,
						   		 'xshift' : None,
						   		 'yshift' : None,
						   		 'bg' : None,
						   		 'psf_size': None}
    def read(self):
        """Get data and description in-memory 

        Retrieve contents from file.

        @return: example names, ordering and the examples
        @rtype: dict of: list of names, list of ordering and dict of examples
        """
        # we want the exception handled elsewhere

        if not h5py.is_hdf5(self.fname):
            return

        h5 = h5py.File(self.fname, 'r')

        contents = {
            'name': h5.attrs['name'],
            'comment': h5.attrs['comment'],
            'mldata': h5.attrs['mldata'],
        }

        if contents['comment']=='Task file':
            contents['task']=dict()
            contents['ordering']=list()
            group='task'
            for field in ml2h5.task.task_data_fields:
                if field in h5[group]:
                    contents['ordering'].append(field)
        else:
            contents['data']=dict()
            contents['ordering']=h5['/data_descr/ordering'][...].tolist()
            group='data'

        contents['group']=group

        if '/%s_descr/names' % group in h5:
           contents['names']=h5['/%s_descr/names' % group][...].tolist()

        if '/%s_descr/types' % group in h5:
            contents['types'] = h5['/%s_descr/types' % group ][...]

        for name in contents['ordering']:
            vname='/%s/%s' % (group, name)
            sp_indices=vname+'_indices'
            sp_indptr=vname+'_indptr'

            if sp_indices in h5['/%s' % group] and sp_indptr in h5['/%s' % group]:
                contents[group][name] = csc_matrix((h5[vname], h5[sp_indices], h5[sp_indptr])
            )
            else:
                d = numpy.array(h5[vname],order='F')

                try:
                    d=d['vlen']
                except:
                    pass
                contents[group][name] = d
        h5.close()
        return contents
Beispiel #27
0
def readIn_generator_coefficient(file_path, rootname='QMP'):
    if not _h5.is_hdf5(file_path):
        raise Exception(' <HDF5> {file} is not a valid hdf5 file'.format(file=file_path))
    f = _h5.File(file_path, "r")
    coefonsite_gen = (_ for _ in f['/'+rootname+'_coefsite'].value)
    for coefsite in coefonsite_gen:
        yield coefsite
    f.close()
Beispiel #28
0
def _create_h5file(erifile, dataname):
    if h5py.is_hdf5(erifile):
        feri = h5py.File(erifile)
        if dataname in feri:
            del(feri[dataname])
    else:
        feri = h5py.File(erifile, 'w')
    return feri
Beispiel #29
0
def is_workspace_file(path):
    '''Return True if the file along the given path is a workspace file'''
    if not h5py.is_hdf5(path):
        return False
    h5file = h5py.File(path, mode="r")
    if not HDF5FileList.has_file_list(h5file):
        return False
    return HDF5Dict.has_hdf5_dict(h5file)
Beispiel #30
0
def _try_h5(fname):
    """Try if given file is in hdf5 format

    @param fname: name of file to determine format for
    @type fname: string
    """

    return h5py.is_hdf5(fname)
Beispiel #31
0
    def __init__(self, *args, **kwds):
        '''Instantiate a data tool frame
        
        module_name: name of module to instantiate
        measurements_file_name: name of measurements file
        '''
        assert kwds.has_key(
            "module_name"), "DataToolFrame() needs a module_name argument"
        assert kwds.has_key(
            "measurements_file_name"
        ), "DataToolFrame() needs a measurements_file_name argument"
        module_name = kwds["module_name"]
        measurements_file_name = kwds["measurements_file_name"]

        kwds_copy = kwds.copy()
        del kwds_copy["module_name"]
        del kwds_copy["measurements_file_name"]
        kwds_copy["title"] = "%s data tool" % module_name
        wx.Frame.__init__(self, *args, **kwds_copy)
        self.module = instantiate_module(module_name)
        self.pipeline = cpp.Pipeline()
        if h5py.is_hdf5(measurements_file_name):
            self.workspace = cpw.Workspace(self.pipeline, self.module, None,
                                           None, None, None)
            self.workspace.load(measurements_file_name, True)
            self.measurements = self.workspace.measurements
        else:
            self.pipeline.load(measurements_file_name)
            self.load_measurements(measurements_file_name)
            self.workspace = cpw.Workspace(self.pipeline, self.module, None,
                                           None, self.measurements, None)

        self.module.module_num = len(self.pipeline.modules()) + 1
        self.pipeline.add_module(self.module)

        self.sizer = wx.BoxSizer(wx.VERTICAL)

        module_panel = wx.lib.scrolledpanel.ScrolledPanel(
            self, -1, style=wx.SUNKEN_BORDER)
        module_panel.BackgroundColour = cpprefs.get_background_color()
        self.BackgroundColour = cpprefs.get_background_color()

        self.module_view = ModuleView(module_panel, self.workspace, True)
        self.module_view.set_selection(self.module.module_num)

        def on_change(caller, event):
            setting = event.get_setting()
            proposed_value = event.get_proposed_value()
            setting.value = proposed_value
            self.pipeline.edit_module(event.get_module().module_num, False)
            self.module_view.reset_view()
            self.module_view.request_validation()

        self.module_view.add_listener(on_change)

        #
        # Add a panel for the "run" button
        #
        panel = wx.Panel(self)
        panel_sizer = wx.BoxSizer(wx.HORIZONTAL)
        button = wx.Button(panel, label="Run")

        self.sizer.Add(module_panel, 1, wx.EXPAND)
        self.sizer.Add(panel, 0, wx.EXPAND)

        panel_sizer.AddStretchSpacer()
        panel_sizer.Add(button, 0, wx.RIGHT, button.Size[1])
        panel.SetSizer(panel_sizer)

        wx.EVT_BUTTON(self, button.Id, self.on_run)
        #
        # Add a file menu
        #
        file_menu = wx.Menu()
        file_menu.Append(ID_FILE_LOAD_MEASUREMENTS, "&Load measurements")
        file_menu.Append(ID_FILE_SAVE_MEASUREMENTS, "&Save measurements")
        file_menu.Append(ID_FILE_EXIT, "E&xit")
        self.MenuBar = wx.MenuBar()
        self.MenuBar.Append(file_menu, "&File")
        self.Bind(wx.EVT_MENU,
                  self.on_load_measurements,
                  id=ID_FILE_LOAD_MEASUREMENTS)
        self.Bind(wx.EVT_MENU,
                  self.on_save_measurements,
                  id=ID_FILE_SAVE_MEASUREMENTS)
        self.Bind(wx.EVT_MENU, self.on_exit, id=ID_FILE_EXIT)
        accelerators = wx.AcceleratorTable([
            (wx.ACCEL_CMD, ord("W"), ID_FILE_EXIT),
            (wx.ACCEL_CMD, ord("O"), ID_FILE_LOAD_MEASUREMENTS),
            (wx.ACCEL_CMD, ord("S"), ID_FILE_SAVE_MEASUREMENTS)
        ])
        self.SetAcceleratorTable(accelerators)
        #
        # Add an image menu
        #
        image_menu = wx.Menu()
        image_menu.Append(ID_IMAGE_CHOOSE, "&Choose")
        self.MenuBar.Append(image_menu, "&Image")
        self.Bind(wx.EVT_MENU, self.on_image_choose, id=ID_IMAGE_CHOOSE)

        self.SetSizer(self.sizer)
        self.Size = (self.module_view.get_max_width(), self.Size[1])
        module_panel.Layout()
        self.Show()
        self.tbicon = wx.TaskBarIcon()
        self.tbicon.SetIcon(get_cp_icon(), "CellProfiler2.0")
        self.SetIcon(get_cp_icon())
Beispiel #32
0
def run_pipeline_headless(options, args):
    """
    Run a CellProfiler pipeline in headless mode
    """
    if options.first_image_set is not None:
        if not options.first_image_set.isdigit():
            raise ValueError("The --first-image-set option takes a numeric argument")
        else:
            image_set_start = int(options.first_image_set)
    else:
        image_set_start = None

    image_set_numbers = None

    if options.last_image_set is not None:
        if not options.last_image_set.isdigit():
            raise ValueError("The --last-image-set option takes a numeric argument")
        else:
            image_set_end = int(options.last_image_set)

            if image_set_start is None:
                image_set_numbers = numpy.arange(1, image_set_end + 1)
            else:
                image_set_numbers = numpy.arange(image_set_start, image_set_end + 1)
    else:
        image_set_end = None

    if (options.pipeline_filename is not None) and (
        not options.pipeline_filename.lower().startswith("http")
    ):
        options.pipeline_filename = os.path.expanduser(options.pipeline_filename)

    pipeline = cellprofiler.pipeline.Pipeline()

    initial_measurements = None

    try:
        if h5py.is_hdf5(options.pipeline_filename):
            initial_measurements = cellprofiler.measurement.load_measurements(
                options.pipeline_filename, image_numbers=image_set_numbers
            )
    except:
        logging.root.info("Failed to load measurements from pipeline")

    if initial_measurements is not None:
        pipeline_text = initial_measurements.get_experiment_measurement(
            cellprofiler.pipeline.M_PIPELINE
        )

        pipeline_text = pipeline_text.encode("us-ascii")

        pipeline.load(six.moves.StringIO(pipeline_text))

        if not pipeline.in_batch_mode():
            #
            # Need file list in order to call prepare_run
            #

            with h5py.File(options.pipeline_filename, "r") as src:
                if cellprofiler.utilities.hdf5_dict.HDF5FileList.has_file_list(src):
                    cellprofiler.utilities.hdf5_dict.HDF5FileList.copy(
                        src, initial_measurements.hdf5_dict.hdf5_file
                    )
    else:
        pipeline.load(options.pipeline_filename)

    if options.groups is not None:
        kvs = [x.split("=") for x in options.groups.split(",")]

        groups = dict(kvs)
    else:
        groups = None

    file_list = cellprofiler.preferences.get_image_set_file()

    if file_list is not None:
        pipeline.read_file_list(file_list)
    elif options.image_directory is not None:
        pathnames = []

        os.path.walk(
            os.path.abspath(options.image_directory),
            lambda pathnames, dirname, fnames: pathnames.append(
                [
                    os.path.join(dirname, fname)
                    for fname in fnames
                    if os.path.isfile(os.path.join(dirname, fname))
                ]
            ),
            pathnames,
        )

        pathnames = sum(pathnames, [])

        pipeline.add_pathnames_to_file_list(pathnames)

    #
    # Fixup CreateBatchFiles with any command-line input or output directories
    #
    if pipeline.in_batch_mode():
        create_batch_files = [
            m for m in pipeline.modules() if m.is_create_batch_module()
        ]

        if len(create_batch_files) > 0:
            create_batch_files = create_batch_files[0]

            if options.output_directory is not None:
                create_batch_files.custom_output_directory.value = (
                    options.output_directory
                )

            if options.image_directory is not None:
                create_batch_files.default_image_directory.value = (
                    options.image_directory
                )

    use_hdf5 = len(args) > 0 and not args[0].lower().endswith(".mat")

    measurements = pipeline.run(
        image_set_start=image_set_start,
        image_set_end=image_set_end,
        grouping=groups,
        measurements_filename=None if not use_hdf5 else args[0],
        initial_measurements=initial_measurements,
    )

    if len(args) > 0 and not use_hdf5:
        pipeline.save_measurements(args[0], measurements)

    if options.done_file is not None:
        if measurements is not None and measurements.has_feature(
            cellprofiler.measurement.EXPERIMENT, cellprofiler.pipeline.EXIT_STATUS
        ):
            done_text = measurements.get_experiment_measurement(
                cellprofiler.pipeline.EXIT_STATUS
            )

            exit_code = 0 if done_text == "Complete" else -1
        else:
            done_text = "Failure"

            exit_code = -1

        fd = open(options.done_file, "wt")
        fd.write("%s\n" % done_text)
        fd.close()
    elif not measurements.has_feature(
        cellprofiler.measurement.EXPERIMENT, cellprofiler.pipeline.EXIT_STATUS
    ):
        # The pipeline probably failed
        exit_code = 1
    else:
        exit_code = 0

    if measurements is not None:
        measurements.close()

    return exit_code
Beispiel #33
0
def run_pipeline_headless(options, args):
    '''Run a CellProfiler pipeline in headless mode'''

    if sys.platform == 'darwin':
        if options.start_awt:
            import bioformats
            from javabridge import activate_awt
            activate_awt()

    if not options.first_image_set is None:
        if not options.first_image_set.isdigit():
            raise ValueError(
                "The --first-image-set option takes a numeric argument")
        else:
            image_set_start = int(options.first_image_set)
    else:
        image_set_start = None

    image_set_numbers = None
    if not options.last_image_set is None:
        if not options.last_image_set.isdigit():
            raise ValueError(
                "The --last-image-set option takes a numeric argument")
        else:
            image_set_end = int(options.last_image_set)
            if image_set_start is None:
                image_set_numbers = np.arange(1, image_set_end + 1)
            else:
                image_set_numbers = np.arange(image_set_start,
                                              image_set_end + 1)
    else:
        image_set_end = None

    if ((options.pipeline_filename is not None)
            and (not options.pipeline_filename.lower().startswith('http'))):
        options.pipeline_filename = os.path.expanduser(
            options.pipeline_filename)
    from cellprofiler.pipeline import Pipeline, EXIT_STATUS, M_PIPELINE
    import cellprofiler.measurements as cpmeas
    pipeline = Pipeline()
    initial_measurements = None
    try:
        if h5py.is_hdf5(options.pipeline_filename):
            initial_measurements = cpmeas.load_measurements(
                options.pipeline_filename, image_numbers=image_set_numbers)
    except:
        logging.root.info("Failed to load measurements from pipeline")
    if initial_measurements is not None:
        pipeline_text = \
            initial_measurements.get_experiment_measurement(
                M_PIPELINE)
        pipeline_text = pipeline_text.encode('us-ascii')
        pipeline.load(StringIO(pipeline_text))
        if not pipeline.in_batch_mode():
            #
            # Need file list in order to call prepare_run
            #
            from cellprofiler.utilities.hdf5_dict import HDF5FileList
            with h5py.File(options.pipeline_filename, "r") as src:
                if HDF5FileList.has_file_list(src):
                    HDF5FileList.copy(src,
                                      initial_measurements.hdf5_dict.hdf5_file)
    else:
        pipeline.load(options.pipeline_filename)
    if options.groups is not None:
        kvs = [x.split('=') for x in options.groups.split(',')]
        groups = dict(kvs)
    else:
        groups = None
    use_hdf5 = len(args) > 0 and not args[0].lower().endswith(".mat")
    measurements = pipeline.run(
        image_set_start=image_set_start,
        image_set_end=image_set_end,
        grouping=groups,
        measurements_filename=None if not use_hdf5 else args[0],
        initial_measurements=initial_measurements)
    if len(args) > 0 and not use_hdf5:
        pipeline.save_measurements(args[0], measurements)
    if options.done_file is not None:
        if (measurements is not None
                and measurements.has_feature(cpmeas.EXPERIMENT, EXIT_STATUS)):
            done_text = measurements.get_experiment_measurement(EXIT_STATUS)
        else:
            done_text = "Failure"
        fd = open(options.done_file, "wt")
        fd.write("%s\n" % done_text)
        fd.close()
    if measurements is not None:
        measurements.close()
Beispiel #34
0
    def getStackFromPattern(self,
                            filepattern,
                            begin,
                            end,
                            increment=None,
                            imagestack=None,
                            fileindex=0):
        #get the first filename
        filename = filepattern % tuple(begin)
        if not os.path.exists(filename):
            raise IOError("Filename %s does not exist." % filename)
        #get the file list
        args = self.getFileListFromPattern(filepattern,
                                           begin,
                                           end,
                                           increment=increment)

        #get the file type
        f = open(args[0], 'rb')
        #read 10 characters
        line = f.read(10)
        f.close()
        if hasattr(line, "decode"):
            # convert to string ignoring errors
            line = line.decode("utf-8", "ignore")

        specfile = False
        marCCD = False
        if line.startswith("II") or line.startswith("MM"):
            marCCD = True
        if line[0] == "\n":
            line = line[1:]
        if line.startswith("{") or marCCD:
            if imagestack is None:
                if marCCD:
                    imagestack = True
            if imagestack:
                #prevent any modification
                fileindex = 0
            if filepattern is not None:
                #this dows not seem to put any trouble
                #(because of no redimensioning attempt)
                if False and (len(begin) != 1):
                    raise IOError("EDF stack redimensioning not supported yet")
            stack = QStack(imagestack=imagestack)
        elif line.startswith('Spectral'):
            stack = OmnicMap.OmnicMap(args[0])
        elif line.startswith('#\tDate:'):
            stack = LuciaMap.LuciaMap(args[0])
        elif args[0][-4:].upper() in ["PIGE", "PIXE"]:
            stack = SupaVisioMap.SupaVisioMap(args[0])
        elif args[0][-3:].upper() in ["RBS"]:
            stack = SupaVisioMap.SupaVisioMap(args[0])
        elif args[0][-3:].lower() in [".h5", "nxs", "hdf", "hdf5"]:
            if not HDF5:
                raise IOError(\
                    "No HDF5 support while trying to read an HDF5 file")
            stack = QHDF5Stack1D.QHDF5Stack1D(args)
        elif args[0].upper().endswith("RAW.GZ")or\
             args[0].upper().endswith("EDF.GZ")or\
             args[0].upper().endswith("CCD.GZ")or\
             args[0].upper().endswith("RAW.BZ2")or\
             args[0].upper().endswith("EDF.BZ2")or\
             args[0].upper().endswith("CCD.BZ2"):
            if imagestack is None:
                imagestack = True
            stack = QStack(imagestack=imagestack)
        else:
            if HDF5:
                if h5py.is_hdf5(args[0]):
                    stack = QHDF5Stack1D.QHDF5Stack1D(args)
                else:
                    stack = QSpecFileStack()
                    specfile = True
            else:
                stack = QSpecFileStack()
                specfile = True

        if specfile and (len(begin) == 2):
            if increment is None:
                increment = [1] * len(begin)
            shape = (len(range(begin[0], end[0] + 1, increment[0])),
                     len(range(begin[1], end[1] + 1, increment[1])))
            stack.loadFileList(args, fileindex=fileindex, shape=shape)
        else:
            stack.loadFileList(args, fileindex=fileindex)
        return stack
def general(eri,
            mo_coeffs,
            erifile,
            dataname='eri_mo',
            ioblk_size=IOBLK_SIZE,
            compact=True,
            verbose=logger.NOTE):
    '''For the given four sets of orbitals, transfer arbitrary spherical AO
    integrals to MO integrals on disk.
    Args:
        eri : 8-fold reduced eri vector
        mo_coeffs : 4-item list of ndarray
            Four sets of orbital coefficients, corresponding to the four
            indices of (ij|kl)
        erifile : str or h5py File or h5py Group object
            To store the transformed integrals, in HDF5 format.
    Kwargs
        dataname : str
            The dataset name in the erifile (ref the hierarchy of HDF5 format
            http://www.hdfgroup.org/HDF5/doc1.6/UG/09_Groups.html).  By assigning
            different dataname, the existed integral file can be reused.  If
            the erifile contains the dataname, the new integrals data will
            overwrite the old one.
        ioblk_size : float or int
            The block size for IO, large block size may **not** improve performance
        compact : bool
            When compact is True, depending on the four oribital sets, the
            returned MO integrals has (up to 4-fold) permutation symmetry.
            If it's False, the function will abandon any permutation symmetry,
            and return the "plain" MO integrals


    Pseudocode / algorithm:
        u = mu
        v = nu
        l = lambda
        o = sigma

        Assume eri's are 8-fold reduced.
        nij/nkl_pair = npair or i*j/k*l if only transforming a subset

        First half transform:
            Initialize half_eri of size (nij_pair,npair)
                For lo = 1 -> npair
                    Unpack row lo
                    Unpack row lo to matrix E_{uv}^{lo}
                    Transform C_ui^+*E*C_nj -> E_{ij}^{lo}
                    Ravel or pack E_{ij}^{lo}
                    Save E_{ij}^{lo} -> half_eri[:,lo]

        Second half transform:
            Initialize h5d_eri of size (nij_pair,nkl_pair)
                For ij = 1 -> nij_pair
                    Load and unpack half_eri[ij,:] -> E_{lo}^{ij}
                    Transform C_{lk}E_{lo}^{ij}C_{ol} -> E_{kl}^{ij}
                    Repack E_{kl}^{ij}
                    Save E_{kl}^{ij} -> h5d_eri[ij,:]

        Each matrix is indexed by the composite index ij x kl, where ij/kl is
        either npair or ixj/kxl, if only a subset of MOs are being transformed.
        Since entire rows or columns need to be read in, the arrays are chunked
        such that IOBLK_SIZE = row/col x chunking col/row. For example, for the
        first half transform, we would save in nij_pair x IOBLK_SIZE/nij_pair,
        then load in IOBLK_SIZE/nkl_pair x npair for the second half transform.

        ------ kl ----->
        |jxl
        |
        ij
        |
        |
        v

        As a first guess, the chunking size is jxl. If the super-rows/cols are
        larger than IOBLK_SIZE, then the chunk rectangle jxl is trimmed
        accordingly. The pathological limiting case is where the dimensions
        nao_pair, nij_pair, or nkl_pair are so large that the arrays are
        chunked 1x1, in which case IOBLK_SIZE needs to be increased.

    '''
    log = logger.new_logger(None, verbose)
    log.info('******** ao2mo disk, custom eri ********')

    nmoi = mo_coeffs[0].shape[1]
    nmoj = mo_coeffs[1].shape[1]
    nmok = mo_coeffs[2].shape[1]
    nmol = mo_coeffs[3].shape[1]
    nao = mo_coeffs[0].shape[0]

    nao_pair = nao * (nao + 1) // 2
    if compact and iden_coeffs(mo_coeffs[0], mo_coeffs[1]):
        ij_red = False
        nij_pair = nmoi * (nmoi + 1) // 2
    else:
        ij_red = True
        nij_pair = nmoi * nmoj
    if compact and iden_coeffs(mo_coeffs[2], mo_coeffs[3]):
        kl_red = False
        nkl_pair = nmok * (nmok + 1) // 2
    else:
        kl_red = True
        nkl_pair = nmok * nmol

    dtype = numpy.result_type(eri, *mo_coeffs)
    typesize = dtype.itemsize / 1e6  # in MB
    chunks_half = (max(
        1, numpy.minimum(int(ioblk_size // (nao_pair * typesize)), nmoj)),
                   max(
                       1,
                       numpy.minimum(int(ioblk_size // (nij_pair * typesize)),
                                     nmol)))
    '''
    ideally, the final transformed eris should have a chunk of nmoj x nmol to
    optimize read operations. However, I'm chunking the row size so that the
    write operations during the transform can be done as fast as possible.
    '''
    chunks_full = (numpy.minimum(int(ioblk_size // (nkl_pair * typesize)),
                                 nmoj), nmol)

    if isinstance(erifile, str):
        if h5py.is_hdf5(erifile):
            feri = h5py.File(erifile, 'a')
            if dataname in feri:
                del (feri[dataname])
        else:
            feri = h5py.File(erifile, 'w', libver='latest')
    else:
        assert (isinstance(erifile, h5py.Group))
        feri = erifile

    h5d_eri = feri.create_dataset(dataname, (nij_pair, nkl_pair),
                                  dtype.char,
                                  chunks=chunks_full)

    feri_swap = lib.H5TmpFile(libver='latest')
    half_eri = feri_swap.create_dataset(dataname, (nij_pair, nao_pair),
                                        dtype.char,
                                        chunks=chunks_half)

    log.debug('Memory information:')
    log.debug('  IOBLK_SIZE (MB): {}'.format(ioblk_size))
    log.debug('  jxl {}x{}, half eri chunk dim  {}x{}'.format(
        nmoj, nmol, chunks_half[0], chunks_half[1]))
    log.debug('  jxl {}x{}, full eri chunk dim {}x{}'.format(
        nmoj, nmol, chunks_full[0], chunks_full[1]))
    log.debug('  Final disk eri size (MB): {:.3g}, chunked {:.3g}'.format(
        nij_pair * nkl_pair * typesize,
        numpy.prod(chunks_full) * typesize))
    log.debug(
        '  Half transformed eri size (MB): {:.3g}, chunked {:.3g}'.format(
            nij_pair * nao_pair * typesize,
            numpy.prod(chunks_half) * typesize))
    log.debug('  RAM buffer for half transform (MB): {:.3g}'.format(
        nij_pair * chunks_half[1] * typesize * 2))
    log.debug('  RAM buffer for full transform (MB): {:.3g}'.format(
        typesize * chunks_full[0] * nkl_pair * 2 +
        chunks_half[0] * nao_pair * typesize * 2))

    def save1(piece, buf):
        start = piece * chunks_half[1]
        stop = (piece + 1) * chunks_half[1]
        if stop > nao_pair:
            stop = nao_pair
        half_eri[:, start:stop] = buf[:, :stop - start]
        return

    def load2(piece):
        start = piece * chunks_half[0]
        stop = (piece + 1) * chunks_half[0]
        if stop > nij_pair:
            stop = nij_pair
            if start >= nij_pair:
                start = stop - 1
        return half_eri[start:stop, :]

    def prefetch2(piece):
        start = piece * chunks_half[0]
        stop = (piece + 1) * chunks_half[0]
        if stop > nij_pair:
            stop = nij_pair
            if start >= nij_pair:
                start = stop - 1
        buf_prefetch[:stop - start, :] = half_eri[start:stop, :]
        return

    def save2(piece, buf):
        start = piece * chunks_full[0]
        stop = (piece + 1) * chunks_full[0]
        if stop > nij_pair:
            stop = nij_pair
        h5d_eri[start:stop, :] = buf[:stop - start, :]
        return

    # transform \mu\nu -> ij
    cput0 = time.clock(), time.time()
    Cimu = mo_coeffs[0].conj().transpose()
    buf_write = numpy.empty((nij_pair, chunks_half[1]))
    buf_out = numpy.empty_like(buf_write)
    wpiece = 0
    with lib.call_in_background(save1) as async_write:
        for lo in range(nao_pair):
            if lo % chunks_half[1] == 0 and lo > 0:
                #save1(wpiece,buf_write)
                buf_out, buf_write = buf_write, buf_out
                async_write(wpiece, buf_out)
                wpiece += 1
            buf = lib.unpack_row(eri, lo)
            uv = lib.unpack_tril(buf)
            uv = Cimu.dot(uv).dot(mo_coeffs[1])
            if ij_red:
                ij = numpy.ravel(uv)  # grabs by row
            else:
                ij = lib.pack_tril(uv)
            buf_write[:, lo % chunks_half[1]] = ij
    # final write operation & cleanup
    save1(wpiece, buf_write)
    log.timer('(uv|lo) -> (ij|lo)', *cput0)
    uv = None
    ij = None
    buf = None

    # transform \lambda\sigma -> kl
    cput1 = time.clock(), time.time()
    Cklam = mo_coeffs[2].conj().transpose()
    buf_write = numpy.empty((chunks_full[0], nkl_pair))
    buf_out = numpy.empty_like(buf_write)
    buf_read = numpy.empty((chunks_half[0], nao_pair))
    buf_prefetch = numpy.empty_like(buf_read)
    rpiece = 0
    wpiece = 0
    with lib.call_in_background(save2, prefetch2) as (async_write, prefetch):
        buf_read = load2(rpiece)
        prefetch(rpiece + 1)
        for ij in range(nij_pair):
            if ij % chunks_full[0] == 0 and ij > 0:
                #save2(wpiece,buf_write)
                buf_out, buf_write = buf_write, buf_out
                async_write(wpiece, buf_out)
                wpiece += 1
            if ij % chunks_half[0] == 0 and ij > 0:
                #buf_read = load2(rpiece)
                buf_read, buf_prefetch = buf_prefetch, buf_read
                rpiece += 1
                prefetch(rpiece + 1)
            lo = lib.unpack_tril(buf_read[ij % chunks_half[0], :])
            lo = Cklam.dot(lo).dot(mo_coeffs[3])
            if kl_red:
                kl = numpy.ravel(lo)
            else:
                kl = lib.pack_tril(lo)
            buf_write[ij % chunks_full[0], :] = kl
    save2(wpiece, buf_write)
    log.timer('(ij|lo) -> (ij|kl)', *cput1)

    if isinstance(erifile, str):
        feri.close()
    return erifile
Beispiel #36
0
def sample_filter(peak_matrix: Union[PeakMatrix, str], min_fraction: float, within: bool = False,
                  rsd_thres: Union[float, None] = None, qc_label: Union[str, None] = None, labels: Union[str, None] = None):
    """
    Removes peaks from the input PeakMatrix object (or .hdf5 file that were detected in fewer-than a user-defined
    minimum number of study samples.

    There are many and varied reasons why a peak may not have been detected in all study samples, including:
        * due to having an intensity (concentration) close to the signal-to-noise limit of the system;
        * due to having been present in only one of the study classes (e.g. a drug administered to the ‘treatment’
          class samples);
        * due to ion suppression/enhancement effects in the mass spectrometer source region; etc.

    :param peak_matrix: PeakMatrix object or path to .hdf5 file

    :param min_fraction: Minimum fraction - a numeric value between 0 and 1 indicating the proportion of study
        samples in which a peak must have a recorded intensity value in order for it to be retained in the output peak
        intensity matrix; e.g. 0.5 means that at least 50% of samples (whether assessed across all classes, or within
        each class individually) must have a recorded intensity value for a specific peak in order for it to be retained
        in the output peak matrix.

    :param within: Apply sample filter within each sample class

        * **False** - check across ALL classes simultaneously whether greater-than the user-defined “Minimum fraction”
          of samples contained an intensity value for a specific mass spectral peak.
        * **True** - check within EACH class separately whether greater-than the user-defined “Minimum fraction” of
          samples contained an intensity value for a specific mass spectral peak.

        .. warning::
            if in ANY class a peak is detected in greater-than the user-defined minimum fraction of samples, then
            the peak is retained in the output peak matrix. For classes in which this condition is not met, the
            peak intensity recorded for that peak (if any) will still be presented in the output peak matrix.
            If no peak intensity was recorded in a sample, then a ‘0’ is inserted in to the peak matrix.

    :param rsd_thres: Relative standard deviation threshold - A numerical value equal-to or greater-than 0.
        If greater than 0, then peaks whose intensity values have a percent relative standard deviation (otherwise termed
        the percent coefficient of variation) greater-than this value are excluded from the output PeakMatrix object.

    :param qc_label: Label for the QC samples - a string indicating the name of the class to be used for
        filtering, i.e. the “reference” class. This string must have been included in the “classLabel”
        column of the metadata file associated with the process_sans or replicate_filter function(s).

    :param labels: Path to a metadata file

    :return: PeakMatrix object
    """

    if not isinstance(peak_matrix, PeakMatrix):
        if h5py.is_hdf5(peak_matrix):
            peak_matrix = hdf5_portal.load_peak_matrix_from_hdf5(peak_matrix)
        else:
            peak_matrix = txt_portal.load_peak_matrix_from_txt(peak_matrix)

    if labels is not None:
        if not os.path.isfile(labels):
            raise IOError("{} does not exist".format(labels))
        peak_matrix = update_labels(peak_matrix, labels)

    if qc_label is not None:
        if Tag(qc_label, 'classLabel') not in peak_matrix.peaklist_tags:
            raise IOError("QC label ({}) does not exist".format(qc_label))

    peak_matrix = filter_fraction(peak_matrix, min_fraction, within_classes=within, class_tag_type="classLabel")

    if rsd_thres is not None:
        peak_matrix = filter_rsd(peak_matrix, rsd_thres, Tag(qc_label, "classLabel"))
    return peak_matrix
Beispiel #37
0
 def assert_h5_format(self, path):
     if h5py is not None:
         self.assertTrue(
             h5py.is_hdf5(path),
             'Model saved at path {} is not a valid hdf5 file.'.format(
                 path))
Beispiel #38
0

def visualize_file():
    """ Open the HDF5 file and display the result """
    try:
        import pylab as p
    except ImportError:
        print("Whoops! Matplotlib is required to view the fractal.")
        raise

    f = h5py.File('mandelbrot.hdf5', 'r')
    dset = f['mandelbrot']
    a = dset[...]
    p.imshow(a.transpose())

    print("Displaying fractal. Close window to exit program.")
    try:
        p.show()
    finally:
        f.close()


if __name__ == '__main__':
    if not h5py.is_hdf5('mandelbrot.hdf5'):
        run_calculation()
    else:
        print(
            'Fractal found in "mandelbrot.hdf5". Delete file to re-run calculation.'
        )
    visualize_file()
Beispiel #39
0
import os, datetime
from spacepy import DOT_FLN, help
from spacepy.toolbox import loadpickle
try:
    import h5py
    _ext = '.h5'
except ImportError:
    _ext = '.pkl'

#dotfln = os.environ['HOME']+'/.spacepy'
omnifln = os.path.join(DOT_FLN, 'data', 'omnidata{0}'.format(_ext))
omni2fln = os.path.join(DOT_FLN, 'data', 'omni2data{0}'.format(_ext))
testfln = os.path.join('data', 'OMNItest{0}'.format(_ext))

if _ext == '.h5':
    presentQD = h5py.is_hdf5(omnifln)
    presentO2 = h5py.is_hdf5(omni2fln)
    if not (presentQD and presentO2):
        print(
            "Qin-Denton/OMNI2 data not found in current format. This module has limited functionality."
        )
        print("Run spacepy.toolbox.update(QDomni=True) to download data")
else:
    presentQD = os.path.isfile(omnifln)
    presentO2 = os.path.isfile(omni2fln)
    if not (presentQD and presentO2):
        print(
            "No Qin-Denton/OMNI2 data found. This module has limited functionality."
        )
        print("Run spacepy.toolbox.update(QDomni=True) to download data")
    else:
Beispiel #40
0
    def merge_files(destination, sources, force_headless=False):
        is_headless = force_headless or get_headless()
        if not is_headless:
            import wx
        if len(sources) == 0:
            return
        if not is_headless:
            progress = wx.ProgressDialog(
                "Writing " + destination,
                "Loading " + sources[0],
                maximum=len(sources) * 4 + 1,
                style=wx.PD_CAN_ABORT
                | wx.PD_APP_MODAL
                | wx.PD_ELAPSED_TIME
                | wx.PD_REMAINING_TIME,
            )
        count = 0
        try:
            pipeline = cpp.Pipeline()
            has_error = [False]

            def callback(caller, event):
                if isinstance(event, cpp.LoadExceptionEvent):
                    has_error = True
                    wx.MessageBox(
                        message="Could not load %s: %s" % (sources[0], event.error),
                        caption="Failed to load %s" % sources[0],
                    )
                    has_error[0] = True

            pipeline.add_listener(callback)

            pipeline.load(sources[0])
            if has_error[0]:
                return
            if destination.lower().endswith(".h5"):
                mdest = cpmeas.Measurements(filename=destination, multithread=False)
                h5_dest = True
            else:
                mdest = cpmeas.Measurements(multithread=False)
                h5_dest = False
            for source in sources:
                if not is_headless:
                    count += 1
                    keep_going, skip = progress.Update(count, "Loading " + source)
                    if not keep_going:
                        return
                if h5py.is_hdf5(source):
                    msource = cpmeas.Measurements(
                        filename=source, mode="r", multithread=False
                    )
                else:
                    msource = cpmeas.load_measurements(source)
                dest_image_numbers = mdest.get_image_numbers()
                source_image_numbers = msource.get_image_numbers()
                if len(dest_image_numbers) == 0 or len(source_image_numbers) == 0:
                    offset_source_image_numbers = source_image_numbers
                else:
                    offset_source_image_numbers = (
                        np.max(dest_image_numbers)
                        - np.min(source_image_numbers)
                        + source_image_numbers
                        + 1
                    )
                for object_name in msource.get_object_names():
                    if object_name in mdest.get_object_names():
                        destfeatures = mdest.get_feature_names(object_name)
                    else:
                        destfeatures = []
                    for feature in msource.get_feature_names(object_name):
                        if object_name == cpmeas.EXPERIMENT:
                            if not mdest.has_feature(object_name, feature):
                                src_value = msource.get_experiment_measurement(feature)
                                mdest.add_experiment_measurement(feature, src_value)
                            continue
                        src_values = msource.get_measurement(
                            object_name, feature, image_set_number=source_image_numbers
                        )
                        mdest[
                            object_name, feature, offset_source_image_numbers
                        ] = src_values
                    destset = set(destfeatures)
            if not is_headless:
                keep_going, skip = progress.Update(
                    count + 1, "Saving to " + destination
                )
                if not keep_going:
                    return
            if not h5_dest:
                pipeline.save_measurements(destination, mdest)
        finally:
            if not is_headless:
                progress.Destroy()
Beispiel #41
0
    def __init__(self,
                 filename,
                 f_start=None,
                 f_stop=None,
                 t_start=None,
                 t_stop=None,
                 load_data=True,
                 max_load=1.):
        """ Constructor.

        Args:
            filename (str): filename of blimpy file.
            f_start (float): start frequency, in MHz
            f_stop (float): stop frequency, in MHz
            t_start (int): start time bin
            t_stop (int): stop time bin
        """
        super(H5Reader, self).__init__()

        if filename and os.path.isfile(filename) and h5py.is_hdf5(filename):

            #These values may be modified once code for multi_beam and multi_stokes observations are possible.
            self.freq_axis = 2
            self.time_axis = 0
            self.beam_axis = 1  # Place holder
            self.stokes_axis = 4  # Place holder

            self.filename = filename
            self.filestat = os.stat(filename)
            self.filesize = self.filestat.st_size / (1024.0**2)
            self.load_data = load_data
            self.h5 = h5py.File(self.filename, mode='r')
            self.read_header()
            self.file_size_bytes = os.path.getsize(self.filename)  # In bytes
            self.n_ints_in_file = self.h5["data"].shape[self.time_axis]  #
            self.n_channels_in_file = self.h5["data"].shape[self.freq_axis]  #
            self.n_beams_in_file = self.header[
                'nifs']  #Placeholder for future development.
            self.n_pols_in_file = 1  #Placeholder for future development.
            self._n_bytes = int(self.header['nbits'] /
                                8)  #number of bytes per digit.
            self._d_type = self._setup_dtype()
            self.file_shape = (self.n_ints_in_file, self.n_beams_in_file,
                               self.n_channels_in_file)

            if self.header['foff'] < 0:
                self.f_end = self.header['fch1']
                self.f_begin = self.f_end + self.n_channels_in_file * self.header[
                    'foff']
            else:
                self.f_begin = self.header['fch1']
                self.f_end = self.f_begin + self.n_channels_in_file * self.header[
                    'foff']

            self.t_begin = 0
            self.t_end = self.n_ints_in_file

            #Taking care all the frequencies are assigned correctly.
            self._setup_selection_range(f_start=f_start,
                                        f_stop=f_stop,
                                        t_start=t_start,
                                        t_stop=t_stop,
                                        init=True)
            #Convert input frequencies into what their corresponding channel number would be.
            self._setup_chans()
            #Update frequencies ranges from channel number.
            self._setup_freqs()

            #Applying data size limit to load.
            if max_load is not None:
                if max_load > 1.0:
                    logger.warning(
                        'Setting data limit > 1GB, please handle with care!')
                self.MAX_DATA_ARRAY_SIZE = max_load * MAX_DATA_ARRAY_SIZE_UNIT
            else:
                self.MAX_DATA_ARRAY_SIZE = MAX_DATA_ARRAY_SIZE_UNIT

            if self.file_size_bytes > self.MAX_DATA_ARRAY_SIZE:
                self.large_file = True
            else:
                self.large_file = False

            if self.load_data:
                if self.large_file:
                    #Only checking the selection, if the file is too large.
                    if self.f_start or self.f_stop or self.t_start or self.t_stop:
                        if self.isheavy():
                            logger.warning(
                                "Selection size of %.2f GB, exceeding our size limit %.2f GB. Instance created, header loaded, but data not loaded, please try another (t,v) selection."
                                % (self._calc_selection_size() /
                                   (1024.**3), self.MAX_DATA_ARRAY_SIZE /
                                   (1024.**3)))
                            self._init_empty_selection()
                        else:
                            self.read_data()
                    else:
                        logger.warning(
                            "The file is of size %.2f GB, exceeding our size limit %.2f GB. Instance created, header loaded, but data not loaded. You could try another (t,v) selection."
                            % (self.file_size_bytes /
                               (1024.**3), self.MAX_DATA_ARRAY_SIZE /
                               (1024.**3)))
                        self._init_empty_selection()
                else:
                    self.read_data()
            else:
                logger.debug("Skipping loading data ...")
                self._init_empty_selection()
        else:
            raise IOError("Need a file to open, please give me one!")
Beispiel #42
0
def main(args):
    '''Run CellProfiler

    args - command-line arguments, e.g. sys.argv
    '''
    import cellprofiler.preferences as cpprefs
    if any([arg.startswith('--work-announce') for arg in args]):
        #
        # Go headless ASAP
        #
        cpprefs.set_headless()
        for i, arg in enumerate(args):
            if arg == "--ij-plugins-directory" and len(args) > i + 1:
                cpprefs.set_ij_plugin_directory(args[i + 1])
                break
        import cellprofiler.analysis_worker
        cellprofiler.analysis_worker.aw_parse_args()
        cellprofiler.analysis_worker.main()
        sys.exit(0)

    options, args = parse_args(args)
    #
    # Important to go headless ASAP
    #
    if not options.show_gui:
        import cellprofiler.preferences as cpprefs
        cpprefs.set_headless()
        # What's there to do but run if you're running headless?
        # Might want to change later if there's some headless setup
        options.run_pipeline = True

    if options.jvm_heap_size != None:
        from cellprofiler.preferences import set_jvm_heap_mb
        set_jvm_heap_mb(options.jvm_heap_size, False)
    set_log_level(options)

    if not hasattr(sys, "frozen") and options.code_statistics:
        print_code_statistics()
        return

    if options.print_groups_file is not None:
        print_groups(options.print_groups_file)
        return

    if options.batch_commands_file is not None:
        get_batch_commands(options.batch_commands_file)
        return

    if options.run_ilastik:
        run_ilastik()
        return

    if options.add_message_for_user:
        if len(args) != 3:
            sys.stderr.write("Usage: (for add_message-for-user)\n")
            sys.stderr.write(
                "CellProfiler --add-message-for-user <caption> <message> <pipeline-or-project>\n"
            )
            sys.stderr.write("where:\n")
            sys.stderr.write("    <caption> - the message box caption\n")
            sys.stderr.write(
                "    <message> - the message displayed inside the message box\n"
            )
            sys.stderr.write(
                "    <pipeline-or-project> - the path to the pipeline or project file to modify\n"
            )
            return
        caption = args[0]
        message = args[1]
        path = args[2]

        import h5py
        using_hdf5 = h5py.is_hdf5(path)
        if using_hdf5:
            import cellprofiler.measurements as cpmeas
            m = cpmeas.Measurements(filename=path, mode="r+")
            pipeline_text = m[cpmeas.EXPERIMENT, "Pipeline_Pipeline"]
        else:
            with open(path, "r") as fd:
                pipeline_text = fd.read()
        header, body = pipeline_text.split("\n\n", 1)
        pipeline_text = header + \
            ("\nMessageForUser:%s|%s\n\n" % (caption, message)) + body
        if using_hdf5:
            m[cpmeas.EXPERIMENT, "Pipeline_Pipeline"] = pipeline_text
            m.close()
        else:
            with open(path, "w") as fd:
                fd.write(pipeline_text)
        print "Message added to %s" % path
        return

    # necessary to prevent matplotlib trying to use Tkinter as its backend.
    # has to be done before CellProfilerApp is imported
    from matplotlib import use as mpluse
    mpluse('WXAgg')

    if (not hasattr(sys, 'frozen')) and options.fetch_external_dependencies:
        import external_dependencies
        external_dependencies.fetch_external_dependencies(
            options.overwrite_external_dependencies)

    if (not hasattr(sys, 'frozen')) and options.build_extensions:
        build_extensions()
        if options.build_and_exit:
            return

    if options.output_html:
        from cellprofiler.gui.html.manual import generate_html
        webpage_path = options.output_directory if options.output_directory else None
        generate_html(webpage_path)
        return
    if options.print_measurements:
        print_measurements(options)
        return
    if options.omero_credentials is not None:
        set_omero_credentials_from_string(options.omero_credentials)
    if options.plugins_directory is not None:
        cpprefs.set_plugin_directory(options.plugins_directory, globally=False)
    if options.ij_plugins_directory is not None:
        cpprefs.set_ij_plugin_directory(options.ij_plugins_directory,
                                        globally=False)
    if options.temp_dir is not None:
        if not os.path.exists(options.temp_dir):
            os.makedirs(options.temp_dir)
        cpprefs.set_temporary_directory(options.temp_dir, globally=False)
    if not options.allow_schema_write:
        cpprefs.set_allow_schema_write(False)
    #
    # After the crucial preferences are established, we can start the VM
    #
    from cellprofiler.utilities.cpjvm import cp_start_vm
    cp_start_vm()
    try:
        if options.show_gui:
            import wx
            wx.Log.EnableLogging(False)
            from cellprofiler.cellprofilerapp import CellProfilerApp
            from cellprofiler.workspace import is_workspace_file
            show_splashbox = (options.pipeline_filename is None
                              and (not options.new_project)
                              and options.show_splashbox)

            if options.pipeline_filename:
                if is_workspace_file(options.pipeline_filename):
                    workspace_path = os.path.expanduser(
                        options.pipeline_filename)
                    pipeline_path = None
                else:
                    pipeline_path = os.path.expanduser(
                        options.pipeline_filename)
                    workspace_path = None
            elif options.new_project:
                workspace_path = False
                pipeline_path = None
            else:
                workspace_path = None
                pipeline_path = None
            App = CellProfilerApp(
                0,
                check_for_new_version=(options.pipeline_filename is None),
                show_splashbox=show_splashbox,
                workspace_path=workspace_path,
                pipeline_path=pipeline_path)

        if options.data_file is not None:
            cpprefs.set_data_file(os.path.abspath(options.data_file))
        if options.image_set_file is not None:
            cpprefs.set_image_set_file(options.image_set_file, False)

        from cellprofiler.utilities.version import version_string, version_number
        logging.root.info("Version: %s / %d" %
                          (version_string, version_number))

        if options.run_pipeline and not options.pipeline_filename:
            raise ValueError("You must specify a pipeline filename to run")

        if options.output_directory:
            if not os.path.exists(options.output_directory):
                os.makedirs(options.output_directory)
            cpprefs.set_default_output_directory(options.output_directory)

        if options.image_directory:
            cpprefs.set_default_image_directory(options.image_directory)

        if options.show_gui:
            if options.run_pipeline:
                App.frame.pipeline_controller.do_analyze_images()
            App.MainLoop()
            return

        elif options.run_pipeline:
            run_pipeline_headless(options, args)
    except Exception, e:
        logging.root.fatal("Uncaught exception in CellProfiler.py",
                           exc_info=True)
        raise
Beispiel #43
0
def merge_peaklists(source: Sequence[PeakList], filelist: Union[str, None] = None):
    """
    Extracts and exports specific PeakList object from one or more list or one or more .hdf5 files,
    to one or more lists or .hdf5 files. If more-than one .hdf5 file is exported, users can control
    which subset of peaklists are exported to which list.

    :param source: List or tuple of Peaklist objects, or .hdf5 files
    :param filelist: A tab-delimited text file containing metadata to determine which peaklists are exported together:

        **Example of a filelist** - the optional multilist column determines which peaklists are exported together.

        +-----------------+------------+-----------+-------+----------------+-----------+-------+
        | filename        | classLabel | replicate | batch | injectionOrder | multilist | [...] |
        +-----------------+------------+-----------+-------+----------------+-----------+-------+
        | sample_rep1.raw | sample     | 1         | 1     | 1              | 1         | [...] |
        +-----------------+------------+-----------+-------+----------------+-----------+-------+
        | sample_rep2.raw | sample     | 2         | 1     | 2              | 1         | [...] |
        +-----------------+------------+-----------+-------+----------------+-----------+-------+
        | sample_rep3.raw | sample     | 3         | 1     | 3              | 1         | [...] |
        +-----------------+------------+-----------+-------+----------------+-----------+-------+
        | sample_rep4.raw | sample     | 4         | 1     | 4              | 1         | [...] |
        +-----------------+------------+-----------+-------+----------------+-----------+-------+
        | blank_rep1.raw  | blank      | 1         | 1     | 5              | 2         | [...] |
        +-----------------+------------+-----------+-------+----------------+-----------+-------+
        | blank_rep2.raw  | blank      | 2         | 1     | 6              | 2         | [...] |
        +-----------------+------------+-----------+-------+----------------+-----------+-------+
        | blank_rep3.raw  | blank      | 3         | 1     | 7              | 2         | [...] |
        +-----------------+------------+-----------+-------+----------------+-----------+-------+
        | blank_rep4.raw  | blank      | 4         | 1     | 8              | 2         | [...] |
        +-----------------+------------+-----------+-------+----------------+-----------+-------+
        | ...             | ...        | ...       | ...   | ...            | ...       | [...] |
        +-----------------+------------+-----------+-------+----------------+-----------+-------+

    :return: Nested lists of Peaklist objects (e.g. [[pl_01, pl_02], [pl_03, pl_04, pl05]]
    """

    if not isinstance(source, list):
        raise IOError(
            "Incorrect input: list of lists of peaklists, list of peak matrix objects or list of .HDF5 files expected.")

    pls_merged = []
    for s in source:
        if isinstance(s, list) or isinstance(s, tuple):
            if isinstance(s[0], PeakList):
                pls_merged.extend(s)
            else:
                raise IOError("Incorrect Object in list. Peaklist Object expected.")
        elif isinstance(s, PeakMatrix):
            pls = s.extract_peaklists()
            pls_merged.extend(pls)
        elif h5py.is_hdf5(s):
            f = h5py.File(s, 'r')
            if "mz" in f:
                pm = hdf5_portal.load_peak_matrix_from_hdf5(s)
                pls = pm.extract_peaklists()
            else:
                pls = hdf5_portal.load_peaklists_from_hdf5(s)
            f.close()
            pls_merged.extend(pls)
        else:
            raise IOError(
                "Incorrect input: list of lists of peaklists, list of peak matrix objects or list of HDF5 files expected.")

    if filelist is not None:
        fl = validate_metadata(filelist)
        pls_merged = update_metadata_and_labels(pls_merged, fl)

        if 'multilist' in list(fl.keys()):
            # make sure the peaklists are in the correct order (need to be sorted ascending)
            order_indx = np.argsort([i.metadata['multilist'] for i in pls_merged])
            nlists = [fl['multilist'][i] for i in order_indx]
            pls_merged = [pls_merged[i] for i in order_indx]

            # get the break points of the different lists to join together
            bp = list(np.cumsum(np.unique(nlists, return_counts=True)[1]))
            bp = bp[:-1]

            # break up the list into a list of lists
            pls_merged = partition(pls_merged, bp)

    return pls_merged
Beispiel #44
0
def main():
    if os.name == 'nt':
        print("Sorry, this utility is not supported on Windows!")
        return -1
    parser = argparse.ArgumentParser()
    parser.add_argument('-u', "--user", help='user id')
    parser.add_argument('-p', "--passwd", help='user password')
    args = parser.parse_args()

    filename = None
    passwd = None
    username = None

    filename = config.get('password_file')
    if not filename:
        print("no password file in config")
        return -1

    if not args.user:
        print("no userid supplied")
        return -1

    username = args.user
    if username.find(':') != -1:
        print("invalid username (':' is not allowed)")
        return -1
    if username.find('/') != -1:
        print("invalid username ('/' is not allowed)")
        return -1

    if args.passwd:
        passwd = args.passwd
        if len(passwd) < 4:
            print("password must be at least 4 characters long")
            return -1
    else:
        passwd = generate_temp_password()

    # verify file exists and is writable
    if not op.isfile(filename):
        print("password file:", filename, " does not exist")
        return -1

    if not h5py.is_hdf5(filename):
        print("invalid password file")
        return -1

    if not os.access(filename, os.W_OK):
        print("password file is not writable")
        return -1

    f = h5py.File(filename, 'r+')
    if 'user_type' not in f:
        print("invalid password file")
        return -1

    user_type = f['user_type']

    now = int(time.time())

    # add a new user
    if username in f.attrs:
        print("user already exists")
        return -1

    # create userid 1 greater than previous used
    userid = len(f.attrs) + 1
    data = np.empty((), dtype=user_type)
    data['pwd'] = encrypt_pwd(passwd)
    data['state'] = 'A'
    data['userid'] = userid
    data['ctime'] = now
    data['mtime'] = now
    f.attrs.create(username, data, dtype=user_type)
    f.close()

    datapath = config.get('datapath')
    if not op.isdir(datapath):
        print("data directory not found")
        return -1

    userpath = op.join(datapath, config.get('home_dir'))
    if not op.isdir(userpath):
        os.mkdir(userpath)
    userdir = op.join(userpath, username)
    if op.isdir(userdir):
        print("user directory already exists")
        return -1

    # create user directory
    os.mkdir(userdir)

    # link to "public" directory
    link_name = op.join(userdir, "public")
    # create symlink to public directory
    os.symlink("../../public", link_name)

    print(passwd)
    return
Beispiel #45
0
    def __init__(
        self, hdf5_file, root,
        num_classes=None, classes=None, load_as_images=True,
        replicas_per_sample=1, **kwargs
    ):
        assert h5py.is_hdf5(hdf5_file)
        super(HDF5Dataset, self).__init__(root=root, **kwargs)

        self.replicas_per_sample = replicas_per_sample
        self._hdf5_file = hdf5_file
        self._load_as_images = load_as_images
        with h5py.File(name=self._hdf5_file, mode="r") as hdf5:
            hdf5_root = hdf5[root]
            self._classes = {
                posixpath.join("/", root, g): i
                for i, g in enumerate(hdf5_root.keys())
            }

            # Load image file names from indices
            self._images = None
            index_file = Path(hdf5_file).with_suffix(".__hdf5_index__")
            if index_file.exists():
                with h5py.File(name=index_file, mode="r") as hdf5_idx:
                    if root in hdf5_idx:
                        images = hdf5_idx[root]["images"][()]
                        # convert null terminated strings to unicode
                        self._images = images.astype("U").tolist()

            if self._images is None:
                # Create index with image file names. Depending on the size and
                # location of the hdf5 this process may take a few minutes.
                self._images = []
                for class_name in self._classes:
                    group = hdf5_root[class_name]

                    if self._load_as_images:
                        files = filter(is_image_file, group)
                    else:
                        files = filter(is_tensor_file, group)

                    # Construct the absolute path name within the HDF5 file
                    path_names = map(
                        partial(posixpath.join, "/", root, class_name), files)
                    self._images.extend(path_names)

                # Convert from python string to null terminated string
                index_data = np.array(self._images, dtype="S")

                # Save cache
                with h5py.File(name=index_file, mode="a") as hdf5_idx:
                    hdf5_idx_root = hdf5_idx.require_group(root)
                    hdf5_idx_root.create_dataset("images", data=index_data)

        total_classes = len(self._classes)

        # Select subset of the dataset filtering only images from the given classes
        if classes is not None:
            self._classes = {
                posixpath.join("/", root, g): i
                for i, g in enumerate(classes)
            }

        # Limit dataset size by num_classes
        if num_classes is not None:
            self._classes = dict(itertools.islice(self._classes.items(), num_classes))

        if len(self._classes) != total_classes:
            self._images = list(filter(
                lambda x: posixpath.dirname(x) in self._classes.keys(), self._images))

        # Lazy open hdf5 file on __getitem__ of each dataloader worker.
        # See https://github.com/pytorch/pytorch/issues/11887
        self._hdf5 = None
Beispiel #46
0
def blank_filter(peak_matrix: Union[PeakMatrix, str], blank_label: str, min_fraction: float = 1.0,
                 min_fold_change: float = 1.0, function: str = "mean", rm_samples: bool = True,
                 labels: Union[str, None] = None):
    """

    :param peak_matrix: PeakMatrix object

    :param blank_label: Label for the blank samples - a string indicating the name of the class to be used for
        filtering (e.g. blank), i.e. the “reference” class. This string must have been included in the “classLabel”
        column of the metadata file associated with the process_sans or replicate_filter function(s).

    :param min_fraction: A numeric value ranging from 0 to 1. Setting this value to None or 0 will skip this
        filtering step. A value greater than 0 requires that for each peak in the peak intensity matrix,
        at least this proportion of non-reference samples have to have an intensity value that exceeds the product
        of: (A) the average intensity of “reference” class intensities and (B) the user-defined “min_fold_change”.
        If this condition is not met, the peak is removed from the peak intensity matrix.

    :param min_fold_change: A numeric value from 0 upwards. When minimum fraction filtering is enabled, this value
        defines the minimum required ratio between the intensity of a peak in a “non-reference” sample and the average
        intensity of the “reference” sample(s). Peaks with ratios exceeding this threshold are considered to have been
        reliably detected in a “non-reference” sample.

    :param function: Function to calculate the 'reference' intensity

        * **mean** - corresponds to using the non-weighted average of “reference” sample peak intensities
          (NA values are ignored) in calculating the “reference” to “non-reference” peak intensity ratio.

        * **median** - corresponds to using the median of “reference” sample peak intensities (NA values are ignored)
          in calculating the “reference” to “non-reference” peak intensity ratio.

        * **max** corresponds to the use of the maximum intensity among “reference” sample peak intensities
          (NA values are ignored) in calculating the “reference” to “non-reference” peak intensity ratio.

    :param rm_samples: 	Remove blank samples from the output peak matrix:
        * **True** - samples belonging to the user-defined “reference” class are removed from the output peak matrix
        * **False** - samples belonging to the user-defined “reference” class are retained in the output peak matrix.

    :param labels: Path to the metadata file

    :return: PeakMatrix object
    """

    if min_fraction < 0.0 or min_fraction > 1.0:
        raise ValueError("Provide a value between 0. and 1.")
    if min_fold_change < 0:
        raise ValueError("Provide a value larger than zero.")
    if function not in ("mean", "median", "max"):
        raise ValueError("Mean, median or max intensity")

    if not isinstance(peak_matrix, PeakMatrix):
        if h5py.is_hdf5(peak_matrix):
            peak_matrix = hdf5_portal.load_peak_matrix_from_hdf5(peak_matrix)
        else:
            peak_matrix = txt_portal.load_peak_matrix_from_txt(peak_matrix)

    if labels is not None:
        peak_matrix = update_labels(peak_matrix, labels)

    if not any([Tag(blank_label, 'classLabel') in x for x in peak_matrix.peaklist_tags]):
        raise IOError("Blank label ({}) does not exist".format(blank_label))

    return filter_blank_peaks(peak_matrix, Tag(blank_label, 'classLabel'), min_fraction, min_fold_change, function,
                              rm_samples)
Beispiel #47
0
def convertToOPMD(input_file):
    """ Take native wpg output and rewrite in openPMD conformant way.
    @param input_file: The hdf5 file to be converted.
    @type: string
    @example: input_file = "prop_out.h5"
    """

    # Check input file.
    if not h5py.is_hdf5(input_file):
        raise IOError("Not a valid hdf5 file: %s. " % (input_file))

    # Open in and out files.
    with h5py.File(input_file, 'r') as h5:
        with h5py.File(input_file.replace(".h5", ".opmd.h5"), 'w') as opmd_h5:

            # Get number of time slices in wpg output, assuming horizontal and vertical polarizations have same dimensions, which is always true for wpg output.
            data_shape = h5['data/arrEhor'].value.shape

            # Branch off if this is a non-time dependent calculation in frequency domain.
            if data_shape[2] == 1 and h5['params/wDomain'].value == "frequency":
                # Time independent calculation in frequency domain.
                _convert_from_frequency_representation(h5, opmd_h5, data_shape)
                return

            number_of_x_meshpoints = data_shape[0]
            number_of_y_meshpoints = data_shape[1]
            number_of_time_steps = data_shape[2]

            time_max = h5['params/Mesh/sliceMax'].value  #s
            time_min = h5['params/Mesh/sliceMin'].value  #s
            time_step = abs(time_max - time_min) / number_of_time_steps  #s

            photon_energy = h5['params/photonEnergy'].value  # eV
            photon_energy = photon_energy * e  # Convert to J

            # Copy misc and params from original wpg output.
            opmd_h5.create_group('history/parent')
            try:
                h5.copy('/params', opmd_h5['history/parent'])
                h5.copy('/misc', opmd_h5['history/parent'])
                h5.copy('/history', opmd_h5['history/parent'])
            # Some keys may not exist, e.g. if the input file comes from a non-simex wpg run.
            except KeyError:
                pass
            except:
                raise

            sum_x = 0.0
            sum_y = 0.0
            for it in range(number_of_time_steps):
                # Write opmd
                # Setup the root attributes for iteration 0
                opmd.setup_root_attr(opmd_h5)

                full_meshes_path = opmd.get_basePath(
                    opmd_h5, it) + opmd_h5.attrs["meshesPath"]
                # Setup basepath.
                time = time_min + it * time_step
                opmd.setup_base_path(opmd_h5,
                                     iteration=it,
                                     time=time,
                                     time_step=time_step)
                opmd_h5.create_group(full_meshes_path)
                meshes = opmd_h5[full_meshes_path]

                # Path to the E field, within the h5 file.
                full_e_path_name = b"E"
                meshes.create_group(full_e_path_name)
                E = meshes[full_e_path_name]

                # Create the dataset (2d cartesian grid)
                E.create_dataset(
                    b"x", (number_of_x_meshpoints, number_of_y_meshpoints),
                    dtype=numpy.complex64,
                    compression='gzip')
                E.create_dataset(
                    b"y", (number_of_x_meshpoints, number_of_y_meshpoints),
                    dtype=numpy.complex64,
                    compression='gzip')

                # Write the common metadata for the group
                E.attrs["geometry"] = numpy.string_("cartesian")
                # Get grid geometry.
                nx = h5['params/Mesh/nx'].value
                xMax = h5['params/Mesh/xMax'].value
                xMin = h5['params/Mesh/xMin'].value
                dx = (xMax - xMin) / nx
                ny = h5['params/Mesh/ny'].value
                yMax = h5['params/Mesh/yMax'].value
                yMin = h5['params/Mesh/yMin'].value
                dy = (yMax - yMin) / ny
                E.attrs["gridSpacing"] = numpy.array([dx, dy],
                                                     dtype=numpy.float64)
                E.attrs["gridGlobalOffset"] = numpy.array(
                    [h5['params/xCentre'].value, h5['params/yCentre'].value],
                    dtype=numpy.float64)
                E.attrs["gridUnitSI"] = numpy.float64(1.0)
                E.attrs["dataOrder"] = numpy.string_("C")
                E.attrs["axisLabels"] = numpy.array([b"x", b"y"])
                E.attrs["unitDimension"] = \
                   numpy.array([1.0, 1.0, -3.0, -1.0, 0.0, 0.0, 0.0 ], dtype=numpy.float64)
                #            L    M     T     I  theta  N    J
                # E is in volts per meters: V / m = kg * m / (A * s^3)
                # -> L * M * T^-3 * I^-1

                # Add time information
                E.attrs[
                    "timeOffset"] = 0.  # Time offset with respect to basePath's time

                # Write attribute that is specific to each dataset:
                # - Staggered position within a cell
                E["x"].attrs["position"] = numpy.array([0.0, 0.5],
                                                       dtype=numpy.float32)
                E["y"].attrs["position"] = numpy.array([0.5, 0.0],
                                                       dtype=numpy.float32)

                # - Conversion factor to SI units
                # WPG writes E fields in units of sqrt(W/mm^2), i.e. it writes E*sqrt(c * eps0 / 2).
                # Unit analysis:
                # [E] = V/m
                # [eps0] = As/Vm
                # [c] = m/s
                # ==> [E^2 * eps0 * c] = V**2/m**2 * As/Vm * m/s = V*A/m**2 = W/m**2 = [Intensity]
                # Converting to SI units by dividing by sqrt(c*eps0/2)*1e3, 1e3 for conversion from mm to m.
                c = 2.998e8  # m/s
                eps0 = 8.854e-12  # As/Vm
                E["x"].attrs["unitSI"] = numpy.float64(
                    1.0 / math.sqrt(0.5 * c * eps0) / 1.0e3)
                E["y"].attrs["unitSI"] = numpy.float64(
                    1.0 / math.sqrt(0.5 * c * eps0) / 1.0e3)

                # Copy the fields.
                Ex = h5['data/arrEhor'][:, :, it,
                                        0] + 1j * h5['data/arrEhor'][:, :, it,
                                                                     1]
                Ey = h5['data/arrEver'][:, :, it,
                                        0] + 1j * h5['data/arrEver'][:, :, it,
                                                                     1]
                E["x"][:, :] = Ex
                E["y"][:, :] = Ey

                # Get area element.
                dA = dx * dy

                ### Number of photon fields.
                # Path to the number of photons.
                full_nph_path_name = b"Nph"
                meshes.create_group(full_nph_path_name)
                Nph = meshes[full_nph_path_name]

                # Create the dataset (2d cartesian grid)
                Nph.create_dataset(
                    b"x", (number_of_x_meshpoints, number_of_y_meshpoints),
                    dtype=numpy.float32,
                    compression='gzip')
                Nph.create_dataset(
                    b"y", (number_of_x_meshpoints, number_of_y_meshpoints),
                    dtype=numpy.float32,
                    compression='gzip')

                # Write the common metadata for the group
                Nph.attrs["geometry"] = numpy.string_("cartesian")
                Nph.attrs["gridSpacing"] = numpy.array([dx, dy],
                                                       dtype=numpy.float64)
                Nph.attrs["gridGlobalOffset"] = numpy.array(
                    [h5['params/xCentre'].value, h5['params/yCentre'].value],
                    dtype=numpy.float64)
                Nph.attrs["gridUnitSI"] = numpy.float64(1.0)
                Nph.attrs["dataOrder"] = numpy.string_("C")
                Nph.attrs["axisLabels"] = numpy.array([b"x", b"y"])
                Nph.attrs["unitDimension"] = \
                   numpy.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=numpy.float64)

                # Add time information
                Nph.attrs[
                    "timeOffset"] = 0.  # Time offset with respect to basePath's time
                # Nph - Staggered position within a cell
                Nph["x"].attrs["position"] = numpy.array([0.0, 0.5],
                                                         dtype=numpy.float32)
                Nph["y"].attrs["position"] = numpy.array([0.5, 0.0],
                                                         dtype=numpy.float32)
                Nph["x"].attrs["unitSI"] = numpy.float64(1.0)
                Nph["y"].attrs["unitSI"] = numpy.float64(1.0)

                # Calculate number of photons via intensity and photon energy.
                # Since fields are stored as sqrt(W/mm^2), have to convert to W/m^2 (factor 1e6 below).
                number_of_photons_x = numpy.round(
                    abs(Ex)**2 * dA * time_step * 1.0e6 / photon_energy)
                number_of_photons_y = numpy.round(
                    abs(Ey)**2 * dA * time_step * 1.0e6 / photon_energy)
                sum_x += number_of_photons_x.sum(axis=-1).sum(axis=-1)
                sum_y += number_of_photons_y.sum(axis=-1).sum(axis=-1)
                Nph["x"][:, :] = number_of_photons_x
                Nph["y"][:, :] = number_of_photons_y

                ### Phases.
                # Path to phases
                full_phases_path_name = b"phases"
                meshes.create_group(full_phases_path_name)
                phases = meshes[full_phases_path_name]

                # Create the dataset (2d cartesian grid)
                phases.create_dataset(
                    b"x", (number_of_x_meshpoints, number_of_y_meshpoints),
                    dtype=numpy.float32,
                    compression='gzip')
                phases.create_dataset(
                    b"y", (number_of_x_meshpoints, number_of_y_meshpoints),
                    dtype=numpy.float32,
                    compression='gzip')

                # Write the common metadata for the group
                phases.attrs["geometry"] = numpy.string_("cartesian")
                phases.attrs["gridSpacing"] = numpy.array([dx, dy],
                                                          dtype=numpy.float64)
                phases.attrs["gridGlobalOffset"] = numpy.array(
                    [h5['params/xCentre'].value, h5['params/yCentre'].value],
                    dtype=numpy.float64)
                phases.attrs["gridUnitSI"] = numpy.float64(1.0)
                phases.attrs["dataOrder"] = numpy.string_("C")
                phases.attrs["axisLabels"] = numpy.array([b"x", b"y"])
                phases.attrs["unitDimension"] = numpy.array(
                    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=numpy.float64)
                phases["x"].attrs["unitSI"] = numpy.float64(1.0)
                phases["y"].attrs["unitSI"] = numpy.float64(1.0)

                # Add time information
                phases.attrs[
                    "timeOffset"] = 0.  # Time offset with respect to basePath's time
                # phases positions. - Staggered position within a cell
                phases["x"].attrs["position"] = numpy.array(
                    [0.0, 0.5], dtype=numpy.float32)
                phases["y"].attrs["position"] = numpy.array(
                    [0.5, 0.0], dtype=numpy.float32)

                phases["x"][:, :] = numpy.angle(Ex)
                phases["y"][:, :] = numpy.angle(Ey)

            print "Found %e and %e photons for horizontal and vertical polarization, respectively." % (
                sum_x, sum_y)

            opmd_h5.close()
            h5.close()
Beispiel #48
0
    def getStack(self, filelist=None, imagestack=None):
        if filelist in [None, []]:
            filelist, filefilter = self._getStackOfFiles(getfilter=True)
        else:
            filefilter = ""

        if not len(filelist):
            return None

        if filefilter in ["", "All Files (*)"]:
            if HDF5:
                if h5py.is_hdf5(filelist[0]):
                    filefilter = "HDF5"

        fileindex = 0
        begin = None
        end = None
        aifirafile = False
        if len(filelist):
            PyMcaDirs.inputDir = os.path.dirname(filelist[0])
            #if we are dealing with HDF5, no more tests needed
            if not filefilter.upper().startswith('HDF5'):
                f = open(filelist[0], 'rb')
                #read 10 characters
                if sys.version < '3.0':
                    line = f.read(10)
                else:
                    try:
                        line = str(f.read(10).decode())
                    except UnicodeDecodeError:
                        #give a dummy value
                        line = "          "
                f.close()
            omnicfile = False
            if filefilter.upper().startswith('HDF5'):
                stack = QHDF5Stack1D.QHDF5Stack1D(filelist)
                omnicfile = True
            elif filefilter.upper().startswith('OPUS-DPT'):
                stack = OpusDPTMap.OpusDPTMap(filelist[0])
                omnicfile = True
            elif filefilter.upper().startswith("AIFIRA"):
                stack = AifiraMap.AifiraMap(filelist[0])
                omnicfile = True
                aifirafile = True
            elif filefilter.upper().startswith("SUPAVISIO"):
                stack = SupaVisioMap.SupaVisioMap(filelist[0])
                omnicfile = True
            elif filefilter.upper().startswith("TEXTIMAGE"):
                imagestack = True
                fileindex = 0
                stack = TextImageStack.TextImageStack(imagestack=True)
            elif filefilter.upper().startswith("IMAGE") and\
                 (filelist[0].upper().endswith("TIF") or\
                  filelist[0].upper().endswith("TIFF")):
                stack = TiffStack.TiffStack(imagestack=True)
            elif filefilter.upper().startswith("RENISHAW"):
                stack = RenishawMap.RenishawMap(filelist[0])
                omnicfile = True
            elif filefilter == "" and\
                 (filelist[0].upper().endswith("TIF") or\
                  filelist[0].upper().endswith("TIFF")):
                stack = TiffStack.TiffStack(imagestack=True)
            elif filefilter.upper().startswith("IMAGE"):
                if imagestack is None:
                    imagestack = True
                fileindex = 0
                stack = QStack(imagestack=imagestack)
            elif line[0] == "{":
                if filelist[0].upper().endswith("RAW"):
                    if imagestack is None:
                        imagestack = True
                stack = QStack(imagestack=imagestack)
            elif line[0:2] in ["II", "MM"]:
                if imagestack is None:
                    imagestack = True
                stack = QStack(imagestack=imagestack)
            elif line.startswith('Spectral'):
                stack = OmnicMap.OmnicMap(filelist[0])
                omnicfile = True
            elif line.startswith('#\tDate'):
                stack = LuciaMap.LuciaMap(filelist[0])
                omnicfile = True
            elif filelist[0].upper().endswith("RAW.GZ")or\
                 filelist[0].upper().endswith("EDF.GZ")or\
                 filelist[0].upper().endswith("CCD.GZ")or\
                 filelist[0].upper().endswith("RAW.BZ2")or\
                 filelist[0].upper().endswith("EDF.BZ2")or\
                 filelist[0].upper().endswith("CCD.BZ2")or\
                 filelist[0].upper().endswith(".CBF"):
                if imagestack is None:
                    imagestack = True
                stack = QStack(imagestack=imagestack)
            elif filelist[0].upper().endswith(".RTX"):
                stack = RTXMap.RTXMap(filelist[0])
                omnicfile = True
            elif filelist[0][-4:].upper() in ["PIGE", "PIGE"]:
                stack = SupaVisioMap.SupaVisioMap(filelist[0])
                omnicfile = True
            elif filelist[0][-3:].upper() in ["RBS"]:
                stack = SupaVisioMap.SupaVisioMap(filelist[0])
                omnicfile = True
            elif filelist[0][-3:].upper() in ["SPE"] and\
                 (line[0] not in ['$', '#']):
                #Roper Scientific format
                #handle it as MarCCD stack
                stack = QStack(imagestack=True)
            elif MRCMap.isMRCFile(filelist[0]):
                stack = MRCMap.MRCMap(filelist[0])
                omnicfile = True
                imagestack = True
            elif LispixMap.isLispixMapFile(filelist[0]):
                stack = LispixMap.LispixMap(filelist[0])
                omnicfile = True
            elif RenishawMap.isRenishawMapFile(filelist[0]):
                # This is dangerous. Any .txt file with four
                # columns would be accepted as a Renishaw Map
                # by other hand, I do not know how to handle
                # that case as a stack.
                stack = RenishawMap.RenishawMap(filelist[0])
                omnicfile = True
            elif OmdaqLmf.isOmdaqLmf(filelist[0]):
                stack = OmdaqLmf.OmdaqLmf(filelist[0])
                omnicfile = True
            elif JcampOpusStack.isJcampOpusStackFile(filelist[0]):
                stack = JcampOpusStack.JcampOpusStack(filelist[0])
                omnicfile = True
            else:
                stack = QSpecFileStack()

        if len(filelist) == 1:
            if not omnicfile:
                try:
                    stack.loadIndexedStack(filelist[0],
                                           begin,
                                           end,
                                           fileindex=fileindex)
                except:
                    msg = qt.QMessageBox()
                    msg.setIcon(qt.QMessageBox.Critical)
                    msg.setInformativeText("%s" % sys.exc_info()[1])
                    msg.setDetailedText(traceback.format_exc())
                    msg.exec_()
                    if _logger.getEffectiveLevel() == logging.DEBUG:
                        raise
        elif len(filelist):
            if not omnicfile:
                try:
                    stack.loadFileList(filelist, fileindex=fileindex)
                except:
                    msg = qt.QMessageBox()
                    msg.setIcon(qt.QMessageBox.Critical)
                    msg.setText("%s" % sys.exc_info()[1])
                    if QTVERSION < '4.0.0':
                        msg.exec_loop()
                    else:
                        msg.exec_()
                    if _logger.getEffectiveLevel() == logging.DEBUG:
                        raise
        if aifirafile:
            masterStack = DataObject.DataObject()
            masterStack.info = copy.deepcopy(stack.info)
            masterStack.data = stack.data[:, :, 0:1024]
            masterStack.info['Dim_2'] = int(masterStack.info['Dim_2'] / 2)

            slaveStack = DataObject.DataObject()
            slaveStack.info = copy.deepcopy(stack.info)
            slaveStack.data = stack.data[:, :, 1024:]
            slaveStack.info['Dim_2'] = int(slaveStack.info['Dim_2'] / 2)
            return [masterStack, slaveStack]
        else:
            return stack
def bin_fast5_file(f5_path, tax_annot_res_dir, sens, min_qual, min_qlen,
                   min_pident, min_coverage, no_trash):
    # Function bins FAST5 file without untwisting.
    #
    # :param f5_path: path to FAST5 file meant to be processed;
    # :type f5_path: str;
    # :param tax_annot_res_dir: path to directory containing taxonomic annotation;
    # :type tax_annot_res_dir: str;
    # :param sens: binning sensitivity;
    # :type sens: str;
    # :param min_qual: threshold for quality filter;
    # :type min_qual: float;
    # :param min_qlen: threshold for length filter;
    # :type min_qlen: int (or None, if this filter is disabled);
    # :param min_pident: threshold for alignment identity filter;
    # :type min_pident: float (or None, if this filter is disabled);
    # :param min_coverage: threshold for alignment coverage filter;
    # :type min_coverage: float (or None, if this filter is disabled);
    # :param no_trash: loical value. True if user does NOT want to output trash files;
    # :type no_trash: bool;

    outdir_path = os.path.dirname(
        logging.getLoggerClass().root.handlers[0].baseFilename)

    seqs_pass = 0  # counter for sequences, which pass filters
    QL_seqs_fail = 0  # counter for too short or too low-quality sequences
    align_seqs_fail = 0  # counter for sequences, which align to their best hit with too low identity or coverage

    srt_file_dict = dict()

    new_dpath = glob("{}{}*{}*".format(tax_annot_res_dir, os.sep,
                                       get_checkstr(f5_path)))[0]
    tsv_res_fpath = get_res_tsv_fpath(new_dpath)
    taxonomy_path = os.path.join(tax_annot_res_dir, "taxonomy", "taxonomy.tsv")
    resfile_lines = configure_resfile_lines(tsv_res_fpath, sens, taxonomy_path)

    # Configure path to "classification not found" file
    classif_not_found_fpath = get_classif_not_found_fpath(f5_path, outdir_path)

    # Make filter for quality and length
    QL_filter = get_QL_filter(f5_path, min_qual, min_qlen)
    # Configure path to trash file
    if not no_trash:
        QL_trash_fpath = get_QL_trash_fpath(
            f5_path,
            outdir_path,
            min_qual,
            min_qlen,
        )
    else:
        QL_trash_fpath = None
    # end if

    # Make filter for identity and coverage
    align_filter = get_align_filter(min_pident, min_coverage)
    # Configure path to this trash file
    if not no_trash:
        align_trash_fpath = get_align_trash_fpath(f5_path, outdir_path,
                                                  min_pident, min_coverage)
    else:
        align_trash_fpath = None
    # end if

    # File validation:
    #   RuntimeError will be raised if FAST5 file is broken.
    try:
        # File existance checking is performed while parsing CL arguments.
        # Therefore, this if-statement will trigger only if f5_path's file is not a valid HDF5 file.
        if not h5py.is_hdf5(f5_path):
            raise RuntimeError("file is not of HDF5 (i.e. not FAST5) format")
        # end if

        from_f5 = h5py.File(f5_path, 'r')

        for _ in from_f5:
            break
        # end for
    except RuntimeError as runterr:
        printlog_error_time("FAST5 file is broken")
        printlog_error("Reading the file `{}` crashed.".format(
            os.path.basename(f5_path)))
        printlog_error("Reason: {}".format(str(runterr)))
        printlog_error("Omitting this file...")
        print()
        # Return zeroes -- inc_val won't be incremented and this file will be omitted
        return (0, 0, 0)
    # end try

    # singleFAST5 and multiFAST5 files should be processed in different ways
    # "Raw" group always in singleFAST5 root and never in multiFAST5 root
    if "Raw" in from_f5.keys():
        f5_cpy_func = copy_single_f5
    else:
        f5_cpy_func = copy_read_f5_2_f5
    # end if

    for _, read_name in enumerate(fast5_readids(from_f5)):

        try:
            hit_names, *vals_to_filter = resfile_lines[sys.intern(
                fmt_read_id(read_name)
            )[1:]]  # omit 'read_' in the beginning of FAST5 group's name
        except KeyError:
            # Place this sequence into the "classification not found" file
            if classif_not_found_fpath not in srt_file_dict.keys():
                srt_file_dict = update_file_dict(srt_file_dict,
                                                 classif_not_found_fpath)
            # end if
            f5_cpy_func(from_f5, read_name,
                        srt_file_dict[classif_not_found_fpath])
            continue
        # end try

        # If read is found in TSV file:
        if not QL_filter(vals_to_filter):
            QL_seqs_fail += 1
            # Get name of result FASTQ file to write this read in
            if QL_trash_fpath not in srt_file_dict.keys():
                srt_file_dict = update_file_dict(srt_file_dict, QL_trash_fpath)
            # end if
            f5_cpy_func(from_f5, read_name, srt_file_dict[QL_trash_fpath])
        elif not align_filter(vals_to_filter):
            align_seqs_fail += 1
            # Get name of result FASTQ file to write this read in
            if QL_trash_fpath not in srt_file_dict.keys():
                srt_file_dict = update_file_dict(srt_file_dict,
                                                 align_trash_fpath)
            # end if
            f5_cpy_func(from_f5, read_name, srt_file_dict[align_trash_fpath])
        else:
            for hit_name in hit_names.split(
                    "&&"
            ):  # there can be multiple hits for single query sequence
                # Get name of result FASTQ file to write this read in
                binned_file_path = os.path.join(outdir_path,
                                                "{}.fast5".format(hit_name))
                if binned_file_path not in srt_file_dict.keys():
                    srt_file_dict = update_file_dict(srt_file_dict,
                                                     binned_file_path)
                # end if
                f5_cpy_func(from_f5, read_name,
                            srt_file_dict[binned_file_path])
            # end for
            seqs_pass += 1
        # end if
    # end for

    from_f5.close()

    # Close all binned files
    for file_obj in filter(lambda x: not x is None, srt_file_dict.values()):
        file_obj.close()
    # end for

    return (seqs_pass, QL_seqs_fail, align_seqs_fail)
Beispiel #50
0
def load(filename: str, lazy: bool = False, **kwargs):
    """Load an :class:`~kikuchipy.signals.EBSD` or
    :class:`~kikuchipy.signals.EBSDMasterPattern` object from a
    supported file format.

    This function is a modified version of :func:`hyperspy.io.load`.

    Parameters
    ----------
    filename
        Name of file to load.
    lazy
        Open the data lazily without actually reading the data from disk
        until required. Allows opening arbitrary sized datasets. Default
        is False.
    kwargs
        Keyword arguments passed to the corresponding kikuchipy reader.
        See their individual documentation for available options.

    Returns
    -------
    kikuchipy.signals.EBSD, kikuchipy.signals.EBSDMasterPattern, \
        list of kikuchipy.signals.EBSD or \
        list of kikuchipy.signals.EBSDMasterPattern

    Examples
    --------
    Import nine patterns from an HDF5 file in a directory `DATA_DIR`

    >>> import kikuchipy as kp
    >>> s = kp.load(DATA_DIR + "/patterns.h5")
    >>> s
    <EBSD, title: patterns My awes0m4 ..., dimensions: (3, 3|60, 60)>
    """
    if not os.path.isfile(filename):
        raise IOError(f"No filename matches '{filename}'.")

    # Find matching reader for file extension
    extension = os.path.splitext(filename)[1][1:]
    readers = []
    for plugin in plugins:
        if extension.lower() in plugin.file_extensions:
            readers.append(plugin)
    if len(readers) == 0:
        raise IOError(
            f"Could not read '{filename}'. If the file format is supported, please "
            "report this error")
    elif len(readers) > 1 and is_hdf5(filename):
        reader = _plugin_from_footprints(filename, plugins=readers)
    else:
        reader = readers[0]

    # Get data and metadata (from potentially multiple signals if an h5ebsd
    # file)
    signal_dicts = reader.file_reader(filename, lazy=lazy, **kwargs)
    signals = []
    for signal in signal_dicts:
        signals.append(_dict2signal(signal, lazy=lazy))
        directory, filename = os.path.split(os.path.abspath(filename))
        filename, extension = os.path.splitext(filename)
        signals[-1].tmp_parameters.folder = directory
        signals[-1].tmp_parameters.filename = filename
        signals[-1].tmp_parameters.extension = extension.replace(".", "")

    if len(signals) == 1:
        signals = signals[0]

    return signals
    os.mkdir(test_output_folder)
logger.info("test output folder: %s" % test_output_folder)

# retrieve the list of BAG files in the test/data folder

bag_paths = list()
for root, _, files in os.walk(test_data_folder):
    for f in files:
        if f.endswith(".bag"):
            bag_paths.append(os.path.join(root, f))
logger.info("nr. of available BAG files: %d" % len(bag_paths))

# select an input from the list of BAG files

bag_path = bag_paths[0]  # change this index to select another bag file
if not h5py.is_hdf5(bag_path):
    raise RuntimeError(
        "The passed BAG file is not recognized as a valid HDF5 format")
logger.info("input BAG file: %s" % bag_path)

# setup comparison parameters
copyBaseBag = False
ziptype = None  # To test with compression, set this to "gzip" or "lzf".
test_suffix = "CMP"
if ziptype != None:
    test_suffix += "_" + ziptype

# open the input BAG in reading mode (and check the presence of the BAG_root group)

fid = h5py.File(bag_path, 'r')
try: