Beispiel #1
0
def writeBarcodeH5(labeledZmws, labeler, outFile, writeExtendedInfo=False):
    """Write a barcode file from a list of labeled ZMWs. In addition
    to labeledZmws, this function takes a
    pbbarcode.BarcodeLabeler."""
    bestScores = [z.toBestRecord() for z in labeledZmws]
    outDta = n.vstack(bestScores)
    outH5 = h5py.File(outFile, 'a')

    if BC_DS_PATH in outH5:
        del outH5[BC_DS_PATH]

    bestDS = outH5.create_dataset(BC_DS_PATH, data=outDta, dtype="int32")
    bestDS.attrs['movieName'] = labeler.movieName
    bestDS.attrs['barcodes'] = n.array(labeler.barcodeLabels,
                                       dtype=h5py.new_vlen(str))
    bestDS.attrs['columnNames'] = n.array([
        'holeNumber', 'nAdapters', 'barcodeIdx1', 'barcodeScore1',
        'barcodeIdx2', 'barcodeScore2'
    ],
                                          dtype=h5py.new_vlen(str))
    bestDS.attrs['scoreMode'] = labeler.scoreMode

    if writeExtendedInfo:
        # here we use the 'names' because each barcode is scored
        # individually.
        nBarcodes = len(labeler.barcodeNames)

        def makeArray(l, v):
            a = n.zeros(l, dtype=type(v))
            a.fill(v)
            return a

        def makeRecord(lZmw):
            zmws = makeArray(nBarcodes * lZmw.nScored, lZmw.holeNumber)
            adapters = n.concatenate([makeArray(nBarcodes, i) for i in \
                                          range(1, lZmw.nScored + 1)])
            idxs = n.concatenate([list(range(0, nBarcodes)) for i in \
                                      range(0, lZmw.nScored)])
            scores = n.concatenate(lZmw.allScores)
            return n.transpose(n.vstack((zmws, adapters, idxs, scores)))

        records = [makeRecord(lZmw) for lZmw in labeledZmws if lZmw.allScores]
        records = n.vstack(records)

        if BC_DS_ALL_PATH in outH5:
            del outH5[BC_DS_ALL_PATH]
        allDS = outH5.create_dataset(BC_DS_ALL_PATH,
                                     data=records,
                                     dtype='int32')
        allDS.attrs['movieName'] = labeler.movieName
        # note names versus labels.
        allDS.attrs['barcodes'] = n.array(labeler.barcodeNames,
                                          dtype=h5py.new_vlen(str))
        allDS.attrs['columnNames'] = n.array(
            ['holeNumber', 'adapter', 'barcodeIdx', 'score'],
            dtype=h5py.new_vlen(str))
    # close the file at the very end.
    outH5.close()
Beispiel #2
0
def create_neurohdf_file(filename, data):

    with closing(h5py.File(filename, 'w')) as hfile:
        hfile.attrs['neurohdf_version'] = '0.1'
        mcgroup = hfile.create_group("Microcircuit")
        mcgroup.attrs['node_type'] = 'irregular_dataset'
        vert = mcgroup.create_group("vertices")
        conn = mcgroup.create_group("connectivity")

        vert.create_dataset("id", data=data['vert']['id'])
        vert.create_dataset("location", data=data['vert']['location'])
        verttype=vert.create_dataset("type", data=data['vert']['type'])
        # create rec array with two columns, value and name
        my_dtype = np.dtype([('value', 'l'), ('name', h5py.new_vlen(str))])
        helpdict={VerticesTypeSkeletonRootNode['id']: VerticesTypeSkeletonRootNode['name'],
                  VerticesTypeSkeletonNode['id']: VerticesTypeSkeletonNode['name'],
                  VerticesTypeConnectorNode['id']: VerticesTypeConnectorNode['name']
        }
        arr=np.recarray( len(helpdict), dtype=my_dtype )
        for i,kv in enumerate(helpdict.items()):
            arr[i][0] = kv[0]
            arr[i][1] = kv[1]
        verttype.attrs['value_name']=arr

        vert.create_dataset("confidence", data=data['vert']['confidence'])
        vert.create_dataset("userid", data=data['vert']['userid'])
        vert.create_dataset("radius", data=data['vert']['radius'])
        vert.create_dataset("skeletonid", data=data['vert']['skeletonid'])
        vert.create_dataset("creation_time", data=data['vert']['creation_time'])
        vert.create_dataset("modification_time", data=data['vert']['modification_time'])

        conn.create_dataset("id", data=data['conn']['id'])
        if data['conn'].has_key('type'):
            conntype=conn.create_dataset("type", data=data['conn']['type'])
            helpdict={ConnectivityNeurite['id']: ConnectivityNeurite['name'],
                      ConnectivityPresynaptic['id']: ConnectivityPresynaptic['name'],
                      ConnectivityPostsynaptic['id']: ConnectivityPostsynaptic['name']
            }
            arr=np.recarray( len(helpdict), dtype=my_dtype )
            for i,kv in enumerate(helpdict.items()):
                arr[i][0] = kv[0]
                arr[i][1] = kv[1]
            conntype.attrs['value_name']=arr

        if data['conn'].has_key('skeletonid'):
            conn.create_dataset("skeletonid", data=data['conn']['skeletonid'])

        if data.has_key('meta'):
            metadata=mcgroup.create_group('metadata')
            # create recarray with two columns, skeletonid and string
            my_dtype = np.dtype([('skeletonid', 'l'), ('name', h5py.new_vlen(str))])
            arr=np.recarray( len(data['meta']), dtype=my_dtype )
            for i,kv in enumerate(data['meta'].items()):
                arr[i][0] = kv[0]
                arr[i][1] = kv[1]

            metadata.create_dataset('skeleton_name', data=arr )
Beispiel #3
0
def create_neurohdf_file(filename, data):

    with closing(h5py.File(filename, 'w')) as hfile:
        hfile.attrs['neurohdf_version'] = '0.1'
        mcgroup = hfile.create_group("Microcircuit")
        mcgroup.attrs['node_type'] = 'irregular_dataset'
        vert = mcgroup.create_group("vertices")
        conn = mcgroup.create_group("connectivity")

        vert.create_dataset("id", data=data['vert']['id'])
        vert.create_dataset("location", data=data['vert']['location'])
        verttype=vert.create_dataset("type", data=data['vert']['type'])
        # create rec array with two columns, value and name
        my_dtype = np.dtype([('value', 'l'), ('name', h5py.new_vlen(str))])
        helpdict={VerticesTypeSkeletonRootNode['id']: VerticesTypeSkeletonRootNode['name'],
                  VerticesTypeSkeletonNode['id']: VerticesTypeSkeletonNode['name'],
                  VerticesTypeConnectorNode['id']: VerticesTypeConnectorNode['name']
        }
        arr=np.recarray( len(helpdict), dtype=my_dtype )
        for i,kv in enumerate(helpdict.items()):
            arr[i][0] = kv[0]
            arr[i][1] = kv[1]
        verttype.attrs['value_name']=arr

        vert.create_dataset("confidence", data=data['vert']['confidence'])
        vert.create_dataset("userid", data=data['vert']['userid'])
        vert.create_dataset("radius", data=data['vert']['radius'])
        vert.create_dataset("skeletonid", data=data['vert']['skeletonid'])
        vert.create_dataset("creation_time", data=data['vert']['creation_time'])
        vert.create_dataset("modification_time", data=data['vert']['modification_time'])

        conn.create_dataset("id", data=data['conn']['id'])
        if data['conn'].has_key('type'):
            conntype=conn.create_dataset("type", data=data['conn']['type'])
            helpdict={ConnectivityNeurite['id']: ConnectivityNeurite['name'],
                      ConnectivityPresynaptic['id']: ConnectivityPresynaptic['name'],
                      ConnectivityPostsynaptic['id']: ConnectivityPostsynaptic['name']
            }
            arr=np.recarray( len(helpdict), dtype=my_dtype )
            for i,kv in enumerate(helpdict.items()):
                arr[i][0] = kv[0]
                arr[i][1] = kv[1]
            conntype.attrs['value_name']=arr

        if data['conn'].has_key('skeletonid'):
            conn.create_dataset("skeletonid", data=data['conn']['skeletonid'])

        if data.has_key('meta'):
            metadata=mcgroup.create_group('metadata')
            # create recarray with two columns, skeletonid and string
            my_dtype = np.dtype([('skeletonid', 'l'), ('name', h5py.new_vlen(str))])
            arr=np.recarray( len(data['meta']), dtype=my_dtype )
            for i,kv in enumerate(data['meta'].items()):
                arr[i][0] = kv[0]
                arr[i][1] = kv[1]

            metadata.create_dataset('skeleton_name', data=arr )
Beispiel #4
0
def writeBarcodeH5(labeledZmws, labeler, outFile,
                   writeExtendedInfo = False):
    """Write a barcode file from a list of labeled ZMWs. In addition
    to labeledZmws, this function takes a
    pbbarcode.BarcodeLabeler."""
    bestScores = map(lambda z: z.toBestRecord(), labeledZmws)
    outDta = n.vstack(bestScores)
    outH5 = h5.File(outFile, 'a')

    if BC_DS_PATH in outH5:
        del outH5[BC_DS_PATH]

    bestDS = outH5.create_dataset(BC_DS_PATH, data = outDta, dtype = "int32")
    bestDS.attrs['movieName'] = labeler.movieName
    bestDS.attrs['barcodes'] = n.array(labeler.barcodeLabels, dtype = h5.new_vlen(str))
    bestDS.attrs['columnNames'] = n.array(['holeNumber', 'nAdapters', 'barcodeIdx1',
                                           'barcodeScore1', 'barcodeIdx2', 'barcodeScore2'],
                                          dtype = h5.new_vlen(str))
    bestDS.attrs['scoreMode'] = labeler.scoreMode

    if writeExtendedInfo:
        # here we use the 'names' because each barcode is scored
        # individually.
        nBarcodes = len(labeler.barcodeNames)

        def makeArray(l, v):
            a = n.zeros(l, dtype = type(v))
            a.fill(v)
            return a

        def makeRecord(lZmw):
            zmws = makeArray(nBarcodes * lZmw.nScored, lZmw.holeNumber)
            adapters = n.concatenate([makeArray(nBarcodes, i) for i in \
                                          xrange(1, lZmw.nScored + 1)])
            idxs = n.concatenate([range(0, nBarcodes) for i in \
                                      xrange(0, lZmw.nScored)])
            scores = n.concatenate(lZmw.allScores)
            return n.transpose(n.vstack((zmws, adapters, idxs, scores)))

        records = [makeRecord(lZmw) for lZmw in labeledZmws if lZmw.allScores]
        records = n.vstack(records)

        if BC_DS_ALL_PATH in outH5:
            del outH5[BC_DS_ALL_PATH]
        allDS = outH5.create_dataset(BC_DS_ALL_PATH, data = records, dtype = 'int32')
        allDS.attrs['movieName'] = labeler.movieName
        # note names versus labels.
        allDS.attrs['barcodes'] = n.array(labeler.barcodeNames, dtype = h5.new_vlen(str))
        allDS.attrs['columnNames'] = n.array(['holeNumber', 'adapter', 'barcodeIdx', 'score'],
                                             dtype = h5.new_vlen(str))
    # close the file at the very end.
    outH5.close()
Beispiel #5
0
    def add_mesh_from_string(self,
                             name,
                             shape_data,
                             scale=None,
                             insideMargin=None,
                             outsideMargin=None):
        """
        Add a mesh shape from a string.
        Accepted format : mesh encoded in VTK .vtp format
        """

        if name not in self._ref:

            shape = self._ref.create_dataset(name, (1, ),
                                             dtype=h5py.new_vlen(str))
            shape[:] = shape_data
            shape.attrs['id'] = self._number_of_shapes
            shape.attrs['type'] = 'vtp'
            if scale is not None:
                shape.attrs['scale'] = scale
            if insideMargin is not None:
                shape.attrs['insideMargin'] = insideMargin
            if outsideMargin is not None:
                shape.attrs['outsideMargin'] = outsideMargin
            self._number_of_shapes += 1
Beispiel #6
0
    def add_occ_shape(self, name, occ_shape):
        """
        Add an OpenCascade TopoDS_Shape.
        """

        if name not in self._ref:

            from OCC.STEPControl import STEPControl_Writer, STEPControl_AsIs

            # step format is used for the storage.
            step_writer = STEPControl_Writer()

            step_writer.Transfer(occ_shape, STEPControl_AsIs)

            shape_data = None

            with tmpfile() as tmpf:

                step_writer.Write(tmpf[1])

                tmpf[0].flush()
                shape_data = str_of_file(tmpf[1])

                shape = self._ref.create_dataset(name, (1, ),
                                                 dtype=h5py.new_vlen(str))
                shape[:] = shape_data
                shape.attrs['id'] = self._number_of_shapes
                shape.attrs['type'] = 'step'
                self._number_of_shapes += 1
Beispiel #7
0
    def add_interaction(self,
                        name,
                        body1_name,
                        contactor1_name=None,
                        body2_name=None,
                        contactor2_name=None,
                        distance_calculator='cadmbtb',
                        offset1=0.0,
                        offset2=0.0):
        """
        Add permanent interactions between two objects contactors.
        """
        if name not in self.permanent_interactions():
            pinter = self.permanent_interactions().create_dataset(
                name, (1, ), dtype=h5py.new_vlen(str))
            pinter.attrs['id'] = self._number_of_permanent_interactions
            pinter.attrs['type'] = 'permanent_interaction'
            pinter.attrs['body1_name'] = body1_name
            pinter.attrs['body2_name'] = body2_name
            if contactor1_name is not None:
                pinter.attrs['contactor1_name'] = contactor1_name
            if contactor2_name is not None:
                pinter.attrs['contactor2_name'] = contactor2_name
            pinter.attrs['distance_calculator'] = distance_calculator
            pinter.attrs['offset1'] = offset1
            pinter.attrs['offset2'] = offset2

            self._number_of_permanent_interactions += 1
Beispiel #8
0
    def _save_hdf5(self, filename, group = "OrbitResponseMatrix"):
        """
        save data in hdf5 format in HDF5 group (h5py.Group object).

        Note
        -----
        h5py before v2.0 does not accept unicode directly.
        """
        h5zip = None # 'gzip' works in default install
        f = h5py.File(filename, 'w')
        grp = f.create_group(group)

        str_type = h5py.new_vlen(str)
        m, n = np.shape(self.m)
        dst = grp.create_dataset('m', (m,n), data=self.m, compression=h5zip)
        #
        name, spos, plane = zip(*self.bpm)
        name = [v.encode('ascii') for v in name]
        dst.attrs["bpm_name"] = name
        dst.attrs["bpm_field"] = plane
        name, spos, plane = zip(*self.trim)
        dst.attrs["cor_name"] = name
        dst.attrs["cor_field"] = plane
        if self.bpm_pv:
            dst.attrs["bpm_pv"] = self.bpm_pv
        if self.cor_pv:
            dst.attrs["cor_pv"] = self.cor_pv
        if self.cor_pvrb:
            dst.attrs["cor_pvrb"] = self.cor_pvrb
            
        f.close()
Beispiel #9
0
    def addOccShape(self, name, occ_shape):
        """
        Add an OpenCascade TopoDS_Shape
        """

        if name not in self._ref:

            from OCC.STEPControl import STEPControl_Writer, STEPControl_AsIs

            # step format is used for the storage.
            step_writer = STEPControl_Writer()
            step_writer.Transfer(occ_shape, STEPControl_AsIs)

            shape_data = None

            with tmpfile() as tmpf:

                status = step_writer.Write(tmpf[1])

                tmpf[0].flush()
                shape_data = str_of_file(tmpf[1])

                shape = self._ref.create_dataset(name, (1,),
                                                 dtype=h5py.new_vlen(str))
                shape[:] = shape_data
                shape.attrs['id'] = self._number_of_shapes
                shape.attrs['type'] = 'step'
                self._shapeid[name] = shape.attrs['id']
                self._number_of_shapes += 1
Beispiel #10
0
def filter_vocab(sample_hdf_fname, tdict_pkl_fname, filtered_hdf_fname):
    log.info("opening original samples file " + sample_hdf_fname)
    sample_hdfile = h5py.File(sample_hdf_fname, "r")    
        
    columns_selector, filtered_vocab = make_new_vocab(sample_hdfile, tdict_pkl_fname)
    
    log.info("creating filtered samples file " + filtered_hdf_fname)
    filtered_hdfile = h5py.File(filtered_hdf_fname, "w")    
    
    log.info("storing filtered vocabulary ({0} terms)".format(len(filtered_vocab)))
    # create new type for variable-length strings
    # see http://code.google.com/p/h5py/wiki/HowTo#Variable-length_strings
    str_type = h5py.new_vlen(str)
    # hdf5 can't handle unicode strings, so encode terms as utf-8 byte strings
    filtered_hdfile.create_dataset("vocab", 
                                   data=[t.encode("utf-8") for t in filtered_vocab],
                                   dtype=str_type)
    
    make_new_samples(sample_hdfile, filtered_hdfile, columns_selector)
            
    log.info("closing " + sample_hdf_fname)
    sample_hdfile.close()          

    log.info("closing " + filtered_hdf_fname)
    filtered_hdfile.close()                  
Beispiel #11
0
def write_probe_map(h5, probe_map):
    if not "probe_map" in h5:
        h5.create_group("probe_map")

    probe_type = numpy.dtype(
        [
            ('hugo', 'S'),
            ('chrome', 'S'),
            ('start', 'i'),
            ('stop', 'i'),
            ('strand', 'S'),
        ])
    h_probe_type = h5py.new_vlen(probe_type)
    print probe_type
    probes = probe_map.geneMap.keys()
    probes.sort()
    pm_count = len(probes)

    ds = h5.create_dataset(
    "/probe_map/%s" % (probe_map['name']), [pm_count], dtype=h_probe_type)

    i = 0
    val = numpy.zeros(1, dtype=probe_type)
    for probe in probes:
        ds[i] = i
        i += 1
Beispiel #12
0
def filter_sample_vocab(lang_pair):
    """
    Filter vocabulary words which do not occur in the translation lexicon.
    This reduces the size of the vocabulary and adjusts the context samples
    accordingly.
    
    Assumes that vocab dos NOT contain:
    - POS tags (i.e. lempos combination)
    - multi-word units (MWUs)
    """
    sample_hdf_fname = config["sample"][lang_pair]["samples_fname"]
    log.info("opening original samples file " + sample_hdf_fname)
    sample_hdfile = h5py.File(sample_hdf_fname, "r")

    filtered_hdf_fname = config["sample"][lang_pair]["samples_filt_fname"]
    log.info("creating filtered samples file " + filtered_hdf_fname)
    filtered_hdfile = h5py.File(filtered_hdf_fname, "w")

    tdict_pkl_fname = config["dict"][lang_pair]["pkl_fname"]
    columns_selector, filtered_vocab = make_new_vocab(sample_hdfile, tdict_pkl_fname)

    log.info("storing filtered vocabulary")
    # create new type for variable-length strings
    # see http://code.google.com/p/h5py/wiki/HowTo#Variable-length_strings
    str_type = h5py.new_vlen(str)
    # hdf5 can't handle unicode strings, so encode terms as utf-8 byte strings
    filtered_hdfile.create_dataset("vocab", data=[t.encode("utf-8") for t in filtered_vocab], dtype=str_type)

    make_new_samples(sample_hdfile, filtered_hdfile, columns_selector)

    log.info("closing " + sample_hdf_fname)
    sample_hdfile.close()

    log.info("closing " + filtered_hdf_fname)
    filtered_hdfile.close()
Beispiel #13
0
    def _createDatasetInFile(self, hdf5File, datasetName, roi):
        shape = tuple(roi[1] - roi[0])
        chunks = self._description.chunks
        if chunks is not None:
            # chunks must not be bigger than the data in any dim
            chunks = numpy.minimum(chunks, shape)
            chunks = tuple(chunks)
        compression = self._description.compression
        compression_opts = self._description.compression_opts

        dtype = self._description.dtype
        if dtype == object:
            dtype = h5py.new_vlen(str)
        dataset = hdf5File.create_dataset(datasetName,
                                          shape=shape,
                                          dtype=dtype,
                                          chunks=chunks,
                                          compression=compression,
                                          compression_opts=compression_opts)

        # Set data attributes
        if self._description.drange is not None:
            dataset.attrs['drange'] = self._description.drange
        if _use_vigra:
            dataset.attrs['axistags'] = vigra.defaultAxistags(
                self._description.axes).toJSON()
Beispiel #14
0
    def _createDatasetInFile(self, hdf5File, datasetName, roi):
        shape = tuple( roi[1] - roi[0] )
        chunks = self._description.chunks
        if chunks is not None:
            # chunks must not be bigger than the data in any dim
            chunks = numpy.minimum( chunks, shape )
            chunks = tuple(chunks)
        compression = self._description.compression
        compression_opts = self._description.compression_opts
        
        dtype=self._description.dtype
        if dtype == object:
            dtype = h5py.new_vlen(str)
        dataset = hdf5File.create_dataset( datasetName,
                                 shape=shape,
                                 dtype=dtype,
                                 chunks=chunks,
                                 compression=compression,
                                 compression_opts=compression_opts )

        # Set data attributes
        if self._description.drange is not None:
            dataset.attrs['drange'] = self._description.drange
        if _use_vigra:
            dataset.attrs['axistags'] = vigra.defaultAxistags( self._description.axes ).toJSON()
Beispiel #15
0
    def _createEmptyDataset_(self, parent, dataset_name, shape, dtype,
                             **kwargs):
        """ Creates an empty dataset in the data file and returns a pointer
        to it. Raises IOError exception if the dataset already exists.
        """
        dataset_key = self.hdfObjectKey(dataset_name)
        if dataset_key in parent.keys():
            errmsg = "'%s' dataset already exists in current data file."
            raise IOError, errmsg % dataset_name

        create_args = {}
        attributes = {}
        for name in kwargs:
            if name in DATASET_CREATE_ARGS:
                create_args[safestring(name)] = safevalue(kwargs[name])
            else:
                attributes[safestring(name)] = safevalue(kwargs[name])

        if 'created' not in attributes:
            attributes['created'] = self._timestamp_()

        if dtype == N.dtype(object):
            create_args['dtype'] = h5py.new_vlen(str)
        else:
            create_args['dtype'] = dtype

        dataset = parent.create_dataset(dataset_key, shape, **create_args)

        for attr_name, attr_value in attributes.items():
            dataset.attrs[attr_name] = attr_value

        return dataset
Beispiel #16
0
    def _save_hdf5(self, filename, group="OrbitResponseMatrix"):
        """
        save data in hdf5 format in HDF5 group (h5py.Group object).

        Note
        -----
        h5py before v2.0 does not accept unicode directly.
        """
        h5zip = None  # 'gzip' works in default install
        f = h5py.File(filename, 'w')
        grp = f.create_group(group)

        str_type = h5py.new_vlen(str)
        m, n = np.shape(self.m)
        dst = grp.create_dataset('m', (m, n), data=self.m, compression=h5zip)
        #
        name, plane = zip(*self.bpm)
        name = [v.encode('ascii') for v in name]
        dst.attrs["bpm_name"] = name
        dst.attrs["bpm_field"] = plane
        name, plane = zip(*self.cor)
        dst.attrs["cor_name"] = name
        dst.attrs["cor_field"] = plane
        if self.bpm_pv:
            dst.attrs["bpm_pv"] = self.bpm_pv
        if self.cor_pv:
            dst.attrs["cor_pv"] = self.cor_pv
        if self.cor_pvrb:
            dst.attrs["cor_pvrb"] = self.cor_pvrb

        f.close()
Beispiel #17
0
 def _recursive_write(self, f, struct):
     
     for thing in dir(struct):
         #Skip everything starting with '_'
         if thing.startswith('_'):
             continue
         try:
             #Get the attribute
             value = getattr(struct, thing)
         except (AttributeError,ValueError) as E:
             print(thing, E)
             #If it can't get the attribute, just go to the next thing
             continue
         
         #If it is an integer, floating point value, or numpy array
         if isinstance(value,(int, float)):
             #Save it as a value, go to next thing
             f.create_dataset(thing, data = value)
             continue
         elif isinstance(value, np.ndarray):
             
             if not value.shape: # value.shape is an empty tuple
                 # It's a one-element numpy array
                 f.create_dataset(thing, data = value)
             else:
                 #Save it with compression, go to next thing
                 f.create_dataset(thing, data = value, compression = 'gzip')
             continue
         elif isinstance(value, basestring):
             str_type = h5py.new_vlen(str)
             f.create_dataset(thing, dtype=str_type, data = value)
             continue
         
         import inspect
         #Skip methods, functions, built-in functions, routines, and modules
         if (inspect.ismethod(value)
             or inspect.isfunction(value)
             or inspect.isbuiltin(value)
             or inspect.isroutine(value)
             or inspect.ismodule(value)):
                 continue
         
         if type(value) is types.DictType:
             dict_group = f.create_group(thing)
             # Recurse into the entries in the dictionary by turning the 
             # dictionary into a class
             self._recursive_write(dict_group, StubClass(value))
         
         elif isinstance(value, (list,tuple)):
             dict_group = f.create_group(thing)
             #Convert to numpy array
             #List/Tuple to a class
             cls = StubClass({str(i):v for i,v in enumerate(value)})
             #Write class recursively
             self._recursive_write(dict_group, cls)
         else:
             f.create_group(thing)
             #Recurse into the class
             self._recursive_write(f[thing], value)
Beispiel #18
0
def generate_random_hdf5(fname, templatefn):
    """
    sample HDF5 file generation
    Generates sample HDF5 files with random data and metadata

    Parameters
    ----------
    fname: str
        Name of the HDF5 to be generated
    templatefn : str
        Name of the configuration file describing HDF5 structure

    Examples
    --------
    >>> fname = generate_random_hdf5('fname.h5', 'template.cfg')
    """

    # read configuration file
    config = ConfigParser.SafeConfigParser()
    config.read(templatefn)

    ini = config._sections

    # open hdf5 file
    f = h5py.File(fname, 'w')

    # fill data and metadata
    meta = config.items("h5meta")
    strdt = h5py.new_vlen(str)
    for val in meta:
        # If it is data, generate a random dataset
        if val[0][-5:] == "/data":
            shape_tmp = map(int, val[1].strip("(").strip(")").split(","))
            shape = [i for i in shape_tmp]
            data = np.random.rand(*shape)
            group = f.require_group(val[0].rsplit('/', 1)[0])
            dset = group.require_dataset(val[0].rsplit('/', 1)[1],
                                         shape,
                                         dtype=float)
            dset[:] = data
            continue

        if len(val[0].split('/')) > 1:
            groupname, dsetname = val[0].rsplit('/', 1)
            group = f.require_group(groupname)
            dset = group.require_dataset(dsetname, (1, ), strdt)
        else:
            dset = f.require_dataset(val[0], (1, ), strdt)
        rndval = val[1].split(',')[random.randint(0,
                                                  len(val[1].split(',')) -
                                                  1)].strip()
        print dset, rndval
        dset[...] = rndval
        ini['h5meta'][val[0]] = rndval
    f.close()

    # what could I return?
    print "Done"
    return 0
Beispiel #19
0
def save_xml_str_to_hdf5_dataset(file_path,
                                 xml='',
                                 dataset_name='something.xml'):
    # Write the xml file...
    with h5py.File(file_path, 'a') as f:
        str_type = h5py.new_vlen(str)
        ds = f.create_dataset(dataset_name, shape=(2, ), dtype=str_type)
        ds[:] = xml
Beispiel #20
0
 def _recursive_write(self, f, struct):
     
     for thing in dir(struct):
         #Skip everything starting with '_'
         if thing.startswith('_'):
             continue
         try:
             #Get the attribute
             value = getattr(struct, thing)
             
         except AttributeError:
             #If it can't get the attribute, just go to the next thing
             continue
         
         #If it is an integer, floating point value, or numpy array
         if isinstance(value,(int, float)):
             #Save it as a value, go to next thing
             f.create_dataset(thing, data = value)
             continue
         elif isinstance(value, np.ndarray):
             
             if not value.shape: # value.shape is an empty tuple
                 # It's a one-element numpy array
                 f.create_dataset(thing, data = value)
             else:
                 #Save it with compression, go to next thing
                 f.create_dataset(thing, data = value, compression = 'gzip')
             continue
         elif isinstance(value, basestring):
             str_type = h5py.new_vlen(str)
             f.create_dataset(thing, dtype=str_type, data = value)
             continue
         
         import inspect
         #Skip methods, functions, built-in functions and routines
         if (inspect.ismethod(value)
             or inspect.isfunction(value)
             or inspect.isbuiltin(value)
             or inspect.isroutine(value)):
                 continue
         
         if type(value) is types.DictType:
             dict_group = f.create_group(thing)
             # Recurse into the entries in the dictionary by turning the 
             # dictionary into a class
             self._recursive_write(dict_group, StubClass(value))
         
         elif isinstance(value, (list,tuple)):
             dict_group = f.create_group(thing)
             #Convert to numpy array
             #List/Tuple to a class
             cls = StubClass({str(i):v for i,v in enumerate(value)})
             #Write class recursively
             self._recursive_write(dict_group, cls)
         else:
             f.create_group(thing)
             #Recurse into the class
             self._recursive_write(f[thing], value)
Beispiel #21
0
    def link_file(self, label, array_idx, filename):
        dtype = h5py.new_vlen(type(filename))
        dataset = self._get_dataset(label, dtype=dtype)

        # And update the HDF5 dataset with the new data
        try:
            self._set_data_point(dataset, array_idx, filename)
        except:
            logger.error(u'Error updating dataset', exc_info=True)
Beispiel #22
0
    def add_plugin_source(self, name, filename):
        """
        Add C source plugin
        """

        if name not in self._plugins:
            plugin_src = self._plugins.create_dataset(name, (1,),
                                                      dtype=h5py.new_vlen(str))
            plugin_src[:] = str_of_file(filename)
            plugin_src.attrs['filename'] = filename
Beispiel #23
0
    def add_plugin_source(self, name, filename):
        """
        Add C source plugin
        """

        if name not in self._plugins:
            plugin_src = self._plugins.create_dataset(name, (1, ),
                                                      dtype=h5py.new_vlen(str))
            plugin_src[:] = str_of_file(filename)
            plugin_src.attrs['filename'] = filename
Beispiel #24
0
def labelAlignments():
    logging.info("Labeling alignments using: %s" % runner.args.inputFofn)
    bcFofn = BarcodeH5Fofn(runner.args.inputFofn)

    with CmpH5Reader(runner.args.cmpH5) as cmpH5:
        bcDS = n.zeros((len(cmpH5), 5), dtype="int32")

        for (i, aln) in enumerate(cmpH5):
            bcReader = bcFofn.readerForMovie(aln.movieInfo.Name)
            try:
                lZmw = bcReader.labeledZmwFromHoleNumber(aln.HoleNumber)
                if lZmw.nScored < runner.args.minNumBarcodes or \
                        lZmw.averageScore < runner.args.minAvgBarcodeScore or \
                        lZmw.scoreRatio < runner.args.minScoreRatio:
                    lZmw = None
            except KeyError:
                lZmw = None

            if lZmw:
                bcDS[i, :] = n.array([
                    lZmw.nScored, lZmw.bestIdx, lZmw.bestScore,
                    lZmw.secondBestIdx, lZmw.secondBestScore
                ])
            else:
                # either no barcode was found for this guy or they got
                # filtered, hence the NULL_BARCODE
                bcDS[i, :] = n.array([
                    0,
                    len(bcReader.barcodeLabels), 0,
                    len(bcReader.barcodeLabels), 0
                ])

    # write to the cmp.h5 file.
    H5 = h5.File(runner.args.cmpH5, 'r+')
    if BC_INFO_ID in H5:
        del H5[BC_INFO_ID]
    if BC_INFO_NAME in H5:
        del H5[BC_INFO_NAME]

    # we use the first one to get the labels, if somehow they
    # don't have all of the same stuff that will be an issue.
    bcLabels = n.concatenate(
        (bcFofn.barcodeLabels, n.array([BARCODE_DELIMITER])))
    H5.create_dataset(BC_INFO_ID,
                      data=n.array(range(0, len(bcLabels))),
                      dtype='int32')
    H5.create_dataset(BC_INFO_NAME, data=bcLabels, dtype=h5.new_vlen(str))
    if BC_ALN_INFO_DS in H5:
        del H5[BC_ALN_INFO_DS]
    bcDS = H5.create_dataset(BC_ALN_INFO_DS, data=bcDS, dtype='int32')
    bcDS.attrs['ColumnNames'] = n.array(
        ['count', 'index1', 'score1', 'index2', 'score2'])
    #force BarcodeMode to have numpy dtype for CmpH5Sort 'extra datasets' routine
    bcDS.attrs['BarcodeMode'] = n.array(bcFofn.scoreMode)
    H5.close()
def generate_random_hdf5(fname, templatefn):
    """
    sample HDF5 file generation
    Generates sample HDF5 files with random data and metadata

    Parameters
    ----------
    fname: str
        Name of the HDF5 to be generated
    templatefn : str
        Name of the configuration file describing HDF5 structure

    Examples
    --------
    >>> fname = generate_random_hdf5('fname.h5', 'template.cfg')
    """

    # read configuration file
    config = ConfigParser.SafeConfigParser()
    config.read(templatefn)

    ini = config._sections

    # open hdf5 file
    f = h5py.File(fname, 'w')

    # fill data and metadata
    meta = config.items("h5meta")
    strdt = h5py.new_vlen(str)
    for val in meta:
        # If it is data, generate a random dataset
        if val[0][-5:] == "/data":
            shape_tmp = map(int, val[1].strip("(").strip(")").split(","))
            shape = [i for i in shape_tmp]
            data = np.random.rand(*shape)
            group = f.require_group(val[0].rsplit('/', 1)[0])
            dset = group.require_dataset(val[0].rsplit('/', 1)[1], shape, dtype=float)
            dset[:] = data
            continue

        if len(val[0].split('/')) > 1:
            groupname, dsetname = val[0].rsplit('/', 1)
            group = f.require_group(groupname)
            dset = group.require_dataset(dsetname, (1, ), strdt)
        else:
            dset = f.require_dataset(val[0], (1, ), strdt)
        rndval = val[1].split(',')[random.randint(0, len(val[1].split(',')) - 1)].strip()
        print dset, rndval
        dset[...] = rndval
        ini['h5meta'][val[0]] = rndval
    f.close()

    # what could I return?
    print "Done"
    return 0
Beispiel #26
0
 def _write_without_iterate(self, D, group_prefix="/"):
     for k in D.keys():
         if isinstance(D[k],dict):
             group_prefix_new = group_prefix + k + "/"
             log_debug(logger, "Writing group %s" % group_prefix_new)
             if k not in self._f[group_prefix]:
                 self._f.create_group(group_prefix_new)
             self._write_without_iterate(D[k], group_prefix_new)
         else:
             name = group_prefix + k
             log_debug(logger, "Writing dataset %s" % name)
             data = D[k]
             if k not in self._f[group_prefix]:
                 if numpy.isscalar(data):
                     maxshape = (None,)
                     shape = (self._chunksize,)
                     if (isinstance(data, str)):
                         dtype = numpy.dtype(type(data.encode('utf8')))
                     else:
                         dtype = numpy.dtype(type(data))
                     if dtype == "S":
                         dtype = h5py.new_vlen(str)
                     axes = "experiment_identifier:value"
                 else:
                     data = numpy.asarray(data)
                     try:
                         h5py.h5t.py_create(data.dtype, logical=1)
                     except TypeError:
                         log_warning(logger, "Could not save dataset %s. Conversion to numpy array failed" % name)
                         continue
                     maxshape = tuple([None]+list(data.shape))
                     shape = tuple([self._chunksize]+list(data.shape))
                     dtype = data.dtype
                     ndim = data.ndim
                     axes = "experiment_identifier"
                     if ndim == 1: axes = axes + ":x"
                     elif ndim == 2: axes = axes + ":y:x"
                     elif ndim == 3: axes = axes + ":z:y:x"
                 log_debug(logger, "Create dataset %s [shape=%s, dtype=%s]" % (name,str(shape),str(dtype)))
                 self._f.create_dataset(name, shape, maxshape=maxshape, dtype=dtype, **self._create_dataset_kwargs)
                 self._f[name].attrs.modify("axes",[axes.encode('utf8')])
             if self._f[name].shape[0] <= self._i:
                 if numpy.isscalar(data):
                     data_shape = []
                 else:
                     data_shape = data.shape
                 new_shape = tuple([self._chunksize*(self._i/self._chunksize+1)]+list(data_shape))
                 log_debug(logger, "Resize dataset %s [old shape: %s, new shape: %s]" % (name,str(self._f[name].shape),str(new_shape)))
                 self._f[name].resize(new_shape)
             log_debug(logger, "Write to dataset %s at stack position %i" % (name, self._i))
             if numpy.isscalar(data):
                 self._f[name][self._i] = data
             else:
                 self._f[name][self._i,:] = data[:]
Beispiel #27
0
    def note(self, note):
        """Add a timestamped note to HDF file, in a dataset called 'notes'"""
        ts = datetime.datetime.now()
        try:
            ds = self['notes']
        except:
            ds = self.create_dataset('notes', (0,), maxshape=(None,), dtype=h5py.new_vlen(str))

        shape = list(ds.shape)
        shape[0] = shape[0] + 1
        ds.resize(shape)
        ds[-1] = str(ts) + ' -- ' + note
        self.flush()
    def note(self, note):
        """Add a timestamped note to HDF file, in a dataset called 'notes'"""
        ts = datetime.datetime.now()
        try:
            ds = self['notes']
        except:
            ds = self.create_dataset('notes', (0,), maxshape=(None,), dtype=h5py.new_vlen(str))

        shape = list(ds.shape)
        shape[0] = shape[0] + 1
        ds.resize(shape)
        ds[-1] = str(ts) + ' -- ' + note
        self.flush()
Beispiel #29
0
 def _write_without_iterate(self, D, group_prefix="/"):
     for k in D.keys():
         if isinstance(D[k],dict):
             group_prefix_new = group_prefix + k + "/"
             log.log_debug(logger, "Writing group %s" % group_prefix_new)
             if k not in self._f[group_prefix]:
                 self._f.create_group(group_prefix_new)
             self._write_without_iterate(D[k], group_prefix_new)
         else:
             name = group_prefix + k
             log.log_debug(logger, "Writing dataset %s" % name)
             data = D[k]
             if k not in self._f[group_prefix]:
                 if numpy.isscalar(data):
                     maxshape = (None,)
                     shape = (self._chunksize,)
                     dtype = numpy.dtype(type(data))
                     if dtype == "S":
                         dtype = h5py.new_vlen(str)
                     axes = "experiment_identifier:value"
                 else:
                     data = numpy.asarray(data)
                     try:
                         h5py.h5t.py_create(data.dtype, logical=1)
                     except TypeError:
                         log.log_warning(logger, "Could not save dataset %s. Conversion to numpy array failed" % name)
                         continue
                     maxshape = tuple([None]+list(data.shape))
                     shape = tuple([self._chunksize]+list(data.shape))
                     dtype = data.dtype
                     ndim = data.ndim
                     axes = "experiment_identifier"
                     if ndim == 1: axes = axes + ":x"
                     elif ndim == 2: axes = axes + ":y:x"
                     elif ndim == 3: axes = axes + ":z:y:x"
                 log.log_debug(logger, "Create dataset %s [shape=%s, dtype=%s]" % (name,str(shape),str(dtype)))
                 self._f.create_dataset(name, shape, maxshape=maxshape, dtype=dtype, **self._create_dataset_kwargs)
                 self._f[name].attrs.modify("axes",[axes])
             if self._f[name].shape[0] <= self._i:
                 if numpy.isscalar(data):
                     data_shape = []
                 else:
                     data_shape = data.shape
                 new_shape = tuple([self._chunksize*(self._i/self._chunksize+1)]+list(data_shape))
                 log.log_debug(logger, "Resize dataset %s [old shape: %s, new shape: %s]" % (name,str(self._f[name].shape),str(new_shape)))
                 self._f[name].resize(new_shape)
             log.log_debug(logger, "Write to dataset %s at stack position %i" % (name, self._i))
             if numpy.isscalar(data):
                 self._f[name][self._i] = data
             else:
                 self._f[name][self._i,:] = data[:]
Beispiel #30
0
    def addMeshFromString(self, name, shape_data):
        """
        Add a mesh shape from a string.
        Accepted format : mesh encoded in VTK .vtp format
        """
        if name not in self._ref:

            shape = self._ref.create_dataset(name, (1,),
                                             dtype=h5py.new_vlen(str))
            shape[:] = shape_data
            shape.attrs['id'] = self._number_of_shapes
            shape.attrs['type'] = 'vtp'
            self._shapeid[name] = shape.attrs['id']
            self._number_of_shapes += 1
Beispiel #31
0
def write_proceesed_data(prdata,grp):

  str_type = h5py.new_vlen(str);
  #(hdf_path,samplerate,samplename,objref)
  meta_info = numpy.dtype([ 
	('Path',str_type),
	('SampleRate',numpy.int),
	('SampleName',str_type),
	('objref',h5py.h5t.special_dtype(ref=h5py.Reference))]);

  if not 'raw_sample_info' in grp: # simple case
    grp.create_dataset("raw_sample_info",(len(prdata),),meta_info,numpy.array(prdata,dtype=meta_info),chunks = True,maxshape=None);

  pass;
Beispiel #32
0
def labelAlignments():
    logging.info("Labeling alignments using: %s" % runner.args.inputFofn)
    bcFofn = BarcodeH5Fofn(runner.args.inputFofn)

    with CmpH5Reader(runner.args.cmpH5) as cmpH5:
        bcDS = n.zeros((len(cmpH5), 5), dtype = "int32")

        for (i, aln) in enumerate(cmpH5):
            bcReader = bcFofn.readerForMovie(aln.movieInfo.Name)
            try:
                lZmw = bcReader.labeledZmwFromHoleNumber(aln.HoleNumber)
                if lZmw.nScored < runner.args.minNumBarcodes or \
                        lZmw.averageScore < runner.args.minAvgBarcodeScore or \
                        lZmw.scoreRatio < runner.args.minScoreRatio:
                    lZmw = None
            except KeyError:
                lZmw = None

            if lZmw:
                bcDS[i,:] = n.array([lZmw.nScored, lZmw.bestIdx, lZmw.bestScore,
                                     lZmw.secondBestIdx, lZmw.secondBestScore])
            else:
                # either no barcode was found for this guy or they got
                # filtered, hence the NULL_BARCODE
                bcDS[i,:] = n.array([0, 
                                     len(bcReader.barcodeLabels), 0, 
                                     len(bcReader.barcodeLabels), 0])

    # write to the cmp.h5 file.
    H5 = h5.File(runner.args.cmpH5, 'r+')
    if BC_INFO_ID in H5:
        del H5[BC_INFO_ID]
    if BC_INFO_NAME in H5:
        del H5[BC_INFO_NAME]

    # we use the first one to get the labels, if somehow they
    # don't have all of the same stuff that will be an issue.
    bcLabels = n.concatenate((bcFofn.barcodeLabels, n.array([BARCODE_DELIMITER]))) 
    H5.create_dataset(BC_INFO_ID, data = n.array(range(0, len(bcLabels))), 
                      dtype = 'int32')
    H5.create_dataset(BC_INFO_NAME, data = bcLabels, dtype = h5.new_vlen(str))
    if BC_ALN_INFO_DS in H5:
        del H5[BC_ALN_INFO_DS]
    bcDS = H5.create_dataset(BC_ALN_INFO_DS, data = bcDS, dtype = 'int32')
    bcDS.attrs['ColumnNames'] = n.array(['count', 'index1', 'score1', 'index2', 
                                         'score2'])
    #force BarcodeMode to have numpy dtype for CmpH5Sort 'extra datasets' routine
    bcDS.attrs['BarcodeMode'] = n.array( bcFofn.scoreMode )
    H5.close()
Beispiel #33
0
    def add_shape_data_from_file(self, name, filename):
        """
        Add shape data from a file.
        """
        if name not in self._ref:
            shape = self._ref.create_dataset(name, (1,),
                                             dtype=h5py.new_vlen(str))
            shape[:] = str_of_file(filename)
            shape.attrs['id'] = self._number_of_shapes
            try:
                shape.attrs['type'] = os.path.splitext(filename)[1][1:]
            except:
                shape.attrs['type'] = 'unknown'

            self._number_of_shapes += 1
Beispiel #34
0
    def add_shape_data_from_file(self, name, filename):
        """
        Add shape data from a file.
        """
        if name not in self._ref:
            shape = self._ref.create_dataset(name, (1, ),
                                             dtype=h5py.new_vlen(str))
            shape[:] = str_of_file(filename)
            shape.attrs['id'] = self._number_of_shapes
            try:
                shape.attrs['type'] = os.path.splitext(filename)[1][1:]
            except:
                shape.attrs['type'] = 'unknown'

            self._number_of_shapes += 1
Beispiel #35
0
def writeRegionsTable(  regions, fileName, 
                        types=PLS_REGION_TYPES,
                        descriptions=PLS_REGION_DESC,
                        sources=PLS_REGION_SRC ):
    """Writes out a pls.h5 file containing a regions table defined
    by the arguments to this function."""

    outFile = h5py.File( fileName, 'w' )
    
    shape = ( max( 1, len(regions) ), len(PlsRegion.TABLE_COLUMNS) )
    pd = outFile.create_group( "PulseData" )
    regionTable = pd.create_dataset( "Regions", shape, numpy.int32, maxshape=(None,shape[1]) )
    
    rows = numpy.zeros( shape=shape, dtype=numpy.int32 )
    for i, row in enumerate([ region.toTableRow() for region in regions ]):
        rows[i] = row
    regionTable[:] = rows

    regionTable.attrs[ "ColumnNames" ] = numpy.array( PlsRegion.TABLE_COLUMNS, dtype=h5py.new_vlen(str) )
    regionTable.attrs[ "RegionTypes" ] = numpy.array( types, dtype=h5py.new_vlen(str) )
    regionTable.attrs[ "RegionDescriptions" ] = numpy.array( descriptions, dtype=h5py.new_vlen(str) )
    regionTable.attrs[ "RegionSources" ] = numpy.array( sources, dtype=h5py.new_vlen(str) )

    outFile.close()
Beispiel #36
0
    def __init__(self, fname, mode="a"):
        """ open the hdf5 file depending on the mode
            defaults are set
            
            NEW 2014: if the opened file exists 8and contains data read it in
                      this is used in the pdlp script (not with pydlpoly)
        """
        self.verbose = 0
        #
        self.fname = fname
        self.mode = mode
        #
        self.h5file = h5py.File(fname, mode)
        #
        self.file_version = 1.1

        #
        if "version" in self.h5file.attrs.keys():
            if (self.mode == "a") or (self.mode == "w"):
                assert self.file_version == self.h5file.attrs[
                    "version"], "Exisiting file has a different version! Can not add data"
        else:
            self.h5file.attrs["version"] = self.file_version
        # defaults
        self.pd = None
        self.stagelist = []
        self.track_data = None
        self.traj_nstep = 1
        self.rest_nstep = 1
        #
        #
        if "system" in self.h5file.keys():
            # ok there is some system so initalize data from here
            self.stagelist = self.h5file.keys()
            self.stagelist.remove("system")
            self.system = self.h5file["system"]
            self.natoms = self.system["elems"].shape[0]
            self.bcd = self.system.attrs["bcd"]
        else:
            self.system = self.h5file.require_group("system")
            self.natoms = 0
            self.bcd = 0
        #
        # helper object for hdf5 variable length strings
        self.str_dt = h5py.new_vlen(str)
        # track charges if floating_charges is True
        self.floating_charges = False
        return
Beispiel #37
0
 def __set_tree(self):
     ''' Setup/Check the tree structure of the file'''
     self.param=self.f.require_group('param')  # Setup param group
     self.param.require_group('dim')           # Read parameters from dim.dat files
     self.param.require_group('init')            #Read parameters from params.log file
     self.param.require_group('run')
     self.param.require_group('index')        # Read parameters from index.pro file
     self.data=self.f.require_group('data')     #Setup the data group
     self.etc=self.f.require_group('etc')    #Setup the notes group
     self.etc.require_group('ext')
     try:
         dt=h5py.new_vlen(str)
         self.notes=self.etc.require_dataset('notes',(1,),dtype=dt,maxshape=(None,))
     except TypeError:     # additional notes already inserted
         self.notes=self.etc['notes']
     self.__accessed()
Beispiel #38
0
 def addContactFromOccShape(self, name, occ_shape_name, contact_type,
                            index, collision_group=0, associated_shape=None):
     """
     Add contact reference from a previously added brep.
     """
     if name not in self._ref:
         shape = self._ref.create_dataset(name, (1,),
                                          dtype=h5py.new_vlen(str))
         shape.attrs['id'] = self._number_of_shapes
         shape.attrs['type'] = 'step'
         shape.attrs['contact'] = contact_type
         shape.attrs['step'] = occ_shape_name
         shape.attrs['index'] = index
         if associated_shape is not None:
             shape.attrs['associated_shape'] = associated_shape
         self._shapeid[name] = shape.attrs['id']
         self._number_of_shapes += 1
Beispiel #39
0
    def write_to_dataset(self, name, data, i):
        if self.logger is not None:
            self.logger.debug("Write dataset %s of event %i." % (name, i))
        if name not in self.f:
            # print name
            t0 = time.time()
            if numpy.isscalar(data):
                if i == -1:
                    s = [1]
                else:
                    s = [self.N]
                t = numpy.dtype(type(data))
                if t == "S":
                    t = h5py.new_vlen(str)
                axes = "experiment_identifier:value"
            else:
                data = numpy.array(data)
                s = list(data.shape)
                ndims = len(s)
                axes = "experiment_identifier"
                if ndims == 1:
                    axes = axes + ":x"
                elif ndims == 2:
                    axes = axes + ":y:x"
                elif ndims == 3:
                    axes = axes + ":z:y:x"
                if i != -1:
                    s.insert(0, self.N)
                t = data.dtype
            self.f.create_dataset(name, s, t)
            self.f[name].attrs.modify("axes", [axes])
            t1 = time.time()
            if self.logger != None:
                self.logger.debug("Create dataset %s within %.1f sec.", name, t1 - t0)

        if i == -1:
            if numpy.isscalar(data):
                self.f[name][0] = data
            else:
                self.f[name][:] = data[:]
        else:
            if numpy.isscalar(data):
                self.f[name][i] = data
            else:
                self.f[name][i, :] = data[:]
Beispiel #40
0
    def write_to_dataset(self,name,data,i):
        if self.logger is not None:
            self.logger.debug("Write dataset %s of event %i." % (name,i))
        if name not in self.f:
            #print name
            t0 = time.time()
            if numpy.isscalar(data):
                if i == -1:
                    s = [1]
                else:
                    s= [self.N]
                t=numpy.dtype(type(data))
                if t == "S":
                    t = h5py.new_vlen(str)
                axes = "experiment_identifier:value"
            else:
                data = numpy.array(data)
                s = list(data.shape)
                ndims = len(s)
                axes = "experiment_identifier"
                if ndims == 2: axes = axes + ":x"
                elif ndims == 3: axes = axes + ":y:x"
                elif ndims == 4: axes = axes + ":z:y:x"
                if i != -1:
                    s.insert(0,self.N)
                t=data.dtype
            self.f.create_dataset(name,s,t)
            self.f[name].attrs.modify("axes",[axes])
            t1 = time.time()
            if self.logger != None:
                self.logger.debug("Create dataset %s within %.1f sec.",name,t1-t0)

        if i == -1:
            if numpy.isscalar(data):
                self.f[name][0] = data
            else:
                self.f[name][:] = data[:]
        else:
            if numpy.isscalar(data):
                self.f[name][i] = data
            else:
                #print name,data,numpy.array(self.f[name]).shape
                self.f[name][i,:] = data[:]
Beispiel #41
0
def _rdkfeats_writer(output_file=None, features=None):
    """Returns a (molindex, molid, smiles) processor that computes descriptors using RDKit and stores then in a h5 file.

    Parameters:
      - output_file: where the descriptors will be written; this file will be overwritten.
      - features: a list of the names of the RDKit features that will be computed
                  (by default all the descriptors exposed by the Descriptor class in RDKit)

    Returns:
      - a processor function ready to be used as a parameter to _molidsmiles_it.

    The h5 file has the following data:
      - 'rdkdescs': a float matrix num_mols x num_descs
                    this will all be nans if the computation failed completely
      - 'fnames': the name of the feature in each column (num_cols)
      - 'molids': the molid corresponding to each row (num_rows)
    """
    ensure_dir(op.dirname(output_file))
    h5 = h5py.File(output_file, mode='w', dtype=np.float32)
    computer = RDKitDescriptorsComputer(features)
    fnames = computer.fnames()
    nf = len(fnames)
    descs = h5.create_dataset('rdkdescs', (0, nf), maxshape=(None, nf), compression='lzf')
    str_type = h5py.new_vlen(str)
    h5.create_dataset('fnames', data=fnames)
    molids = h5.create_dataset('molids', shape=(0,), maxshape=(None,), dtype=str_type)

    def process(molid, smiles):
        if molid is _END_MOLID:
            h5.close()
            return
        ne = len(molids)
        try:
            molids.resize((ne + 1,))
            molids[ne] = molid
            mol = to_rdkit_mol(smiles)
            descs.resize((ne + 1, nf))
            descs[ne, :] = computer.compute(mol)[0]
        except:
            info('Failed molecule %s: %s' % (molid, smiles))
            descs[ne, :] = [np.nan] * nf

    return process
Beispiel #42
0
    def __init__(self, filename, mode):
        self._h5FileHandle = h5py.File(filename, mode)
        self._assignAlignmentIndexCol()
        self._refSeqName2Id = {}
        self._readGroupPath2Id = {}
        self.mode = mode
        self._vlType = h5py.new_vlen(str)

        if mode in ['a','w']:
            self._createIndexTables()
        if mode != 'w':
            self._globalIndex = self._h5FileHandle["AlignmentIndex"]
            self._refSeqName = self._h5FileHandle["RefSeqName"]
            self._refSeqID = self._h5FileHandle["RefSeqID"]
            self._readGroupPath = self._h5FileHandle["ReadGroupPath"]
            self._readGroupPathID = self._h5FileHandle["ReadGroupPathID"]
    
        self._updateRefSeqDict()
        self._updateReadGroupDict()
Beispiel #43
0
    def add_brep_from_string(self, name, shape_data):
        """
        Add a brep contained in a string.
        """
        if name not in self._ref:
            shape = self._ref.create_dataset(name, (1, ),
                                             dtype=h5py.new_vlen(str))
            if type(shape_data) == str:
                # raw str
                shape[:] = shape_data
            else:
                # __getstate__ as with pythonocc
                shape[:] = shape_data[0]
                shape.attrs['occ_indx'] = shape_data[1]

            shape.attrs['id'] = self._number_of_shapes
            shape.attrs['type'] = 'brep'

            self._number_of_shapes += 1
Beispiel #44
0
    def add_brep_from_string(self, name, shape_data):
        """
        Add a brep contained in a string.
        """
        if name not in self._ref:
            shape = self._ref.create_dataset(name, (1,),
                                             dtype=h5py.new_vlen(str))
            if type(shape_data) == str:
                # raw str
                shape[:] = shape_data
            else:
                # __getstate__ as with pythonocc
                shape[:] = shape_data[0]
                shape.attrs['occ_indx'] = shape_data[1]

            shape.attrs['id'] = self._number_of_shapes
            shape.attrs['type'] = 'brep'

            self._number_of_shapes += 1
Beispiel #45
0
    def _createDataset_(self, parent, dataset_name, numpy_array_or_shape,
                        **kwargs):
        """ Creates a new dataset in the data file and returns a pointer to
        it. Raises IOError exception if the dataset already exists.
        """
        dataset_key = self.hdfObjectKey(dataset_name)
        if dataset_key in parent.keys():
            errmsg = "'%s' dataset already exists in current data file."
            raise IOError, errmsg % dataset_name

        create_args = {}
        attributes = {}
        for name in kwargs:
            if name in DATASET_CREATE_ARGS:
                create_args[safestring(name)] = safevalue(kwargs[name])
            else:
                attributes[safestring(name)] = safevalue(kwargs[name])

        if 'created' not in attributes:
            attributes['created'] = self._timestamp_()

        if 'fillvalue' in create_args:
            if 'dtype' not in create_args:
                errmsg = "'dtype' is required for empty or extendable datasets."
                raise IOError, errmsg

            shape = numpy_array_or_shape
            dataset = parent.create_dataset(dataset_key, shape, **create_args)

        else:  #TODO: need to have a better set of checks here
            if 'dtype' not in create_args\
            and numpy_array_or_shape.dtype == N.dtype(object):
                create_args['dtype'] = h5py.new_vlen(str)

            dataset = parent.create_dataset(dataset_key,
                                            data=numpy_array_or_shape,
                                            **create_args)

        for attr_name, attr_value in attributes.items():
            dataset.attrs[attr_name] = attr_value

        return dataset
Beispiel #46
0
    def add_mesh_from_string(self, name, shape_data, scale=None,
                             insideMargin=None, outsideMargin=None):
        """
        Add a mesh shape from a string.
        Accepted format : mesh encoded in VTK .vtp format
        """

        if name not in self._ref:

            shape = self._ref.create_dataset(name, (1,),
                                             dtype=h5py.new_vlen(str))
            shape[:] = shape_data
            shape.attrs['id'] = self._number_of_shapes
            shape.attrs['type'] = 'vtp'
            if scale is not None:
                shape.attrs['scale'] = scale
            if insideMargin is not None:
                shape.attrs['insideMargin'] = insideMargin
            if outsideMargin is not None:
                shape.attrs['outsideMargin'] = outsideMargin
            self._number_of_shapes += 1
Beispiel #47
0
    def __init__(self, sys, filename, mode='w'):
        
        self.hdf = hd = h5py.File(filename, mode)

        if mode == 'w':
            #Variable length string
            str_type = h5py.new_vlen(str)
            tl =hd.create_dataset('types', (sys.n,), str_type)
            tl[:] = [at.type for at in sys.atoms]

            bsize = hd.create_dataset('boxsize', (1,), 'f')
            bsize[0] = sys.boxsize

            # The coordinate list
            cl = hd.create_dataset('coordlist', (1, sys.n, 3), 'f', maxshape=(None, sys.n, 3),
            compression='gzip', compression_opts=4)
            cl[0] = sys.rarray
            
            # The velocity list
            vl = hd.create_dataset('velocitylist', (1, sys.n, 3), 'f', maxshape=(None, sys.n, 3),
            compression='gzip', compression_opts=4)
            vl[0] = sys.varray
Beispiel #48
0
    def write_to_dataset(self, data, name, i=None):
        if self.logger != None:
            self.logger.debug("Write dataset %s of event %i." % (name, i))
        if name not in self.f:
            t0 = time.time()
            if numpy.isscalar(data):
                if i is None:
                    s = [1]
                else:
                    s = [self.N]
                t = numpy.dtype(type(data))
                if t == "S":
                    t = h5py.new_vlen(str)
                axes = "experiment_identifier:value"
            else:
                s = list(data.shape)
                if i is not None:
                    s.insert(0, self.N)
                t = data.dtype
                axes = "experiment_identifier:y:x"
            self.f.create_dataset(name, s, t)
            self.f[name].attrs.modify("axes", [axes])
            t1 = time.time()
            if self.logger != None:
                self.logger.debug("Create dataset %s within %.1f sec.", name,
                                  t1 - t0)

        if i is None:
            if numpy.isscalar(data):
                self.f[name][0] = data
            else:
                self.f[name][:] = data[:]
        else:
            if numpy.isscalar(data):
                self.f[name][i] = data
            else:
                self.f[name][i, :] = data[:]
Beispiel #49
0
    def _createDataset_(self, parent, dataset_name, numpy_array, attributes,
                        **kwargs):
        """ Creates a new dataset in the data file and returns a pointer to
        it. Raises IOError exception if the dataset already exists.
        """
        dataset_key = safeDataKey(dataset_name)
        if dataset_key in parent.keys():
            errmsg = "'%s' dataset already exists in current data file."
            raise IOError, errmsg % dataset_name

        create_args = {}
        for arg_name in kwargs:
            create_args[safe_name(arg_name)] = kwargs[arg_name]

        if 'maxshape' in create_args:
            if 'dtype' not in create_args:
                raise IOError, "'dtype' is required for extendable datasets."
            if len(numpy_array) != len(create_args['maxshape']):
                errmg = '3rd argument must be the initial shape of the array.'
                raise IOError, errmsg
            initial_shape = numpy_array
            dataset = parent.create_dataset(dataset_key, initial_shape,
                                            **create_args)
        else:
            if 'dtype' not in create_args\
            and numpy_array.dtype == N.dtype(object):
                create_args['dtype'] = h5py.new_vlen(str)

            dataset = parent.create_dataset(dataset_key,
                                            data=numpy_array,
                                            **create_args)

        for attr_name, attr_value in attributes.items():
            if attr_name != 'dtype' and attr_value is not None:
                dataset.attrs[safeDataKey(attr_name)] = safevalue(attr_value)

        return dataset
Beispiel #50
0
def write_exchange_dataset_from_stack(file_name, image_stack, energies):
    #And adds the necessary groups, datasets and attributes(Scientific Data Exchange standard HDF5 format data)
    xnumber = np.arange(image_stack.shape[0] * 1.0)
    ynumber = np.arange(image_stack.shape[1] * 1.0)
    enumber = np.arange(image_stack.shape[2] * 1.0)
    #print xnumber.shape,ynumber.shape,xnumber,ynumber
    inumber = np.ones(image_stack.shape[2])
    comment = ''
    f1 = h5py.File(file_name, 'w')
    dset = f1.create_group("exchange")
    dset2 = dset.create_dataset("data", data=image_stack)
    dset2.attrs['axes'] = 'x:y'
    dset2.attrs['signal'] = 1
    dset3 = dset.create_dataset("energy", data=energies)
    dset3.attrs['units'] = 'eV'
    dset4 = dset.create_dataset("x", data=xnumber)
    dset5 = dset.create_dataset("y", data=ynumber)
    str_type = h5py.new_vlen(str)
    eset = f1.create_dataset("implements", shape=(1, ), dtype=str_type)
    eset[:] = 'information:exchange:spectromicroscopy'
    fset = f1.create_group("information")
    fset2 = fset.create_dataset("comment", shape=(1, ), dtype=str_type)
    fset2[:] = comment
    fset3 = fset.create_dataset("file_creation_datetime",
                                shape=(1, ),
                                dtype=str_type)
    fset3[:] = "2012-07-11T09:15"
    fset3.attrs['file_creation_datetime'] = 'time'
    gset = f1.create_group("spectromicroscopy")
    gset2 = gset.create_group("normalization")
    gset3 = gset2.create_dataset("white_spectrum", data=inumber)
    gset4 = gset2.create_dataset("white_spectrum_energy", data=enumber)
    gset4.attrs['units'] = 'eV'
    hset = f1.create_dataset("version", shape=(1, ), dtype=str_type)
    hset[:] = '1.0'
    f1.close()
Beispiel #51
0
    def add_interaction(self, name, body1_name, contactor1_name=None,
                        body2_name=None, contactor2_name=None,
                        distance_calculator='cadmbtb',
                        offset1=0.0, offset2=0.0):
        """
        Add permanent interactions between two objects contactors.
        """
        if name not in self.permanent_interactions():
            pinter = self.permanent_interactions().\
                      create_dataset(name, (1,),
                                     dtype=h5py.new_vlen(str))
            pinter.attrs['id'] = self._number_of_permanent_interactions
            pinter.attrs['type'] = 'permanent_interaction'
            pinter.attrs['body1_name'] = body1_name
            pinter.attrs['body2_name'] = body2_name
            if contactor1_name is not None:
                pinter.attrs['contactor1_name'] = contactor1_name
            if contactor2_name is not None:
                pinter.attrs['contactor2_name'] = contactor2_name
            pinter.attrs['distance_calculator'] = distance_calculator
            pinter.attrs['offset1'] = offset1
            pinter.attrs['offset2'] = offset2

            self._number_of_permanent_interactions += 1
Beispiel #52
0
    def write_to_dataset(self,data,name,i=None):
        if self.logger != None:
            self.logger.debug("Write dataset %s of event %i." % (name,i))
        if name not in self.f:
            t0 = time.time()
            if numpy.isscalar(data):
                if i is None:
                    s = [1]
                else:
                    s= [self.N]
                t=numpy.dtype(type(data))
                if t == "S":
                    t = h5py.new_vlen(str)
                axes = "experiment_identifier:value"
            else:
                s = list(data.shape)
                if i is not None:
                    s.insert(0,self.N)
                t=data.dtype
                axes = "experiment_identifier:y:x"
            self.f.create_dataset(name,s,t)
            self.f[name].attrs.modify("axes",[axes])
            t1 = time.time()
            if self.logger != None:
                self.logger.debug("Create dataset %s within %.1f sec.",name,t1-t0)

        if i is None:
            if numpy.isscalar(data):
                self.f[name][0] = data
            else:
                self.f[name][:] = data[:]
        else:
            if numpy.isscalar(data):
                self.f[name][i] = data
            else:
                self.f[name][i,:] = data[:]
    def postprocessClusterSubResult(self, roi, result, blockwise_fileset):
        """
        This function is only used by special cluster scripts.
        
        When the batch-processing mechanism was rewritten, this function broke.
        It could probably be fixed with minor changes.
        """
        # TODO: Here, we hard-code to select from the first lane only.
        opBatchClassify = self.opBatchClassify[0]
        
        from lazyflow.utility.io_uti.blockwiseFileset import vectorized_pickle_dumps
        # Assume that roi always starts as a multiple of the blockshape
        block_shape = opBatchClassify.get_blockshape()
        assert all(block_shape == blockwise_fileset.description.sub_block_shape), "block shapes don't match"
        assert all((roi[0] % block_shape) == 0), "Sub-blocks must exactly correspond to the blockwise object classification blockshape"
        sub_block_index = roi[0] / blockwise_fileset.description.sub_block_shape

        sub_block_start = sub_block_index
        sub_block_stop = sub_block_start + 1
        sub_block_roi = (sub_block_start, sub_block_stop)
        
        # FIRST, remove all objects that lie outside the block (i.e. remove the ones in the halo)
        region_features = opBatchClassify.BlockwiseRegionFeatures( *sub_block_roi ).wait()
        region_features_dict = region_features.flat[0]
        region_centers = region_features_dict['Default features']['RegionCenter']

        opBlockPipeline = opBatchClassify._blockPipelines[ tuple(roi[0]) ]

        # Compute the block offset within the image coordinates
        halo_roi = opBlockPipeline._halo_roi

        translated_region_centers = region_centers + halo_roi[0][1:-1]

        # TODO: If this is too slow, vectorize this
        mask = numpy.zeros( region_centers.shape[0], dtype=numpy.bool_ )
        for index, translated_region_center in enumerate(translated_region_centers):
            # FIXME: Here we assume t=0 and c=0
            mask[index] = opBatchClassify.is_in_block( roi[0], (0,) + tuple(translated_region_center) + (0,) )
        
        # Always exclude the first object (it's the background??)
        mask[0] = False
        
        # Remove all 'negative' predictions, emit only 'positive' predictions
        # FIXME: Don't hardcode this?
        POSITIVE_LABEL = 2
        objectwise_predictions = opBlockPipeline.ObjectwisePredictions([]).wait()[0]
        assert objectwise_predictions.shape == mask.shape
        mask[objectwise_predictions != POSITIVE_LABEL] = False

        filtered_features = {}
        for feature_group, feature_dict in region_features_dict.items():
            filtered_group = filtered_features[feature_group] = {}
            for feature_name, feature_array in feature_dict.items():
                filtered_group[feature_name] = feature_array[mask]

        # SECOND, translate from block-local coordinates to global (file) coordinates.
        # Unfortunately, we've got multiple translations to perform here:
        # Coordinates in the region features are relative to their own block INCLUDING HALO,
        #  so we need to add the start of the block-with-halo as an offset.
        # BUT the image itself may be offset relative to the BlockwiseFileset coordinates
        #  (due to the view_origin setting), so we also need to add an offset for that, too

        # Get the image offset relative to the file coordinates
        image_offset = blockwise_fileset.description.view_origin
        
        total_offset_5d = halo_roi[0] + image_offset
        total_offset_3d = total_offset_5d[1:-1]

        filtered_features["Default features"]["RegionCenter"] += total_offset_3d
        filtered_features["Default features"]["Coord<Minimum>"] += total_offset_3d
        filtered_features["Default features"]["Coord<Maximum>"] += total_offset_3d

        # Finally, write the features to hdf5
        h5File = blockwise_fileset.getOpenHdf5FileForBlock( roi[0] )
        if 'pickled_region_features' in h5File:
            del h5File['pickled_region_features']

        # Must use str dtype
        dtype = h5py.new_vlen(str)
        dataset = h5File.create_dataset( 'pickled_region_features', shape=(1,), dtype=dtype )
        pickled_features = vectorized_pickle_dumps(numpy.array((filtered_features,)))
        dataset[0] = pickled_features

        object_centers_xyz = filtered_features["Default features"]["RegionCenter"].astype(int)
        object_min_coords_xyz = filtered_features["Default features"]["Coord<Minimum>"].astype(int)
        object_max_coords_xyz = filtered_features["Default features"]["Coord<Maximum>"].astype(int)
        object_sizes = filtered_features["Default features"]["Count"][:,0].astype(int)

        # Also, write out selected features as a 'point cloud' csv file.
        # (Store the csv file next to this block's h5 file.)
        dataset_directory = blockwise_fileset.getDatasetDirectory(roi[0])
        pointcloud_path = os.path.join( dataset_directory, "block-pointcloud.csv" )
        
        logger.info("Writing to csv: {}".format( pointcloud_path ))
        with open(pointcloud_path, "w") as fout:
            csv_writer = csv.DictWriter(fout, OUTPUT_COLUMNS, **CSV_FORMAT)
            csv_writer.writeheader()
        
            for obj_id in range(len(object_sizes)):
                fields = {}
                fields["x_px"], fields["y_px"], fields["z_px"], = object_centers_xyz[obj_id]
                fields["min_x_px"], fields["min_y_px"], fields["min_z_px"], = object_min_coords_xyz[obj_id]
                fields["max_x_px"], fields["max_y_px"], fields["max_z_px"], = object_max_coords_xyz[obj_id]
                fields["size_px"] = object_sizes[obj_id]

                csv_writer.writerow( fields )
                #fout.flush()
        
        logger.info("FINISHED csv export")
Beispiel #54
0
    #processing potentials
    voltage_path = os.path.join(path_folder, 'v', 'tmp')
    voltage_name = 'v'
    h = read_analog(voltage_path, voltage_name, t_max, all_pops, h)
    print('Done processing all voltages')

    #processing input spike data
    spike_path = os.path.join(path_folder, 'input.dat')
    spike_name = 'input'
    h = read_digital(spike_path, spike_name, t_max, all_pops, h)
    print('Done processing all input spikes')

    #processing output spike data
    spike_path = os.path.join(path_folder, 'output.dat')
    spike_name = 'output'
    h = read_digital(spike_path, spike_name, t_max, all_pops, h)
    print('Done processing all output spikes')

    #processing log information
    log_file = os.path.join(path_folder, log_filename)
    f = open(log_file, 'r')
    str_type = h5py.new_vlen(str)
    #ds = h.create_dataset('/model/log.txt', data=f.read().replace('\n',''), dtype=str_type)
    ds = h.create_dataset('/model/log.txt', data=f.readlines(), dtype=str_type)
    f.close()

    # #process time data
    add_time(h, t_max, t_step)

    h.close()
Beispiel #55
0
def _h5write(filename, mode, *args, **kwargs):
    """\
    _h5write(filename, mode, {'var1'=..., 'var2'=..., ...})
    _h5write(filename, mode, var1=..., var2=..., ...)
    _h5write(filename, mode, dict, var1=..., var2=...)
    
    Writes variables var1, var2, ... to file filename. The file mode
    can be chosen according to the h5py documentation. The key-value
    arguments have precedence on the provided dictionnary.

    supported variable types are:
    * scalars
    * numpy arrays
    * strings
    * lists
    * dictionaries

    (if the option UNSUPPORTED is equal to 'pickle', any other type
    is pickled and saved. UNSUPPORTED = 'ignore' silently eliminates
    unsupported types. Default is 'fail', which raises an error.) 
    
    The file mode can be chosen according to the h5py documentation.
    It defaults to overwriting an existing file.
    """

    filename = os.path.abspath(os.path.expanduser(filename))

    ctime = time.asctime()
    mtime = ctime

    # Update input dictionnary
    if args:
        d = args[0].copy()  # shallow copy
    else:
        d = {}
    d.update(kwargs)

    # List of object ids to make sure we are not saving something twice.
    ids = []

    # This is needed to store strings
    dt = h5py.new_vlen(str)

    def check_id(id):
        if id in ids:
            raise RuntimeError('Circular reference detected! Aborting save.')
        else:
            ids.append(id)

    def pop_id(id):
        ids[:] = [x for x in ids if x != id]

    #@sdebug
    def _store_numpy(group, a, name, compress=True):
        if compress:
            dset = group.create_dataset(name, data=a, compression='gzip')
        else:
            dset = group.create_dataset(name, data=a)
        dset.attrs['type'] = 'array'
        return dset

    #@sdebug
    def _store_string(group, s, name):
        dset = group.create_dataset(name, data=np.asarray(s), dtype=dt)
        dset.attrs['type'] = 'string'
        return dset

    #@sdebug
    def _store_unicode(group, s, name):
        dset = group.create_dataset(name,
                                    data=np.asarray(s.encode('utf8')),
                                    dtype=dt)
        dset.attrs['type'] = 'unicode'
        return dset

    #@sdebug
    def _store_list(group, l, name):
        check_id(id(l))
        arrayOK = len(set([type(x) for x in l])) == 1
        if arrayOK:
            try:
                # Try conversion to a numpy array
                la = np.array(l)
                if la.dtype.type is np.string_:
                    arrayOK = False
                else:
                    dset = _store_numpy(group, la, name)
                    dset.attrs['type'] = 'arraylist'
            except:
                arrayOK = False
        if not arrayOK:
            # inhomogenous list. Store all elements individually
            dset = group.create_group(name)
            for i, v in enumerate(l):
                _store(dset, v, '%05d' % i)
            dset.attrs['type'] = 'list'
        pop_id(id(l))
        return dset

    #@sdebug
    def _store_tuple(group, t, name):
        dset = _store_list(group, list(t), name)
        dset_type = dset.attrs['type']
        dset.attrs[
            'type'] = 'arraytuple' if dset_type == 'arraylist' else 'tuple'
        return dset

    #@sdebug
    def _store_dict(group, d, name):
        check_id(id(d))
        if any([type(k) not in [str, unicode] for k in d.keys()]):
            raise RuntimeError(
                'Only dictionaries with string keys are supported.')
        dset = group.create_group(name)
        dset.attrs['type'] = 'dict'
        for k, v in d.iteritems():
            if k.find('/') > -1:
                k = k.replace('/', h5options['SLASH_ESCAPE'])
                ndset = _store(dset, v, k)
                if ndset is not None:
                    ndset.attrs['escaped'] = '1'
            else:
                _store(dset, v, k)
        pop_id(id(d))
        return dset

    def _store_dict_new(group, d, name):
        check_id(id(d))
        dset = group.create_group(name)
        dset.attrs['type'] = 'dict'
        for i, kv in enumerate(d.iteritems()):
            _store(dset, kv, '%05d' % i)
        pop_id(id(d))
        return dset

    #@sdebug
    def _store_None(group, a, name):
        dset = group.create_dataset(name, data=np.zeros((1, )))
        dset.attrs['type'] = 'None'
        return dset

    #@sdebug
    def _store_pickle(group, a, name):
        apic = cPickle.dumps(a)
        dset = group.create_dataset(name, data=np.asarray(apic), dtype=dt)
        dset.attrs['type'] = 'pickle'
        return dset

    #@sdebug
    def _store(group, a, name):
        if type(a) is str:
            dset = _store_string(group, a, name)
        elif type(a) is unicode:
            dset = _store_unicode(group, a, name)
        elif type(a) is dict:
            dset = _store_dict(group, a, name)
        elif type(a) is list:
            dset = _store_list(group, a, name)
        elif type(a) is tuple:
            dset = _store_tuple(group, a, name)
        elif type(a) is np.ndarray:
            dset = _store_numpy(group, a, name)
        elif np.isscalar(a):
            dset = _store_numpy(group, np.asarray(a), name, compress=False)
            dset.attrs['type'] = 'scalar'
        elif a is None:
            dset = _store_None(group, a, name)
        else:
            if h5options['UNSUPPORTED'] == 'fail':
                raise RuntimeError('Unsupported data type : %s' % type(a))
            elif h5options['UNSUPPORTED'] == 'pickle':
                dset = _store_pickle(group, a, name)
            else:
                dset = None
        return dset

    # Open the file and save everything
    with h5py.File(filename, mode) as f:
        f.attrs['h5rw_version'] = h5options['H5RW_VERSION']
        f.attrs['ctime'] = ctime
        f.attrs['mtime'] = mtime
        for k, v in d.iteritems():
            _store(f, v, k)

    return
def agglomeration(options, agglom_stack, supervoxels, prediction, image_stack,
                  session_location, sp_outs, master_logger):

    seg_thresholds = sorted(options.segmentation_thresholds)
    for threshold in seg_thresholds:
        if threshold != 0 or not options.use_neuroproof:
            master_logger.info("Starting agglomeration to threshold " +
                               str(threshold) + " with " +
                               str(agglom_stack.number_of_nodes()))
            agglom_stack.agglomerate(threshold)
            master_logger.info("Finished agglomeration to threshold " +
                               str(threshold) + " with " +
                               str(agglom_stack.number_of_nodes()))

            if options.inclusion_removal:
                inclusion_removal(agglom_stack, master_logger)

        segmentation = agglom_stack.get_segmentation()

        if options.h5_output:
            imio.write_image_stack(segmentation,
                                   session_location + "/agglom-" +
                                   str(threshold) + ".lzf.h5",
                                   compression='lzf')

        md5hex = hashlib.md5(' '.join(sys.argv)).hexdigest()
        file_base = os.path.abspath(session_location) + "/seg_data/seg-" + str(
            threshold) + "-" + md5hex + "-"
        transforms = imio.compute_sp_to_body_map(supervoxels, segmentation)
        seg_loc = file_base + "v1.h5"
        if not os.path.exists(session_location + "/seg_data"):
            os.makedirs(session_location + "/seg_data")
        imio.write_mapped_segmentation(supervoxels, transforms, seg_loc)

        if options.synapse_file is not None:
            h5temp = h5py.File(seg_loc, 'a')
            syn_data = json.load(open((options.synapse_file)))
            meta = syn_data['metadata']
            meta['username'] = "******"
            syn_data_str = json.dumps(syn_data, indent=4)
            str_type = h5py.new_vlen(str)
            ds = h5temp.create_dataset("synapse-annotations",
                                       data=syn_data_str,
                                       shape=(1, ),
                                       dtype=str_type)

        graph_loc = file_base + "graphv1.json"

        json_data = {}
        json_data['graph'] = graph_loc
        json_data['border'] = options.border_size
        subvolume = {}
        subvolume['segmentation-file'] = seg_loc
        subvolume['prediction-file'] = os.path.abspath(
            session_location) + "/STACKED_prediction.h5"

        gray_file_whole = os.path.abspath(glob.glob(options.image_stack)[0])
        gray_path = os.path.dirname(gray_file_whole)

        gray_file = os.path.basename(gray_file_whole)
        field_width = len(re.findall(r'\d', gray_file))
        field_rep = "%%0%dd" % field_width
        gray_file = re.sub(r'\d+', field_rep, gray_file)

        subvolume['grayscale-files'] = gray_path + "/" + gray_file

        # get extant
        x1 = options.border_size
        y1 = options.border_size
        z1 = options.border_size
        z2, y2, x2 = supervoxels.shape
        z2 = z2 - options.border_size - 1
        y2 = y2 - options.border_size - 1
        x2 = x2 - options.border_size - 1
        extant = re.findall(r'\d+-\d+_\d+-\d+_\d+-\d+', gray_path)
        if len(extant) > 0:
            bbox = extant[0]
            x1, x2, y1, y2, z1, z2 = re.findall(r'\d+', bbox)
        subvolume["far-upper-right"] = [int(x2), int(y2), int(z2)]
        subvolume["near-lower-left"] = [int(x1), int(y1), int(z1)]

        json_data['subvolumes'] = [subvolume]

        agglom_stack.write_plaza_json(graph_loc, options.synapse_file,
                                      (int(z1) - (options.border_size)))

        # write out json file
        json_str = json.dumps(json_data, indent=4)
        json_file = session_location + "/seg-" + str(
            threshold) + "-" + md5hex + "-v1.json"
        jw = open(json_file, 'w')
        jw.write(json_str)
    def postprocessClusterSubResult(self, roi, result, blockwise_fileset):
        """
        This function is only used by special cluster scripts.
        
        When the batch-processing mechanism was rewritten, this function broke.
        It could probably be fixed with minor changes.
        """
        # TODO: Here, we hard-code to select from the first lane only.
        opBatchClassify = self.opBatchClassify[0]
        
        from lazyflow.utility.io.blockwiseFileset import vectorized_pickle_dumps
        # Assume that roi always starts as a multiple of the blockshape
        block_shape = opBatchClassify.get_blockshape()
        assert all(block_shape == blockwise_fileset.description.sub_block_shape), "block shapes don't match"
        assert all((roi[0] % block_shape) == 0), "Sub-blocks must exactly correspond to the blockwise object classification blockshape"
        sub_block_index = roi[0] / blockwise_fileset.description.sub_block_shape

        sub_block_start = sub_block_index
        sub_block_stop = sub_block_start + 1
        sub_block_roi = (sub_block_start, sub_block_stop)
        
        # FIRST, remove all objects that lie outside the block (i.e. remove the ones in the halo)
        region_features = opBatchClassify.BlockwiseRegionFeatures( *sub_block_roi ).wait()
        region_features_dict = region_features.flat[0]
        region_centers = region_features_dict['Default features']['RegionCenter']

        opBlockPipeline = opBatchClassify._blockPipelines[ tuple(roi[0]) ]

        # Compute the block offset within the image coordinates
        halo_roi = opBlockPipeline._halo_roi

        translated_region_centers = region_centers + halo_roi[0][1:-1]

        # TODO: If this is too slow, vectorize this
        mask = numpy.zeros( region_centers.shape[0], dtype=numpy.bool_ )
        for index, translated_region_center in enumerate(translated_region_centers):
            # FIXME: Here we assume t=0 and c=0
            mask[index] = opBatchClassify.is_in_block( roi[0], (0,) + tuple(translated_region_center) + (0,) )
        
        # Always exclude the first object (it's the background??)
        mask[0] = False
        
        # Remove all 'negative' predictions, emit only 'positive' predictions
        # FIXME: Don't hardcode this?
        POSITIVE_LABEL = 2
        objectwise_predictions = opBlockPipeline.ObjectwisePredictions([]).wait()[0]
        assert objectwise_predictions.shape == mask.shape
        mask[objectwise_predictions != POSITIVE_LABEL] = False

        filtered_features = {}
        for feature_group, feature_dict in region_features_dict.items():
            filtered_group = filtered_features[feature_group] = {}
            for feature_name, feature_array in feature_dict.items():
                filtered_group[feature_name] = feature_array[mask]

        # SECOND, translate from block-local coordinates to global (file) coordinates.
        # Unfortunately, we've got multiple translations to perform here:
        # Coordinates in the region features are relative to their own block INCLUDING HALO,
        #  so we need to add the start of the block-with-halo as an offset.
        # BUT the image itself may be offset relative to the BlockwiseFileset coordinates
        #  (due to the view_origin setting), so we also need to add an offset for that, too

        # Get the image offset relative to the file coordinates
        image_offset = blockwise_fileset.description.view_origin
        
        total_offset_5d = halo_roi[0] + image_offset
        total_offset_3d = total_offset_5d[1:-1]

        filtered_features["Default features"]["RegionCenter"] += total_offset_3d
        filtered_features["Default features"]["Coord<Minimum>"] += total_offset_3d
        filtered_features["Default features"]["Coord<Maximum>"] += total_offset_3d

        # Finally, write the features to hdf5
        h5File = blockwise_fileset.getOpenHdf5FileForBlock( roi[0] )
        if 'pickled_region_features' in h5File:
            del h5File['pickled_region_features']

        # Must use str dtype
        dtype = h5py.new_vlen(str)
        dataset = h5File.create_dataset( 'pickled_region_features', shape=(1,), dtype=dtype )
        pickled_features = vectorized_pickle_dumps(numpy.array((filtered_features,)))
        dataset[0] = pickled_features

        object_centers_xyz = filtered_features["Default features"]["RegionCenter"].astype(int)
        object_min_coords_xyz = filtered_features["Default features"]["Coord<Minimum>"].astype(int)
        object_max_coords_xyz = filtered_features["Default features"]["Coord<Maximum>"].astype(int)
        object_sizes = filtered_features["Default features"]["Count"][:,0].astype(int)

        # Also, write out selected features as a 'point cloud' csv file.
        # (Store the csv file next to this block's h5 file.)
        dataset_directory = blockwise_fileset.getDatasetDirectory(roi[0])
        pointcloud_path = os.path.join( dataset_directory, "block-pointcloud.csv" )
        
        logger.info("Writing to csv: {}".format( pointcloud_path ))
        with open(pointcloud_path, "w") as fout:
            csv_writer = csv.DictWriter(fout, OUTPUT_COLUMNS, **CSV_FORMAT)
            csv_writer.writeheader()
        
            for obj_id in range(len(object_sizes)):
                fields = {}
                fields["x_px"], fields["y_px"], fields["z_px"], = object_centers_xyz[obj_id]
                fields["min_x_px"], fields["min_y_px"], fields["min_z_px"], = object_min_coords_xyz[obj_id]
                fields["max_x_px"], fields["max_y_px"], fields["max_z_px"], = object_max_coords_xyz[obj_id]
                fields["size_px"] = object_sizes[obj_id]

                csv_writer.writerow( fields )
                #fout.flush()
        
        logger.info("FINISHED csv export")
import numpy as np
import h5py
import argparse
import sys
import os

import west

# h5py storage types
vstr_dtype = h5py.new_vlen(str)
idtype = np.dtype([('iter_name', vstr_dtype), ('string_index', np.int32)])

print '-----------------------'
print os.path.basename(__file__)
print '-----------------------'
env = os.environ
for k in env:
    if 'WEST' in k:
        print k, env[k]

parser = argparse.ArgumentParser('get_strings',
                                 description='''\
        Retrieve strings from west.h5 file and write them to new file
        ''')

west.rc.add_args(parser)
parser.add_argument('-o', dest='h5out', help='name of output file')

args = parser.parse_args()
west.rc.process_args(args)