Esempio n. 1
0
    def harvest(self,*filenames):
        """
        Extract the variable data from the provided files

        Args:
            filenames (list): the files to extract from
                              currently supported: {}

        Returns:
            pd.Series or pd.DataFrame
        """.format(REGISTERED_FILEEXTENSIONS.__repr__())
        if self.is_harvested:
            return

        data = pd.Series()
        if self.defsize == 1:
            data = pd.DataFrame()

        for filename in filenames:
            ext = f.strip_all_endings(filename)[1]
            assert ext in REGISTERED_FILEEXTENSIONS, "Filetype {} not known!".format(ext)
            assert os.path.exists(filename), "File {} does not exist!".format(filename)
            #Logger.debug("Attempting to harvest file {0}".format(filename))
            data = self.harvest_single_file(filename,ext)
            #self.data = self.data.append(data.map(self.transform))
            #concat should be much faster

            if isinstance(data,pd.Series):
                self.data = pd.concat([self.data,data.map(self.transform)])
            else:
                self.data = pd.concat([self.data,data])
            del data

        self.declare_harvested()
        return None
Esempio n. 2
0
def harvest(filenames,definitions,**kwargs):
    """
    Extract the variable data from the provided files

    Args:
        filenames (list): the files to extract from
                          currently supported: {0}

    Keyword Args:
        transformation (func): will be applied to the read out data

    Returns:
        pd.Series or pd.DataFrame
    """.format(REGISTERED_FILEEXTENSIONS.__repr__())

    data = pd.Series()
    for filename in filenames:
        filetype = f.strip_all_endings(filename)[1]
        assert filetype in REGISTERED_FILEEXTENSIONS, "Filetype {} not known!".format(filetype)
        assert os.path.exists(filename), "File {} does not exist!".format(filetype)
        Logger.debug("Attempting to harvest {1} file {0}".format(filename,filetype))
        
        if filetype == ".h5" and not isinstance(filename, tables.table.Table):
            # store = pd.HDFStore(filename)
            hdftable = tables.openFile(filename)

        else:
            hdftable = filename

        tmpdata = pd.Series()
        for definition in definitions:
            if filetype == ".h5":
                try:
                    # data = store.select_column(*definition)
                    tmpdata = hdftable.getNode("/" + definition[0]).col(definition[1])
                    tmpdata = pd.Series(tmpdata, dtype=n.float64)
                    Logger.debug("Found {} entries in table for {}{}".format(len(tmpdata),definition[0],definition[1]))
                    break
                except tables.NoSuchNodeError:
                    Logger.debug("Can not find definition {0} in {1}! ".format(definition, filename))
                    continue

            elif filetype == ".root":
                tmpdata = rn.root2rec(filename, *definition)
                tmpdata = pd.Series(data)
        if filetype == ".h5":
            hdftable.close()

        #tmpdata = harvest_single_file(filename, filetype,definitions)
        # self.data = self.data.append(data.map(self.transform))
        # concat should be much faster
        if "transformation" in kwargs:
            transform = kwargs['transformation']
            data = pd.concat([data, tmpdata.map(transform)])
        else:
            data = pd.concat([data, tmpdata])
        del tmpdata
    return data