Example #1
0
class Particle(tables.IsDescription):
    identity = tables.StringCol(itemsize=22, dflt=" ",
                                pos=0)  # character String
    idnumber = tables.Int16Col(dflt=1, pos=1)  # short integer
    speed = tables.Float32Col(dflt=1, pos=1)  # single-precision
Example #2
0
def homogeneousTableInfo(input_handler, first_line, data):
    """Return useful information about the `tables.Table` being created.

    The `data` array is homegenous, i.e. all fields have the same dtype.

    :Parameters:

    - `input_handler`: the file handler of the inspected `CSV` file
    - `first_line`: a ``numpy`` array which contains the first line of the 
      `CSV` file
    - `data`: a numpy array which contains the second line of the `CSV` file
    """

    has_header = False
    # If dtype is a string,  ask to user if the table has a header or not.
    # Then find out the biggest itemsize
    if data.dtype.name.startswith('string'):
        answer = askForHelp(first_line)
        buf_size = 1024 * 1024
        read_fh = input_handler.readlines
        input_handler.seek(0)
        if answer == 'Header':
            # Skip the header
            has_header = True
            input_handler.readline()
        itemsize = 0
        buf = read_fh(buf_size)
        if not buf:
            # If the CSV file contains just one line
            itemsize = first_line.dtype.itemsize
        while buf:
            temp_file = tempfile.TemporaryFile()
            temp_file.writelines(buf)
            temp_file.seek(0)
            idata = numpy.genfromtxt(temp_file, delimiter=',', dtype=None)
            itemsize = max(itemsize, idata.dtype.itemsize)
            del idata
            temp_file.close()
            buf = read_fh(buf_size)
    elif first_line.dtype.name.startswith('string'):
        has_header = True

    # Iterate over the data fields and make the table description
    # If the CSV file contains just one field then first_line is a
    # scalar array and cannot be iterated so we reshape it
    if first_line.shape == ():
        first_line = first_line.reshape(1, )
    indices = range(0, first_line.shape[0])

    if has_header:
        if data.dtype.name.startswith('string'):
            descr = dict([(first_line[i], tables.StringCol(itemsize, pos=i))
                          for i in indices])
        else:
            descr = dict([(first_line[i],
                           tables.Col.from_dtype(data.dtype, pos=i))
                          for i in indices])
    else:
        if data.dtype.name.startswith('string'):
            descr = dict(
                [('f{0}'.format(field), tables.StringCol(itemsize)) \
                for field in indices])
        else:
            descr = dict(
                [('f{0}'.format(field), tables.Col.from_dtype(data.dtype)) \
                for field in indices])

    return descr, has_header
Example #3
0
class FloatScalar(tables.IsDescription):
    name = tables.StringCol(_max_name_len)
    value = tables.FloatCol()
import csv
import time
from six import print_

FORMAT = "%(asctime)-15s -8s %(message)s"

logging.basicConfig(format=FORMAT, level=logging.INFO)


class Usage(Exception):
    def __init__(self, msg):
        self.msg = msg


OHLCTableDescription = {
    'sid': tables.StringCol(14, pos=2),
    'dt': tables.Int64Col(pos=1),
    'open': tables.Float64Col(dflt=np.NaN, pos=3),
    'high': tables.Float64Col(dflt=np.NaN, pos=4),
    'low': tables.Float64Col(dflt=np.NaN, pos=5),
    'close': tables.Float64Col(dflt=np.NaN, pos=6),
    "volume": tables.Int64Col(dflt=0, pos=7)
}


def process_line(line):
    dt = np.datetime64(line["dt"]).astype(np.int64)
    sid = line["sid"]
    open_p = float(line["open"])
    high_p = float(line["high"])
    low_p = float(line["low"])
Example #5
0
class TileDescriptor(tb.IsDescription):
    id = tb.Int64Col(pos=0)
    name = tb.StringCol(8, pos=1)
    size = tb.Int64Col(pos=2)
    matchs_counted = tb.BoolCol(pos=3)
Example #6
0
class MovieInfo(PT.IsDescription):
    cam_id = PT.StringCol(16, pos=0)
    filename = PT.StringCol(255, pos=1)
    approx_start_frame = PT.Int64Col(pos=2)
    approx_stop_frame = PT.Int64Col(pos=3)
Example #7
0
class Record(tb.IsDescription):
    var1 = tb.StringCol(itemsize=4)  # 4-character String
    var2 = tb.IntCol()              # integer
    var3 = tb.Int16Col()            # short integer
    var4 = tb.FloatCol()            # double (double-precision)
    var5 = tb.Float32Col()          # float  (single-precision)
Example #8
0
def spotDesc(maxRGPLen):
    return { 
            'spot': tables.UInt32Col(),
            'RGP':tables.StringCol(itemsize=maxRGPLen)
        }
Example #9
0
class Particle(tables.IsDescription):
    name = tables.StringCol(16, pos=1)  # 16-character String
    lati = tables.Int32Col(pos=2)  # integer
    longi = tables.Int32Col(pos=3)  # integer
    vector = tables.Int32Col(shape=(2, ), pos=4)  # Integer
    matrix2D = tables.Float64Col(shape=(2, 2), pos=5)
Example #10
0
def graphDesc(maxGeneIDLen):
    return {
            'geneTarget':tables.StringCol(itemsize = maxGeneIDLen),
            'geneSource':tables.StringCol(itemsize = maxGeneIDLen)
        }
Example #11
0
def RGPDesc(maxRGPLen, maxGeneLen):
    return { 
            'RGP': tables.StringCol(itemsize=maxRGPLen),
            'gene':tables.StringCol(itemsize=maxGeneLen)
        }
Example #12
0
def gene2famDesc(geneFamNameLen, geneIDLen):
    return {
        "geneFam": tables.StringCol(itemsize = geneFamNameLen),
        "gene":tables.StringCol(itemsize= geneIDLen)
        }
Example #13
0
def geneFamDesc(maxNameLen, maxSequenceLength, maxPartLen):
     return {
        "name": tables.StringCol(itemsize = maxNameLen),
        "protein": tables.StringCol(itemsize=maxSequenceLength),
        "partition": tables.StringCol(itemsize=maxPartLen)
        }
Example #14
0
def geneSequenceDesc(geneIDLen, geneSeqLen, geneTypeLen):
    return {
        "gene":tables.StringCol(itemsize=geneIDLen),
        "dna":tables.StringCol(itemsize=geneSeqLen),
        "type":tables.StringCol(itemsize=geneTypeLen)
    }
Example #15
0
class CamSyncInfo(PT.IsDescription):
    cam_id = PT.StringCol(256, pos=0)
    camn = PT.UInt16Col(pos=1)
    hostname = PT.StringCol(2048, pos=2)
Example #16
0
class TimeSeriesGrid(tb.IsDescription):
    sat_name = tb.StringCol(20, pos=1)
    ref_time = tb.StringCol(20, pos=2)
    date = tb.StringCol(20, pos=3)
    year = tb.Int32Col(pos=4)
    month = tb.Int32Col(pos=5)
Example #17
0
class HostClockInfo(PT.IsDescription):
    remote_hostname = PT.StringCol(255, pos=0)
    start_timestamp = PT.FloatCol(pos=1)
    remote_timestamp = PT.FloatCol(pos=2)
    stop_timestamp = PT.FloatCol(pos=3)
Example #18
0
    def setup(self):
        """
        Setup the tables
        """
        from pywr.parameters import IndexParameter
        import tables

        # The first dimension is the number of timesteps.
        # The following dimensions are sized per scenario
        scenario_shape = list(self.model.scenarios.shape)
        shape = [len(self.model.timestepper)] + scenario_shape

        self.h5store = H5Store(self.h5file,
                               self.filter_kwds,
                               self.mode,
                               title=self.title,
                               metadata=self.metadata,
                               create_directories=self.create_directories)

        # Create a CArray for each node
        self._arrays = {}

        # Default to all nodes if None given.
        if self.nodes is None:
            nodes = [((self.where + "/" + n.name).replace("//", "/"), n)
                     for n in self.model.nodes.values()]
        else:
            nodes = []
            for n in self.nodes:

                try:
                    where, node = n
                except (TypeError, ValueError):
                    node = n
                    where = self.where + "/" + node

                # Accept a str, and lookup node by name instead.
                if isinstance(node, str):
                    node = self.model.nodes[node]
                # Otherwise assume it is a node object anyway

                where = where.replace("//", "/")
                nodes.append((where, node))

        if self.parameters is not None:
            nodes.extend(self.parameters)

        self._nodes = nodes

        for where, node in self._nodes:
            if isinstance(node, IndexParameter):
                atom = tables.Int32Atom()
            else:
                atom = tables.Float64Atom()
            group_name, node_name = where.rsplit("/", 1)
            if group_name == "":
                group_name = "/"
            self.h5store.file.create_carray(group_name,
                                            node_name,
                                            atom,
                                            shape,
                                            createparents=True)

        # Create scenario tables
        if self.scenarios is not None:
            group_name, node_name = self.scenarios.rsplit('/', 1)
            if group_name == "":
                group_name = "/"
            description = {
                # TODO make string length configurable
                'name': tables.StringCol(1024),
                'size': tables.Int64Col()
            }
            tbl = self.h5store.file.create_table(group_name,
                                                 node_name,
                                                 description=description,
                                                 createparents=True)
            # Now add the scenarios
            entry = tbl.row
            for scenario in self.model.scenarios.scenarios:
                entry['name'] = scenario.name.encode('utf-8')
                entry['size'] = scenario.size
                entry.append()
            tbl.flush()

            if self.model.scenarios.user_combinations is not None:
                description = {
                    s.name: tables.Int64Col()
                    for s in self.model.scenarios.scenarios
                }
                tbl = self.h5store.file.create_table(group_name,
                                                     'scenario_combinations',
                                                     description=description)
                entry = tbl.row
                for comb in self.model.scenarios.user_combinations:
                    for s, i in zip(self.model.scenarios.scenarios, comb):
                        entry[s.name] = i
                    entry.append()
                tbl.flush()

        self.h5store = None
Example #19
0
class ExperimentInfo(PT.IsDescription):
    uuid = PT.StringCol(32, pos=0)
Example #20
0
    def reset(self):
        import tables

        mode = "r+"  # always need to append, as file already created in setup
        self.h5store = H5Store(self.h5file, self.filter_kwds, mode)
        self._arrays = {}
        for where, node in self._nodes:
            self._arrays[node] = self.h5store.file.get_node(where)

        self._time_table = None
        # Create time table
        # This is created in reset so that the table is always recreated
        if self.time is not None:
            group_name, node_name = self.time.rsplit('/', 1)
            if group_name == "":
                group_name = "/"
            description = {
                c: tables.Int64Col()
                for c in ('year', 'month', 'day', 'index')
            }

            try:
                self.h5store.file.remove_node(group_name, node_name)
            except tables.NoSuchNodeError:
                pass
            finally:
                self._time_table = self.h5store.file.create_table(
                    group_name,
                    node_name,
                    description=description,
                    createparents=True)

        self._routes_flow_array = None
        if self.routes_flows is not None:
            # Create a CArray for the flows
            # The first dimension is the number of timesteps.
            # The second dimension is the number of routes
            # The following dimensions are sized per scenario
            scenario_shape = list(self.model.scenarios.shape)
            shape = [
                len(self.model.timestepper),
                len(self.model.solver.routes)
            ] + scenario_shape
            atom = tables.Float64Atom()

            try:
                self.h5store.file.remove_node(self.where, self.routes_flows)
            except tables.NoSuchNodeError:
                pass
            finally:
                self._routes_flow_array = self.h5store.file.create_carray(
                    self.where,
                    self.routes_flows,
                    atom,
                    shape,
                    createparents=True)

            # Create routes table. This must be done in reset
            if self.routes is not None:
                group_name, node_name = self.routes.rsplit('/', 1)
                if group_name == "":
                    group_name = "/"

                description = {
                    # TODO make string length configurable
                    'start': tables.StringCol(1024),
                    'end': tables.StringCol(1024),
                }
                try:
                    self.h5store.file.remove_node(group_name, node_name)
                except tables.NoSuchNodeError:
                    pass
                finally:
                    tbl = self.h5store.file.create_table(
                        group_name,
                        node_name,
                        description=description,
                        createparents=True)

                entry = tbl.row
                for route in self.model.solver.routes:
                    node_first = route[0]
                    node_last = route[-1]

                    if node_first.parent is not None:
                        node_first = node_first.parent
                    if node_last.parent is not None:
                        node_last = node_last.parent

                    entry['start'] = node_first.name.encode('utf-8')
                    entry['end'] = node_last.name.encode('utf-8')
                    entry.append()

                tbl.flush()
Example #21
0
    DATETIME       12                timestamp
    YEAR           13                number
    VARCHAR        15                string
    BIT            16                number
    NEWDECIMAL     246               number
    ENUM           247               string
    TINY_BLOB      249               binary
    MEDIUM_BLOB    250               binary
    LONG_BLOB      251               binary
    BLOB           252               binary
    VAR_STRING     253               string
    STRING         254               string
    ============== ================= =============
    
"""

import tables as tb
import numpy as np

# see also:
# http://mysql-python.sourceforge.net/MySQLdb-1.2.2/public/MySQLdb.constants.FIELD_TYPE-module.html
map_numpy = {'VAR_STRING': str, 'LONG': np.int16, 'FLOAT': np.float}

# TODO: Figure out variable length string atoms with references in the actual table. Or calibrate string field width (or both).
map_pytables = {
    'VAR_STRING': tb.StringCol(
        32
    ),  # 32 is relatively arbitrary - pytables doesn't support variable length strings
    'LONG': tb.Int16Col(),
    'FLOAT': tb.Float16Col()
}
Example #22
0
class Info(tables.IsDescription):
    _v_pos = 2
    Name = tables.StringCol(16, dflt='sample string')
    Value = tables.Float64Col()
Example #23
0
def determine_chunkshape(size1, size2):
    """
    returns optimum size for chuncks for a dataset of file size1, size2
    and update cachesize for accomodating dataset
    """
    c1 = int(size1 / 64. + 1)
    c2 = int(size2 / 64. + 1)
    #    update_cache_size()
    return (c1, c2)


# The dictionary used to define axes tables
FTICR_AXISvp9 = {
    "itype": tables.Int32Col(),
    "size": tables.Int32Col(),
    "FTICR": tables.StringCol(itemsize=16),
    "sampling": tables.StringCol(itemsize=16),
    "specwidth": tables.Float32Col(),
    "highmass": tables.Float32Col(),
    "offsetfreq": tables.Float32Col(),
    "left_point": tables.Int32Col(),
    "calibA": tables.Float64Col(),
    "calibB": tables.Float64Col(),
    "calibC": tables.Float64Col(),
    "highfreq": tables.Float32Col(),
    "lowfreq": tables.Float32Col(),
}


class HDF5File(object):
    """
Example #24
0
 class info3(tables.IsDescription):
     name = tables.StringCol(10)
     value = tables.Time64Col()
     y4 = tables.Float64Col(dflt=1, shape=(2, 3))
     z4 = tables.UInt8Col(dflt=1)
Example #25
0
def heterogeneousTableInfo(input_handler, first_line, data):
    """Return useful information about the `tables.Table` being created.

    The `data` array is heterogenous, i.e. not all fields have the same
    dtype.

    :Parameters:

    - `input_handler`: the file handler of the inspected `CSV` file
    - `first_line`: a numpy array which contains the first line of the `CSV` 
      file
    - `data`: a numpy array which contains the second line of the `CSV` file
    """

    has_header = False
    if first_line.dtype.name.startswith('string'):
        has_header = True

    # Stuff used for finding out itemsizes of string fields
    itemsizes = {}
    for field in range(0, len(data.dtype)):
        if data.dtype[field].name.startswith('string'):
            itemsizes[field] = 0

    # If a dtype is a string, find out its biggest itemsize
    if itemsizes:
        buf_size = 1024 * 1024
        read_fh = input_handler.readlines
        input_handler.seek(0)
        if has_header:
            # Skip the header
            input_handler.readline()
        buf = read_fh(buf_size)
        while buf:
            temp_file = tempfile.TemporaryFile()
            temp_file.writelines(buf)
            for field in itemsizes.keys():
                temp_file.seek(0)
                idata = numpy.genfromtxt(temp_file,
                                         delimiter=',',
                                         usecols=(field, ),
                                         dtype=None)
                itemsizes[field] = \
                    max(itemsizes[field], idata.dtype.itemsize)
                del idata
            temp_file.close()
            buf = read_fh(buf_size)

    if has_header:
        descr = {}
        for i in range(0, first_line.size):
            dtype = data.dtype.fields['f{0}'.format(i)][0]
            descr[first_line[i]] = tables.Col.from_dtype(dtype, pos=i)
        for i in itemsizes:
            descr[first_line[i]] = tables.StringCol(itemsizes[i], pos=i)
    else:
        descr = dict([(f, tables.Col.from_dtype(t[0]))
                      for f, t in data.dtype.fields.items()])
        for i in itemsizes:
            descr['f{0}'.format(i)] = tables.StringCol(itemsizes[i])

    return descr, has_header
 class LoadcaseTable(tables.IsDescription):
     title = tables.StringCol(64)
     subtitle = tables.StringCol(64)
     label = tables.StringCol(64)
     subcase_id = tables.StringCol(64)
Example #27
0
class SpoolEvent(tables.IsDescription):
    EventName = tables.StringCol(32)
    Time = tables.Time64Col()
    EventDescr = tables.StringCol(256)
Example #28
0
class TextLogDescription(PT.IsDescription):
    mainbrain_timestamp = PT.FloatCol(pos=0)
    cam_id = PT.StringCol(255, pos=1)
    host_timestamp = PT.FloatCol(pos=2)
    message = PT.StringCol(255, pos=3)
Example #29
0
 class Record(tb.IsDescription):
     var1 = tb.IntCol(pos=1)
     var2 = tb.StringCol(length=1, pos=2)
     var3 = tb.FloatCol(pos=3)
Example #30
0
class Weights(tables.IsDescription):
	#图片的名称和值都存入,用于对照answer评分
	name = tables.StringCol(itemsize=32)
	value  = tables.Float32Col(shape=(1, 512, 14, 14))   # float  (single-precision)