class Particle(tables.IsDescription): identity = tables.StringCol(itemsize=22, dflt=" ", pos=0) # character String idnumber = tables.Int16Col(dflt=1, pos=1) # short integer speed = tables.Float32Col(dflt=1, pos=1) # single-precision
def homogeneousTableInfo(input_handler, first_line, data): """Return useful information about the `tables.Table` being created. The `data` array is homegenous, i.e. all fields have the same dtype. :Parameters: - `input_handler`: the file handler of the inspected `CSV` file - `first_line`: a ``numpy`` array which contains the first line of the `CSV` file - `data`: a numpy array which contains the second line of the `CSV` file """ has_header = False # If dtype is a string, ask to user if the table has a header or not. # Then find out the biggest itemsize if data.dtype.name.startswith('string'): answer = askForHelp(first_line) buf_size = 1024 * 1024 read_fh = input_handler.readlines input_handler.seek(0) if answer == 'Header': # Skip the header has_header = True input_handler.readline() itemsize = 0 buf = read_fh(buf_size) if not buf: # If the CSV file contains just one line itemsize = first_line.dtype.itemsize while buf: temp_file = tempfile.TemporaryFile() temp_file.writelines(buf) temp_file.seek(0) idata = numpy.genfromtxt(temp_file, delimiter=',', dtype=None) itemsize = max(itemsize, idata.dtype.itemsize) del idata temp_file.close() buf = read_fh(buf_size) elif first_line.dtype.name.startswith('string'): has_header = True # Iterate over the data fields and make the table description # If the CSV file contains just one field then first_line is a # scalar array and cannot be iterated so we reshape it if first_line.shape == (): first_line = first_line.reshape(1, ) indices = range(0, first_line.shape[0]) if has_header: if data.dtype.name.startswith('string'): descr = dict([(first_line[i], tables.StringCol(itemsize, pos=i)) for i in indices]) else: descr = dict([(first_line[i], tables.Col.from_dtype(data.dtype, pos=i)) for i in indices]) else: if data.dtype.name.startswith('string'): descr = dict( [('f{0}'.format(field), tables.StringCol(itemsize)) \ for field in indices]) else: descr = dict( [('f{0}'.format(field), tables.Col.from_dtype(data.dtype)) \ for field in indices]) return descr, has_header
class FloatScalar(tables.IsDescription): name = tables.StringCol(_max_name_len) value = tables.FloatCol()
import csv import time from six import print_ FORMAT = "%(asctime)-15s -8s %(message)s" logging.basicConfig(format=FORMAT, level=logging.INFO) class Usage(Exception): def __init__(self, msg): self.msg = msg OHLCTableDescription = { 'sid': tables.StringCol(14, pos=2), 'dt': tables.Int64Col(pos=1), 'open': tables.Float64Col(dflt=np.NaN, pos=3), 'high': tables.Float64Col(dflt=np.NaN, pos=4), 'low': tables.Float64Col(dflt=np.NaN, pos=5), 'close': tables.Float64Col(dflt=np.NaN, pos=6), "volume": tables.Int64Col(dflt=0, pos=7) } def process_line(line): dt = np.datetime64(line["dt"]).astype(np.int64) sid = line["sid"] open_p = float(line["open"]) high_p = float(line["high"]) low_p = float(line["low"])
class TileDescriptor(tb.IsDescription): id = tb.Int64Col(pos=0) name = tb.StringCol(8, pos=1) size = tb.Int64Col(pos=2) matchs_counted = tb.BoolCol(pos=3)
class MovieInfo(PT.IsDescription): cam_id = PT.StringCol(16, pos=0) filename = PT.StringCol(255, pos=1) approx_start_frame = PT.Int64Col(pos=2) approx_stop_frame = PT.Int64Col(pos=3)
class Record(tb.IsDescription): var1 = tb.StringCol(itemsize=4) # 4-character String var2 = tb.IntCol() # integer var3 = tb.Int16Col() # short integer var4 = tb.FloatCol() # double (double-precision) var5 = tb.Float32Col() # float (single-precision)
def spotDesc(maxRGPLen): return { 'spot': tables.UInt32Col(), 'RGP':tables.StringCol(itemsize=maxRGPLen) }
class Particle(tables.IsDescription): name = tables.StringCol(16, pos=1) # 16-character String lati = tables.Int32Col(pos=2) # integer longi = tables.Int32Col(pos=3) # integer vector = tables.Int32Col(shape=(2, ), pos=4) # Integer matrix2D = tables.Float64Col(shape=(2, 2), pos=5)
def graphDesc(maxGeneIDLen): return { 'geneTarget':tables.StringCol(itemsize = maxGeneIDLen), 'geneSource':tables.StringCol(itemsize = maxGeneIDLen) }
def RGPDesc(maxRGPLen, maxGeneLen): return { 'RGP': tables.StringCol(itemsize=maxRGPLen), 'gene':tables.StringCol(itemsize=maxGeneLen) }
def gene2famDesc(geneFamNameLen, geneIDLen): return { "geneFam": tables.StringCol(itemsize = geneFamNameLen), "gene":tables.StringCol(itemsize= geneIDLen) }
def geneFamDesc(maxNameLen, maxSequenceLength, maxPartLen): return { "name": tables.StringCol(itemsize = maxNameLen), "protein": tables.StringCol(itemsize=maxSequenceLength), "partition": tables.StringCol(itemsize=maxPartLen) }
def geneSequenceDesc(geneIDLen, geneSeqLen, geneTypeLen): return { "gene":tables.StringCol(itemsize=geneIDLen), "dna":tables.StringCol(itemsize=geneSeqLen), "type":tables.StringCol(itemsize=geneTypeLen) }
class CamSyncInfo(PT.IsDescription): cam_id = PT.StringCol(256, pos=0) camn = PT.UInt16Col(pos=1) hostname = PT.StringCol(2048, pos=2)
class TimeSeriesGrid(tb.IsDescription): sat_name = tb.StringCol(20, pos=1) ref_time = tb.StringCol(20, pos=2) date = tb.StringCol(20, pos=3) year = tb.Int32Col(pos=4) month = tb.Int32Col(pos=5)
class HostClockInfo(PT.IsDescription): remote_hostname = PT.StringCol(255, pos=0) start_timestamp = PT.FloatCol(pos=1) remote_timestamp = PT.FloatCol(pos=2) stop_timestamp = PT.FloatCol(pos=3)
def setup(self): """ Setup the tables """ from pywr.parameters import IndexParameter import tables # The first dimension is the number of timesteps. # The following dimensions are sized per scenario scenario_shape = list(self.model.scenarios.shape) shape = [len(self.model.timestepper)] + scenario_shape self.h5store = H5Store(self.h5file, self.filter_kwds, self.mode, title=self.title, metadata=self.metadata, create_directories=self.create_directories) # Create a CArray for each node self._arrays = {} # Default to all nodes if None given. if self.nodes is None: nodes = [((self.where + "/" + n.name).replace("//", "/"), n) for n in self.model.nodes.values()] else: nodes = [] for n in self.nodes: try: where, node = n except (TypeError, ValueError): node = n where = self.where + "/" + node # Accept a str, and lookup node by name instead. if isinstance(node, str): node = self.model.nodes[node] # Otherwise assume it is a node object anyway where = where.replace("//", "/") nodes.append((where, node)) if self.parameters is not None: nodes.extend(self.parameters) self._nodes = nodes for where, node in self._nodes: if isinstance(node, IndexParameter): atom = tables.Int32Atom() else: atom = tables.Float64Atom() group_name, node_name = where.rsplit("/", 1) if group_name == "": group_name = "/" self.h5store.file.create_carray(group_name, node_name, atom, shape, createparents=True) # Create scenario tables if self.scenarios is not None: group_name, node_name = self.scenarios.rsplit('/', 1) if group_name == "": group_name = "/" description = { # TODO make string length configurable 'name': tables.StringCol(1024), 'size': tables.Int64Col() } tbl = self.h5store.file.create_table(group_name, node_name, description=description, createparents=True) # Now add the scenarios entry = tbl.row for scenario in self.model.scenarios.scenarios: entry['name'] = scenario.name.encode('utf-8') entry['size'] = scenario.size entry.append() tbl.flush() if self.model.scenarios.user_combinations is not None: description = { s.name: tables.Int64Col() for s in self.model.scenarios.scenarios } tbl = self.h5store.file.create_table(group_name, 'scenario_combinations', description=description) entry = tbl.row for comb in self.model.scenarios.user_combinations: for s, i in zip(self.model.scenarios.scenarios, comb): entry[s.name] = i entry.append() tbl.flush() self.h5store = None
class ExperimentInfo(PT.IsDescription): uuid = PT.StringCol(32, pos=0)
def reset(self): import tables mode = "r+" # always need to append, as file already created in setup self.h5store = H5Store(self.h5file, self.filter_kwds, mode) self._arrays = {} for where, node in self._nodes: self._arrays[node] = self.h5store.file.get_node(where) self._time_table = None # Create time table # This is created in reset so that the table is always recreated if self.time is not None: group_name, node_name = self.time.rsplit('/', 1) if group_name == "": group_name = "/" description = { c: tables.Int64Col() for c in ('year', 'month', 'day', 'index') } try: self.h5store.file.remove_node(group_name, node_name) except tables.NoSuchNodeError: pass finally: self._time_table = self.h5store.file.create_table( group_name, node_name, description=description, createparents=True) self._routes_flow_array = None if self.routes_flows is not None: # Create a CArray for the flows # The first dimension is the number of timesteps. # The second dimension is the number of routes # The following dimensions are sized per scenario scenario_shape = list(self.model.scenarios.shape) shape = [ len(self.model.timestepper), len(self.model.solver.routes) ] + scenario_shape atom = tables.Float64Atom() try: self.h5store.file.remove_node(self.where, self.routes_flows) except tables.NoSuchNodeError: pass finally: self._routes_flow_array = self.h5store.file.create_carray( self.where, self.routes_flows, atom, shape, createparents=True) # Create routes table. This must be done in reset if self.routes is not None: group_name, node_name = self.routes.rsplit('/', 1) if group_name == "": group_name = "/" description = { # TODO make string length configurable 'start': tables.StringCol(1024), 'end': tables.StringCol(1024), } try: self.h5store.file.remove_node(group_name, node_name) except tables.NoSuchNodeError: pass finally: tbl = self.h5store.file.create_table( group_name, node_name, description=description, createparents=True) entry = tbl.row for route in self.model.solver.routes: node_first = route[0] node_last = route[-1] if node_first.parent is not None: node_first = node_first.parent if node_last.parent is not None: node_last = node_last.parent entry['start'] = node_first.name.encode('utf-8') entry['end'] = node_last.name.encode('utf-8') entry.append() tbl.flush()
DATETIME 12 timestamp YEAR 13 number VARCHAR 15 string BIT 16 number NEWDECIMAL 246 number ENUM 247 string TINY_BLOB 249 binary MEDIUM_BLOB 250 binary LONG_BLOB 251 binary BLOB 252 binary VAR_STRING 253 string STRING 254 string ============== ================= ============= """ import tables as tb import numpy as np # see also: # http://mysql-python.sourceforge.net/MySQLdb-1.2.2/public/MySQLdb.constants.FIELD_TYPE-module.html map_numpy = {'VAR_STRING': str, 'LONG': np.int16, 'FLOAT': np.float} # TODO: Figure out variable length string atoms with references in the actual table. Or calibrate string field width (or both). map_pytables = { 'VAR_STRING': tb.StringCol( 32 ), # 32 is relatively arbitrary - pytables doesn't support variable length strings 'LONG': tb.Int16Col(), 'FLOAT': tb.Float16Col() }
class Info(tables.IsDescription): _v_pos = 2 Name = tables.StringCol(16, dflt='sample string') Value = tables.Float64Col()
def determine_chunkshape(size1, size2): """ returns optimum size for chuncks for a dataset of file size1, size2 and update cachesize for accomodating dataset """ c1 = int(size1 / 64. + 1) c2 = int(size2 / 64. + 1) # update_cache_size() return (c1, c2) # The dictionary used to define axes tables FTICR_AXISvp9 = { "itype": tables.Int32Col(), "size": tables.Int32Col(), "FTICR": tables.StringCol(itemsize=16), "sampling": tables.StringCol(itemsize=16), "specwidth": tables.Float32Col(), "highmass": tables.Float32Col(), "offsetfreq": tables.Float32Col(), "left_point": tables.Int32Col(), "calibA": tables.Float64Col(), "calibB": tables.Float64Col(), "calibC": tables.Float64Col(), "highfreq": tables.Float32Col(), "lowfreq": tables.Float32Col(), } class HDF5File(object): """
class info3(tables.IsDescription): name = tables.StringCol(10) value = tables.Time64Col() y4 = tables.Float64Col(dflt=1, shape=(2, 3)) z4 = tables.UInt8Col(dflt=1)
def heterogeneousTableInfo(input_handler, first_line, data): """Return useful information about the `tables.Table` being created. The `data` array is heterogenous, i.e. not all fields have the same dtype. :Parameters: - `input_handler`: the file handler of the inspected `CSV` file - `first_line`: a numpy array which contains the first line of the `CSV` file - `data`: a numpy array which contains the second line of the `CSV` file """ has_header = False if first_line.dtype.name.startswith('string'): has_header = True # Stuff used for finding out itemsizes of string fields itemsizes = {} for field in range(0, len(data.dtype)): if data.dtype[field].name.startswith('string'): itemsizes[field] = 0 # If a dtype is a string, find out its biggest itemsize if itemsizes: buf_size = 1024 * 1024 read_fh = input_handler.readlines input_handler.seek(0) if has_header: # Skip the header input_handler.readline() buf = read_fh(buf_size) while buf: temp_file = tempfile.TemporaryFile() temp_file.writelines(buf) for field in itemsizes.keys(): temp_file.seek(0) idata = numpy.genfromtxt(temp_file, delimiter=',', usecols=(field, ), dtype=None) itemsizes[field] = \ max(itemsizes[field], idata.dtype.itemsize) del idata temp_file.close() buf = read_fh(buf_size) if has_header: descr = {} for i in range(0, first_line.size): dtype = data.dtype.fields['f{0}'.format(i)][0] descr[first_line[i]] = tables.Col.from_dtype(dtype, pos=i) for i in itemsizes: descr[first_line[i]] = tables.StringCol(itemsizes[i], pos=i) else: descr = dict([(f, tables.Col.from_dtype(t[0])) for f, t in data.dtype.fields.items()]) for i in itemsizes: descr['f{0}'.format(i)] = tables.StringCol(itemsizes[i]) return descr, has_header
class LoadcaseTable(tables.IsDescription): title = tables.StringCol(64) subtitle = tables.StringCol(64) label = tables.StringCol(64) subcase_id = tables.StringCol(64)
class SpoolEvent(tables.IsDescription): EventName = tables.StringCol(32) Time = tables.Time64Col() EventDescr = tables.StringCol(256)
class TextLogDescription(PT.IsDescription): mainbrain_timestamp = PT.FloatCol(pos=0) cam_id = PT.StringCol(255, pos=1) host_timestamp = PT.FloatCol(pos=2) message = PT.StringCol(255, pos=3)
class Record(tb.IsDescription): var1 = tb.IntCol(pos=1) var2 = tb.StringCol(length=1, pos=2) var3 = tb.FloatCol(pos=3)
class Weights(tables.IsDescription): #图片的名称和值都存入,用于对照answer评分 name = tables.StringCol(itemsize=32) value = tables.Float32Col(shape=(1, 512, 14, 14)) # float (single-precision)