forked from sot/cheta
-
Notifications
You must be signed in to change notification settings - Fork 0
/
add_derived.py
executable file
·163 lines (136 loc) · 5.99 KB
/
add_derived.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/usr/bin/env python
import re, os, sys
import cPickle as pickle
import optparse
import tables
from Chandra.Time import DateTime
import Ska.DBI
import pyyaks.logger
import pyyaks.context
import numpy as np
import Ska.engarchive.fetch as fetch
import Ska.engarchive.file_defs as file_defs
import Ska.engarchive.derived as derived
def get_options():
parser = optparse.OptionParser()
parser.add_option("--data-root",
default=".",
help="Engineering archive root directory for MSID and arch files")
parser.add_option("--start",
default="1999:201",
help="Start for initial data fetch")
parser.add_option("--stop",
default="1999:260",
help="Stop for initial data fetch")
parser.add_option("--content",
action='append',
help="Content type to process [match regex] (default = all)")
return parser.parse_args()
def make_archfiles_db(filename, content_def):
# Do nothing if it is already there
if os.path.exists(filename):
return
datestart = DateTime(DateTime(opt.start).secs - 60)
tstart = datestart.secs
tstop = tstart
year, doy = datestart.date.split(':')[:2]
times, indexes = derived.times_indexes(tstart, tstop, content_def['time_step'])
logger.info('Creating db {}'.format(filename))
archfiles_def = open('archfiles_def.sql').read()
db = Ska.DBI.DBI(dbi='sqlite', server=filename)
db.execute(archfiles_def)
archfiles_row = dict(filename='{}:0:1'.format(content_def['content']),
filetime=0,
year=year,
doy=doy,
tstart=tstart,
tstop=tstop,
rowstart=0,
rowstop=0,
startmjf=indexes[0], # really index0
stopmjf=indexes[-1], # really index1
date=datestart.date)
db.insert(archfiles_row, 'archfiles')
def add_colname(filename, colname):
"""Add ``colname`` to the pickled set() in ``filename``. Create the pickle
as needed.
"""
if not os.path.exists(filename):
logger.info('Creating colnames pickle {}'.format(filename))
with open(filename, 'w') as f:
pickle.dump(set(), f)
colnames = pickle.load(open(filename, 'r'))
if colname not in colnames:
logger.info('Adding colname {} to colnames pickle {}'.format(colname, filename))
colnames.add(colname)
with open(filename, 'w') as f:
pickle.dump(colnames, f)
def make_msid_file(colname, content, content_def):
ft['content'] = content
ft['msid'] = colname
filename = msid_files['data'].abs
if os.path.exists(filename):
return
logger.info('Making MSID data file %s', filename)
if colname == 'TIME':
dp_vals, indexes = derived.times_indexes(opt.start, opt.stop,
content_def['time_step'])
else:
dp = content_def['classes'][colname]()
dataset = dp.fetch(opt.start, opt.stop)
dp_vals = np.asarray(dp.calc(dataset), dtype=dp.dtype)
# Finally make the actual MSID data file
filters = tables.Filters(complevel=5, complib='zlib')
h5 = tables.openFile(filename, mode='w', filters=filters)
n_rows = int(20 * 3e7 / content_def['time_step'])
h5shape = (0,)
h5type = tables.Atom.from_dtype(dp_vals.dtype)
h5.createEArray(h5.root, 'data', h5type, h5shape, title=colname,
expectedrows=n_rows)
h5.createEArray(h5.root, 'quality', tables.BoolAtom(), (0,), title='Quality',
expectedrows=n_rows)
logger.info('Made {} shape={} with n_rows(1e6)={}'.format(colname, h5shape, n_rows / 1.0e6))
h5.close()
def main():
global opt, ft, msid_files, logger
opt, args = get_options()
ft = fetch.ft
msid_files = pyyaks.context.ContextDict('add_derived.msid_files', basedir=opt.data_root)
msid_files.update(file_defs.msid_files)
logger = pyyaks.logger.get_logger(name='engarchive', level=pyyaks.logger.VERBOSE,
format="%(asctime)s %(message)s")
# Get the derived parameter classes
dp_classes = (getattr(derived, x) for x in dir(derived) if x.startswith('DP_'))
dp_classes = [x for x in dp_classes if hasattr(x, '__base__') and
issubclass(x, derived.DerivedParameter)]
content_defs = {}
for dp_class in dp_classes:
colname = dp_class.__name__.upper()
dp = dp_class()
content = dp.content
if opt.content == [] or any(re.match(x + r'\d+', content) for x in opt.content):
dpd = content_defs.setdefault(content, {})
dpd.setdefault('classes', {'TIME': None})
dpd['content'] = content
dpd['classes'][colname] = dp_class
dpd['mnf_step'] = dp.mnf_step
dpd['time_step'] = dp.time_step
for content, content_def in content_defs.items():
ft['content'] = content
logger.info('CONTENT = {}'.format(content))
# Make content directory
if not os.path.exists(msid_files['contentdir'].rel):
logger.info('Making directory {}'.format(msid_files['contentdir'].rel))
os.mkdir(msid_files['contentdir'].rel)
# Make the archfiles.db3 file (if needed)
make_archfiles_db(msid_files['archfiles'].abs, content_def)
for colname in content_def['classes']:
ft['msid'] = colname
logger.debug('MSID = {}'.format(colname))
# Create colnames and colnames_all pickle files (if needed) and add colname
add_colname(msid_files['colnames'].rel, colname)
add_colname(msid_files['colnames_all'].rel, colname)
make_msid_file(colname, content, content_def)
add_colname(msid_files['colnames_all'].rel, 'QUALITY')
if __name__ == '__main__':
main()