/
msdbconvert.py
143 lines (109 loc) · 4.45 KB
/
msdbconvert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# import msdb to python data object
# http://stackoverflow.com/questions/17910657/pyodbc-error-of-data-source-name-not-found-or-no-default-driver-on-64-bit-pc
import os
import numpy as np
import pypyodbc
import h5py
def cursor_dtypes(cursor):
import numpy as np
import decimal
import datetime
dtypes = []
dtypes_str = []
column_info = cursor.description
for column in column_info:
print 'column0',column[0]
if column[1] == unicode:
if column[3] > 1000:
dtypes.append((column[0], 'S250'))
dtypes_str.append('list')
print column[0],column[3],column[1]
else:
dtypes.append((column[0], 'S%d' % column[3]))
dtypes_str.append('str')
elif column[1] == str:
dtypes.append((column[0], 'S%d' % column[3]))
dtypes_str.append('str')
elif column[1] == decimal.Decimal:
#dtypes.append((column[0], 'dd%d' % column[3]))
dtypes.append((column[0], 'f%d' % column[3]))
dtypes_str.append('dd')
elif column[1] == float:
dtypes.append((column[0], 'f4'))
dtypes_str.append('f')
elif column[1] == datetime.datetime:
dtypes.append((column[0], 'S10'))
dtypes_str.append('datetime')
elif column[1] == int:
dtypes.append((column[0], 'i4'))
dtypes_str.append('i')
dtypes = np.dtype(dtypes)
return dtypes, dtypes_str
def process_cursor(cursor, h5file, table):
'''Return numpy dtype array from pypyodbc cursor with SQL query'''
# Query for by table name
cursor.execute('Select * from '+table+';')
print 'pc1'
dtypes, dtypes_str = cursor_dtypes(cursor)
print 'pc2'
group_h5 = h5file.create_group(table)
print group_h5
# Get number of rows
num_rows = len(cursor.fetchall())
for i in range(len(dtypes)):
param = h5file.create_dataset(table+'/'+ dtypes.names[i],
(num_rows,),
dtype = dtypes[i],
maxshape = (None,),
fillvalue = None
)
print 'pc3'
# Query by table name
cursor.execute('Select * from '+table+';')
# Typecast and save data
for i in range(num_rows):
data = cursor.fetchone()
for j in range(len(dtypes)):
key_h5 = table + '/' + dtypes.names[j]
if data != None:
if ('S' in dtypes.descr[j][1]) or \
('U' in dtypes.descr[j][1]):
if data[j] == None:
data_str = ''
elif dtypes_str[j] == 'datetime':
data_str = data[j].strftime('%Y-%m-%d')
else:
data_str = data[j].encode('utf-8')
h5file[key_h5][i] = data_str
elif 'f' in dtypes.descr[j][1]:
try:
h5file[key_h5][i] = float(data[j])
except:
break
#h5file[key_h5][i] = -9999
else:
h5file[key_h5][i] = data[j]
if __name__ == '__main__':
#msdb_file = 'C:/Users/ryan/Desktop/hg37g.mdb'
msdb_file = 'C:/Users/ryan/Desktop/hg37.mdb'
msdb_name = os.path.splitext(os.path.split(msdb_file)[1])[0]
bin_file = 'E:/access2py/'+msdb_name+'.h5'
msdb_tables = ('cruise','ctd','deployments','diag','dive','haulout',
'haulout_orig','summary','tag_info','uplink')
#msdb_tables = ('ctd','deployments','dive','gps','haulout',
# 'haulout_orig','sms','summary','tag_info','uplink')
# If pandas/pickly object doesn't exist, create
#if not os.path.isfile(bin_file):
# Connect to MS Access database & create cursor
connection = pypyodbc.win_connect_mdb(msdb_file)
cursor = connection.cursor()
#h5file = tables.openFile(bin_file, mode='w', title=msdb_name)
h5file = h5py.File(bin_file, 'w')
for table in msdb_tables:
print table+'1'
# Append table to HDF5 File
process_cursor(cursor, h5file, table)
h5file.close()
connection.close()
#else:
# print bin_file+' already created.'