-
Notifications
You must be signed in to change notification settings - Fork 0
/
main3.py
116 lines (101 loc) · 4.03 KB
/
main3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# -*- coding: utf-8 -*-
"""
Created on Sun Nov 09 14:11:45 2014
@author: liu
"""
from io_routines import readMat2
from db_utilities import prepareInsert,prepareCreateTable,getSession
import numpy as np
import sys
import time
#use cpickle instead of pickle to speed up the process.
#import cPickle as pickle
#Recording time.
start_time = time.time();
#fileNames = ['PiemanData.mat','HitchcockData.mat']
fileNames = ['HitchcockData.mat']
'''
auth_provider = PlainTextAuthProvider(username='cassandra', password='cassandra')
cluster = Cluster(['10.10.10.10'],auth_provider=auth_provider, protocol_version=2)
session = cluster.connect('engagement')
#session.set_keyspace('users')
# or you can do this instead
#session.execute('USE users')
session.execute()
cluster.shutdown()
'''
def tryInsert(session,s,time):
try:
session.execute(s)
except:
print('failed to insert, retrying: '+s[0:100]+' '+str(time))
tryInsert(session,s,time+1)
#reading data from file specified in fileNames array.
for fileName in fileNames:
try:
f,data = readMat2(fileName)
except:
sys.exit('not found file '+fileName+': exiting')
try:
session = getSession(username = 'leolincoln',password='ll7713689')
session.default_timeout = 30 # this is in *seconds* setting the default timeout will prevent the exception.
except:
print 'not able to start db session'
#processing the data.
tempInsert = ''
tempCreate = prepareCreateTable(fileName[:-4]+'total2')
print('creating table:' + tempCreate)
tryInsert(session,tempCreate,1)
for subject in xrange(len(data)):
if subject>=16:
continue
data2 = np.array(f[data[subject][0]])
start_time2 = time.time();
batchInsert = []
tempData = data[subject]
'''
When you read pieman.mat from matlab, you will get a 58x40x46x274 datc.
58 --> Z. from lower to top
40 --> Y, from front to back
46 --> X, from left to right.
274 --> time, from lower time to higher time
for hitchcockdatat*, you will get 600 x 46 x 40 x 58 data references.
time -- x -- y -- z
'''
for t in xrange(len(data2)):
start_time3 = time.time();
batchInsert=[]
batchInsert.append('BEGIN BATCH')
#print 'at time: ',t
for z in xrange(len(data2[0][0][0])):
for y in xrange(len(data2[0][0])):
for x in xrange(len(data2[0])):
#print 999
#the data for subject is in data[i][x][y][z][time]
#print str(x),str(y),str(z),str(t)
tempInsert = prepareInsert('engagement',fileName[:-4]+'total2',[subject,x,y,z,t,data2[t][x][y][z]],tableColumns = ' (subject,x,y,z,time,data) ')
#print tempInsert;
batchInsert.append(tempInsert)
#dont forget to comment this line out when in actual enviornment
#sys.exit(0)
'''
if(len(batchInsert)>10):
batchInsert.append(' APPLY BATCH;')
sys.exit(0)
'''
#session.execute(tempInsert)
#print 'inserting: '+ tempInsert
batchInsert.append(' APPLY BATCH;')
temp = ' '.join(batchInsert)
#sys.exit(0)
#use the newly defined session timeouts.
#session.execute(temp)
tryInsert(session,temp,1)
# session.execute(temp)
print("--- run time for t: %s seconds ---" % str(time.time() - start_time3))
#pickle.dump( ' '.join(batchInsert), open('subject'+str(subject),'wb') )
print 'subject:',subject
print("--- run time for subject: %s seconds ---" % str(time.time() - start_time2))
#after processing, shutdown the session.
session.shutdown()
print("--- total run time: %s seconds ---" % str(time.time() - start_time))