-
Notifications
You must be signed in to change notification settings - Fork 0
/
excelPreprocess.py
149 lines (104 loc) · 4.65 KB
/
excelPreprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#-------------------------------------------------------------------------------
# Name: module1
# Purpose: reformats the class lists with multiple sections into one excel spreadsheet
#
# Author: DBMS
#
# Created: 20/06/2014
# Copyright: (c) DBMS 2014
# Licence: <your licence>
#-------------------------------------------------------------------------------
import os
import xlrd
import xlwt
from xlutils.copy import copy
from dbFormat import findCol
def findFiles(location, extension):
''' #Searches a location for all files that have a specific extension
#Used to return all the excel workbooks (xls) locations to the main function
'''
workbooksList=[] #delcaration, stores the file locations of all the workbooks
for filename in os.listdir(location): #goes through every file in the folder
if filename[-3:]==extension:
workbooksList.append(location+"\\"+filename) #adds the file if the last 3 char are "xls"
return workbooksList
def findSheetAddresses(location):
excelExtension = "xls"
filesList = findFiles(location, excelExtension)
wbData = []
sheetAddress = []
for i in range(len(filesList)):
wbData.append(xlrd.open_workbook(filesList[i]))
sheetAddress.append(wbData[i].sheet_by_index(0))
return sheetAddress
def locateHeadings(headingsList,sheetAddress):
'''Generate the column numbers holding the headings
'''
headingsLocation=[]
for heading in headingsList:
headingsLocation.append(findCol(sheetAddress,heading))
return headingsLocation
def copyData(fileLocation,sheetAddress):
'''Copies all cells from the fileLocation workbook and outputs the cells
'''
oldWB=xlrd.open_workbook(fileLocation,on_demand=True)
numRowsOldWB=sheetAddress.nrows-1 #dimensions of the old workbook
numCellsOldWB=sheetAddress.ncols
cellData=[[0 for x in xrange(numCellsOldWB)]for x in xrange(numRowsOldWB)] #multiD array that will store all the data
curr_row=-1
while curr_row<numRowsOldWB: #incrementing through the rows of the WB
curr_row+=1
row = sheetAddress.row(curr_row)
curr_cell=-1
while curr_cell<numCellsOldWB: #incrementing through the columns of a row
curr_cell+=1
cellData[curr_row-1][curr_cell-1]=sheetAddress.cell_value(curr_row,curr_cell-1)
return cellData
def recreateWB(fileLocation,sheetAddress,cellData):
'''Copies the one workbook using xlutils
then adds onto the new workbook using the data copied using copyData
and currently saves as "1.xls"
'''
wb = xlrd.open_workbook(fileLocation,on_demand=True) #xlrd open workbook
numRows = sheetAddress.nrows
newWB = xlwt.Workbook() #xlwt open workbook
sheet1 = newWB.add_sheet("sheet1",cell_overwrite_ok=True)
newWB = copy(wb)
for i in range(len(cellData[:][:])): #iterate through the rows of the copied data
for j in range(len(cellData[1][:])):
newWB.get_sheet(0).write(i + numRows,j,cellData[i][j]) #copies the new data to the end of the old data
newWB.save("1.xls")
def checkCourseCode(filesList,sheetAddressList):
headings = ["Subject","Catalog Number","Term"] #these are the headings that need to be searched for
courseCodes = [[] for x in xrange(len(filesList))] #multiD array that stores subject, catalog, and term for each course
for i in range(len(sheetAddressList)):
xlrd.open_workbook(filesList[i],on_demand = True)
headingsLocation = locateHeadings(headings,sheetAddressList[i])
tempValues = []
courseCodes[i].append(filesList[i])
for dataCol in headingsLocation:
courseCodes[i].append(sheetAddressList[i].cell(2,dataCol).value)
return courseCodes
'''Grabs the subject. catalog # and term, along with location of the file
e.g. [u'ANAT', u' 215', 2139, 'C:\\Users\\DBMS\\Documents\\Daniel\\DBMS_Enrollment\\full_data\\ANAT 215 - Fall 2013.xls']
'''
def main():
progDirectory = os.getcwd()
dataLocation = progDirectory+"\\full_data"
excelExtension ="xls"
filesList = findFiles(dataLocation,excelExtension)
wbData = []
sheetAddress = []
for i in range(len(filesList)):
wbData.append(xlrd.open_workbook(filesList[i]))
sheetAddress.append(wbData[i].sheet_by_index(0))
checkCourseCode(filesList,sheetAddress)
try:
os.remove(progDirectory + "\\1.xls")
except WindowsError:
print "can't find"
else:
pass
#recreateWB(filesList[0],sheetAddress[0],copyData(filesList[0],sheetAddress[0]))
if __name__ == '__main__':
main()