def parseFile(writer, file): print("Reading file " + file) handler = pyUtils.readLocalFile(file) year = pyUtils.getFileNameWithoutExtension(file)[pyUtils.getLastIndexOf(file, '-') + 1:] # print(handler) soup = Soup(handler) rows = soup.findAll('row') i = 0 headers_row_num = -1 found_headers = False for row in rows: print(row) csv_row = [] styleId = row.get('ss:styleid', '') if found_headers is False: if styleId == 's5': found_headers = True headers_row_num = i cells = row.findAll('data') for cell in cells: csv_row.append(pyUtils.returnNormalText(cell.contents[0])) writer.writerow(csv_row) else: autofitheight = row.get('ss:AutoFitHeight'.lower(), '') # print("autofitheight: " + autofitheight) if autofitheight == "0": print("autoheight is 0") cells = row.findAll('cell') for column in cells: dataType = column.data['ss:Type'.lower()] if len(column.data.contents) == 0: if dataType.lower() == 'number': csv_row.append(-100) else: csv_row.append('') else: if dataType.lower() == 'number': csv_row.append(column.data.contents[0].replace(',', '.')) else: content = column.data.contents[0] csv_row.append(pyUtils.returnNormalText(content)) writer.writerow(csv_row) # print("Row: " + str(i+1) + " " + '.'.join(csv_row)) i = i + 1
def parseFile(file): print("Reading file " + file) handler = pyUtils.readLocalFile(file) year = pyUtils.getFileNameWithoutExtension(file)[pyUtils.getLastIndexOf(file, '-') + 1:] # print(handler) soup = Soup(handler) rows = soup.findAll('row') headers = [] row_num = 0 headers_row_num = -1 found_headers = False for row in rows: autofitheight = row.get('ss:AutoFitHeight'.lower(), '') # print(row) csv_row = [] styleId = row.get('ss:styleid', '') if found_headers is False: if styleId == 's5' and autofitheight == '0': found_headers = True # headers_row_num = i cells = row.findAll('data') for i in range(0, len(cells)): cell = cells[i] # for cell in cells: # csv_row.append(pyUtils.returnNormalText(cell.contents[0])) headers.append(pyUtils.returnNormalText(cell.contents[0])) if cell.contents[0] == key_field_name: key_field_column = i if cell.contents[0] == country_field_name: country_field_column = i # writer.writerow(csv_row) else: # print("autofitheight: " + autofitheight) if autofitheight == "0": # print("autoheight is 0") school = {} cells = row.findAll('cell') country = '' # for column in cells: for i in range(0, len(cells)): if i == key_field_column: continue column = cells[i] if i == country_field_column: country = pyUtils.returnNormalText(column.data.contents[0]) continue dataType = column.data['ss:Type'.lower()] if len(column.data.contents) == 0: if dataType.lower() == 'number': # csv_row.append(-100) school[headers[i]] = -100 else: # csv_row.append('') school[headers[i]] = '' else: if dataType.lower() == 'number': # csv_row.append(column.data.contents[0].replace(',', '.')) school[headers[i]] = float(column.data.contents[0].replace(',', '.')) else: content = column.data.contents[0] # csv_row.append(pyUtils.returnNormalText(content)) school[headers[i]] = pyUtils.returnNormalText(content) # writer.writerow(csv_row) addSchoolData(cells[key_field_column].data.contents[0], year, country, school) # print("Row num: " + str(row_num)) row_num = row_num + 1