def getDefaultFieldMap(fileName, sheetName, expectedFieldCol=None): reader = GetExcelReader(fileName) headers, fieldCol = [], {} # Try to find the header columns. # Look for the first row with more than 4 columns. for r, row in enumerate(reader.iter_list(sheetName)): cols = sum(1 for d in row if d and unicode(d).strip()) if cols > 4: headers = [unicode(h or '').strip() for h in row] break # If we haven't found a header row yet, assume the first non-empty row is the header. if not headers: for r, row in enumerate(reader.iter_list(sheetName)): cols = sum(1 for d in row if d and unicode(d).strip()) if cols > 0: headers = [unicode(h or '').strip() for h in row] break # Ignore empty columns on the end. while headers and (not headers[-1] or headers[-1].isspace()): headers.pop() if not headers: raise ValueError, u'{} {}::{}.'.format( _('Could not find a Header Row'), fileName, sheetName) # Rename empty columns so as not to confuse the user. headers = [ h if h else u'<{} {:03d}>'.format(_('Blank Header Column'), (c + 1)) for c, h in enumerate(headers) ] headers = [h if len(h) < 32 else h[:29].strip() + u'...' for h in headers] # Set a blank final entry. headers.append(u'') # Create a map for the field names we are looking for # and the headers we found in the Excel sheet. sStateField = 'State' sProvField = 'Prov' sStateProvField = 'StateProv' GetTranslation = _ iNoMatch = len(headers) - 1 exactMatch = {h.lower(): (100.0, i) for i, h in enumerate(headers)} matchStrength = {} for c, f in enumerate(Fields): # Figure out some reasonable defaults for headers. # First look for a perfect match ignoring case. matchBest, iBest = exactMatch.get(f.lower(), (0.0, iNoMatch)) if not f.lower().startswith('tag'): # Then try the local translation of the header name. if matchBest < 2.0: fTrans = GetTranslation(f) matchBest, iBest = max(((Utils.approximateMatch(fTrans, h), i) for i, h in enumerate(headers)), key=lambda x: x[0]) # If that fails, try matching the untranslated header fields. if matchBest <= 0.34: matchBest, iBest = max(((Utils.approximateMatch(f, h), i) for i, h in enumerate(headers)), key=lambda x: x[0]) # If we don't get a high enough match, set to blank. if matchBest <= 0.34: try: iBest = min(expectedFieldCol[h], iNoMatch) except (TypeError, KeyError): iBest = iNoMatch fieldCol[f] = iBest matchStrength[f] = matchBest # If we already have a match for State of Prov, don't match on StateProv, etc. if matchStrength.get(sStateProvField, 0.0) > matchStrength.get( sStateField, 0.0): fieldCol[sStateField] = iNoMatch fieldCol[sProvField] = iNoMatch elif matchStrength.get(sProvField, 0.0) > matchStrength.get( sStateProvField, 0.0): fieldCol[sStateProvField] = iNoMatch elif matchStrength.get(sStateField, 0.0) > matchStrength.get( sStateProvField, 0.0): fieldCol[sStateProvField] = iNoMatch return headers, fieldCol
def read( self, alwaysReturnCache = False ): # Check the cache. Return the last info if the file has not been modified, and the name, sheet and fields are the same. global stateCache global infoCache global errorCache self.readFromFile = False if alwaysReturnCache and infoCache is not None: return infoCache if stateCache and infoCache: try: state = (os.path.getmtime(self.fileName), self.fileName, self.sheetName, self.fieldCol) if state == stateCache: return infoCache except: pass # Read the sheet and return the rider data. self.readFromFile = True try: reader = GetExcelReader( self.fileName ) if self.sheetName not in reader.sheet_names(): infoCache = {} errorCache = [] return {} except (IOError, ValueError): infoCache = {} errorCache = [] return {} info = {} rowInfo = [] hasTags = False for r, row in enumerate(reader.iter_list(self.sheetName)): data = {} for field, col in self.fieldCol.items(): if col < 0: # Skip unmapped columns. continue try: try: data[field] = row[col].strip() except AttributeError: data[field] = row[col] if data[field] == None: data[field] = u'' if field == 'LastName': try: data[field] = six.text_type(data[field] or '').upper() except: data[field] = _('Unknown') elif field.startswith('Tag'): try: data[field] = int( data[field] ) except (ValueError, TypeError): pass try: data[field] = six.text_type(data[field] or '').upper() hasTags = True except: pass elif field == 'Gender': # Normalize and encode the gender information. try: genderFirstChar = six.text_type(data[field] or 'Open').strip().lower()[:1] if genderFirstChar in 'mhu': # Men, Male, Hommes, Uomini data[field] = 'Men' elif genderFirstChar in 'wlfd': # Women, Ladies, Female, Femmes, Donne data[field] = 'Women' else: data[field] = 'Open' # Otherwise Open except: data[field] = 'Open' pass else: if field in NumericFields: try: data[field] = float(data[field]) if data[field] == int(data[field]): data[field] = int(data[field]) except ValueError: data[field] = 0 else: data[field] = six.text_type(data[field]) except IndexError: pass try: num = int(float(data[Fields[0]])) except (ValueError, TypeError, KeyError) as e: pass else: data[Fields[0]] = num info[num] = data rowInfo.append( (r+1, num, data) ) # Add one to the row to make error reporting consistent. # Fix all the tag formats FixTagFormat( info ) # Check for duplicate numbers, duplicate tags and missing tags. numRow = {} # Collect how many tag fields we have. tagFields = [] if hasTags: for tf in TagFields: if self.fieldCol.get(tf, -1) >= 0: tagFields.append( (tf, {}) ) errors = [] rowBib = {} for row, num, data in rowInfo: rowBib[row] = num if num in numRow: errors.append( ( num, u'{}: {} {}: {} {} {}.'.format( _('Row'), row, _('Duplicate Bib#'), num, _('Same as row'), numRow[num], ) ) ) else: numRow[num] = row for tField, tRow in tagFields: if tField not in data and tField == 'Tag': # Don't check for missing Tag2s as they are optional. errors.append( ( num, u'{}: {} {}: {} {}: {}'.format( _('Row'), row, _('Bib'), num, _('Missing field'), tField, ) ) ) continue tag = six.text_type(data.get(tField,u'')).lstrip('0').upper() if tag: if tag in tRow: errors.append( ( num, u'{}: {} {}: {} {}. {} {}: {} {}: {}'.format( _('Row'), row, _('Duplicate Field'), tField, tag, _('Same as'), _('Bib'), rowBib[tRow[tag]], _('Row'), tRow[tag] ) ) ) else: tRow[tag] = row else: if tField == 'Tag': # Don't check for empty Tag2s as they are optional. errors.append( ( num, u'{}: {} {}: {} {}: {}'.format( _('Row'), row, _('Bib'), num, _('Missing Field'), tField, ) ) ) stateCache = (os.path.getmtime(self.fileName), self.fileName, self.sheetName, self.fieldCol) infoCache = info errorCache = errors # Clear the tagNums cache try: Model.race.tagNums = None except AttributeError: pass # Do not read properties after the race has started to avoid overwriting local changes. if Model.race and Model.race.startTime: self.hasPropertiesSheet = False UnmatchedTagsUpdate() else: self.hasPropertiesSheet = ReadPropertiesFromExcel( reader ) # Unconditionally update the categories from Excel. This will overwrite any local changes. self.hasCategoriesSheet = ReadCategoriesFromExcel( reader ) # Process any unknown tags with the updates. if self.hasCategoriesSheet and Model.race and Model.race.startTime: UnmatchedTagsUpdate() if not self.hasCategoriesSheet and self.initCategoriesFromExcel and ( self.hasField('EventCategory') or any( self.hasField(f) for f in CustomCategoryFields )): MatchingCategory.PrologMatchingCategory() for bib, fields in infoCache.items(): MatchingCategory.AddToMatchingCategory( bib, fields ) MatchingCategory.EpilogMatchingCategory() try: Model.race.resetAllCaches() except: pass return infoCache
def GetCallups(fname, soundalike=True, useUciId=True, useLicense=True, callbackfunc=None, callbackupdate=None): if callbackupdate: callbackupdate(_('Reading spreadsheet...')) reader = GetExcelReader(fname) sheet_names = [name for name in reader.sheet_names()] registration_sheet_count = sum(1 for sheet in sheet_names if sheet == RegistrationSheet) if registration_sheet_count == 0: raise ValueError(u'{}: "{}"'.format('Spreadsheet is missing sheet', RegistrationSheet)) if registration_sheet_count > 1: raise ValueError(u'{}: "{}"'.format( 'Spreadsheet must have exactly one sheet named', RegistrationSheet)) if callbackupdate: callbackupdate(u'{}: {}'.format(_('Reading'), RegistrationSheet)) reader = GetExcelReader(fname) registration = Source(fname, RegistrationSheet, False) registrationErrors = registration.read(reader) if callbackfunc: callbackfunc([registration], [registrationErrors]) sources = [] errors = [] for sheet in sheet_names: if sheet == RegistrationSheet: continue if callbackfunc: callbackfunc(sources + [registration], errors + [registrationErrors]) if callbackupdate: callbackupdate(u'{}: {}'.format(_('Reading'), sheet)) source = Source(fname, sheet, soundalike=soundalike, useUciId=useUciId, useLicense=useLicense) errs = source.read(reader) sources.append(source) errors.append(errs) # Add a random sequence as a final sort order. registration.randomize_positions() sources.append(registration) errors.append(registrationErrors) if callbackfunc: callbackfunc(sources, errors) for reg in registration.results: reg.result_vector = [source.find(reg) for source in sources] callup_order = sorted(registration.results, key=lambda reg: tuple(r.get_sort_key() for r in reg.result_vector)) # Randomize riders with no criteria. for i_random, reg in enumerate(callup_order): if all(r.get_status() == r.NoMatch for r in reg.result_vector[:-1]): cu2 = callup_order[i_random:] random.seed() random.shuffle(cu2) callup_order = callup_order[:i_random] + cu2 break callup_results = [] registration_headers = registration.get_ordered_fields() # Also add the team code if there is one. if not 'team_code' in registration_headers: for iSource, source in enumerate(sources): if 'team_code' in source.get_ordered_fields(): try: i_team = registration_headers.index('team') registration_headers = tuple( list(registration_headers[:i_team + 1]) + ['team_code'] + list(registration_headers[i_team + 1:])) except ValueError: registration_headers = tuple( list(registration_headers) + ['team_code']) for reg in callup_order: try: reg.team_code = reg.result_vector[iSource].matches[ 0].team_code except: pass break callup_headers = list(registration_headers) + [ source.sheet_name for source in sources[:-1] ] for reg in callup_order: row = [getattr(reg, f, u'') for f in registration_headers] row.extend(reg.result_vector[:-1]) callup_results.append(row) return registration_headers, callup_headers, callup_results, sources, errors
def setFileNameSheetName(self, fileName, sheetName): reader = GetExcelReader(fileName) self.headers = None # Try to find the header columns. # Look for the first row with more than 4 columns. for r, row in enumerate(reader.iter_list(sheetName)): cols = sum(1 for d in row if d) if cols > 4: self.headers = [six.text_type(h or '').strip() for h in row] break # If we haven't found a header row yet, assume the first non-empty row is the header. if not self.headers: for r, row in enumerate(reader.iter_list(sheetName)): cols = sum(1 for d in row if d) if cols > 0: self.headers = [ six.text_type(h or '').strip() for h in row ] break # Ignore empty columns on the end. while self.headers and not self.headers[-1]: self.headers.pop() if not self.headers: raise ValueError('Could not find a Header Row %s::%s.' % (fileName, sheetName)) # Rename empty columns so as not to confuse the user. self.headers = [ h if h else 'BlankHeaderName%03d' % (c + 1) for c, h in enumerate(self.headers) ] # Create a map for the field names we are looking for # and the self.headers we found in the Excel sheet. for c, f in enumerate(Fields): # Figure out some reasonable defaults for the self.headers. iBest = len(self.headers) - 1 matchBest = 0.0 for i, h in enumerate(self.headers): matchCur = Utils.approximateMatch(f, h) if matchCur > matchBest: matchBest = matchCur iBest = i # If we don't get a high enough match, set to blank. if matchBest <= 0.34: try: iBest = min(self.expectedFieldCol[h], len(self.headers) - 1) except (TypeError, KeyError): iBest = len(self.headers) - 1 self.choices[c].Clear() self.choices[c].AppendItems(self.headers) self.choices[c].SetSelection(iBest) self.gs.Layout() self.sp.SetAutoLayout(1) self.sp.SetupScrolling(scroll_y=False)
def onImportFromExcel( self, event ): dlg = wx.MessageBox( u'Import from Excel\n\n' u'Reads the first sheet in the file.\n' u'Looks for the first row starting with "Bib","BibNum","Bib Num", "Bib #" or "Bib#".\n\n' u'Recognizes the following header fields (in any order, case insensitive):\n' u'\u2022 Bib|BibNum|Bib Num|Bib #|Bib#: Bib Number\n' u'\u2022 Points|Existing Points: Existing points at the start of the race.\n' u'\u2022 LastName|Last Name|LName: Last Name\n' u'\u2022 FirstName|First Name|FName: First Name\n' u'\u2022 Name: in the form "LastName, FirstName". Used only if no Last Name or First Name\n' u'\u2022 Team|Team Name|TeamName|Rider Team|Club|Club Name|ClubName|Rider Club: Team\n' u'\u2022 License|Licence: Regional License (not uci code)\n' u'\u2022 UCI ID|UCIID: UCI ID.\n' u'\u2022 Nat Code|NatCode|NationCode: 3 letter nation code.\n' , u'Import from Excel', wx.OK|wx.CANCEL | wx.ICON_INFORMATION, ) # Get the excel filename. openFileDialog = wx.FileDialog(self, "Open Excel file", "", "", "Excel files (*.xls,*.xlsx,*.xlsm)|*.xls;*.xlsx;*.xlsm", wx.FD_OPEN | wx.FD_FILE_MUST_EXIST) if openFileDialog.ShowModal() == wx.ID_CANCEL: return # proceed loading the file chosen by the user # this can be done with e.g. wxPython input streams: excelFile = openFileDialog.GetPath() excel = GetExcelReader( excelFile ) # Get the sheet in the excel file. sheetName = excel.sheet_names()[0] riderInfo = [] fm = None for row in excel.iter_list(sheetName): if fm: f = fm.finder( row ) info = { 'bib': f('bib',u''), 'first_name': u'{}'.format(f('first_name',u'')).strip(), 'last_name': u'{}'.format(f('last_name',u'')).strip(), 'license': u'{}'.format(f('license_code',u'')).strip(), 'team': u'{}'.format(f('team',u'')).strip(), 'uci_id': u'{}'.format(f('uci_id',u'')).strip(), 'nation_code': u'{}'.format(f('nation_code',u'')).strip(), 'existing_points': u'{}'.format(f('existing_points',u'0')).strip(), } info['bib'] = u'{}'.format(info['bib']).strip() if not info['bib']: # If missing bib, assume end of input. continue # Check for comma-separated name. name = u'{}'.format(f('name', u'')).strip() if name and not info['first_name'] and not info['last_name']: try: info['last_name'], info['first_name'] = name.split(',',1) except: pass # If there is a bib it must be numeric. try: info['bib'] = int(u'{}'.format(info['bib']).strip()) except ValueError: continue ri = Model.RiderInfo( **info ) riderInfo.append( ri ) elif any( u'{}'.format(h).strip().lower() in self.bibHeader for h in row ): fm = standard_field_map() fm.set_headers( row ) Model.race.setRiderInfo( riderInfo ) self.updateGrid()
def ExtractRaceResultsExcel( raceInSeries ): getReferenceName = SeriesModel.model.getReferenceName excel = GetExcelReader( raceInSeries.fileName ) raceName = os.path.splitext(os.path.basename(raceInSeries.fileName))[0] raceResults = [] posHeader = set([u'pos', u'pos.', u'rank']) for sheetName in excel.sheet_names(): fm = None categoryName = sheetName.strip() for row in excel.iter_list(sheetName): if fm: f = fm.finder( row ) info = { 'raceDate': None, 'raceFileName': raceInSeries.fileName, 'raceName': raceName, 'raceOrganizer': u'', 'raceInSeries': raceInSeries, 'bib': f('bib',99999), 'rank': f('pos',''), 'tFinish': f('time',0.0), 'firstName': unicode(f('first_name',u'')).strip(), 'lastName' : unicode(f('last_name',u'')).strip(), 'license': unicode(f('license_code',u'')).strip(), 'team': unicode(f('team',u'')).strip(), 'categoryName': f('category_code',None), } info['rank'] = unicode(info['rank']).strip() if not info['rank']: # If missing rank, assume end of input. break if info['categoryName'] is None: info['categoryName'] = categoryName info['categoryName'] = unicode(info['categoryName']).strip() try: info['rank'] = toInt(info['rank']) except ValueError: pass if not isinstance(info['rank'], (long,int)): continue # Check for comma-separated name. name = unicode(f('name', u'')).strip() if name and not info['firstName'] and not info['lastName']: try: info['lastName'], info['firstName'] = name.split(',',1) except: pass if not info['firstName'] and not info['lastName']: continue info['lastName'], info['firstName'] = getReferenceName(info['lastName'], info['firstName']) # If there is a bib it must be numeric. try: info['bib'] = int(unicode(info['bib']).strip()) except ValueError: continue info['tFinish'] = (info['tFinish'] or 0.0) if isinstance(info['tFinish'], basestring) and ':' in info['tFinish']: info['tFinish'] = Utils.StrToSeconds( info['tFinish'].strip() ) else: try: info['tFinish'] = float( info['tFinish'] ) * 24.0 * 60.0 * 60.0 # Convert Excel day number to seconds. except Exception as e: info['tFinish'] = 0.0 raceResults.append( RaceResult(**info) ) elif unicode(row[0]).strip().lower() in posHeader: fm = standard_field_map() fm.set_headers( row ) return True, 'success', raceResults