def _load_rel_db(self, dbf_file, rel_key): f = open(dbf_file, 'rb') db = {} try: for row in dbf.dict_reader(f, strip_values=True): db[row[rel_key]] = row finally: f.close() return db
def load_db(self, dbf_file, options): if options['group']: db = defaultdict(dict) else: db = defaultdict(list) with open(dbf_file, 'rb') as f: for row in dbf.dict_reader(f, strip_values=True): if options['group']: db[row[options['field']]][row[options['group']]] = row else: db[row[options['field']]].append(row) return db
def _load_rel_db(self, dbf_file, rel_key): """ Reads rows as dicts from a .dbf file. Returns a mapping of rel_key -> row dict. """ f = open(dbf_file, "rb") db = {} rowcount = 0 try: for row in dbf.dict_reader(f, strip_values=True): db[row[rel_key]] = row rowcount += 1 self.log(" GOT DBF ROW %s for %s" % (row[rel_key], row.get("FULLNAME", "unknown"))) finally: f.close() self.log("Rows in %s: %d" % (dbf_file, rowcount)) self.log("Unique keys for %r: %d" % (rel_key, len(db))) return db
def parse_list(self, raw_zip_data): # The input is a ZIP file full of directories and/or files. Files can # be ZIP, DBF or XLS. zf = zipfile.ZipFile(StringIO(raw_zip_data)) for zi in zf.filelist: if zi.file_size == 0: continue # Skip directories. if zi.filename.lower().endswith('.zip'): for data in self.parse_list(zf.read(zi.filename)): yield data elif zi.filename.lower().endswith('.dbf'): try: reader = dbf.dict_reader(StringIO(zf.read(zi.filename))) for row in reader: yield row except ValueError: self.logger.warn( 'Skipping file %r: could not be parsed as DBF', zi.filename) elif zi.filename.lower().endswith('.xls'): # The Excel parser requires that the file be on the filesystem, # so write out a temp file. fd, filename = mkstemp() fp = os.fdopen(fd, 'wb') fp.write(zf.read(zi.filename)) fp.close() # The workbook might have multiple worksheets, so we loop over # the ones we care about (by checking the worksheet's name # against self.excel_sheet_name). reader = excel.ExcelDictReader(filename, header_row_num=0, start_row_num=1) sheet_indexes = [ sheet.number for sheet in reader.workbook.sheets() if self.excel_sheet_name == sheet.name.lower() ] for index in sheet_indexes: reader.sheet_index = index for row in reader: yield row else: self.logger.warn('Got unknown file type: %r', zi.filename)
def _load_rel_db(self, dbf_file, rel_key): """ Reads rows as dicts from a .dbf file. Returns a mapping of rel_key -> row dict. """ f = open(dbf_file, 'rb') db = {} rowcount = 0 try: for row in dbf.dict_reader(f, strip_values=True): db[row[rel_key]] = row rowcount += 1 self.log(" GOT DBF ROW %s for %s" % (row[rel_key], row.get('FULLNAME', 'unknown'))) finally: f.close() self.log("Rows in %s: %d" % (dbf_file, rowcount)) self.log("Unique keys for %r: %d" % (rel_key, len(db))) return db
def _load_rel_db(self, dbf_file, rel_key): """ Reads rows as dicts from a .dbf file. Returns a mapping of rel_key -> row dict. """ f = open(dbf_file, 'rb') db = {} rowcount = 0 try: for row in dbf.dict_reader(f, strip_values=True): db[row[rel_key]] = row rowcount += 1 if self.verbose: print " GOT DBF ROW %s for %s" % (row[rel_key], row.get('FULLNAME', 'unknown')) finally: f.close() if self.verbose: print "Rows in %s: %d" % (dbf_file, rowcount) print "Unique keys for %r: %d" % (rel_key, len(db)) return db
def parse_list(self, raw_zip_data): # The input is a ZIP file full of directories and/or files. Files can # be ZIP, DBF or XLS. zf = zipfile.ZipFile(StringIO(raw_zip_data)) for zi in zf.filelist: if zi.file_size == 0: continue # Skip directories. if zi.filename.lower().endswith('.zip'): for data in self.parse_list(zf.read(zi.filename)): yield data elif zi.filename.lower().endswith('.dbf'): try: reader = dbf.dict_reader(StringIO(zf.read(zi.filename))) for row in reader: yield row except ValueError: self.logger.warn('Skipping file %r: could not be parsed as DBF', zi.filename) elif zi.filename.lower().endswith('.xls'): # The Excel parser requires that the file be on the filesystem, # so write out a temp file. fd, filename = mkstemp() fp = os.fdopen(fd, 'wb') fp.write(zf.read(zi.filename)) fp.close() # The workbook might have multiple worksheets, so we loop over # the ones we care about (by checking the worksheet's name # against self.excel_sheet_name). reader = excel.ExcelDictReader(filename, header_row_num=0, start_row_num=1) sheet_indexes = [sheet.number for sheet in reader.workbook.sheets() if self.excel_sheet_name == sheet.name.lower()] for index in sheet_indexes: reader.sheet_index = index for row in reader: yield row else: self.logger.warn('Got unknown file type: %r', zi.filename)