def _load_rel_db(self, dbf_file, rel_key):
     f = open(dbf_file, 'rb')
     db = {}
     try:
         for row in dbf.dict_reader(f, strip_values=True):
             db[row[rel_key]] = row
     finally:
         f.close()
     return db
 def _load_rel_db(self, dbf_file, rel_key):
     f = open(dbf_file, 'rb')
     db = {}
     try:
         for row in dbf.dict_reader(f, strip_values=True):
             db[row[rel_key]] = row
     finally:
         f.close()
     return db
 def load_db(self, dbf_file, options):
     if options['group']:
         db = defaultdict(dict)
     else:
         db = defaultdict(list)
     with open(dbf_file, 'rb') as f:
         for row in dbf.dict_reader(f, strip_values=True):
             if options['group']:
                 db[row[options['field']]][row[options['group']]] = row
             else:
                 db[row[options['field']]].append(row)
     return db
 def _load_rel_db(self, dbf_file, rel_key):
     """
     Reads rows as dicts from a .dbf file.
     Returns a mapping of rel_key -> row dict.
     """
     f = open(dbf_file, "rb")
     db = {}
     rowcount = 0
     try:
         for row in dbf.dict_reader(f, strip_values=True):
             db[row[rel_key]] = row
             rowcount += 1
             self.log(" GOT DBF ROW %s for %s" % (row[rel_key], row.get("FULLNAME", "unknown")))
     finally:
         f.close()
     self.log("Rows in %s: %d" % (dbf_file, rowcount))
     self.log("Unique keys for %r: %d" % (rel_key, len(db)))
     return db
Exemple #5
0
    def parse_list(self, raw_zip_data):
        # The input is a ZIP file full of directories and/or files. Files can
        # be ZIP, DBF or XLS.
        zf = zipfile.ZipFile(StringIO(raw_zip_data))
        for zi in zf.filelist:
            if zi.file_size == 0:
                continue  # Skip directories.
            if zi.filename.lower().endswith('.zip'):
                for data in self.parse_list(zf.read(zi.filename)):
                    yield data
            elif zi.filename.lower().endswith('.dbf'):
                try:
                    reader = dbf.dict_reader(StringIO(zf.read(zi.filename)))
                    for row in reader:
                        yield row
                except ValueError:
                    self.logger.warn(
                        'Skipping file %r: could not be parsed as DBF',
                        zi.filename)
            elif zi.filename.lower().endswith('.xls'):
                # The Excel parser requires that the file be on the filesystem,
                # so write out a temp file.
                fd, filename = mkstemp()
                fp = os.fdopen(fd, 'wb')
                fp.write(zf.read(zi.filename))
                fp.close()

                # The workbook might have multiple worksheets, so we loop over
                # the ones we care about (by checking the worksheet's name
                # against self.excel_sheet_name).
                reader = excel.ExcelDictReader(filename,
                                               header_row_num=0,
                                               start_row_num=1)
                sheet_indexes = [
                    sheet.number for sheet in reader.workbook.sheets()
                    if self.excel_sheet_name == sheet.name.lower()
                ]
                for index in sheet_indexes:
                    reader.sheet_index = index
                    for row in reader:
                        yield row
            else:
                self.logger.warn('Got unknown file type: %r', zi.filename)
 def _load_rel_db(self, dbf_file, rel_key):
     """
     Reads rows as dicts from a .dbf file.
     Returns a mapping of rel_key -> row dict.
     """
     f = open(dbf_file, 'rb')
     db = {}
     rowcount = 0
     try:
         for row in dbf.dict_reader(f, strip_values=True):
             db[row[rel_key]] = row
             rowcount += 1
             self.log(" GOT DBF ROW %s for %s" %
                      (row[rel_key], row.get('FULLNAME', 'unknown')))
     finally:
         f.close()
     self.log("Rows in %s: %d" % (dbf_file, rowcount))
     self.log("Unique keys for %r: %d" % (rel_key, len(db)))
     return db
Exemple #7
0
 def _load_rel_db(self, dbf_file, rel_key):
     """
     Reads rows as dicts from a .dbf file.
     Returns a mapping of rel_key -> row dict.
     """
     f = open(dbf_file, 'rb')
     db = {}
     rowcount = 0
     try:
         for row in dbf.dict_reader(f, strip_values=True):
             db[row[rel_key]] = row
             rowcount += 1
             if self.verbose:
                 print " GOT DBF ROW %s for %s" % (row[rel_key], row.get('FULLNAME', 'unknown'))
     finally:
         f.close()
     if self.verbose:
         print "Rows in %s: %d" % (dbf_file, rowcount)
         print "Unique keys for %r: %d" % (rel_key, len(db))
     return db
    def parse_list(self, raw_zip_data):
        # The input is a ZIP file full of directories and/or files. Files can
        # be ZIP, DBF or XLS.
        zf = zipfile.ZipFile(StringIO(raw_zip_data))
        for zi in zf.filelist:
            if zi.file_size == 0:
                continue # Skip directories.
            if zi.filename.lower().endswith('.zip'):
                for data in self.parse_list(zf.read(zi.filename)):
                    yield data
            elif zi.filename.lower().endswith('.dbf'):
                try:
                    reader = dbf.dict_reader(StringIO(zf.read(zi.filename)))
                    for row in reader:
                        yield row
                except ValueError:
                    self.logger.warn('Skipping file %r: could not be parsed as DBF', zi.filename)
            elif zi.filename.lower().endswith('.xls'):
                # The Excel parser requires that the file be on the filesystem,
                # so write out a temp file.
                fd, filename = mkstemp()
                fp = os.fdopen(fd, 'wb')
                fp.write(zf.read(zi.filename))
                fp.close()

                # The workbook might have multiple worksheets, so we loop over
                # the ones we care about (by checking the worksheet's name
                # against self.excel_sheet_name).
                reader = excel.ExcelDictReader(filename, header_row_num=0, start_row_num=1)
                sheet_indexes = [sheet.number for sheet in reader.workbook.sheets() if self.excel_sheet_name == sheet.name.lower()]
                for index in sheet_indexes:
                    reader.sheet_index = index
                    for row in reader:
                        yield row
            else:
                self.logger.warn('Got unknown file type: %r', zi.filename)