def get_records(rows, fields): """ Truncate/pad empty/missing records to expected row length, canonicalize cell content, and return resulting record list. :param upload_data: generator producing rows of content :type upload_data: generator :param fields: collection of fields specified in JSON schema :type fields: list or tuple :return: canonicalized records of specified upload data :rtype: tuple of dicts """ records = [] for n, row in enumerate(rows): # trailing cells might be empty: trim row to fit while (row and (len(row) > len(fields)) and (row[-1] is None or row[-1] == '')): row.pop() while row and (len(row) < len(fields)): row.append(None) # placeholder: canonicalize once only, below try: records.append( dict((f['datastore_id'], _canonicalize(v, f['datastore_type'])) for f, v in zip(fields, row))) except BadExcelData, e: raise BadExcelData('Row %d: ' % (n + HEADER_ROWS + 1) + e.message)
def _canonicalize(dirty, dstore_tag): """ Canonicalize dirty input from xlrd to align with recombinant.json datastore type specified in dstore_tag. :param dirty: dirty cell content as read through xlrd :type dirty: object :param dstore_tag: datastore_type specifier in (JSON) schema for cell :type dstore_tag: str :return: Canonicalized cell input :rtype: float or unicode Raises BadExcelData on formula cells """ dtype = datastore_type[dstore_tag] if dirty is None: return dtype.default elif isinstance(dirty, float) or isinstance(dirty, int): if dtype.numeric: return unicode(dirty) # FIXME ckan2.1 datastore?-- float(dirty) else: # JSON specifies text or money: content of origin is numeric string. # If xlrd has added .0 to present content as a float, # trim it before returning as numeric string if int(dirty) == dirty: return unicode(int(dirty)) else: return unicode(dirty) elif (isinstance(dirty, basestring)) and (dirty.strip() == ''): # Content trims to empty: default return dtype.default elif not dtype.numeric: if dtype.tag == 'money': # User has overridden Excel format string, probably adding currency # markers or digit group separators (e.g.,fr-CA uses 1$ (not $1)). # Truncate any trailing decimal digits, retain int # part, and cast as numeric string. canon = re.sub(r'[^0-9]', '', re.sub(r'\.[0-9 ]+$', '', unicode(dirty))) return unicode(canon) elif dtype.tag == 'date' and isinstance(dirty, datetime): return u'%04d-%02d-%02d' % (dirty.year, dirty.month, dirty.day) if unicode(dirty).startswith('='): raise BadExcelData('Formulas are not supported') return unicode(dirty) # dirty is numeric: truncate trailing decimal digits, retain int part canon = re.sub(r'[^0-9]', '', re.sub(r'\.[0-9 ]+$', '', unicode(dirty))) if not canon: return 0 return unicode(canon) # FIXME ckan2.1 datastore?-- float(dirty)
def canonicalize(dirty, dstore_tag): """ Canonicalize dirty input from xlrd to align with recombinant.json datastore type specified in dstore_tag. :param dirty: dirty cell content as read through xlrd :type dirty: object :param dstore_tag: datastore_type specifier in (JSON) schema for cell :type dstore_tag: str :return: Canonicalized cell input :rtype: float or unicode Raises BadExcelData on formula cells """ dtype = datastore_type[dstore_tag] if dirty is None: return dtype.default elif isinstance(dirty, (float, int, long)): return unicode(dirty) elif isinstance(dirty, basestring) and not dirty.strip(): # Content trims to empty: default return dtype.default elif not dtype.numeric: if dtype.tag == 'money': # User has overridden Excel format string, probably adding currency # markers or digit group separators (e.g.,fr-CA uses 1$ (not $1)). # Accept only "DDDDD.DD", discard other characters dollars, sep, cents = unicode(dirty).rpartition('.') return re.sub(ur'[^0-9]', '', dollars) + sep + re.sub(ur'[^0-9]', '', cents) elif dtype.tag == 'date' and isinstance(dirty, datetime): return u'%04d-%02d-%02d' % (dirty.year, dirty.month, dirty.day) if unicode(dirty).startswith('='): raise BadExcelData('Formulas are not supported') return unicode(dirty) # dirty is numeric: truncate trailing decimal digits, retain int part canon = re.sub(r'[^0-9]', '', unicode(dirty).split('.')[0]) if not canon: return 0 return unicode(canon) # FIXME ckan2.1 datastore?-- float(dirty)
def upload(self, id): package_type = self._get_package_type(id) geno = get_geno(package_type) lc = ckanapi.LocalCKAN(username=c.user) dataset = lc.action.package_show(id=id) try: if request.POST['xls_update'] == '': raise BadExcelData('You must provide a valid file') _process_upload_file(lc, dataset, request.POST['xls_update'].file, geno) h.flash_success( _("Your file was successfully uploaded into the central system." )) redirect(h.url_for(controller='package', action='read', id=id)) except BadExcelData, e: org = lc.action.organization_show(id=dataset['owner_org']) return self.preview_table( resource_name=dataset['resources'][0]['name'], owner_org=org['name'], errors=[e.message])
def upload(self, id): package_type = self._get_package_type(id) geno = get_geno(package_type) lc = ckanapi.LocalCKAN(username=c.user) dataset = lc.action.package_show(id=id) try: if request.POST['xls_update'] == '': raise BadExcelData('You must provide a valid file') _process_upload_file( lc, dataset, request.POST['xls_update'].file, geno) h.flash_success(_( "Your file was successfully uploaded into the central system." )) redirect(h.url_for(controller='package', action='read', id=id)) except BadExcelData, e: x_vars = {'errors': [e.message], 'action': 'edit'} c.pkg_dict = dataset return render(self._edit_template(package_type), extra_vars=x_vars)
def _process_upload_file(lc, dataset, upload_file, geno): """ Use lc.action.datastore_upsert to load data from upload_file raises BadExcelData on errors. """ owner_org = dataset['organization']['name'] expected_sheet_names = dict((resource['name'], resource['id']) for resource in dataset['resources']) upload_data = read_excel(upload_file) while True: try: sheet_name, org_name, column_names, rows = next(upload_data) except StopIteration: return except: # XXX bare except because this can fail in all sorts of ways if asbool(config.get('debug', False)): # on debug we want the real error raise raise BadExcelData( _("The server encountered a problem processing the file " "uploaded. Please try copying your data into the latest " "version of the template and uploading again. If this " "problem continues, send your Excel file to " "[email protected] so we may investigate.")) if sheet_name not in expected_sheet_names: raise BadExcelData( _('Invalid file for this data type. ' + 'Sheet must be labeled "{0}", ' + 'but you supplied a sheet labeled "{1}"').format( '"/"'.join(sorted(expected_sheet_names)), sheet_name)) if org_name != owner_org: raise BadExcelData( _('Invalid sheet for this organization. ' + 'Sheet must be labeled for {0}, ' + 'but you supplied a sheet for {1}').format( owner_org, org_name)) # custom styles or other errors cause columns to be read # that actually have no data. strip them here to avoid error below while column_names[-1] is None: column_names.pop() chromo = get_chromo(sheet_name) expected_columns = [f['datastore_id'] for f in chromo['fields']] if column_names != expected_columns: raise BadExcelData( _("This template is out of date. " "Please try copying your data into the latest " "version of the template and uploading again. If this " "problem continues, send your Excel file to " "[email protected] so we may investigate.")) records = get_records(rows, chromo['fields']) method = 'upsert' if chromo.get('datastore_primary_key') else 'insert' try: lc.action.datastore_upsert( method=method, resource_id=expected_sheet_names[sheet_name], records=records, ) except ValidationError as e: # because, where else would you put the error text? # XXX improve this in datastore, please pgerror = e.error_dict['info']['orig'][0].decode('utf-8') # remove some postgres-isms that won't help the user # when we render this as an error in the form pgerror = re.sub(ur'\nLINE \d+:', u'', pgerror) pgerror = re.sub(ur'\n *\^\n$', u'', pgerror) raise BadExcelData( _(u"Error while importing data: {0}").format(pgerror))