def detect_publisher(extras): publisher = None if 'publisher' in extras and extras['publisher']: publisher = JsonExportBuilder.strip_if_string(extras['publisher']) for i in range(1, 6): key = 'publisher_' + str(i) if key in extras and extras[key] and JsonExportBuilder.strip_if_string(extras[key]): publisher = JsonExportBuilder.strip_if_string(extras[key]) return publisher
def detect_publisher(extras): publisher = None if 'publisher' in extras and extras['publisher']: publisher = JsonExportBuilder.strip_if_string(extras['publisher']) for i in range(1, 6): key = 'publisher_' + str(i) if key in extras and extras[ key] and JsonExportBuilder.strip_if_string(extras[key]): publisher = JsonExportBuilder.strip_if_string(extras[key]) return publisher
def write_zip(self, data, error=None, errors_json=None, zip_name='data'): """ Data: a python object to write to the data.json Error: unicode string representing the content of the error log. zip_name: the name to use for the zip file """ import zipfile o = StringIO.StringIO() zf = zipfile.ZipFile(o, mode='w') data_file_name = 'data.json' if 'draft' == zip_name: data_file_name = 'draft_data.json' # Write the data file if data: zf.writestr( data_file_name, json.dumps( JsonExportBuilder.make_datajson_export_catalog(data), ensure_ascii=False).encode('utf8')) # Write empty.json if nothing to return else: zf.writestr('empty.json', '') if self._errors_json: if errors_json: errors_json += self._errors_json else: errors_json = self._errors_json # Errors in json format if errors_json: zf.writestr('errors.json', json.dumps(errors_json).encode('utf8')) # Write the error log if error: zf.writestr('errorlog.txt', error.encode('utf8')) zf.close() o.seek(0) binary = o.read() o.close() response.content_type = 'application/octet-stream' response.content_disposition = 'attachment; filename="%s.zip"' % zip_name return binary
def write_zip(self, data, error=None, errors_json=None, zip_name='data'): """ Data: a python object to write to the data.json Error: unicode string representing the content of the error log. zip_name: the name to use for the zip file """ import zipfile o = StringIO.StringIO() zf = zipfile.ZipFile(o, mode='w') data_file_name = 'data.json' if 'draft' == zip_name: data_file_name = 'draft_data.json' # Write the data file if data: zf.writestr(data_file_name, json.dumps(JsonExportBuilder.make_datajson_export_catalog(data), ensure_ascii=False).encode( 'utf8')) # Write empty.json if nothing to return else: zf.writestr('empty.json', '') if self._errors_json: if errors_json: errors_json += self._errors_json else: errors_json = self._errors_json # Errors in json format if errors_json: zf.writestr('errors.json', json.dumps(errors_json).encode('utf8')) # Write the error log if error: zf.writestr('errorlog.txt', error.encode('utf8')) zf.close() o.seek(0) binary = o.read() o.close() response.content_type = 'application/octet-stream' response.content_disposition = 'attachment; filename="%s.zip"' % zip_name return binary
def make_draft(self, owner_org): # Error handler for creating error log stream = StringIO.StringIO() eh = logging.StreamHandler(stream) eh.setLevel(logging.WARN) formatter = logging.Formatter('%(asctime)s - %(message)s') eh.setFormatter(formatter) logger.addHandler(eh) # Build the data.json file. packages = self.get_packages(owner_org) errors_json = [] output = [] seen_identifiers = set() for pkg in packages: extras = dict([(x['key'], x['value']) for x in pkg['extras']]) if 'publishing_status' not in extras.keys() or extras['publishing_status'] != 'Draft': continue datajson_entry = JsonExportBuilder.make_datajson_export_entry(pkg, seen_identifiers) if 'errors' in datajson_entry.keys(): errors_json.append(datajson_entry) datajson_entry = None if datajson_entry and self.is_valid(datajson_entry): output.append(datajson_entry) else: publisher = self.detect_publisher(extras) logger.warn("Dataset id=[%s], title=[%s], organization=[%s] omitted\n", pkg.get('id', None), pkg.get('title', None), publisher) # Get the error log eh.flush() error = stream.getvalue() eh.close() logger.removeHandler(eh) stream.close() # return json.dumps(output) return self.write_zip(output, error, errors_json, zip_name='draft')
def make_draft(self, owner_org): # Error handler for creating error log stream = StringIO.StringIO() eh = logging.StreamHandler(stream) eh.setLevel(logging.WARN) formatter = logging.Formatter('%(asctime)s - %(message)s') eh.setFormatter(formatter) logger.addHandler(eh) # Build the data.json file. packages = self.get_packages(owner_org) errors_json = [] output = [] for pkg in packages: extras = dict([(x['key'], x['value']) for x in pkg['extras']]) if 'publishing_status' not in extras.keys( ) or extras['publishing_status'] != 'Draft': continue datajson_entry = JsonExportBuilder.make_datajson_export_entry(pkg) if 'errors' in datajson_entry.keys(): errors_json.append(datajson_entry) datajson_entry = None if datajson_entry and self.is_valid(datajson_entry): output.append(datajson_entry) else: publisher = self.detect_publisher(extras) logger.warn( "Dataset id=[%s], title=[%s], organization=[%s] omitted\n", pkg.get('id', None), pkg.get('title', None), publisher) # Get the error log eh.flush() error = stream.getvalue() eh.close() logger.removeHandler(eh) stream.close() # return json.dumps(output) return self.write_zip(output, error, errors_json, zip_name='draft')
def make_json(self): # Build the data.json file. packages = p.toolkit.get_action("current_package_list_with_resources")( None, {}) output = [] # Create data.json only using public and public-restricted datasets, datasets marked non-public are not exposed for pkg in packages: extras = dict([(x['key'], x['value']) for x in pkg['extras']]) try: if not (re.match(r'[Nn]on-public', extras['public_access_level'])): datajson_entry = JsonExportBuilder.make_datajson_export_entry( pkg) if datajson_entry: output.append(datajson_entry) else: publisher = self.detect_publisher(extras) logger.warn( "Dataset id=[%s], title=[%s], organization=[%s] omitted\n", pkg.get('id', None), pkg.get('title', None), publisher) except KeyError: publisher = self.detect_publisher(extras) logger.warn( "Dataset id=[%s], title=[%s], organization=[%s] missing required 'public_access_level' field", pkg.get('id', None), pkg.get('title', None), publisher) errors = ['Missing Required Field', ['public_access_level']] self._errors_json.append( OrderedDict([ ('id', pkg.get('id')), ('name', pkg.get('name')), ('title', pkg.get('title')), ('organization', publisher), ('errors', errors), ])) pass return output
def make_json(self): # Build the data.json file. packages = p.toolkit.get_action("current_package_list_with_resources")(None, {}) output = [] seen_identifiers = set() # Create data.json only using public and public-restricted datasets, datasets marked non-public are not exposed for pkg in packages: extras = dict([(x['key'], x['value']) for x in pkg['extras']]) try: if not (re.match(r'[Nn]on-public', extras['public_access_level'])): datajson_entry = JsonExportBuilder.make_datajson_export_entry(pkg, seen_identifiers) if datajson_entry: output.append(datajson_entry) else: publisher = self.detect_publisher(extras) logger.warn("Dataset id=[%s], title=[%s], organization=[%s] omitted\n", pkg.get('id', None), pkg.get('title', None), publisher) except KeyError: publisher = self.detect_publisher(extras) logger.warn( "Dataset id=[%s], title=[%s], organization=[%s] missing required 'public_access_level' field", pkg.get('id', None), pkg.get('title', None), publisher) errors = ['Missing Required Field', ['public_access_level']] self._errors_json.append(OrderedDict([ ('id', pkg.get('id')), ('name', pkg.get('name')), ('title', pkg.get('title')), ('organization', publisher), ('errors', errors), ])) pass return output
def make_pdl(self, owner_org): # Error handler for creating error log stream = StringIO.StringIO() eh = logging.StreamHandler(stream) eh.setLevel(logging.WARN) formatter = logging.Formatter('%(asctime)s - %(message)s') eh.setFormatter(formatter) logger.addHandler(eh) # Build the data.json file. packages = self.get_packages(owner_org) output = [] errors_json = [] seen_identifiers = set() # Create data.json only using public datasets, datasets marked non-public are not exposed for pkg in packages: extras = dict([(x['key'], x['value']) for x in pkg['extras']]) if 'publishing_status' in extras.keys() and extras['publishing_status'] == 'Draft': continue try: if re.match(r'[Nn]on-public', extras['public_access_level']): continue datajson_entry = JsonExportBuilder.make_datajson_export_entry(pkg, seen_identifiers) if 'errors' in datajson_entry.keys(): errors_json.append(datajson_entry) datajson_entry = None if datajson_entry and self.is_valid(datajson_entry): output.append(datajson_entry) else: publisher = self.detect_publisher(extras) logger.warn("Dataset id=[%s], title=[%s], organization=[%s] omitted\n", pkg.get('id', None), pkg.get('title', None), publisher) except KeyError: publisher = self.detect_publisher(extras) logger.warn( "Dataset id=[%s], title=['%s'], organization=['%s'] missing required 'public_access_level' field", pkg.get('id', None), pkg.get('title', None), publisher) errors = ['Missing Required Field', ['public_access_level']] self._errors_json.append(OrderedDict([ ('id', pkg.get('id')), ('name', pkg.get('name')), ('title', pkg.get('title')), ('organization', publisher), ('errors', errors), ])) pass # Get the error log eh.flush() error = stream.getvalue() eh.close() logger.removeHandler(eh) stream.close() # return json.dumps(output) return self.write_zip(output, error, errors_json, zip_name='pdl')
def make_pdl(self, owner_org): # Error handler for creating error log stream = StringIO.StringIO() eh = logging.StreamHandler(stream) eh.setLevel(logging.WARN) formatter = logging.Formatter('%(asctime)s - %(message)s') eh.setFormatter(formatter) logger.addHandler(eh) # Build the data.json file. packages = self.get_packages(owner_org) output = [] errors_json = [] # Create data.json only using public datasets, datasets marked non-public are not exposed for pkg in packages: extras = dict([(x['key'], x['value']) for x in pkg['extras']]) if 'publishing_status' in extras.keys( ) and extras['publishing_status'] == 'Draft': continue try: if re.match(r'[Nn]on-public', extras['public_access_level']): continue datajson_entry = JsonExportBuilder.make_datajson_export_entry( pkg) if 'errors' in datajson_entry.keys(): errors_json.append(datajson_entry) datajson_entry = None if datajson_entry and self.is_valid(datajson_entry): output.append(datajson_entry) else: publisher = self.detect_publisher(extras) logger.warn( "Dataset id=[%s], title=[%s], organization=[%s] omitted\n", pkg.get('id', None), pkg.get('title', None), publisher) except KeyError: publisher = self.detect_publisher(extras) logger.warn( "Dataset id=[%s], title=['%s'], organization=['%s'] missing required 'public_access_level' field", pkg.get('id', None), pkg.get('title', None), publisher) errors = ['Missing Required Field', ['public_access_level']] self._errors_json.append( OrderedDict([ ('id', pkg.get('id')), ('name', pkg.get('name')), ('title', pkg.get('title')), ('organization', publisher), ('errors', errors), ])) pass # Get the error log eh.flush() error = stream.getvalue() eh.close() logger.removeHandler(eh) stream.close() # return json.dumps(output) return self.write_zip(output, error, errors_json, zip_name='pdl')