def validate_cm_form(req): if req.method == 'POST': form = UploadFileForm(req.POST, req.FILES) if form.is_valid(): uploadFile = req.FILES['file'] if uploadFile.name.endswith(".csv"): try: fileCSV = io.StringIO(unicode(uploadFile.read()), newline=None) except: valid = False messages = "Unable to read the CSV file. Check the file for invalid characters." datastr = None else: models = usginmodels.get_models() for m in models: if m.title == form.cleaned_data["content_model"].title: for v in m.versions: if v.version == form.cleaned_data["version"].version: uri = v.uri break try: valid, messages, dataCorrected, long_fields, srs = usginmodels.validate_file( fileCSV, uri, form.cleaned_data["feature_type"] ) # Original Working # csvstr = "" # for line in uploadFile: # csvstr += str(line) datastr = "" for line in dataCorrected: for ele in line: datastr += "\"" + str(ele) + "\"," datastr += "\r\n" except: valid = False messages = "Invalid Layer" datastr = None else: valid = False messages = "Only CSV files may be validated." datastr = None context = { "valid": valid, "messages": messages, "dataCorrected": datastr, "filepath": uploadFile.name } # Render the results as HTML return render(req, 'validation/cm-results-bootstrap.html', context) else: form = UploadFileForm() return render(req, 'validation/cm-form-bootstrap.html', { 'form': form })
def contentmodel_checkFile(context, data_dict): '''Check whether the given csv file follows the specified content model. This action returns detailed description of inconsistent cells. **Parameters:** :param cm_uri: uri of the content model. :type cm_uri: string :param cm_version: version of the content model. :type cm_version: string :param cm_resource_url: the URL to the resource :type cm_resource_url: string **Results:** :returns: A status object (either success, or failed). :rtype: dictionary ''' cm_resource_url = _get_or_bust(data_dict, 'cm_resource_url') modified_resource_url = cm_resource_url.replace("%3A", ":") truncated_url = modified_resource_url.split("/storage/f/")[1] csv_filename_withfile = get_url_for_file(truncated_url) validation_msg = [] if csv_filename_withfile is None: msg = toolkit._("Can't find the full path of the resources from %s" % cm_resource_url) validation_msg.append({'row':0, 'col':0, 'errorTYpe': 'systemError', 'message':msg}) else: log.info("filename full path: %s " % csv_filename_withfile) this_layer = _get_or_bust(data_dict, 'cm_layer') this_uri = _get_or_bust(data_dict, 'cm_uri') this_version_uri = _get_or_bust(data_dict, 'cm_version_url') if this_layer.lower() and this_uri.lower() and this_version_uri.lower() == 'none': log.debug("tier 2 data model/version/layer are none") return {"valid": True, "messages": "Okay"} else: log.debug("about to start schema reading") user_schema = contentmodel_get(context, data_dict) # print user_schema fieldModelList = [] field_info_list = user_schema['version']['layers_info'] for field_info in field_info_list[this_layer]: if ((field_info['name'] is None) and ((len(field_info['type'])==0) or (field_info['type'].isspace()))): log.debug("found a undefined field: %s" % str(field_info)) continue else: fieldModelList.append(ContentModel_FieldInfoCell(field_info['optional'], field_info['type'], field_info['name'], field_info['description'])) log.debug(fieldModelList) log.debug("finish schema reading, find %s field information" % str(len(fieldModelList))) if len(validation_msg) == 0: try: csv_filename = csv_filename_withfile.split("file://")[1] this_csv = open(csv_filename, 'rbU') valid, errors, dataCorrected, long_fields, srs = usginmodels.validate_file( this_csv, this_version_uri, this_layer ) if valid: pass else: validation_msg.append({'valid': False}) except: validation_msg.append({'valid': False}) log.debug(validation_msg) # print 'JSON:', json.dumps({"valid": "false", "messages": validation_msg}) if len(validation_msg) == 0: return {"valid": True, "messages": "Okay"} else: return {"valid": False, "messages": validation_msg}
def is_usgin_valid_data(key, data, errors, context): resource_id = data.get(('resources', 0, 'id'), None) if resource_id is None: return resource_name = data.get(('resources', 0, 'name'), None) md_resource = None for k, v in data.iteritems(): if k[0] == 'resources' and k[-1] == 'md_resource': query_key = k md_resource = json.loads(data.get(query_key, None)) md_package = None for k, v in data.iteritems(): if k[0] == 'extras' and v == 'md_package': query_key = (k[0], k[1], 'value') md_package = json.loads(data.get(query_key, None)) resourceDescription = md_package.get('resourceDescription', {}) uri = resourceDescription.get('usginContentModel', None) version = resourceDescription.get('usginContentModelVersion', None) layer = resourceDescription.get('usginContentModelLayer', None) if None in [uri, version, layer]: return def get_file_path(res_id): dir_1 = res_id[0:3] dir_2 = res_id[3:6] file = res_id[6:] storage_base = config.get('ckan.storage_path', 'default') return path.join(storage_base, 'resources', dir_1, dir_2, file) validation_msg = [] csv_file = get_file_path(resource_id) if csv_file: log.info("Filename full path: %s " % csv_file) else: msg = base._("Cannot find the full path of the resources from %s"\ % resource_name) validation_msg.append({ 'row': 0, 'col': 0, 'errorType': 'systemError', 'message': msg }) if 'none' in [uri.lower(), version.lower(), layer.lower()]: log.debug("Start USGIN content model validation") log.debug("USGIN tier 2 data model/version/layer are none") return {'valid': True} else: try: # Valid intialization to resove this issue: # Error - <type 'exceptions.UnboundLocalError'>: local variable 'valid' referenced before assignment valid = False csv = open(csv_file, 'rbU') valid, messages, dataCorrected, long_fields, srs = usginmodels.validate_file(csv, version, layer) except: log.info("The given format's file is not a CSV") os.remove(csv_file) get_action('resource_delete')(context, {'id': resource_id}) errors[key].append(base._("The given format's file is not a CSV")) #raise ValidationError(errors) #raise StopOnError log.debug("Finished USGIN content model validation") if valid and messages: log.debug('With changes the USGIN document will be valid') h.flash_error(base._('With changes the USGIN document will be valid')) elif valid and not messages: log.debug("USGIN document is valid") else: log.debug('USGIN document is not valid') h.flash_error(base._('The USGIN document is not valid'))
def usginmodels_validate_file(context, data_dict): NewFilePath = '' resourceId = data_dict.get('resource_id', None) resourceName = data_dict.get('resource_name', None) packageId = data_dict.get('package_id', None) if None in [resourceId, packageId, resourceName]: log.info("Missing Package ID or Resource ID") return { 'valid': False, 'message': '', 'log': 'Missing Package ID or Resource ID or Resource name', 'resourceId': resourceId } pkg = get_action('package_show')(context, {'id': packageId}) md_package = get_md_package(context, pkg) if None in [md_package]: log.info("Missing md_package") return { 'valid': False, 'message': '', 'log': 'Missing md_package', 'resourceName': resourceName, 'resourceId': resourceId } resourceDescription = md_package.get('resourceDescription', {}) uri = resourceDescription.get('usginContentModel', None) version = resourceDescription.get('usginContentModelVersion', None) layer = resourceDescription.get('usginContentModelLayer', None) if None in [uri, version, layer] or 'none' in [ uri.lower(), version.lower(), layer.lower() ]: log.info("Missing content model information (URI, Version, Layer)") return { 'valid': False, 'message': [ 'Missing content model information (URI, Version, Layer) or none given.' ], 'resourceName': resourceName, 'resourceId': resourceId } #def get_file_path(res_id): # dir_1 = res_id[0:3] # dir_2 = res_id[3:6] # file = res_id[6:] # storage_base = config.get('ckan.storage_path', 'default') # return os.path.join(storage_base, 'resources', dir_1, dir_2, file) path = get_file_path(context, {'resourceId': resourceId, 'suffix': ''}) csv_file = path.get('path', None) if csv_file: log.info("Filename full path: %s " % csv_file) else: log.info("Cannot find the full path of the resources from %s" % resourceName) return { 'valid': False, 'message': '', 'log': "Cannot find the full path of the resources from %s" % resourceName, 'resourceName': resourceName, 'resourceId': resourceId } try: log.debug("Start USGIN content model validation") # intializing variables to resove this issue: # Error - <type 'exceptions.UnboundLocalError'>: local variable 'valid, messages ...' referenced before assignment valid = False messages = None dataCorrected = None long_fields = None srs = None csv = open(csv_file, 'rbU') valid, messages, dataCorrected, long_fields, srs = usginmodels.validate_file( csv, version, layer, True) # True to fill default on row1 except: log.info("the file format is not supported.") return { 'valid': False, 'message': ["the file format is not supported."], 'resourceName': resourceName, 'resourceId': resourceId } #close the file csv.close() #write the correcte data into a new file if (valid and messages) or (not valid): #No automatic erasing content, let the user fix his file, https://github.com/REI-Systems/ckanext-metadata/issues/3 #Create a new file has correctedData instead if dataCorrected: # try: # shutil.copy2(csv_file, csv_file+'_original') # log.debug("%s: New file copy is made %s." % (resourceName, csv_file+'_original')) # except: # log.debug("%s: Couldn't make a file copy." % resourceName) import csv try: NewFilePath = csv_file + '_CorrectedData' with open(NewFilePath, "wb") as f: writer = csv.writer(f) writer.writerows(dataCorrected) log.debug( "%s: The new corrected data file has been created %s" % (resourceName, NewFilePath)) except: log.debug("%s: Couldn't erase the file content." % resourceName) if valid and messages: log.debug('%s: With changes the USGIN document will be valid' % resourceName) elif valid and not messages: log.debug("%s: USGIN document is valid" % resourceName) else: log.debug('%s: USGIN document is not valid' % resourceName) log.debug("%s: Finished USGIN content model validation." % resourceName) return { 'valid': valid, 'message': messages, 'dataCorrected': dataCorrected, 'long_fields': long_fields, 'srs': srs, 'resourceName': resourceName, 'resourceId': resourceId }
def usginmodels_validate_file(context, data_dict): NewFilePath = '' resourceId = data_dict.get('resource_id', None) resourceName = data_dict.get('resource_name', None) packageId = data_dict.get('package_id', None) if None in [resourceId, packageId, resourceName]: log.info("Missing Package ID or Resource ID") return {'valid': False, 'message': '', 'log': 'Missing Package ID or Resource ID or Resource name', 'resourceId': resourceId} pkg = get_action('package_show')(context, {'id': packageId}) md_package = get_md_package(context, pkg) if None in [md_package]: log.info("Missing md_package") return {'valid': False, 'message': '', 'log': 'Missing md_package', 'resourceName': resourceName, 'resourceId': resourceId} resourceDescription = md_package.get('resourceDescription', {}) uri = resourceDescription.get('usginContentModel', None) version = resourceDescription.get('usginContentModelVersion', None) layer = resourceDescription.get('usginContentModelLayer', None) if None in [uri, version, layer] or 'none' in [uri.lower(), version.lower(), layer.lower()]: log.info("Missing content model information (URI, Version, Layer)") return {'valid': False, 'message': ['Missing content model information (URI, Version, Layer) or none given.'], 'resourceName': resourceName, 'resourceId': resourceId} #def get_file_path(res_id): # dir_1 = res_id[0:3] # dir_2 = res_id[3:6] # file = res_id[6:] # storage_base = config.get('ckan.storage_path', 'default') # return os.path.join(storage_base, 'resources', dir_1, dir_2, file) path = get_file_path(context, {'resourceId': resourceId, 'suffix': ''}) csv_file = path.get('path', None) if csv_file: log.info("Filename full path: %s " % csv_file) else: log.info("Cannot find the full path of the resources from %s" % resourceName) return {'valid': False, 'message': '', 'log': "Cannot find the full path of the resources from %s" % resourceName, 'resourceName': resourceName, 'resourceId': resourceId} try: log.debug("Start USGIN content model validation") # intializing variables to resove this issue: # Error - <type 'exceptions.UnboundLocalError'>: local variable 'valid, messages ...' referenced before assignment valid = False messages = None dataCorrected = None long_fields = None srs = None csv = open(csv_file, 'rbU') valid, messages, dataCorrected, long_fields, srs = usginmodels.validate_file(csv, version, layer, True) # True to fill default on row1 except: log.info("the file format is not supported.") return {'valid': False, 'message': ["the file format is not supported."], 'resourceName': resourceName, 'resourceId': resourceId} #close the file csv.close() #write the correcte data into a new file if (valid and messages) or (not valid): #No automatic erasing content, let the user fix his file, https://github.com/REI-Systems/ckanext-metadata/issues/3 #Create a new file has correctedData instead if dataCorrected: # try: # shutil.copy2(csv_file, csv_file+'_original') # log.debug("%s: New file copy is made %s." % (resourceName, csv_file+'_original')) # except: # log.debug("%s: Couldn't make a file copy." % resourceName) import csv try: NewFilePath = csv_file+'_CorrectedData' with open(NewFilePath, "wb") as f: writer = csv.writer(f) writer.writerows(dataCorrected) log.debug("%s: The new corrected data file has been created %s" % (resourceName, NewFilePath)) except: log.debug("%s: Couldn't erase the file content." % resourceName) if valid and messages: log.debug('%s: With changes the USGIN document will be valid' % resourceName) elif valid and not messages: log.debug("%s: USGIN document is valid" % resourceName) else: log.debug('%s: USGIN document is not valid' % resourceName) log.debug("%s: Finished USGIN content model validation." % resourceName) return {'valid': valid, 'message': messages, 'dataCorrected': dataCorrected, 'long_fields': long_fields, 'srs': srs, 'resourceName': resourceName, 'resourceId': resourceId}
def contentmodel_checkFile(context, data_dict): '''Check whether the given csv file follows the specified content model. This action returns detailed description of inconsistent cells. **Parameters:** :param cm_uri: uri of the content model. :type cm_uri: string :param cm_version: version of the content model. :type cm_version: string :param cm_resource_url: the URL to the resource :type cm_resource_url: string **Results:** :returns: A status object (either success, or failed). :rtype: dictionary ''' cm_resource_url = _get_or_bust(data_dict, 'cm_resource_url') modified_resource_url = cm_resource_url.replace("%3A", ":") truncated_url = modified_resource_url.split("/storage/f/")[1] csv_filename_withfile = get_url_for_file(truncated_url) validation_msg = [] if csv_filename_withfile is None: msg = toolkit._("Can't find the full path of the resources from %s" % cm_resource_url) validation_msg.append({ 'row': 0, 'col': 0, 'errorTYpe': 'systemError', 'message': msg }) else: log.info("filename full path: %s " % csv_filename_withfile) this_layer = _get_or_bust(data_dict, 'cm_layer') this_uri = _get_or_bust(data_dict, 'cm_uri') this_version_uri = _get_or_bust(data_dict, 'cm_version_url') if this_layer.lower() and this_uri.lower() and this_version_uri.lower( ) == 'none': log.debug("tier 2 data model/version/layer are none") return {"valid": True, "messages": "Okay"} else: log.debug("about to start schema reading") user_schema = contentmodel_get(context, data_dict) # print user_schema fieldModelList = [] field_info_list = user_schema['version']['layers_info'] for field_info in field_info_list[this_layer]: if ((field_info['name'] is None) and ((len(field_info['type']) == 0) or (field_info['type'].isspace()))): log.debug("found a undefined field: %s" % str(field_info)) continue else: fieldModelList.append( ContentModel_FieldInfoCell(field_info['optional'], field_info['type'], field_info['name'], field_info['description'])) log.debug(fieldModelList) log.debug("finish schema reading, find %s field information" % str(len(fieldModelList))) if len(validation_msg) == 0: try: csv_filename = csv_filename_withfile.split("file://")[1] this_csv = open(csv_filename, 'rbU') valid, errors, dataCorrected, long_fields, srs = usginmodels.validate_file( this_csv, this_version_uri, this_layer) if valid: pass else: validation_msg.append({'valid': False}) except: validation_msg.append({'valid': False}) log.debug(validation_msg) # print 'JSON:', json.dumps({"valid": "false", "messages": validation_msg}) if len(validation_msg) == 0: return {"valid": True, "messages": "Okay"} else: return {"valid": False, "messages": validation_msg}
def main(argv=None): # Don't allow overwriting arcpy.env.overwriteOutput = False # Get the parameters of the tool in_file = arcpy.GetParameterAsText(0) sheet_name = arcpy.GetParameterAsText(1) schema_name = arcpy.GetParameterAsText(2) service_name = arcpy.GetParameterAsText(3) layer_name = arcpy.GetParameterAsText(4) validate_only = arcpy.GetParameterAsText(5) # Get the path for the folder of the Excel file (used for output of GeoDB) path = os.path.dirname(in_file) + "\\" # Paste from development environment below here schema_uri = get_schema_uri(schema_name) layer_info = usginmodels.get_layer(schema_uri, layer_name) # If data is in a sheet in an Excel file convert to CSV, otherwise just read if sheet_name != "N/A": csv_file = excel_to_csv(in_file, sheet_name) else: csv_file = open(in_file) if csv_file: # Pass in the the CSV as a dictionary, the schema to validate against and the layer name valid, messages, dataCorrected, long_fields, srs = usginmodels.validate_file(csv_file, schema_uri, layer_name) print_errors(valid, messages, dataCorrected) try: if (validate_only == "false" and valid == True): CreateGeodatabase(path, service_name) arcpy.env.workspace = path + service_name + ".mdb" table = layer_name + "Table" MakeTable(table, layer_info.fields[1:][:-1], long_fields) InsertData(table, dataCorrected[1:], layer_info.fields[1:][:-1]) CreateXYEventLayer(table, layer_name + "Layer", srs) CreateFeatureClass(layer_name + "Layer", layer_name, srs) # Make sure the final feature class has the same number of rows as the original table rowsTemp = int(arcpy.GetCount_management(table).getOutput(0)) rowsFinal = int(arcpy.GetCount_management(layer_name).getOutput(0)) if rowsTemp != rowsFinal: rowsDeleted = rowsTemp - rowsFinal if rowsDeleted == 1: arcpy.AddError(str(rowsDeleted) + " row was deleted when converting the table to the feature class.") else: arcpy.AddError(str(rowsDeleted) + " rows were deleted when converting the table to the feature class.") arcpy.AddError("Check the Lat & Long values for errors.") raise Exception ("Conversion Failed.") else: arcpy.Delete_management(table) arcpy.AddMessage("Conversion Successful!") except Exception as err: arcpy.AddError("Error: {0}".format(err)) return
def is_usgin_valid_data(key, data, errors, context): resource_id = data.get(('resources', 0, 'id'), None) if resource_id is None: return resource_name = data.get(('resources', 0, 'name'), None) md_resource = None for k, v in data.iteritems(): if k[0] == 'resources' and k[-1] == 'md_resource': query_key = k md_resource = json.loads(data.get(query_key, None)) md_package = None for k, v in data.iteritems(): if k[0] == 'extras' and v == 'md_package': query_key = (k[0], k[1], 'value') md_package = json.loads(data.get(query_key, None)) uri = md_package.get('usginContentModel', None) version = md_package.get('usginContentModelVersion', None) layer = md_resource.get('usginContentModelLayer', None) if None in [uri, version, layer]: return def get_file_path(res_id): dir_1 = res_id[0:3] dir_2 = res_id[3:6] file = res_id[6:] storage_base = config.get('ckan.storage_path', 'default') return path.join(storage_base, 'resources', dir_1, dir_2, file) validation_msg = [] csv_file = get_file_path(resource_id) if csv_file: log.info("Filename full path: %s " % csv_file) else: msg = base._("Cannot find the full path of the resources from %s"\ % resource_name) validation_msg.append({ 'row': 0, 'col': 0, 'errorType': 'systemError', 'message': msg }) if 'none' in [uri.lower(), version.lower(), layer.lower()]: log.debug("Start USGIN content model validation") log.debug("USGIN tier 2 data model/version/layer are none") return {'valid': True} else: csv = open(csv_file, 'rbU') valid, errors, dataCorrected, long_fields, srs = \ usginmodels.validate_file(csv, version, layer) log.debug("Finished USGIN content model validation") if valid and not errors: log.debug("USGIN document is valid") if valid and errors: log.debug('With changes the USGIN document will be valid') h.flash_error(base._('With changes the USGIN document will be valid')) else: log.debug('USGIN document is not valid') h.flash_error(base._('The USGIN document is not valid'))