Example #1
0
def validate_cm_form(req):
    if req.method == 'POST':
        form = UploadFileForm(req.POST, req.FILES)
        if form.is_valid():

            uploadFile = req.FILES['file']


            if uploadFile.name.endswith(".csv"):

                try:
                    fileCSV = io.StringIO(unicode(uploadFile.read()), newline=None)

                except:
                    valid = False
                    messages = "Unable to read the CSV file. Check the file for invalid characters."
                    datastr = None

                else:
                    models = usginmodels.get_models()

                    for m in models:
                        if m.title == form.cleaned_data["content_model"].title:
                            for v in m.versions:
                                if v.version == form.cleaned_data["version"].version:
                                    uri = v.uri
                                    break

                    try:
                        valid, messages, dataCorrected, long_fields, srs = usginmodels.validate_file(
                            fileCSV,
                            uri,
                            form.cleaned_data["feature_type"]
                        )

                        # Original Working
                        # csvstr = ""
                        # for line in uploadFile:
                        #     csvstr += str(line)

                        datastr = ""
                        for line in dataCorrected:
                            for ele in line:
                                datastr += "\"" + str(ele) + "\","
                            datastr += "\r\n"

                    except:
                        valid = False
                        messages = "Invalid Layer"
                        datastr = None

            else:
                valid = False
                messages = "Only CSV files may be validated."
                datastr = None

            context = {
                "valid": valid,
                "messages": messages,
                "dataCorrected": datastr,
                "filepath": uploadFile.name
            }
            # Render the results as HTML
            return render(req, 'validation/cm-results-bootstrap.html', context)
    else:
        form = UploadFileForm()
    return render(req, 'validation/cm-form-bootstrap.html', { 'form': form })
Example #2
0
def contentmodel_checkFile(context, data_dict):
    '''Check whether the given csv file follows the specified content model.
    
    This action returns detailed description of inconsistent cells.
    **Parameters:**
    :param cm_uri: uri of the content model.
    :type cm_uri: string

    :param cm_version: version of the content model.
    :type cm_version: string

    :param cm_resource_url: the URL to the resource
    :type cm_resource_url: string
    
    **Results:**
    :returns: A status object (either success, or failed).
    :rtype: dictionary
    '''


    cm_resource_url = _get_or_bust(data_dict, 'cm_resource_url')
    modified_resource_url = cm_resource_url.replace("%3A", ":")
    truncated_url = modified_resource_url.split("/storage/f/")[1]
    csv_filename_withfile = get_url_for_file(truncated_url)
    validation_msg = []
    
    if csv_filename_withfile is None:
        msg = toolkit._("Can't find the full path of the resources from %s" % cm_resource_url)
        validation_msg.append({'row':0, 'col':0, 'errorTYpe': 'systemError', 'message':msg})
    else:
        log.info("filename full path: %s " % csv_filename_withfile)

    this_layer = _get_or_bust(data_dict, 'cm_layer')
    this_uri = _get_or_bust(data_dict, 'cm_uri')
    this_version_uri = _get_or_bust(data_dict, 'cm_version_url')

    if this_layer.lower() and this_uri.lower() and this_version_uri.lower() == 'none':
        log.debug("tier 2 data model/version/layer are none")
        return {"valid": True, "messages": "Okay"}
    else:
        log.debug("about to start schema reading")
        user_schema = contentmodel_get(context, data_dict)
        # print user_schema
        fieldModelList = []
        field_info_list = user_schema['version']['layers_info']

        for field_info in field_info_list[this_layer]:
            if ((field_info['name'] is None) and ((len(field_info['type'])==0) or (field_info['type'].isspace()))):
                log.debug("found a undefined field: %s" % str(field_info))
                continue
            else:
                fieldModelList.append(ContentModel_FieldInfoCell(field_info['optional'], field_info['type'], field_info['name'], field_info['description']))

        log.debug(fieldModelList)
        log.debug("finish schema reading, find %s field information" % str(len(fieldModelList)))

        if len(validation_msg) == 0:
            try:
                csv_filename = csv_filename_withfile.split("file://")[1]
                this_csv = open(csv_filename, 'rbU')

                valid, errors, dataCorrected, long_fields, srs = usginmodels.validate_file(
                    this_csv,
                    this_version_uri,
                    this_layer
                )

                if valid:
                    pass
                else:
                    validation_msg.append({'valid': False})
            except:
                validation_msg.append({'valid': False})

    log.debug(validation_msg)
    # print 'JSON:', json.dumps({"valid": "false", "messages": validation_msg})
    if len(validation_msg) == 0:
        return {"valid": True, "messages": "Okay"}
    else:
        return {"valid": False, "messages": validation_msg}
Example #3
0
def is_usgin_valid_data(key, data, errors, context):
    resource_id = data.get(('resources', 0, 'id'), None)

    if resource_id is None:
        return

    resource_name = data.get(('resources', 0, 'name'), None)

    md_resource = None
    for k, v in data.iteritems():
        if k[0] == 'resources' and k[-1] == 'md_resource':
            query_key = k
            md_resource = json.loads(data.get(query_key, None))

    md_package = None
    for k, v in data.iteritems():
        if k[0] == 'extras' and v == 'md_package':
            query_key = (k[0], k[1], 'value')
            md_package = json.loads(data.get(query_key, None))

    resourceDescription = md_package.get('resourceDescription', {})
    uri = resourceDescription.get('usginContentModel', None)
    version = resourceDescription.get('usginContentModelVersion', None)
    layer = resourceDescription.get('usginContentModelLayer', None)

    if None in [uri, version, layer]:
        return

    def get_file_path(res_id):
        dir_1 = res_id[0:3]
        dir_2 = res_id[3:6]
        file = res_id[6:]
        storage_base = config.get('ckan.storage_path', 'default')
        return path.join(storage_base, 'resources', dir_1, dir_2, file)

    validation_msg = []
    csv_file = get_file_path(resource_id)

    if csv_file:
        log.info("Filename full path: %s " % csv_file)
    else:
        msg = base._("Cannot find the full path of the resources from %s"\
            % resource_name)
        validation_msg.append({
            'row': 0,
            'col': 0,
            'errorType': 'systemError',
            'message': msg
        })

    if 'none' in [uri.lower(), version.lower(), layer.lower()]:
        log.debug("Start USGIN content model validation")
        log.debug("USGIN tier 2 data model/version/layer are none")
        return {'valid': True}
    else:
	try:
	    # Valid intialization to resove this issue:
            # Error - <type 'exceptions.UnboundLocalError'>: local variable 'valid' referenced before assignment
            valid = False

            csv = open(csv_file, 'rbU')
	    valid, messages, dataCorrected, long_fields, srs = usginmodels.validate_file(csv, version, layer)
        except:
            log.info("The given format's file is not a CSV")
	    os.remove(csv_file)
	    get_action('resource_delete')(context, {'id': resource_id})
	    errors[key].append(base._("The given format's file is not a CSV"))
	    #raise ValidationError(errors)
	    #raise StopOnError

        log.debug("Finished USGIN content model validation")

	if valid and messages:
            log.debug('With changes the USGIN document will be valid')
            h.flash_error(base._('With changes the USGIN document will be valid'))
        elif valid and not messages:
            log.debug("USGIN document is valid")
        else:
            log.debug('USGIN document is not valid')
            h.flash_error(base._('The USGIN document is not valid'))
Example #4
0
def usginmodels_validate_file(context, data_dict):

    NewFilePath = ''
    resourceId = data_dict.get('resource_id', None)
    resourceName = data_dict.get('resource_name', None)
    packageId = data_dict.get('package_id', None)

    if None in [resourceId, packageId, resourceName]:
        log.info("Missing Package ID or Resource ID")
        return {
            'valid': False,
            'message': '',
            'log': 'Missing Package ID or Resource ID or Resource name',
            'resourceId': resourceId
        }

    pkg = get_action('package_show')(context, {'id': packageId})

    md_package = get_md_package(context, pkg)

    if None in [md_package]:
        log.info("Missing md_package")
        return {
            'valid': False,
            'message': '',
            'log': 'Missing md_package',
            'resourceName': resourceName,
            'resourceId': resourceId
        }

    resourceDescription = md_package.get('resourceDescription', {})
    uri = resourceDescription.get('usginContentModel', None)
    version = resourceDescription.get('usginContentModelVersion', None)
    layer = resourceDescription.get('usginContentModelLayer', None)

    if None in [uri, version, layer] or 'none' in [
            uri.lower(), version.lower(),
            layer.lower()
    ]:
        log.info("Missing content model information (URI, Version, Layer)")
        return {
            'valid':
            False,
            'message': [
                'Missing content model information (URI, Version, Layer) or none given.'
            ],
            'resourceName':
            resourceName,
            'resourceId':
            resourceId
        }

    #def get_file_path(res_id):
    #    dir_1 = res_id[0:3]
    #    dir_2 = res_id[3:6]
    #    file = res_id[6:]
    #    storage_base = config.get('ckan.storage_path', 'default')
    #    return os.path.join(storage_base, 'resources', dir_1, dir_2, file)

    path = get_file_path(context, {'resourceId': resourceId, 'suffix': ''})
    csv_file = path.get('path', None)

    if csv_file:
        log.info("Filename full path: %s " % csv_file)
    else:
        log.info("Cannot find the full path of the resources from %s" %
                 resourceName)
        return {
            'valid': False,
            'message': '',
            'log': "Cannot find the full path of the resources from %s" %
            resourceName,
            'resourceName': resourceName,
            'resourceId': resourceId
        }

    try:
        log.debug("Start USGIN content model validation")

        # intializing variables to resove this issue:
        # Error - <type 'exceptions.UnboundLocalError'>: local variable 'valid, messages ...' referenced before assignment
        valid = False
        messages = None
        dataCorrected = None
        long_fields = None
        srs = None

        csv = open(csv_file, 'rbU')
        valid, messages, dataCorrected, long_fields, srs = usginmodels.validate_file(
            csv, version, layer, True)  # True to fill default on row1
    except:
        log.info("the file format is not supported.")
        return {
            'valid': False,
            'message': ["the file format is not supported."],
            'resourceName': resourceName,
            'resourceId': resourceId
        }

    #close the file
    csv.close()

    #write the correcte data into a new file
    if (valid and messages) or (not valid):
        #No automatic erasing content, let the user fix his file, https://github.com/REI-Systems/ckanext-metadata/issues/3
        #Create a new file has correctedData instead
        if dataCorrected:
            #    try:
            #	shutil.copy2(csv_file, csv_file+'_original')
            #	log.debug("%s: New file copy is made %s." % (resourceName, csv_file+'_original'))
            #    except:
            #	log.debug("%s: Couldn't make a file copy." % resourceName)
            import csv
            try:
                NewFilePath = csv_file + '_CorrectedData'
                with open(NewFilePath, "wb") as f:
                    writer = csv.writer(f)
                    writer.writerows(dataCorrected)
                log.debug(
                    "%s: The new corrected data file has been created %s" %
                    (resourceName, NewFilePath))
            except:
                log.debug("%s: Couldn't erase the file content." %
                          resourceName)

    if valid and messages:
        log.debug('%s: With changes the USGIN document will be valid' %
                  resourceName)
    elif valid and not messages:
        log.debug("%s: USGIN document is valid" % resourceName)
    else:
        log.debug('%s: USGIN document is not valid' % resourceName)

    log.debug("%s: Finished USGIN content model validation." % resourceName)

    return {
        'valid': valid,
        'message': messages,
        'dataCorrected': dataCorrected,
        'long_fields': long_fields,
        'srs': srs,
        'resourceName': resourceName,
        'resourceId': resourceId
    }
Example #5
0
def usginmodels_validate_file(context, data_dict):

    NewFilePath = ''
    resourceId = data_dict.get('resource_id', None)
    resourceName = data_dict.get('resource_name', None)
    packageId = data_dict.get('package_id', None)

    if None in [resourceId, packageId, resourceName]:
        log.info("Missing Package ID or Resource ID")
        return {'valid': False, 'message': '', 'log': 'Missing Package ID or Resource ID or Resource name', 'resourceId': resourceId}

    pkg = get_action('package_show')(context, {'id': packageId})

    md_package = get_md_package(context, pkg)

    if None in [md_package]:
        log.info("Missing md_package")
        return {'valid': False, 'message': '', 'log': 'Missing md_package', 'resourceName': resourceName, 'resourceId': resourceId}

    resourceDescription = md_package.get('resourceDescription', {})
    uri = resourceDescription.get('usginContentModel', None)
    version = resourceDescription.get('usginContentModelVersion', None)
    layer = resourceDescription.get('usginContentModelLayer', None)

    if None in [uri, version, layer] or 'none' in [uri.lower(), version.lower(), layer.lower()]:
        log.info("Missing content model information (URI, Version, Layer)")
        return {'valid': False, 'message': ['Missing content model information (URI, Version, Layer) or none given.'], 'resourceName': resourceName, 'resourceId': resourceId}

    #def get_file_path(res_id):
    #    dir_1 = res_id[0:3]
    #    dir_2 = res_id[3:6]
    #    file = res_id[6:]
    #    storage_base = config.get('ckan.storage_path', 'default')
    #    return os.path.join(storage_base, 'resources', dir_1, dir_2, file)

    path = get_file_path(context, {'resourceId': resourceId, 'suffix': ''})
    csv_file = path.get('path', None)

    if csv_file:
        log.info("Filename full path: %s " % csv_file)
    else:
        log.info("Cannot find the full path of the resources from %s" % resourceName)
        return {'valid': False, 'message': '', 'log': "Cannot find the full path of the resources from %s" % resourceName, 'resourceName': resourceName, 'resourceId': resourceId}

    try:
        log.debug("Start USGIN content model validation")

        # intializing variables to resove this issue:
        # Error - <type 'exceptions.UnboundLocalError'>: local variable 'valid, messages ...' referenced before assignment
        valid = False
	messages = None
	dataCorrected = None
	long_fields = None
	srs = None

        csv = open(csv_file, 'rbU')
        valid, messages, dataCorrected, long_fields, srs = usginmodels.validate_file(csv, version, layer, True) # True to fill default on row1
    except:
        log.info("the file format is not supported.")
	return {'valid': False, 'message': ["the file format is not supported."], 'resourceName': resourceName, 'resourceId': resourceId}

    #close the file
    csv.close()

    #write the correcte data into a new file
    if (valid and messages) or (not valid):
	#No automatic erasing content, let the user fix his file, https://github.com/REI-Systems/ckanext-metadata/issues/3
	#Create a new file has correctedData instead
	if dataCorrected:
	#    try:
	#	shutil.copy2(csv_file, csv_file+'_original')
	#	log.debug("%s: New file copy is made %s." % (resourceName, csv_file+'_original'))
	#    except:
	#	log.debug("%s: Couldn't make a file copy." % resourceName)
	    import csv
	    try:
		NewFilePath = csv_file+'_CorrectedData'
		with open(NewFilePath, "wb") as f:
			writer = csv.writer(f)
			writer.writerows(dataCorrected)
		log.debug("%s: The new corrected data file has been created %s" % (resourceName, NewFilePath))
            except:
            	log.debug("%s: Couldn't erase the file content." % resourceName)

    if valid and messages:
	log.debug('%s: With changes the USGIN document will be valid' % resourceName)
    elif valid and not messages:
        log.debug("%s: USGIN document is valid" % resourceName)
    else:
        log.debug('%s: USGIN document is not valid' % resourceName)

    log.debug("%s: Finished USGIN content model validation." % resourceName)

    return {'valid': valid, 'message': messages, 'dataCorrected': dataCorrected, 'long_fields': long_fields, 'srs': srs, 'resourceName': resourceName, 'resourceId': resourceId}
Example #6
0
def contentmodel_checkFile(context, data_dict):
    '''Check whether the given csv file follows the specified content model.
    
    This action returns detailed description of inconsistent cells.
    **Parameters:**
    :param cm_uri: uri of the content model.
    :type cm_uri: string

    :param cm_version: version of the content model.
    :type cm_version: string

    :param cm_resource_url: the URL to the resource
    :type cm_resource_url: string
    
    **Results:**
    :returns: A status object (either success, or failed).
    :rtype: dictionary
    '''

    cm_resource_url = _get_or_bust(data_dict, 'cm_resource_url')
    modified_resource_url = cm_resource_url.replace("%3A", ":")
    truncated_url = modified_resource_url.split("/storage/f/")[1]
    csv_filename_withfile = get_url_for_file(truncated_url)
    validation_msg = []

    if csv_filename_withfile is None:
        msg = toolkit._("Can't find the full path of the resources from %s" %
                        cm_resource_url)
        validation_msg.append({
            'row': 0,
            'col': 0,
            'errorTYpe': 'systemError',
            'message': msg
        })
    else:
        log.info("filename full path: %s " % csv_filename_withfile)

    this_layer = _get_or_bust(data_dict, 'cm_layer')
    this_uri = _get_or_bust(data_dict, 'cm_uri')
    this_version_uri = _get_or_bust(data_dict, 'cm_version_url')

    if this_layer.lower() and this_uri.lower() and this_version_uri.lower(
    ) == 'none':
        log.debug("tier 2 data model/version/layer are none")
        return {"valid": True, "messages": "Okay"}
    else:
        log.debug("about to start schema reading")
        user_schema = contentmodel_get(context, data_dict)
        # print user_schema
        fieldModelList = []
        field_info_list = user_schema['version']['layers_info']

        for field_info in field_info_list[this_layer]:
            if ((field_info['name'] is None)
                    and ((len(field_info['type']) == 0) or
                         (field_info['type'].isspace()))):
                log.debug("found a undefined field: %s" % str(field_info))
                continue
            else:
                fieldModelList.append(
                    ContentModel_FieldInfoCell(field_info['optional'],
                                               field_info['type'],
                                               field_info['name'],
                                               field_info['description']))

        log.debug(fieldModelList)
        log.debug("finish schema reading, find %s field information" %
                  str(len(fieldModelList)))

        if len(validation_msg) == 0:
            try:
                csv_filename = csv_filename_withfile.split("file://")[1]
                this_csv = open(csv_filename, 'rbU')

                valid, errors, dataCorrected, long_fields, srs = usginmodels.validate_file(
                    this_csv, this_version_uri, this_layer)

                if valid:
                    pass
                else:
                    validation_msg.append({'valid': False})
            except:
                validation_msg.append({'valid': False})

    log.debug(validation_msg)
    # print 'JSON:', json.dumps({"valid": "false", "messages": validation_msg})
    if len(validation_msg) == 0:
        return {"valid": True, "messages": "Okay"}
    else:
        return {"valid": False, "messages": validation_msg}
def main(argv=None):
    # Don't allow overwriting
    arcpy.env.overwriteOutput = False

    # Get the parameters of the tool
    in_file = arcpy.GetParameterAsText(0)
    sheet_name = arcpy.GetParameterAsText(1)
    schema_name = arcpy.GetParameterAsText(2)
    service_name = arcpy.GetParameterAsText(3)
    layer_name = arcpy.GetParameterAsText(4)
    validate_only = arcpy.GetParameterAsText(5)
	
	# Get the path for the folder of the Excel file (used for output of GeoDB)
    path = os.path.dirname(in_file) + "\\"

    # Paste from development environment below here
	
    schema_uri = get_schema_uri(schema_name)
    layer_info = usginmodels.get_layer(schema_uri, layer_name)

    # If data is in a sheet in an Excel file convert to CSV, otherwise just read
    if sheet_name != "N/A":
        csv_file = excel_to_csv(in_file, sheet_name)
    else:
        csv_file = open(in_file)

    if csv_file:
        # Pass in the the CSV as a dictionary, the schema to validate against and the layer name
        valid, messages, dataCorrected, long_fields, srs = usginmodels.validate_file(csv_file, schema_uri, layer_name)
        print_errors(valid, messages, dataCorrected)

        try:
            if (validate_only == "false" and valid == True):
                CreateGeodatabase(path, service_name)

                arcpy.env.workspace = path + service_name + ".mdb"
                table = layer_name + "Table"

                MakeTable(table, layer_info.fields[1:][:-1], long_fields)
                InsertData(table, dataCorrected[1:], layer_info.fields[1:][:-1])
                CreateXYEventLayer(table, layer_name + "Layer", srs)
                CreateFeatureClass(layer_name + "Layer", layer_name, srs)

                # Make sure the final feature class has the same number of rows as the original table
                rowsTemp = int(arcpy.GetCount_management(table).getOutput(0))
                rowsFinal = int(arcpy.GetCount_management(layer_name).getOutput(0))
                if rowsTemp != rowsFinal:
                    rowsDeleted = rowsTemp - rowsFinal
                    if rowsDeleted == 1:
                        arcpy.AddError(str(rowsDeleted) + " row was deleted when converting the table to the feature class.")
                    else:
                        arcpy.AddError(str(rowsDeleted) + " rows were deleted when converting the table to the feature class.")
                    arcpy.AddError("Check the Lat & Long values for errors.")
                    raise Exception ("Conversion Failed.")
                else:
                    arcpy.Delete_management(table)

                arcpy.AddMessage("Conversion Successful!")

        except Exception as err:
            arcpy.AddError("Error: {0}".format(err))

    return
def is_usgin_valid_data(key, data, errors, context):
    resource_id = data.get(('resources', 0, 'id'), None)

    if resource_id is None:
        return

    resource_name = data.get(('resources', 0, 'name'), None)

    md_resource = None
    for k, v in data.iteritems():
        if k[0] == 'resources' and k[-1] == 'md_resource':
            query_key = k
            md_resource = json.loads(data.get(query_key, None))

    md_package = None
    for k, v in data.iteritems():
        if k[0] == 'extras' and v == 'md_package':
            query_key = (k[0], k[1], 'value')
            md_package = json.loads(data.get(query_key, None))

    uri = md_package.get('usginContentModel', None)
    version = md_package.get('usginContentModelVersion', None)
    layer = md_resource.get('usginContentModelLayer', None)

    if None in [uri, version, layer]:
        return

    def get_file_path(res_id):
        dir_1 = res_id[0:3]
        dir_2 = res_id[3:6]
        file = res_id[6:]
        storage_base = config.get('ckan.storage_path', 'default')
        return path.join(storage_base, 'resources', dir_1, dir_2, file)

    validation_msg = []
    csv_file = get_file_path(resource_id)

    if csv_file:
        log.info("Filename full path: %s " % csv_file)
    else:
        msg = base._("Cannot find the full path of the resources from %s"\
            % resource_name)
        validation_msg.append({
            'row': 0,
            'col': 0,
            'errorType': 'systemError',
            'message': msg
        })

    if 'none' in [uri.lower(), version.lower(), layer.lower()]:
        log.debug("Start USGIN content model validation")
        log.debug("USGIN tier 2 data model/version/layer are none")
        return {'valid': True}
    else:
        csv = open(csv_file, 'rbU')
        valid, errors, dataCorrected, long_fields, srs = \
            usginmodels.validate_file(csv, version, layer)

        log.debug("Finished USGIN content model validation")

        if valid and not errors:
            log.debug("USGIN document is valid")
        if valid and errors:
            log.debug('With changes the USGIN document will be valid')
            h.flash_error(base._('With changes the USGIN document will be valid'))
        else:
            log.debug('USGIN document is not valid')
            h.flash_error(base._('The USGIN document is not valid'))