Esempio n. 1
0
def validation(csv, cust):
    inspector = Inspector()
    inspector.__init__(row_limit=100000,
                       error_limit=100000)  # arbitrary row limit
    report = inspector.inspect(csv)
    email_data = []
    pretty_str = ''
    if not report[
            'valid']:  # an error report will only be sent if there are issues to be found
        for table in report['tables']:
            s = ast.literal_eval(table['datapackage'])
            filename = s['name'] + "_error_dump.txt"
            with open(
                    filename,
                    'w',
            ) as fp:
                for error in table['errors']:
                    row = error['row-number']
                    col = error['column-number']
                    err_str = error['message']
                    code = ""
                    for err in cust:
                        if col in err['columns'] and error[
                                'code'] != 'required-constraint' and error[
                                    'code'] != 'type-or-format-error':
                            err_str = err_str[:err_str.find(
                                "\"",
                                err_str.find("\"") + 1,
                            ) + 1]
                            value = err_str[err_str.find("\"") + 1:]
                            value = value[:len(value) - 1]
                            err_str = err_str + " in row " + str(
                                row) + " and column " + str(
                                    col) + err['message']
                            code = err['name']
                            #print(code)
                            break
                            # multiple codes are possible, but the custom code should be given advantage non-constraints or type errors.
                        elif error['code'] == 'required-constraint':
                            value = ''
                            code = error['code']
                        else:
                            new_err_str = err_str[:err_str.find(
                                "\"",
                                err_str.find("\"") + 1,
                            ) + 1]
                            value = new_err_str[new_err_str.find("\"") + 1:]
                            value = value[:len(value) - 1]
                            code = error['code']
                    pretty_str = pretty_str + err_str + "\n"
                    email_data.append({
                        'code': code,
                        'row': row,
                        'col': col,
                        'value': value
                    })
            notification(owner, email_data, pretty_str, s['name'])
Esempio n. 2
0
def validation(datapackage, cust, path_and_filename, path):
    """
    datapackage: dataset uploaded in datapackage
        A datapackage is a json like data structure, that contains the original
        dataset, as well as the schema that was generated previously, e.g.,
        {
            "name": filename,
            "title": filename,
            "resources": [
                    {
                        "name": filename,
                        "path": path_and_filename,
                        "schema": schema
                    }
                ]
        }

    cust: eventual housing for custom error messages, NYI
    path_and_filename: name of the file Uploaded
    path: directory of the file
    """
    inspector = Inspector()
    inspector.__init__(row_limit=100000,
                       error_limit=100000)  # arbitrary row limit
    report = inspector.inspect(datapackage)
    print(json.dumps(report, indent=4))
    pretty_str = ''
    if path != "":
        path = path + '/'

    if not report[
            'valid']:  # an error report will only be sent if there are issues to be found
        for table in report['tables']:
            s = ast.literal_eval(table['datapackage'])
            filename = s['name'] + "_error_dump.txt"
            with open(
                    path + filename,
                    'w',
            ) as fp:
                error_rows = []
                for error in table['errors']:
                    row = error['row-number']
                    error_rows.append(row)

                    if 'col' in error.keys():
                        col = error['column-number']
                    else:
                        col = ""
                    err_str = error['message']
                    code = ""
                    for err in cust:
                        # This replaces certain error codes with better formatted, more human readable variants.
                        if col in err['columns'] and error[
                                'code'] != 'required-constraint' and error[
                                    'code'] != 'type-or-format-error':
                            err_str = err_str[:err_str.find(
                                "\"",
                                err_str.find("\"") + 1,
                            ) + 1]
                            value = err_str[err_str.find("\"") + 1:]
                            value = value[:len(value) - 1]
                            newrow = row - 1
                            err_str = err_str + " in row " + str(
                                newrow) + " and column " + str(
                                    col) + err['message']
                            code = err['name']
                            #print(code)
                            break
                            # multiple codes are possible, but the custom code should be given advantage non-constraints or type errors.
                        elif error['code'] == 'required-constraint':
                            value = ''
                            code = error['code']
                        else:
                            new_err_str = err_str[:err_str.find(
                                "\"",
                                err_str.find("\"") + 1,
                            ) + 1]
                            value = new_err_str[new_err_str.find("\"") + 1:]
                            value = value[:len(value) - 1]
                            code = error['code']
                            pretty_str = pretty_str + err_str + "\n"
                            '''
                with open(path + path_and_filename + '_errorlog.csv','w') as sp:
                    with open(path + path_and_filename + '.csv','r') as rp:
                        csv_r = csv.reader(rp)
                        csv_w = csv.writer(sp)
                        headers = csv_r.__next__()
                        csv_w.writerow(headers)
                        row_number = 2
                        for row in csv_r:
                            if row_number in error_rows:
                                csv_w.writerow(row)
                            row_number = row_number + 1
                            '''
            with open(path + path_and_filename + '_error_report.json',
                      'w') as fp:
                fp.write(json.dumps(table['errors'], indent=4))
            return table['errors']
    else:
        return "All clear"