def loadObjectClass(filename): LoaderUtils.loadCsv( filename, ObjectClass, ValidatorValidationInterface(), { "max_oc_code": "object_class_code", "max_object_class_name": "object_class_name" }, {"object_class_code": { "skip_duplicates": True }})
def loadCgac(filename): LoaderUtils.loadCsv(filename, CGAC, ValidatorValidationInterface(), { "cgac": "cgac_code", "agency": "agency_name" }, {"cgac_code": { "pad_to_length": 3, "skip_duplicates": True }})
def loadFields(filename): """ Load schema file to create validation rules and removes existing schemas Arguments: filename -- filename of csv file that holds TAS data """ totalTASAdded = 0 totalExistingTAS = 0 #Step 1 Clean out the database database = ValidatorValidationInterface() database.deleteTAS() lastRecord = {} #Step 2 add the new data with open(filename, 'rU') as csvfile: #skip the first line of the csv as its just metadata next(csvfile, None) #second line contains headers reader = csv.DictReader(csvfile) #Loop over each row for index, record in enumerate(reader): #Let the user know that the script is still running. if (index % 40000 == 0): print("".join(["Loading ... ", str(index)])) #Pad Record record["ATA"] = record["ATA"].zfill(3) record["AID"] = record["AID"].zfill(3) record["BPOA"] = record["BPOA"].zfill(4) record["EPOA"] = record["EPOA"].zfill(4) record["A"] = record["A"].zfill(1) record["MAIN"] = record["MAIN"].zfill(4) record["SUB"] = record["SUB"].zfill(3) #Check if record exists if (not (LoaderUtils.compareRecords(record, lastRecord, TASLoader.FILE_SCHEMA))): if (LoaderUtils.checkRecord(record, TASLoader.FILE_SCHEMA)): if (database.addTAS(record["ATA"], record["AID"], record["BPOA"], record["EPOA"], record["A"], record["MAIN"], record["SUB"])): totalTASAdded += 1 else: totalExistingTAS += 1 else: raise ValueError('CSV File does not follow schema') else: totalExistingTAS += 1 lastRecord = record #Step 3 Report Metrics for debuging print("".join(["Total TAS added : ", str(totalTASAdded)])) print("".join(["Duplicate TAS in file :", str(totalExistingTAS)])) print("".join( ["Total TAS in file : ", str(totalExistingTAS + totalTASAdded)]))
def loadFields(filename): """ Load schema file to create validation rules and removes existing schemas Arguments: filename -- filename of csv file that holds TAS data """ totalTASAdded = 0 totalExistingTAS = 0 #Step 1 Clean out the database database = ValidatorValidationInterface() database.deleteTAS() lastRecord = {} #Step 2 add the new data with open(filename,'rU') as csvfile: #skip the first line of the csv as its just metadata next(csvfile, None) #second line contains headers reader = csv.DictReader(csvfile) #Loop over each row for index,record in enumerate(reader): #Let the user know that the script is still running. if(index % 40000 == 0) : print("".join(["Loading ... ",str(index)])) #Pad Record record["ATA"] = record["ATA"].zfill(3) record["AID"] = record["AID"].zfill(3) record["BPOA"] = record["BPOA"].zfill(4) record["EPOA"] = record["EPOA"].zfill(4) record["A"] = record["A"].zfill(1) record["MAIN"] = record["MAIN"].zfill(4) record["SUB"] = record["SUB"].zfill(3) #Check if record exists if(not (LoaderUtils.compareRecords(record,lastRecord,TASLoader.FILE_SCHEMA))) : if(LoaderUtils.checkRecord(record,TASLoader.FILE_SCHEMA)) : if(database.addTAS(record["ATA"], record["AID"], record["BPOA"], record["EPOA"], record["A"], record["MAIN"], record["SUB"])) : totalTASAdded += 1 else : totalExistingTAS += 1 else : raise ValueError('CSV File does not follow schema') else : totalExistingTAS += 1 lastRecord = record #Step 3 Report Metrics for debuging print("".join(["Total TAS added : ",str(totalTASAdded)])) print("".join(["Duplicate TAS in file :",str(totalExistingTAS)])) print("".join(["Total TAS in file : ",str(totalExistingTAS + totalTASAdded)]))
def loadFields(fileTypeName,schemaFileName): """ Load schema file to create validation rules and removes existing schemas Arguments: schemaFileName -- filename of csv file that holds schema definition fileTypeName -- the type of file that the schema represents """ #Step 1 Clean out the database database = ValidatorValidationInterface() database.removeRulesByFileType(fileTypeName) database.removeColumnsByFileType(fileTypeName) #Step 2 add the new fields with open(schemaFileName, 'rU') as csvfile: reader = csv.DictReader(csvfile) for record in reader: record = FieldCleaner.cleanRecord(record) if(LoaderUtils.checkRecord(record, ["fieldname","required","data_type"])) : columnId = database.addColumnByFileType(fileTypeName,FieldCleaner.cleanString(record["fieldname"]),record["required"],record["data_type"]) if "field_length" in record: # When a field length is specified, create a rule for it length = record["field_length"].strip() if(len(length) > 0): # If there are non-whitespace characters here, create a length rule database.addRule(columnId,"LENGTH",length,"Field must be no longer than specified limit") else : raise ValueError('CSV File does not follow schema')
def loadProgramActivity(filename): LoaderUtils.loadCsv( filename, ProgramActivity, ValidatorValidationInterface(), { "year": "budget_year", "agency_id": "agency_id", "alloc_id": "allocation_transfer_id", "account": "account_number", "pa_code": "program_activity_code", "pa_name": "program_activity_name" }, { "program_activity_code": { "pad_to_length": 4 }, "agency_id": { "pad_to_length": 3 }, "account_number": { "pad_to_length": 4 }, "allocation_transfer_id": { "pad_to_length": 3 } })
def loadFields(fileTypeName, schemaFileName): """ Load schema file to create validation rules and removes existing schemas Arguments: schemaFileName -- filename of csv file that holds schema definition fileTypeName -- the type of file that the schema represents """ #Step 1 Clean out the database database = ValidatorValidationInterface() database.removeRulesByFileType(fileTypeName) database.removeColumnsByFileType(fileTypeName) #Step 2 add the new fields with open(schemaFileName, 'rU') as csvfile: reader = csv.DictReader(csvfile) for record in reader: record = FieldCleaner.cleanRecord(record) if (LoaderUtils.checkRecord( record, ["fieldname", "required", "data_type"])): columnId = database.addColumnByFileType( fileTypeName, FieldCleaner.cleanString(record["fieldname"]), record["required"], record["data_type"]) if "field_length" in record: # When a field length is specified, create a rule for it length = record["field_length"].strip() if (len(length) > 0): # If there are non-whitespace characters here, create a length rule database.addRule( columnId, "LENGTH", length, "", "Field must be no longer than specified limit") else: raise ValueError('CSV File does not follow schema')
def loadObjectClass(filename): LoaderUtils.loadCsv(filename,ObjectClass,ValidatorValidationInterface(),{"max_oc_code":"object_class_code","max_object_class_name":"object_class_name"},{"object_class_code":{"skip_duplicates":True}})
def loadCgac(filename): LoaderUtils.loadCsv(filename,CGAC,ValidatorValidationInterface(),{"cgac":"cgac_code","agency":"agency_name"},{"cgac_code":{"pad_to_length":3,"skip_duplicates":True}})
def loadProgramActivity(filename): LoaderUtils.loadCsv(filename, ProgramActivity, ValidatorValidationInterface(), {"year":"budget_year","agency_id":"agency_id", "alloc_id":"allocation_transfer_id","account":"account_number","pa_code":"program_activity_code","pa_name":"program_activity_name"}, {"program_activity_code":{"pad_to_length":4},"agency_id":{"pad_to_length":3},"account_number":{"pad_to_length":4},"allocation_transfer_id":{"pad_to_length":3}})