def load_data(uri, dateFormat): logging.info('loading data; uri: {0}'.format(uri)) from urllib2 import urlopen from csv import DictReader reader = DictReader(urlopen(uri).readlines()) encodedFieldNames = [] for fieldname in reader.fieldnames: encodedFieldNames.append(fieldname.decode("utf-8-sig").encode("utf-8")) reader.fieldnames = encodedFieldNames data = [] from time import strptime for row in reader: data.append({ 'date': strptime(row['Date'], dateFormat), 'open': float(row['Open']), 'close': float(row['Close']), 'high': float(row['High']), 'low': float(row['Low']), 'volume': float(row['Volume']) }) return data
def read_certified_applications(csv_file_path): """ Generator function for reading the input CSV files. This function does the following: 1. We sniff the file format so we can support both default excel formatted csv and the semi-colon separated files (see Test 1 and Test 2). 2. We check if the file uses the previous LCA record layout. If so, we migrate it to the current H1B Record Layout. 3. Filter out all application that are not 'CERTIFIED'. The goal of this function is to separate code for reading the input file format from our analytical code. :param csv_file_path: :return: """ print('Processing input file: {0}'.format(csv_file_path)) with open(csv_file_path) as csv_file: # Sniff the file format dialect = Sniffer().sniff(csv_file.read(16384)) csv_file.seek(0) reader = DictReader(csv_file, dialect=dialect) # Dirty migration of the previous LCA Record Layout to the current H1B Record Layout if 'LCA_CASE_NUMBER' in reader.fieldnames: reader.fieldnames = get_migrated_fieldnames() # Filter all applications that are not certified yield from filterfalse(lambda row: row['CASE_STATUS'] != 'CERTIFIED', reader)
def upload(request): # get parameters response, schools = get_variable_with_error(request, 'leave', 'school', as_list=True) if response is not None: return response response, minimum_str = get_variable_with_error(request, 'leave', 'minimum', def_value="0") if response is not None: return response minimum = int(minimum_str) csv = request.FILES['datafile'] order = ['school', 'employee', 'manager', 'actual_balance', 'leave_accrued', 'future_leave_bookings', 'current_balance', 'current_allocated_balance'] header = dict() header['school'] = 'Faculty/School' header['employee'] = 'Employee' header['manager'] = 'Manager' header['actual_balance'] = 'Actual balance' header['leave_accrued'] = 'Leave accrued' header['future_leave_bookings'] = 'Future bookings' header['current_balance'] = 'Current balance' header['current_allocated_balance'] = 'Current allocated balance' result = list() try: with open(csv.temporary_file_path(), encoding='ISO-8859-1') as csvfile: reader = DictReader(csvfile) reader.fieldnames = [name.lower().replace(" ", "_") for name in reader.fieldnames] for row in reader: school = row['main_clevel'] if school not in schools: continue leave_type = row['leave_type'] if leave_type != "AL": # TODO any other types of leave to add/include? continue rrow = dict() rrow['school'] = school rrow['employee'] = row['employee_name'] rrow['manager'] = row['manager'] rrow['actual_balance'] = float(row['current_allocated_balance']) - float(row['future_leave_bookings']) rrow['current_balance'] = row['current_balance'] rrow['current_allocated_balance'] = row['current_allocated_balance'] rrow['leave_accrued'] = row['accrual'] rrow['future_leave_bookings'] = row['future_leave_bookings'] if int(rrow['actual_balance']) >= minimum: result.append(rrow) except Exception as ex: traceback.print_exc(file=sys.stdout) return create_error_response(request, 'leave', 'Failed to read uploaded CSV file: ' + str(ex)) # sort result.sort(key=actual_balance_key, reverse=True) # configure template template = loader.get_template('leave/output.html') context = applist.template_context('leave') context['table'] = result context['header'] = header context['order'] = order return HttpResponse(template.render(context, request))
def _read_file(self, key): resp = self.boto_client.get_object(Bucket=self.bucket, Key=key) with gz_open(resp['Body'], mode='rt') as gz_f: reader = DictReader(gz_f, delimiter=' ') reader.fieldnames = [ f.replace('-', '_') for f in reader.fieldnames ] yield from reader
def _read_file(self, key): resp = self.boto_client.get_object(Bucket=self.bucket, Key=key) with gz_open(resp['Body'], mode='rt') as gz_f: reader = DictReader(gz_f, delimiter=' ') reader.fieldnames = [ f.replace('-', '_') for f in reader.fieldnames ] yield from reader with THREAD_LOCK: self.bytes_processed += gz_f.tell() self.compressed_bytes_processed += resp['ContentLength']
def csv2ics(filename, tz=None): print_field("BEGIN", "VCALENDAR") with open(filename, "rb") as infile: dialect = Sniffer().sniff(infile.read()) infile.seek(0) reader = DictReader(infile, dialect=dialect) reader.fieldnames = [ transform_fieldname(name) for name in reader.fieldnames ] rows = [row for row in reader if row_nonblank(row)] write_events(rows, tz) print_field("END", "VCALENDAR")
def csvInput(file,options,dialect='excel'): header=options['header'] from csv import DictReader with open(file,'r') as f: if not header: reader = DictReader(f,dialect=dialect) else: reader = DictReader(f,dialect=dialect,fieldnames=header.split(',')) reader.fieldnames = map(options['alias'],reader.fieldnames) entries =[line for line in reader] map(lambda(dict): dict.update({"file":file, "format":fileType(file)}), entries) return entries
def startup(): help_string = """ The API is available at: http://localhost:5000/api/products/ Example use: A GET request to http://localhost:5000/api/products/1234 would return the details of the product with id = 1234 (if it exists) in JSON format """ print(help_string) if "products.csv" in os.listdir(os.getcwd()): #Fetch json products from aws aws_url = "https://s3-eu-west-1.amazonaws.com/pricesearcher-code-tests/python-software-developer/products.json" response = requests.get(aws_url) try: # Read the fetched data as json and call parse function # to standardise the data into the form we require product_list = parse_json(response.json()) except decoder.JSONDecodeError: logging.warning("Failed to retrieve json products from AWS, could not decode data into JSON") # Check the json products keys keys = [] for d in product_list: for key in d: if key not in keys: keys.append(key) if set(keys) != set(["id", "name", "brand", "retailer", "price", "in_stock"]): logging.warning("Json products keys differ from required") logging.warning("Json keys: {}".format(list(keys))) # Use DictReader from csv module to turn the csv data into a python dictionary with open("products.csv") as csvfile: dr = DictReader(csvfile, delimiter=",") # Reassign the field names to match the json data dr.fieldnames = ["id", "name", "brand", "retailer", "price", "in_stock"] # Parse the data to fit our expected scheme and add it to the list of products product_list.extend(parse_csv([x for x in dr])) return product_list else: logging.warning("Please ensure the products.csv file is in the same directory as the app!")
def import_supervisors(csv, encoding, email=None, delete=True): """ Imports the supervisors (Jade Export). :param csv: the CSV file to import :type csv: str :param encoding: the file encoding (eg utf-8) :type encoding: str :param email: the (optional) email address to send a notification to :type email: str :param delete: whether to delete the data file :type delete: bool :return: None if successful, otherwise error message :rtype: str """ result = None set_maintenance_mode(True) # empty table Supervisors.objects.all().delete() # import p1 = re.compile('.*\/') p2 = re.compile(' .*') try: with open(csv, encoding=encoding) as csvfile: reader = DictReader(csvfile) reader.fieldnames = [ name.lower().replace(" ", "_") for name in reader.fieldnames ] count = 0 for row in reader: count += 1 truncate_strings(row, 250) encode_strings(row, 'utf-8') r = Supervisors() r.student_id = row['student'][ row['student'].rfind(' ') + 1:] # extract ID at end of string r.student = row['student'] r.supervisor = row['supervisor'] r.active_roles = row['active_roles'] r.entity = row['entity'] r.agreement_status = row['agreement_status'] r.date_agreed = parse_supervisors_date('date_agreed', row['date_agreed']) r.completion_date = parse_supervisors_date( 'completion_date', row['completion_date']) r.proposed_enrolment_date = parse_supervisors_date( 'proposed_enrolment_date', row['proposed_enrolment_date']) r.proposed_research_topic = row['proposed_research_topic'] # normalize title a bit title = row['title'] title = title.lower() title = title.replace(".", "").replace("/", "").replace(" ", "") title = title.replace("associate", "a").replace("assoc", "a") title = title.replace("professor", "prof").replace( "pro", "prof").replace("proff", "prof") title = title.replace("doctor", "dr") title = title.replace("sir", "") r.title = title r.quals = row['quals'] r.comments = row['comments'] # active if not withdrawn r.active = ("removed" not in title) and ( "replaced" not in title) and ("informal" not in title) # determine program type program = p2.sub('', p1.sub('', row['entity'])).upper() r.program = award_to_program(program) r.save() # progress if (count % 1000) == 0: update_tablestatus(Supervisors._meta.db_table, "Imported " + str(count) + " rows...") except Exception as ex: msg = traceback.format_exc() logger.error(msg=msg) result = msg finally: if delete: try: os.remove(csv) except Exception as ex: msg = traceback.format_exc() logger.error(msg=msg) result = msg if email is not None: send_email( email, 'Import: supervisors', 'Import succeeded' if (result is None) else 'Import failed: ' + result) return result
def import_coursedefs(year, csv, encoding, email=None, delete=True): """ Imports the course definitions for a specific year (Brio/Hyperion export). :param year: the year to import the results for (eg 2015) :type year: int :param csv: the CSV file to import, can be gzip compressed :type csv: str :param encoding: the file encoding (eg utf-8) :type encoding: str :param email: the (optional) email address to send a notification to :type email: str :param delete: whether to delete the data file :type delete: bool :return: None if successful, otherwise error message :rtype: str """ result = None set_maintenance_mode(True) # delete previous rows for year CourseDefs.objects.all().filter(year=year).delete() # import try: csvfile = open(csv, encoding=encoding) reader = DictReader(csvfile) reader.fieldnames = [ name.lower().replace(" ", "_") for name in reader.fieldnames ] count = 0 for row in reader: count += 1 truncate_strings(row, 250) encode_strings(row, 'utf-8') r = CourseDefs() r.year = year r.code = string_cell(row, ['papercode']) r.title = string_cell(row, ['papertitle']) r.description = string_cell(row, ['paperdescription']) r.type = string_cell(row, ['papertype']) r.stage = int_cell(row, ['paperstage']) r.points = float_cell(row, ['paperpoints']) r.delivery_mode = string_cell(row, ['paperdeliverymode']) r.owning_programme = string_cell(row, ['paperowningprogramme']) r.owning_programme_title = string_cell( row, ['paperowningprogrammetitle']) r.fw_level = int_cell(row, ['paperfwlevel']) r.hours_contact = int_cell(row, ['paperhourscontact']) r.hours_self_directed = int_cell(row, ['paperhoursselfdirected']) r.hours_other_directed = int_cell(row, ['paperhoursotherdirected']) r.funding_source = string_cell(row, ['paperfundingsource']) r.course_factor = float_cell(row, ['papercoursefactor']) r.cost_category_code = string_cell(row, ['papercostcategorycode']) r.cost_category = string_cell(row, ['papercostcategory']) r.funding_class_code = string_cell(row, ['paperfundingclasscode']) r.funding_class = string_cell(row, ['paperfundingclass']) r.individual_efts = int_cell(row, ['paperindividualefts']) r.nzsced_code = string_cell(row, ['nzscedcode']) r.nzsced_category = string_cell(row, ['nzscedcategory']) r.delivering_school_code = string_cell( row, ['paperdeliveringschoolcode']) r.delivering_school = string_cell(row, ['paperdeliveringschool']) r.delivering_dept_code = string_cell(row, ['paperdeliveringdeptcode']) r.delivering_dept = string_cell(row, ['paperdeliveringdept']) r.delivering_unit_code = string_cell(row, ['paperdeliveringunitcode']) r.delivering_unit = string_cell(row, ['paperdeliveringunit']) r.owning_school_code = string_cell(row, ['paperowningschoolcode']) r.owning_school = string_cell(row, ['paperowningschool']) r.owning_dept_code = string_cell(row, ['paperowningdepartmentcode']) r.owning_dept = string_cell(row, ['paperowningdepartment']) r.owning_unit_code = string_cell(row, ['paperowningunitcode']) r.owning_unit = string_cell(row, ['paperowningunit']) r.self_paced = bool_cell(row, ['papertitle']) r.online = bool_cell(row, ['paperonline']) r.active = bool_cell(row, ['paperactive']) r.pending = bool_cell(row, ['paperpending']) r.sub_status = string_cell(row, ['papersubstatus']) r.grade_method_code = string_cell(row, ['grademethodcode']) r.pbrf_eligibility = string_cell(row, ['pbrfeligibility']) r.coe_policy = string_cell(row, ['coepolicy']) r.report_academic_result = bool_cell(row, ['reportacademicresult']) r.internet_based = string_cell(row, ['paperinternetbased']) r.save() # progress if (count % 1000) == 0: update_tablestatus(CourseDefs._meta.db_table, "Imported " + str(count) + " rows...") # close file csvfile.close() except Exception as ex: msg = traceback.format_exc() logger.error(msg=msg) return msg finally: if delete: try: os.remove(csv) except Exception as ex: msg = traceback.format_exc() logger.error(msg=msg) result = msg if email is not None: send_email( email, 'Import: course definitions', 'Import succeeded' if (result is None) else 'Import failed: ' + result) return result
def import_associatedrole(csv, encoding, email=None, delete=True): """ Imports the associated role (Jade Export). :param csv: the CSV file to import :type csv: str :param encoding: the file encoding (eg utf-8) :type encoding: str :param email: the (optional) email address to send a notification to :type email: str :param delete: whether to delete the data file :type delete: bool :return: None if successful, otherwise error message :rtype: str """ result = None set_maintenance_mode(True) # empty table AssociatedRole.objects.all().delete() # import try: with open(csv, encoding=encoding) as csvfile: reader = DictReader(csvfile) reader.fieldnames = [ name.lower().replace(" ", "_") for name in reader.fieldnames ] count = 0 for row in reader: count += 1 truncate_strings(row, 250) encode_strings(row, 'utf-8') r = AssociatedRole() r.role = row['role'] r.person = row['person'] r.entity = row['entity'] r.valid_from = parse_associatedrole_date( 'valid_from', row['valid_from']) r.valid_to = parse_associatedrole_date('valid_to', row['valid_to']) r.active = len(row['valid_to'].strip()) == 0 if " - " in r.entity: r.student_id = r.entity[(r.entity.index(" - ") + 3):] if "Award/" in r.entity: r.student = r.entity[r.entity.index("Award/") + 6:] r.program = award_to_program( r.student[0:r.student.index(" ")].upper()) r.student = r.student[r.student.index(" ") + 1:] r.save() # progress if (count % 1000) == 0: update_tablestatus(AssociatedRole._meta.db_table, "Imported " + str(count) + " rows...") except Exception as ex: msg = traceback.format_exc() logger.error(msg=msg) result = msg finally: if delete: try: os.remove(csv) except Exception as ex: msg = traceback.format_exc() logger.error(msg=msg) result = msg if email is not None: send_email( email, 'Import: associated role', 'Import succeeded' if (result is None) else 'Import failed: ' + result) return result
def import_grade_results(year, csv, isgzip, encoding, email=None, delete=True): """ Imports the grade results for a specific year (Brio/Hyperion export). :param year: the year to import the results for (eg 2015) :type year: int :param csv: the CSV file to import, can be gzip compressed :type csv: str :param isgzip: true if GZIP compressed :type isgzip: bool :param encoding: the file encoding (eg utf-8) :type encoding: str :param email: the (optional) email address to send a notification to :type email: str :param delete: whether to delete the data file :type delete: bool :return: None if successful, otherwise error message :rtype: str """ result = None set_maintenance_mode(True) query_date = None # delete previous rows for year GradeResults.objects.all().filter(year=year).delete() # import try: if isgzip: csvfile = gzip.open(csv, mode='rt', encoding=encoding) else: csvfile = open(csv, encoding=encoding) reader = DictReader(csvfile) reader.fieldnames = [ name.lower().replace(" ", "_") for name in reader.fieldnames ] count = 0 for row in reader: count += 1 truncate_strings(row, 250) encode_strings(row, 'utf-8') r = GradeResults() r.year = year r.student_id = string_cell(row, ['student_id']) r.name = string_cell(row, ['name']) r.title = string_cell(row, ['title']) r.prefered_given_name = string_cell(row, ['prefered_given_name'], defvalue='') r.given_name = string_cell(row, ['given_name'], defvalue='') r.other_given_names = string_cell(row, ['other_given_names'], defvalue='') r.family_name = string_cell(row, ['family_name'], defvalue='') r.previous_name = string_cell(row, ['previous_name']) r.address1 = string_cell(row, ['address1', 'address_line_1']) r.address2 = string_cell(row, ['address2', 'address_line_2']) r.address2a = string_cell(row, ['address2a']) r.address2b = string_cell(row, ['address2b']) r.address3 = string_cell(row, ['address3', 'address_line_3']) r.address4 = string_cell(row, ['address4', 'address_line_4']) r.postcode = string_cell(row, ['postcode', 'postal_area_code']) r.telephone = string_cell(row, ['telephone', 'perm_phone_number']) r.cellphone = string_cell(row, ['cellphone', 'perm_cellphone_number']) r.email = string_cell(row, ['email', 'perm_email_address']) r.hasdisability = int_cell(row, ['hasdisability']) r.isdomestic = int_cell(row, ['isdomestic', 'domestic_indicator']) r.is_domiciled_locally = int_cell(row, ['is_domiciled_locally']) r.citizenship = string_cell(row, ['citizenship']) r.residency_status = string_cell(row, ['residency_status']) r.origin = string_cell(row, ['origin']) r.gender = string_cell(row, ['gender']) r.ethnicity = string_cell(row, ['ethnicity']) r.ethnic_group = string_cell(row, ['ethnic_group']) r.all_ethnicities_string = string_cell(row, ['all_ethnicities_string']) r.all_iwi_string = string_cell(row, ['all_iwi_string']) r.dateofbirth = parse_grade_results_date( 'dateofbirth', string_cell(row, ['dateofbirth', 'date_of_birth'])) r.dateofdeath = string_cell(row, ['dateofdeath']) r.waikato_1st = int_cell(row, ['waikato_1st']) r.nz_1st = int_cell(row, ['nz_1st']) r.last_year_sec = int_cell(row, ['last_year_sec']) r.sec_qual_year = int_cell(row, ['sec_qual_year']) r.last_sec_school = string_cell(row, ['last_sec_school']) r.last_sec_school_region = string_cell(row, ['last_sec_school_region']) r.highest_sec_qual = string_cell(row, ['highest_sec_qual']) r.main_activity = string_cell(row, ['main_activity']) r.award_title = string_cell(row, ['award_title', 'award']) r.prog_abbr = string_cell(row, ['prog_abbr', 'prog_-_abbr']) r.programme = string_cell(row, ['programme']) r.programme_type_code = string_cell(row, ['programme_type_code']) r.programme_type = string_cell(row, ['programme_type']) r.ishigherdegree = int_cell(row, ['ishigherdegree']) r.school_of_study = string_cell(row, ['school_of_study']) r.school_of_study_clevel = fix_org_unit( string_cell(row, ['school_of_study_clevel'])) r.paper_master_code = string_cell( row, ['paper_master_code', 'paper_master']) r.paper_occurrence = string_cell(row, ['paper_occurrence']) r.paper_title = string_cell(row, ['paper_title']) r.occurrence_startdate = parse_grade_results_date( 'occurrence_startdate', string_cell(row, ['occurrence_startdate'])) r.occurrence_startyear = int_cell(row, ['occurrence_startyear']) r.occurrence_startweek = int_cell(row, ['occurrence_startweek']) r.occurrence_enddate = parse_grade_results_date( 'occurrence_enddate', string_cell(row, ['occurrence_enddate'])) r.stage = int_cell(row, ['stage']) r.credits = float_cell(row, ['credits']) r.student_credit_points = float_cell( row, ['student_credit_points', 'student_credits']) r.iscancelled = int_cell(row, ['iscancelled']) r.isoncampus = int_cell(row, ['isoncampus']) r.issemesteracourse = int_cell(row, ['issemesteracourse']) r.issemesterbcourse = int_cell(row, ['issemesterbcourse']) r.iswholeyearcourse = int_cell(row, ['iswholeyearcourse']) r.location_code = string_cell(row, ['location_code']) r.location = string_cell(row, ['location']) r.owning_school_clevel = fix_org_unit( string_cell(row, ['owning_school_clevel'])) r.owning_school = string_cell(row, ['owning_school']) r.owning_department_clevel = string_cell( row, ['owning_department_clevel']) r.owning_department = string_cell(row, ['owning_department']) r.owning_level4_clevel = string_cell(row, ['owning_level4_clevel']) r.owning_level4_department = string_cell( row, ['owning_level4_department']) r.owning_level4or3_department = string_cell( row, ['owning_level4or3_department']) r.owning_level4or3_clevel = string_cell( row, ['owning_level4or3_clevel']) r.delivery_mode_code = string_cell(row, ['delivery_mode_code']) r.delivery_mode = string_cell(row, ['delivery_mode']) r.semester_code = string_cell(row, ['semester_code']) r.semester_description = string_cell(row, ['semester_description']) r.isselfpaced = int_cell(row, ['isselfpaced']) r.source_of_funding = string_cell(row, ['source_of_funding']) r.funding_category_code = string_cell(row, ['funding_category_code']) r.funding_category = string_cell(row, ['funding_category']) r.cost_category_code = string_cell(row, ['cost_category_code']) r.cost_category = string_cell(row, ['cost_category']) r.research_supplement_code = int_cell(row, ['research_supplement_code']) r.research_supplement = string_cell(row, ['research_supplement']) r.classification_code = float_cell(row, ['classification_code']) r.classification = string_cell(row, ['classification']) r.division = string_cell(row, ['division']) r.division_code = string_cell(row, ['division_code']) r.specified_programme = string_cell(row, ['specified_programme']) r.major = string_cell(row, ['major']) r.second_major = string_cell(row, ['second_major']) r.major2 = string_cell(row, ['major2']) r.second_major2 = string_cell(row, ['second_major2']) r.main_subject = string_cell(row, ['main_subject']) r.second_subject = string_cell(row, ['second_subject']) r.supporting_subject = string_cell(row, ['supporting_subject']) r.teaching_1 = string_cell(row, ['teaching_1']) r.teaching_2 = string_cell(row, ['teaching_2']) r.teaching_3 = string_cell(row, ['teaching_3']) r.teaching_4 = string_cell(row, ['teaching_4']) r.subject = string_cell(row, ['subject']) r.field = string_cell(row, ['field']) r.specialisation = string_cell(row, ['specialisation']) r.stream = string_cell(row, ['stream']) r.endorsement = string_cell(row, ['endorsement']) r.award_year = int_cell(row, ['award_year']) r.award_completion_status = string_cell( row, ['award_completion_status']) r.award_completion_date = parse_grade_results_date( 'award_completion_date', string_cell(row, ['award_completion_date'])) r.award_completion_confirmed_date = parse_grade_results_date( 'award_completion_confirmed_date', string_cell(row, ['award_completion_confirmed_date'])) r.admission_year = int_cell(row, ['admission_year']) r.admission_reason = string_cell(row, ['admission_reason']) r.admission_criteria = string_cell(row, ['admission_criteria']) r.admission_status = string_cell(row, ['admission_status']) r.grade = string_cell(row, ['grade']) r.grade_status = string_cell(row, ['grade_status']) r.result_status_code = string_cell(row, ['result_status_code']) r.result_status = string_cell(row, ['result_status']) r.grade_ranking = int_cell(row, ['grade_ranking']) r.mark = float_cell(row, ['mark']) r.moe_completion_code = int_cell(row, ['moe_completion_code']) r.iscontinuinggrade = int_cell(row, ['iscontinuinggrade']) r.ispassgrade = int_cell(row, ['ispassgrade']) r.query_date = parse_grade_results_date( 'query_date', string_cell(row, ['query_date'])) if (query_date is None) and (r.query_date is not None): query_date = datetime.strptime( parse_grade_results_date('query_date', string_cell(row, ['query_date'])), "%Y-%m-%d") r.enr_year = int_cell(row, ['enr_year', 'enrolment_year']) r.enrolment_status = string_cell(row, ['enrolment_status']) r.final_grade = string_cell(row, ['final_grade']) r.final_grade_ranking = int_cell(row, ['final_grade_ranking']) r.final_grade_status = string_cell(row, ['final_grade_status']) r.final_grade_result_status = string_cell( row, ['final_grade_result_status']) r.papers_per_student = int_cell(row, ['papers_per_student']) r.credits_per_student = float_cell(row, ['credits_per_student']) r.gpa = float_cell(row, ['gpa']) r.ones = int_cell(row, ['ones']) r.allgradeones = int_cell(row, ['allgradeones']) r.passgradeones = int_cell(row, ['passgradeones']) r.retentionones = int_cell(row, ['retentionones']) r.award_completion_year = int_cell(row, ['award_completion_year']) r.personoid = float_cell(row, ['personoid']) r.courseoccurrenceoid = float_cell(row, ['courseoccurrenceoid']) r.awardenrolmentoid = float_cell(row, ['awardenrolmentoid']) r.enrolmentorcosuoid = float_cell(row, ['enrolmentorcosuoid']) r.isformalprogramme = int_cell(row, ['isformalprogramme']) r.citizenship_simple = string_cell( row, ['citizenship_simple', 'citizenship_code']) r.moe_pbrf_code = string_cell(row, ['moe_pbrf_code']) r.moe_pbrf = string_cell(row, ['moe_pbrf']) r.achievement_date = parse_grade_results_date( 'achievement_date', string_cell(row, ['achievement_date'])) r.te_reo = int_cell(row, ['te_reo']) r.save() # progress if (count % 1000) == 0: update_tablestatus(GradeResults._meta.db_table, "Imported " + str(count) + " rows...") # close file csvfile.close() except Exception as ex: msg = traceback.format_exc() logger.error(msg=msg) return msg finally: if delete: try: os.remove(csv) except Exception as ex: msg = traceback.format_exc() logger.error(msg=msg) result = msg # if (result is None) and (query_date is not None) and (query_date.year == datetime.today().year): update_tablestatus(GradeResults._meta.db_table, timestamp=query_date) if email is not None: send_email( email, 'Import: grade results', 'Import succeeded' if (result is None) else 'Import failed: ' + result) return result
def import_scholarships(csv, encoding, email=None, delete=True): """ Imports the scholarships (Jade Export). :param csv: the CSV file to import :type csv: str :param encoding: the file encoding (eg utf-8) :type encoding: str :param email: the (optional) email address to send a notification to :type email: str :param delete: whether to delete the data file :type delete: bool :return: None if successful, otherwise error message :rtype: str """ result = None set_maintenance_mode(True) # empty table Scholarship.objects.all().delete() # import try: with open(csv, encoding=encoding) as csvfile: reader = DictReader(csvfile) reader.fieldnames = [ name.lower().replace(" ", "_") for name in reader.fieldnames ] count = 0 for row in reader: count += 1 truncate_strings(row, 250) encode_strings(row, 'utf-8') r = Scholarship() r.student_id = row['person_id'] r.name = row['template'] r.status = row['status'] r.decision = row['decision'] r.year = int(row['year']) r.save() # progress if (count % 1000) == 0: update_tablestatus(Scholarship._meta.db_table, "Imported " + str(count) + " rows...") except Exception as ex: msg = traceback.format_exc() logger.error(msg=msg) result = msg finally: if delete: try: os.remove(csv) except Exception as ex: msg = traceback.format_exc() logger.error(msg=msg) result = msg if email is not None: send_email( email, 'Import: scholarships', 'Import succeeded' if (result is None) else 'Import failed: ' + result) return result
for dirpath, dirnames, filenames in walk(root): if match(".*\/mvol\/\d{4}\/\d{4}\/\d{4}", dirpath): print("Stopping recursion at "+dirpath) del dirnames[:] for filename in filenames: print("Scanning " + filename) if match("mvol-\d{4}-\d{4}-\d{4}\.struct\.txt$", filename) or \ match("mvol-\d{4}-\d{4}-\d{4}\.txt$", filename): print("Acting on " + filename) try: with open(join(dirpath, filename), 'r') as f: reader = DictReader(f, delimiter='\t') assert('object' in reader.fieldnames or 'Object' in reader.fieldnames) if reader.fieldnames != [x.lower() for x in reader.fieldnames]: print("Editing " + join(dirpath, filename)) reader.fieldnames = [x.lower() for x in reader.fieldnames] with open(join(dirpath, filename)+".new",'w') as out: writer = DictWriter(out, fieldnames=reader.fieldnames, delimiter='\t',quoting=QUOTE_MINIMAL) writer.writeheader() for row in reader: rowDict = {} for header in reader.fieldnames: rowDict[header] = row[header] writer.writerow(rowDict) move(join(dirpath, filename), join(dirpath, filename+'.old')) assert(isfile(join(dirpath, filename)) == False) move(join(dirpath, filename+'.new'), join(dirpath, filename)) except Exception as e: print(e)