def run(self, driver, task, log): inp = task["source"]["file"] inp = compat.translate_unicode(inp) inp = "input/{}".format(inp) row_match = task["source"].get("row", None) value_match = task["source"].get("value", None) attr = task["source"].get("attr", None) mapping = task["source"].get("mapping", None) if row_match and value_match: if attr: record_set = etl.fromxml(inp, row_match, value_match, attr) else: record_set = etl.fromxml(inp, row_match, value_match) elif row_match and mapping: record_set = etl.fromxml(inp, row_match, mapping) else: raise ValueError('Incorrect parameter for source') if not etl.data(record_set).any(): log.write("Task skipped. No rows on source") else: transform = TransformSubTask(task, log) record_set = transform.get_result(record_set) output_driver = driver.get_driver(task["target"]["connection"]) db = output_driver.get_db() table = task["target"]["table"] table = compat.translate_unicode(table) if "schema" in task["target"]: schema_name = task["target"]["schema"] schema_name = compat.translate_unicode(schema_name) else: schema_name = None task_log = "log/xml-db_{}_{}.log".format(task["name"], get_time_filename()) with open(task_log, "w") as lg: if "truncate" in task["target"] and task["target"]["truncate"]: record_set.progress(10000, out=lg).todb(output_driver.cursor(db), tablename=table, schema=schema_name) else: record_set.progress(10000, out=lg).appenddb( output_driver.cursor(db), tablename=table, schema=schema_name) db.close()
def test_fromxml_2(): # initial data f = NamedTemporaryFile(delete=False) data = """<table> <tr> <td v='foo'/><td v='bar'/> </tr> <tr> <td v='a'/><td v='1'/> </tr> <tr> <td v='b'/><td v='2'/> </tr> <tr> <td v='c'/><td v='2'/> </tr> </table>""" f.write(data) f.close() actual = fromxml(f.name, "tr", "td", "v") expect = (("foo", "bar"), ("a", "1"), ("b", "2"), ("c", "2")) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_fromxml(): # initial data f = NamedTemporaryFile(delete=False) data = """<table> <tr> <td>foo</td><td>bar</td> </tr> <tr> <td>a</td><td>1</td> </tr> <tr> <td>b</td><td>2</td> </tr> <tr> <td>c</td><td>2</td> </tr> </table>""" f.write(data) f.close() actual = fromxml(f.name, 'tr', 'td') expect = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2')) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_fromxml_5(): # initial data f = NamedTemporaryFile(delete=False) data = """<table> <row> <foo>a</foo><baz><bar v='1'/><bar v='3'/></baz> </row> <row> <foo>b</foo><baz><bar v='2'/></baz> </row> <row> <foo>c</foo><baz><bar v='2'/></baz> </row> </table>""" f.write(data) f.close() actual = fromxml(f.name, 'row', {'foo': 'foo', 'bar': ('baz/bar', 'v')}) expect = (('foo', 'bar'), ('a', ('1', '3')), ('b', '2'), ('c', '2')) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_fromxml_2(): # initial data f = NamedTemporaryFile(delete=False) data = """<table> <tr> <td v='foo'/><td v='bar'/> </tr> <tr> <td v='a'/><td v='1'/> </tr> <tr> <td v='b'/><td v='2'/> </tr> <tr> <td v='c'/><td v='2'/> </tr> </table>""" f.write(data) f.close() print open(f.name).read() actual = fromxml(f.name, 'tr', 'td', 'v') print actual expect = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2')) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def dof9_emergency_teacher_import(data, input_file, destination): """Import the emergency teaher information from a Timetabler DOF9 file.""" emergency_teachers = petl.fromxml( input_file, '{http://www.timetabling.com.au/DOV9}EmergencyTeachers/{http://www.timetabling.com.au/DOV9}EmergencyTeacher', { 'EmergencyTeacherID': '{http://www.timetabling.com.au/DOV9}EmergencyTeacherID', 'Code': '{http://www.timetabling.com.au/DOV9}Code', 'FirstName': '{http://www.timetabling.com.au/DOV9}FirstName', 'MiddleName': '{http://www.timetabling.com.au/DOV9}MiddleName', 'LastName': '{http://www.timetabling.com.au/DOV9}LastName', 'Salutation': '{http://www.timetabling.com.au/DOV9}Salutation', 'Email': '{http://www.timetabling.com.au/DOV9}Email', 'Address': '{http://www.timetabling.com.au/DOV9}Address', 'Suburb': '{http://www.timetabling.com.au/DOV9}Suburb', 'State': '{http://www.timetabling.com.au/DOV9}State', 'Postcode': '{http://www.timetabling.com.au/DOV9}Postcode', 'Phone': '{http://www.timetabling.com.au/DOV9}Phone', 'Mobile': '{http://www.timetabling.com.au/DOV9}Mobile', 'OtherPhone': '{http://www.timetabling.com.au/DOV9}OtherPhone', 'Priority': '{http://www.timetabling.com.au/DOV9}Priority', 'Notes': '{http://www.timetabling.com.au/DOV9}Notes', 'SpareField1': '{http://www.timetabling.com.au/DOV9}SpareField1', 'SpareField2': '{http://www.timetabling.com.au/DOV9}SpareField2', 'SpareField3': '{http://www.timetabling.com.au/DOV9}SpareField3' }) data.set(destination, emergency_teachers)
def DataIntegration(clinics_LOC, Services_LOC, Location_LOC): # Reading the clinics.csv file fileData = pt.fromcsv(clinics_LOC) # Reading the clinic_services.csv file servicesData = pt.fromcsv(Services_LOC) # reading the xml file cliniclocations.xml locationXML = pt.fromxml(Location_LOC, 'clinic', { "ClinicID": "ClinicID", "Lat": "Lat", "Lon": "Lon" }) # join the csv file's using the inbuilt function join using ClinicID as main key fileJoin = pt.join(servicesData, fileData, key="ClinicID") # join the csv file using the inbuilt function join using ClinicID as main key MainJoin = pt.join(fileJoin, locationXML, key="ClinicID") # acquire the required columns result = pt.cut(MainJoin, 'ClinicServiceID', 'Service', 'ClinicID', 'Suburb', 'Postcode', 'Lat', 'Lon') # creating the final csv file which is clinicservicelocations.csv pt.tocsv(result, "clinic_service_locations.csv") print('Csv file generated.!!!')
def test_fromxml(): # initial data f = NamedTemporaryFile(delete=False) data = """<table> <tr> <td>foo</td><td>bar</td> </tr> <tr> <td>a</td><td>1</td> </tr> <tr> <td>b</td><td>2</td> </tr> <tr> <td>c</td><td>2</td> </tr> </table>""" f.write(data) f.close() actual = fromxml(f.name, "tr", "td") expect = (("foo", "bar"), ("a", "1"), ("b", "2"), ("c", "2")) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def ptf9_staff_import(data, input_file, destination): """Import staff users from a Timetabler PTF9 export file.""" staff_users = petl.fromxml( input_file, '{http://www.timetabling.com.au/TDV9}Teachers/{http://www.timetabling.com.au/TDV9}Teacher', { 'TeacherID': '{http://www.timetabling.com.au/TDV9}TeacherID', 'Code': '{http://www.timetabling.com.au/TDV9}Code', 'FirstName': '{http://www.timetabling.com.au/TDV9}FirstName', 'MiddleName': '{http://www.timetabling.com.au/TDV9}MiddleName', 'LastName': '{http://www.timetabling.com.au/TDV9}LastName', 'Salutation': '{http://www.timetabling.com.au/TDV9}Salutation', 'DaysUnavailable': '{http://www.timetabling.com.au/TDV9}DaysUnavailable', 'PeriodsUnavailable': '{http://www.timetabling.com.au/TDV9}PeriodsUnavailable', 'ProposedLoad': '{http://www.timetabling.com.au/TDV9}ProposedLoad', 'ActualLoad': '{http://www.timetabling.com.au/TDV9}ActualLoad', 'FinalLoad': '{http://www.timetabling.com.au/TDV9}FinalLoad', 'ConsecutivePeriods': '{http://www.timetabling.com.au/TDV9}ConsecutivePeriods', 'MaxYardDutyLoad': '{http://www.timetabling.com.au/TDV9}MaxYardDutyLoad', 'PeriodsOff': '{http://www.timetabling.com.au/TDV9}PeriodsOff', 'Email': '{http://www.timetabling.com.au/TDV9}Email', 'SpareField1': '{http://www.timetabling.com.au/TDV9}SpareField1', 'SpareField2': '{http://www.timetabling.com.au/TDV9}SpareField2', 'SpareField3': '{http://www.timetabling.com.au/TDV9}SpareField3', }) data.set(destination, staff_users)
def run(self, driver, task, log): inp = task["source"]["file"] inp = compat.translate_unicode(inp) inp = "input/{}".format(inp) row_match = task["source"].get("row", None) value_match = task["source"].get("value", None) attr = task["source"].get("attr", None) mapping = task["source"].get("mapping", None) if row_match and value_match: if attr: record_set = etl.fromxml(inp, row_match, value_match, attr) else: record_set = etl.fromxml(inp, row_match, value_match) elif row_match and mapping: record_set = etl.fromxml(inp, row_match, mapping) else: raise ValueError('Incorrect parameter for source') if not etl.data(record_set).any(): log.write("Task skipped. No rows on source") else: transform = TransformSubTask(task, log) record_set = transform.get_result(record_set) out = task["target"]["file"] out = compat.translate_unicode(out) out = "output/{}".format(out) separator = task["target"].get("delimiter", ";") separator = compat.translate_unicode(separator) enc = task["target"].get("encoding", "utf-8") task_log = "log/xml-csv_{}_{}.log".format(task["name"], get_time_filename()) with open(task_log, "w") as lg: if "truncate" in task["target"] and task["target"]["truncate"]: record_set.progress(10000, out=lg).tocsv(out, encoding=enc, delimiter=separator) else: record_set.progress(10000, out=lg).appendcsv(out, encoding=enc, delimiter=separator)
def test_teehtml(): t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) etl.wrap(t1).teehtml(f1.name).selectgt('bar', 1).topickle(f2.name) ieq(t1, etl.fromxml(f1.name, './/tr', ('th', 'td')).convertnumbers()) ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
def test_teehtml(): t1 = (("foo", "bar"), ("a", 2), ("b", 1), ("c", 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) etl.wrap(t1).teehtml(f1.name).selectgt("bar", 1).topickle(f2.name) ieq(t1, etl.fromxml(f1.name, ".//tr", ("th", "td")).convertnumbers()) ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))
def test_teehtml_unicode(): t1 = ((u"foo", u"bar"), (u"Արամ Խաչատրյան", 2), (u"Johann Strauß", 1), (u"Вагиф Сәмәдоғлу", 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) (etl.wrap(t1).teehtml(f1.name, encoding="utf-8").selectgt("bar", 1).topickle(f2.name)) ieq(t1, (etl.fromxml(f1.name, ".//tr", ("th", "td"), encoding="utf-8").convertnumbers())) ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))
def day_import(data, input_file, destination): """Import courses from a Timetabler PTF9 export file.""" new_days = petl.fromxml( input_file, '{http://www.timetabling.com.au/TDV9}Days/{http://www.timetabling.com.au/TDV9}Day', { 'DayID': '{http://www.timetabling.com.au/TDV9}DayID', 'Code': '{http://www.timetabling.com.au/TDV9}Code', 'Name': '{http://www.timetabling.com.au/TDV9}Name', }) data.set(destination, new_days)
def ptf9_student_enrollment_import(data, input_file, destination): """Import student enrollments from a Timetabler PTF9 export file.""" student_enrollments = petl.fromxml(input_file, '{http://www.timetabling.com.au/TDV9}StudentLessons/{http://www.timetabling.com.au/TDV9}StudentLesson', { 'StudentID': '{http://www.timetabling.com.au/TDV9}StudentID', 'CourseID': '{http://www.timetabling.com.au/TDV9}CourseID', 'LessonType': '{http://www.timetabling.com.au/TDV9}LessonType', 'ClassCode': '{http://www.timetabling.com.au/TDV9}ClassCode', 'RollClassCode': '{http://www.timetabling.com.au/TDV9}RollClassCode', }) data.cat(destination, student_enrollments)
def teacher_timetable_import(data, input_file, destination): """Import the teacher timetable information from a PTF9 file.""" teacher_timetables = petl.fromxml(input_file, '{http://www.timetabling.com.au/TDV9}Timetables/{http://www.timetabling.com.au/TDV9}Timetable', { 'TimetableID': '{http://www.timetabling.com.au/TDV9}TimetableID', 'RollClassID': '{http://www.timetabling.com.au/TDV9}RollClassID', 'PeriodID': '{http://www.timetabling.com.au/TDV9}PeriodID', 'ClassID': '{http://www.timetabling.com.au/TDV9}ClassID', 'RoomID': '{http://www.timetabling.com.au/TDV9}RoomID', 'TeacherID': '{http://www.timetabling.com.au/TDV9}TeacherID' }) data.set(destination, teacher_timetables)
def etl(file): tb1 = list() import petl as etl import sqllite3 if ".csv" in file: tb1 = etl.fromcsv(file) elif ".json" in file: tb1 = etl.fromjson(file) elif ".xls" in file: tb1 = etl.fromxls(file) elif ".xml" in file: tb1 = etl.fromxml(file,"row")
def room_import(data, input_file, destination): """Import courses from a Timetabler PTF9 export file.""" new_rooms = petl.fromxml( input_file, '{http://www.timetabling.com.au/TDV9}Rooms/{http://www.timetabling.com.au/TDV9}Room', { 'RoomID': '{http://www.timetabling.com.au/TDV9}RoomID', 'Code': '{http://www.timetabling.com.au/TDV9}Code', 'Name': '{http://www.timetabling.com.au/TDV9}Name', 'Seats': '{http://www.timetabling.com.au/TDV9}Seats', 'SiteNo': '{http://www.timetabling.com.au/TDV9}SiteNo', 'Notes': '{http://www.timetabling.com.au/TDV9}Notes', }) data.set(destination, new_rooms)
def ptf9_staff_enrollment_import(data, input_file, destination): """Import staff enrolments from a Timetabler PTF9 export file.""" staff_enrollments = petl.fromxml( input_file, '{http://www.timetabling.com.au/TDV9}Timetables/{http://www.timetabling.com.au/TDV9}Timetable', { 'TimetableID': '{http://www.timetabling.com.au/TDV9}TimetableID', 'RollClassID': '{http://www.timetabling.com.au/TDV9}RollClassID', 'PeriodID': '{http://www.timetabling.com.au/TDV9}PeriodID', 'ClassID': '{http://www.timetabling.com.au/TDV9}ClassID', 'RoomID': '{http://www.timetabling.com.au/TDV9}RoomID', 'TeacherID': '{http://www.timetabling.com.au/TDV9}TeacherID', }) data.set(destination, staff_enrollments)
def test_teehtml_unicode(): t1 = ((u'foo', u'bar'), (u'Արամ Խաչատրյան', 2), (u'Johann Strauß', 1), (u'Вагиф Сәмәдоғлу', 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) (etl.wrap(t1).teehtml(f1.name, encoding='utf-8').selectgt('bar', 1).topickle(f2.name)) ieq(t1, (etl.fromxml(f1.name, './/tr', ('th', 'td'), encoding='utf-8').convertnumbers())) ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
def student_timetable_import(data, input_file, destination): """Import the teacher timetable information from a PTF9 file.""" student_timetables = petl.fromxml( input_file, '{http://www.timetabling.com.au/TDV9}StudentLessons/{http://www.timetabling.com.au/TDV9}StudentLesson', { 'StudentID': '{http://www.timetabling.com.au/TDV9}StudentID', 'CourseID': '{http://www.timetabling.com.au/TDV9}CourseID', 'LessonType': '{http://www.timetabling.com.au/TDV9}LessonType', 'ClassCode': '{http://www.timetabling.com.au/TDV9}ClassCode', 'RollClassCode': '{http://www.timetabling.com.au/TDV9}RollClassCode' }) data.set(destination, student_timetables)
def period_import(data, input_file, destination): """Import courses from a Timetabler PTF9 export file.""" new_periods = petl.fromxml(input_file, '{http://www.timetabling.com.au/TDV9}Periods/{http://www.timetabling.com.au/TDV9}Period', { 'PeriodID': '{http://www.timetabling.com.au/TDV9}PeriodID', 'DayID': '{http://www.timetabling.com.au/TDV9}DayID', 'Code': '{http://www.timetabling.com.au/TDV9}Code', 'Name': '{http://www.timetabling.com.au/TDV9}Name', 'Doubles': '{http://www.timetabling.com.au/TDV9}Doubles', 'Triples': '{http://www.timetabling.com.au/TDV9}Triples', 'Quadruples': '{http://www.timetabling.com.au/TDV9}Quadruples', 'SiteMove': '{http://www.timetabling.com.au/TDV9}SiteMove', 'Load': '{http://www.timetabling.com.au/TDV9}Load', 'Index': '{http://www.timetabling.com.au/TDV9}Index', 'Number': '{http://www.timetabling.com.au/TDV9}Number', 'StartTime': '{http://www.timetabling.com.au/TDV9}StartTime', 'FinishTime': '{http://www.timetabling.com.au/TDV9}FinishTime', }) data.set(destination, new_periods)
def test_fromxml_5(): # initial data f = NamedTemporaryFile(delete=False) data = """<table> <row> <foo>a</foo><baz><bar v='1'/><bar v='3'/></baz> </row> <row> <foo>b</foo><baz><bar v='2'/></baz> </row> <row> <foo>c</foo><baz><bar v='2'/></baz> </row> </table>""" f.write(data) f.close() actual = fromxml(f.name, "row", {"foo": "foo", "bar": ("baz/bar", "v")}) expect = (("foo", "bar"), ("a", ("1", "3")), ("b", "2"), ("c", "2")) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def ptf9_student_import(data, input_file, destination): """Import student users from a Timetabler PTF9 export file.""" student_users = petl.fromxml(input_file, '{http://www.timetabling.com.au/TDV9}Students/{http://www.timetabling.com.au/TDV9}Student', { 'StudentID': '{http://www.timetabling.com.au/TDV9}StudentID', 'Code': '{http://www.timetabling.com.au/TDV9}Code', 'FirstName': '{http://www.timetabling.com.au/TDV9}FirstName', 'MiddleName': '{http://www.timetabling.com.au/TDV9}MiddleName', 'FamilyName': '{http://www.timetabling.com.au/TDV9}FamilyName', 'Gender': '{http://www.timetabling.com.au/TDV9}Gender', 'Email': '{http://www.timetabling.com.au/TDV9}Email', 'House': '{http://www.timetabling.com.au/TDV9}House', 'RollClass': '{http://www.timetabling.com.au/TDV9}RollClass', 'YearLevel': '{http://www.timetabling.com.au/TDV9}YearLevel', 'HomeGroup': '{http://www.timetabling.com.au/TDV9}HomeGroup', 'WCSet': '{http://www.timetabling.com.au/TDV9}WCSet', 'BOSCode': '{http://www.timetabling.com.au/TDV9}BOSCode', 'SpareField1': '{http://www.timetabling.com.au/TDV9}SpareField1', 'SpareField2': '{http://www.timetabling.com.au/TDV9}SpareField2', 'SpareField3': '{http://www.timetabling.com.au/TDV9}SpareField3', }) data.cat(destination, student_users)
<td>foo</td><td>bar</td> </tr> <tr> <td>a</td><td>1</td> </tr> <tr> <td>b</td><td>2</td> </tr> <tr> <td>c</td><td>2</td> </tr> </table>''' with open('example1.xml', 'w') as f: f.write(d) table1 = etl.fromxml('example1.xml', 'tr', 'td') table1 # if the data values are stored in an attribute, provide the attribute name # as an extra positional argument d = '''<table> <tr> <td v='foo'/><td v='bar'/> </tr> <tr> <td v='a'/><td v='1'/> </tr> <tr> <td v='b'/><td v='2'/> </tr> <tr> <td v='c'/><td v='2'/>
def dof9_import_dates( data, input_file, teacher_absences_destination, period_replacements_destination, emergency_teacher_availables_destination, emergency_teacher_yard_duty_available_destination, yard_duty_replacements_destination, room_replacements_destination, room_edits_destination, lesson_cancellations_destination, yard_duty_cancellations_destination, load_adjustments_destination, room_availables_destination, attendence_changes_destination): """Import the dates and definitions for the replacements that need to happen.""" namespaces = {'x': 'http://www.timetabling.com.au/DOV9'} all_teacher_absences = petl.wrap([[]]) all_period_replacements = petl.wrap([[]]) all_emergency_teacher_availables = petl.wrap([[]]) all_emergency_teacher_yard_duty_available = petl.wrap([[]]) all_yard_duty_replacements = petl.wrap([[]]) all_room_replacements = petl.wrap([[]]) all_room_edits = petl.wrap([[]]) all_lesson_cancellations = petl.wrap([[]]) all_yard_duty_cancellations = petl.wrap([[]]) all_load_adjustments = petl.wrap([[]]) all_room_availables = petl.wrap([[]]) all_attendence_changes = petl.wrap([[]]) tree = lxml.etree.parse(open(input_file)) # Read Everything date_subtrees = tree.xpath( '/x:DailyOrganiserData/x:Dates/x:Date', namespaces={'x': 'http://www.timetabling.com.au/DOV9'}) for date_subtree in date_subtrees: cf_date = date_subtree.findtext('x:Date', namespaces=namespaces) cf_datestring = date_subtree.findtext('x:DateString', namespaces=namespaces) cf_day = date_subtree.findtext('x:Day', namespaces=namespaces) add_common_fields = lambda i, cf_date=cf_date, cf_datestring=cf_datestring, cf_day=cf_day: i.addfield( 'Date', cf_date).addfield('DateString', cf_datestring).addfield( 'Day', cf_day) # noqa subtree_data = petl.MemorySource(lxml.etree.tostring(date_subtree)) if teacher_absences_destination is not None: teacher_absences = petl.fromxml( subtree_data, '{http://www.timetabling.com.au/DOV9}TeacherAbsences/{http://www.timetabling.com.au/DOV9}TeacherAbsence', { 'TeacherAbsenceID': '{http://www.timetabling.com.au/DOV9}TeacherAbsenceID', 'TeacherCode': '{http://www.timetabling.com.au/DOV9}TeacherCode', 'SessionNo': '{http://www.timetabling.com.au/DOV9}SessionNo', 'Precedes': '{http://www.timetabling.com.au/DOV9}Precedes', 'IsYardDuty': '{http://www.timetabling.com.au/DOV9}IsYardDuty', 'PeriodCode': '{http://www.timetabling.com.au/DOV9}PeriodCode', 'TeacherAbsenceReasonID': '{http://www.timetabling.com.au/DOV9}TeacherAbsenceID', 'Counted': '{http://www.timetabling.com.au/DOV9}Counted', 'Load': '{http://www.timetabling.com.au/DOV9}Load', 'ArchiveTimetableReference': '{http://www.timetabling.com.au/DOV9}ArchiveTimetableReference', 'ArchiveErrorType': '{http://www.timetabling.com.au/DOV9}ArchiveErrorType' }) teacher_absences = add_common_fields(teacher_absences) all_teacher_absences = all_teacher_absences.cat(teacher_absences) if period_replacements_destination is not None: period_replacements = petl.fromxml( subtree_data, '{http://www.timetabling.com.au/DOV9}PeriodReplacements/{http://www.timetabling.com.au/DOV9}PeriodReplacement', { 'PeriodReplacementID': '{http://www.timetabling.com.au/DOV9}PeriodReplacementID', 'RollClassCode': '{http://www.timetabling.com.au/DOV9}RollClassCode', 'ClassCode': '{http://www.timetabling.com.au/DOV9}ClassCode', 'ClassGroupRowID': '{http://www.timetabling.com.au/DOV9}ClassGroupRowID', 'PeriodCode': '{http://www.timetabling.com.au/DOV9}PeriodCode', 'PeriodNo': '{http://www.timetabling.com.au/DOV9}PeriodNo', 'ReplacementTeacherCode': '{http://www.timetabling.com.au/DOV9}ReplacementTeacherCode', 'Load': '{http://www.timetabling.com.au/DOV9}Load', 'Count': '{http://www.timetabling.com.au/DOV9}Count', 'InLieu': '{http://www.timetabling.com.au/DOV9}InLieu', 'Notes': '{http://www.timetabling.com.au/DOV9}Notes', 'Index': '{http://www.timetabling.com.au/DOV9}Index', 'NotRequired': '{http://www.timetabling.com.au/DOV9}NotRequired', 'DuplicateReplacementID': '{http://www.timetabling.com.au/DOV9}DuplicateReplacementID', 'ReferenceTeacherCode': '{http://www.timetabling.com.au/DOV9}ReferenceTeacherCode', 'IsComposites': '{http://www.timetabling.com.au/DOV9}IsComposites', 'ArchiveTimetableReference': '{http://www.timetabling.com.au/DOV9}ArchiveTimetableReference', 'ArchiveErrorType': '{http://www.timetabling.com.au/DOV9}ArchiveErrorType' }) period_replacements = add_common_fields(period_replacements) all_period_replacements = all_period_replacements.cat( period_replacements) if yard_duty_replacements_destination is not None: yard_duty_replacements = petl.fromxml( subtree_data, '{http://www.timetabling.com.au/DOV9}YardDutyReplacements/{http://www.timetabling.com.au/DOV9}YardDutyReplacement', { 'YardDutyReplacementID': '{http://www.timetabling.com.au/DOV9}YardDutyReplacementID', 'YardDutyCode': '{http://www.timetabling.com.au/DOV9}YardDutyCode', 'PeriodCode': '{http://www.timetabling.com.au/DOV9}PeriodCode', 'PeriodNo': '{http://www.timetabling.com.au/DOV9}PeriodNo', 'Precedes': '{http://www.timetabling.com.au/DOV9}Precedes', 'SessionNo': '{http://www.timetabling.com.au/DOV9}SessionNo', 'ReplacementTeacherCode': '{http://www.timetabling.com.au/DOV9}ReplacementTeacherCode', 'Load': '{http://www.timetabling.com.au/DOV9}Load', 'Count': '{http://www.timetabling.com.au/DOV9}Count', 'InLieu': '{http://www.timetabling.com.au/DOV9}InLieu', 'Notes': '{http://www.timetabling.com.au/DOV9}Notes', 'Index': '{http://www.timetabling.com.au/DOV9}Index', 'NotRequired': '{http://www.timetabling.com.au/DOV9}NotRequired', 'ActivityCode': '{http://www.timetabling.com.au/DOV9}ActivityCode', 'ReferenceTeacherCode': '{http://www.timetabling.com.au/DOV9}ReferenceTeacherCode', 'DuplicateReplacementID': '{http://www.timetabling.com.au/DOV9}DuplicateReplacementID', 'ArchiveTimetableReference': '{http://www.timetabling.com.au/DOV9}ArchiveTimetableReference', 'ArchiveErrorType': '{http://www.timetabling.com.au/DOV9}ArchiveErrorType' }) yard_duty_replacements = add_common_fields(yard_duty_replacements) all_yard_duty_replacements = all_yard_duty_replacements.cat( yard_duty_replacements) if room_edits_destination is not None: room_edits = petl.fromxml( subtree_data, '{http://www.timetabling.com.au/DOV9}RoomEdits/{http://www.timetabling.com.au/DOV9}RoomEdit', { 'ClassCode': '{http://www.timetabling.com.au/DOV9}ClassCode', 'ClassGroupRowID': '{http://www.timetabling.com.au/DOV9}ClassGroupRowID', 'RollClassCode': '{http://www.timetabling.com.au/DOV9}RollClassCode', 'PeriodCode': '{http://www.timetabling.com.au/DOV9}PeriodCode', 'ReplacementRoomCode': '{http://www.timetabling.com.au/DOV9}ReplacementRoomCode', 'ArchiveTimetableReference': '{http://www.timetabling.com.au/DOV9}ArchiveTimetableReference', 'ArchiveErrorType': '{http://www.timetabling.com.au/DOV9}ArchiveErrorType' }) room_edits = add_common_fields(room_edits) all_room_edits = all_room_edits.cat(room_edits) if teacher_absences_destination is not None: data.set(teacher_absences_destination, all_teacher_absences) if period_replacements_destination is not None: data.set(period_replacements_destination, all_period_replacements) if yard_duty_replacements_destination is not None: data.set(yard_duty_replacements_destination, all_yard_duty_replacements) if room_edits_destination is not None: data.set(room_edits_destination, all_room_edits)
def createTableFromXML(xmlFile): return etl.fromxml(xmlFile, "store",{"Name":"Name", "Suburb":"Suburb", "Lat":"Lat", "Lon":"Lon"})
def main(argv): urls = { 'INDIVIDUALS': { 'xmlroot': 'response/individuals/individual', 'parse_dict': { 'Family ID': ('family', 'id'), 'Individual ID': ('.', 'id'), 'Family Position': 'family_position', 'Prefix': 'salutation', 'First Name': 'first_name', 'Middle Name': 'middle_name', 'Last Name': 'last_name', 'Legal Name': 'legal_first_name', 'Legal Name': 'legal_first_name', 'Active': 'active', 'Campus': 'campus', 'Email': 'email', 'Mailing Street': ".//address[@type='mailing']/street_address", 'Mailing City': ".//address[@type='mailing']/city", 'Mailing State': ".//address[@type='mailing']/state", 'Mailing Postal Code': ".//address[@type='mailing']/zip", 'Mailing Country': ".//address[@type='mailing']/country", 'Home Street': ".//address[@type='home']/street_address", 'Home City': ".//address[@type='home']/city", 'Home State': ".//address[@type='home']/state", 'Home Postal Code': ".//address[@type='home']/zip", 'Home Country': ".//address[@type='home']/country", 'Other Street': ".//address[@type='other']/street_address", 'Other City': ".//address[@type='other']/city", 'Other State': ".//address[@type='other']/state", 'Other Postal Code': ".//address[@type='other']/zip", 'Other Country': ".//address[@type='other']/country", 'Contact Phone': ".//phone[@type='contact']", 'Home Phone': ".//phone[@type='home']", 'Work Phone': ".//phone[@type='work']", 'Mobile Phone': ".//phone[@type='mobile']", 'Emergency Phone': ".//phone[@type='emergency']", 'Birthday': 'birthday', 'Anniversary': 'anniversary', 'Gender': 'gender', 'Giving Number': 'giving_number', 'Marital Status': 'marital_status', 'Membership Start Date': 'membership_date', 'Membership End Date': 'membership_end', 'Membership Type': 'membership_type', 'Baptized': 'baptized', # 'School District': ??, # 'How They Heard': ??, # 'How They Joined': ??, # 'Reason Left Church': ??, # 'Job Title': ??, 'Deceased': 'deceased', # !!! 'Baptism Date': ".//user_defined_date_fields/user_defined_date_field[label='Baptism Date']/date", 'Baptized By': ".//user_defined_text_fields/user_defined_text_field[label='Baptized By']/text", 'Confirmed Date': ".//user_defined_date_fields/user_defined_date_field[label='Confirmed Date']/date", 'Confirmed': ".//user_defined_pulldown_fields/user_defined_pulldown_field[label='Confirmed']/selection", 'Mailbox Number': ".//user_defined_text_fields/user_defined_text_field[label='Mailbox Number']/text", 'Spirit Mailing': ".//user_defined_pulldown_fields/user_defined_pulldown_field[label='Spirit Mailing']/selection", 'Photo Release': ".//user_defined_pulldown_fields/user_defined_pulldown_field[label='Photo Release']/selection", 'Ethnicity': ".//user_defined_pulldown_fields/user_defined_pulldown_field[label='Ethnicity']/selection", 'Transferred Frm': ".//user_defined_text_fields/user_defined_text_field[label='Transferred Frm']/text", 'Transferred To': ".//user_defined_text_fields/user_defined_text_field[label='Transferred To']/text", 'Pastr When Join': ".//user_defined_text_fields/user_defined_text_field[label='Pastr When Join']/text", 'Pastr When Leav': ".//user_defined_text_fields/user_defined_text_field[label='Pastr When Leav']/text", 'SK Indiv ID': ".//user_defined_text_fields/user_defined_text_field[label='SK Indiv ID']/text" } }, 'GROUPS': 'https://ingomar.ccbchurch.com/api.php?srv=group_profiles', 'ACCOUNTS': 'https://ingomar.ccbchurch.com/api.php?srv=transaction_detail_type_list', 'TRANSACTIONS': { 'xmlroot': 'response/batches/batch/transactions/transaction', 'parse_dict': { 'Date': 'date', 'Payment Type': 'payment_type', 'Check Number': 'check_number', 'Individual ID': ('individual', 'id'), 'Account': './/transaction_details/transaction_detail/coa', 'Amount': './/transaction_details/transaction_detail/amount', 'Tax Deductible': './/transaction_details/transaction_detail/tax_deductible', 'Note': './/transaction_details/transaction_detail/note' } } } parser = argparse.ArgumentParser(description="Parses XML file into CSV output") parser.add_argument("--type", required=True, help='One of ' + ', '.join(urls.keys())) parser.add_argument("--xml-input-filename", required=True, help="XML file to parse") parser.add_argument("--csv-output-filename", required=True, help="CSV output file") args = parser.parse_args() table1 = petl.fromxml(args.xml_input_filename, urls[args.type]['xmlroot'], urls[args.type]['parse_dict']) petl.tocsv(table1, args.csv_output_filename)
import petl as etl # Open CSV file stores = etl.fromcsv('stores.csv') # Open XML document locations = etl.fromxml('locations.xml', 'store', {'Name': 'Name', 'Lat': 'Lat', 'Lon': 'Lon'}) print(locations) # Set output output_table = [["ID", "Name", "Suburb", "State", "Postcode"]] store_id = 1 # Read through the store.csv to generate output_table store = etl.cut(stores, 'Name', 'Suburb', 'State', 'Postcode').distinct() print(store) for s in etl.values(store, 'Name', 'Suburb', 'State', 'Postcode'): output_table.append([store_id, s]) store_id += 1 print (output_table) # Merge and join XML and CSV together merge_output = etl.join(stores, locations, key="Name") print(merge_output) store_table = etl.cut(merge_output, 'ID', 'Name', 'Suburb', 'State', 'Postcode', 'Lat', 'Lon') print(etl.head(store_table, 5)) # Export to CSV file etl.tocsv(merge_output, 'store_locations.csv')
table3 = etl.frompickle('pickel_file.p') print('Pick') print(table3) ###################Reading Text Files################################# text = 'a,1\nb,2\nc,2\n' with open('example.txt', 'w') as f: f.write(text) table4 = etl.fromtext('example.txt') print(table4) ################Reading XML files################################## table5 = etl.fromxml('data.xml', 'tr', 'td') print(table5) ################Reading JASON files############################### data = ''' [{"foo": "a", "bar": 1}, {"foo": "b", "bar": 2}, {"foo": "c", "bar": 2}] ''' with open('example.json', 'w') as f: f.write(data) table6 = etl.fromjson('example.json', header=['foo', 'bar']) print(table6)
# List of source and target files for merge healthcsvfile = './datafiles/Vic_Health_Care.csv' locationxmlfile = './datafiles/Vic_Locations.xml' mergedcsvfile = './datafiles/practice_locations.csv' # xmlfields is a dictionary to be used as xmlfields = {'Town_name': 'Town', 'Latitude': 'Lat', 'Longitude': 'Lon'} # type: Dict[str, str] xmlparent = 'Town_location' initialrow = ['Practice_Name', 'Latitude', 'Longitude', 'Town', 'State', 'Post_Code'] # tables in memory created from xml and csv files csvtable = petl.fromcsv(healthcsvfile) xmltable = petl.fromxml(locationxmlfile, xmlparent, xmlfields) # Find the row in xmltable matching town from csv lktbl = petl.lookupone(xmltable, 'Town_name') # type: Union[Dict[Any, Union[tuple[Any], Tuple[Any]]], Any] nmdtbl = petl.namedtuples(csvtable) finaltabl = [initialrow] for lin in nmdtbl: tabl = lktbl[lin.Town] latitude = tabl[0] longitude = tabl[1] insertline = (str(lin.Practice_Name) + ',' + latitude + ',' + longitude + ',' + str( lin.Town) + ',' + str(lin.State) + ',' + str(lin.Postcode)).split(',') print insertline finaltabl.extend([insertline])
def test_fromxml_url(): tbl = fromxml('http://feeds.bbci.co.uk/news/rss.xml', './/item', 'title') print tbl assert nrows(tbl) > 0
parser.add_argument('-e', action='store_true', help='add quotes to header to fit SQL pattern') parser.add_argument('config', help='path of config yaml file') args = parser.parse_args() info = yaml.load(open(args.config)) xml_file = args.f or info['xml'] csv_file = args.t or info['csv'] table = petl.empty() # substitute namespace to keys for key in eval(Template(str(info['keys'])).substitute( **info['namespace'])) if 'namespace' in info else info['keys']: # collect data from each key table = table.cat(petl.fromxml(xml_file, key['anchor'], key['select'])) if 'pks' in info: table = table.mergeduplicates( info['pks'] if len(info['pks']) > 1 else info['pks'][0]) if 'orderBy' in info: table = table.sort(info['orderBy']) if 'skip' in info: table = table.tail(len(table) - info['skip']) if 'first' in info: table = table.head(info['first']) if 'replace' in info:
import petl as etl import csv table1=(etl.fromcsv('covid.csv')) # importing data from xml file and creating table table2 = etl.fromxml('Country_location.xml','.//tr',('th','td')) # print(table2) # removing column country from table table3=etl.cutout(table2,'country') # merging the covid table with xml data table4=etl.join(table1,table3,key='location') print(table4) # writing result to csv file with open('covid_countries.csv','w') as f: writer=csv.writer(f) writer.writerows(table4)
# use to read the csv file using the petl framework # the below code is use to read services csv file fileData = pt.fromcsv('services.csv') # the below code is use to read clinicservices csv file servicesData = pt.fromcsv('clinicservices.csv') # join the csv file using the inbuilt function join using ServiceID as main key fileJoin = pt.join(servicesData, fileData, key="ServiceID") # the below code is use to read clinic csv file readCsv = pt.fromcsv('clinics.csv') # join the csv file using the inbuilt function join using ClinicID as main key doubleJoin = pt.join(fileJoin, readCsv, key='ClinicID') # reading the xml file cliniclocations.xml locationXML = pt.fromxml('cliniclocations.xml', 'clinic', { "ClinicID": "ClinicID", "Lat": "Lat", "Lon": "Lon" }) # join the csv file using the inbuilt function join using ClinicID as main key doubleJoin2 = pt.join(doubleJoin, locationXML, key="ClinicID") # removing the spaces from the email field cleanOne = substitute(doubleJoin2, 'Email', '\s', '') # adding @myclinic.com.au behind every email id cleanTwo = substitute(cleanOne, 'Email', '(^[\w]+$)', '\\[email protected]') # acquire the required columns result = pt.cut(cleanTwo, 'ClinicServiceID', 'ClinicID', 'ServiceID', 'Service', 'Name', 'Suburb', 'State', 'Email', 'Lat', 'Lon') # creating the final csv file which is clinicservicelocations.csv pt.tocsv(result, "clinicservicelocations.csv") print('Csv file generated.!!!')