Ejemplo n.º 1
0
    def run(self, driver, task, log):
        inp = task["source"]["file"]
        inp = compat.translate_unicode(inp)
        inp = "input/{}".format(inp)
        row_match = task["source"].get("row", None)
        value_match = task["source"].get("value", None)
        attr = task["source"].get("attr", None)
        mapping = task["source"].get("mapping", None)

        if row_match and value_match:
            if attr:
                record_set = etl.fromxml(inp, row_match, value_match, attr)
            else:
                record_set = etl.fromxml(inp, row_match, value_match)
        elif row_match and mapping:
            record_set = etl.fromxml(inp, row_match, mapping)
        else:
            raise ValueError('Incorrect parameter for source')

        if not etl.data(record_set).any():
            log.write("Task skipped. No rows on source")
        else:
            transform = TransformSubTask(task, log)
            record_set = transform.get_result(record_set)

            output_driver = driver.get_driver(task["target"]["connection"])
            db = output_driver.get_db()

            table = task["target"]["table"]
            table = compat.translate_unicode(table)
            if "schema" in task["target"]:
                schema_name = task["target"]["schema"]
                schema_name = compat.translate_unicode(schema_name)
            else:
                schema_name = None

            task_log = "log/xml-db_{}_{}.log".format(task["name"],
                                                     get_time_filename())
            with open(task_log, "w") as lg:
                if "truncate" in task["target"] and task["target"]["truncate"]:
                    record_set.progress(10000,
                                        out=lg).todb(output_driver.cursor(db),
                                                     tablename=table,
                                                     schema=schema_name)
                else:
                    record_set.progress(10000, out=lg).appenddb(
                        output_driver.cursor(db),
                        tablename=table,
                        schema=schema_name)
            db.close()
Ejemplo n.º 2
0
def test_fromxml_2():

    # initial data
    f = NamedTemporaryFile(delete=False)
    data = """<table>
    <tr>
        <td v='foo'/><td v='bar'/>
    </tr>
    <tr>
        <td v='a'/><td v='1'/>
    </tr>
    <tr>
        <td v='b'/><td v='2'/>
    </tr>
    <tr>
        <td v='c'/><td v='2'/>
    </tr>
</table>"""
    f.write(data)
    f.close()

    actual = fromxml(f.name, "tr", "td", "v")
    expect = (("foo", "bar"), ("a", "1"), ("b", "2"), ("c", "2"))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice
Ejemplo n.º 3
0
def test_fromxml():
    
    # initial data
    f = NamedTemporaryFile(delete=False)
    data = """<table>
    <tr>
        <td>foo</td><td>bar</td>
    </tr>
    <tr>
        <td>a</td><td>1</td>
    </tr>
    <tr>
        <td>b</td><td>2</td>
    </tr>
    <tr>
        <td>c</td><td>2</td>
    </tr>
</table>"""
    f.write(data)
    f.close()
    
    actual = fromxml(f.name, 'tr', 'td')
    expect = (('foo', 'bar'),
              ('a', '1'),
              ('b', '2'),
              ('c', '2'))
    ieq(expect, actual)
    ieq(expect, actual) # verify can iterate twice
Ejemplo n.º 4
0
def test_fromxml_5():
    
    # initial data
    f = NamedTemporaryFile(delete=False)
    data = """<table>
    <row>
        <foo>a</foo><baz><bar v='1'/><bar v='3'/></baz>
    </row>
    <row>
        <foo>b</foo><baz><bar v='2'/></baz>
    </row>
    <row>
        <foo>c</foo><baz><bar v='2'/></baz>
    </row>
</table>"""
    f.write(data)
    f.close()
    
    actual = fromxml(f.name, 'row', {'foo': 'foo', 'bar': ('baz/bar', 'v')})
    expect = (('foo', 'bar'),
              ('a', ('1', '3')),
              ('b', '2'),
              ('c', '2'))
    ieq(expect, actual)
    ieq(expect, actual) # verify can iterate twice
Ejemplo n.º 5
0
def test_fromxml_2():
    
    # initial data
    f = NamedTemporaryFile(delete=False)
    data = """<table>
    <tr>
        <td v='foo'/><td v='bar'/>
    </tr>
    <tr>
        <td v='a'/><td v='1'/>
    </tr>
    <tr>
        <td v='b'/><td v='2'/>
    </tr>
    <tr>
        <td v='c'/><td v='2'/>
    </tr>
</table>"""
    f.write(data)
    f.close()
    
    print open(f.name).read()
    actual = fromxml(f.name, 'tr', 'td', 'v')
    print actual
    expect = (('foo', 'bar'),
              ('a', '1'),
              ('b', '2'),
              ('c', '2'))
    ieq(expect, actual)
    ieq(expect, actual) # verify can iterate twice
Ejemplo n.º 6
0
def dof9_emergency_teacher_import(data, input_file, destination):
    """Import the emergency teaher information from a Timetabler DOF9 file."""
    emergency_teachers = petl.fromxml(
        input_file,
        '{http://www.timetabling.com.au/DOV9}EmergencyTeachers/{http://www.timetabling.com.au/DOV9}EmergencyTeacher',
        {
            'EmergencyTeacherID':
            '{http://www.timetabling.com.au/DOV9}EmergencyTeacherID',
            'Code': '{http://www.timetabling.com.au/DOV9}Code',
            'FirstName': '{http://www.timetabling.com.au/DOV9}FirstName',
            'MiddleName': '{http://www.timetabling.com.au/DOV9}MiddleName',
            'LastName': '{http://www.timetabling.com.au/DOV9}LastName',
            'Salutation': '{http://www.timetabling.com.au/DOV9}Salutation',
            'Email': '{http://www.timetabling.com.au/DOV9}Email',
            'Address': '{http://www.timetabling.com.au/DOV9}Address',
            'Suburb': '{http://www.timetabling.com.au/DOV9}Suburb',
            'State': '{http://www.timetabling.com.au/DOV9}State',
            'Postcode': '{http://www.timetabling.com.au/DOV9}Postcode',
            'Phone': '{http://www.timetabling.com.au/DOV9}Phone',
            'Mobile': '{http://www.timetabling.com.au/DOV9}Mobile',
            'OtherPhone': '{http://www.timetabling.com.au/DOV9}OtherPhone',
            'Priority': '{http://www.timetabling.com.au/DOV9}Priority',
            'Notes': '{http://www.timetabling.com.au/DOV9}Notes',
            'SpareField1': '{http://www.timetabling.com.au/DOV9}SpareField1',
            'SpareField2': '{http://www.timetabling.com.au/DOV9}SpareField2',
            'SpareField3': '{http://www.timetabling.com.au/DOV9}SpareField3'
        })
    data.set(destination, emergency_teachers)
Ejemplo n.º 7
0
def test_fromxml():

    # initial data
    f = NamedTemporaryFile(delete=False)
    data = """<table>
    <tr>
        <td>foo</td><td>bar</td>
    </tr>
    <tr>
        <td>a</td><td>1</td>
    </tr>
    <tr>
        <td>b</td><td>2</td>
    </tr>
    <tr>
        <td>c</td><td>2</td>
    </tr>
</table>"""
    f.write(data)
    f.close()

    actual = fromxml(f.name, 'tr', 'td')
    expect = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2'))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice
Ejemplo n.º 8
0
def DataIntegration(clinics_LOC, Services_LOC, Location_LOC):
    # Reading the clinics.csv file
    fileData = pt.fromcsv(clinics_LOC)

    # Reading the clinic_services.csv file
    servicesData = pt.fromcsv(Services_LOC)

    # reading the xml file cliniclocations.xml
    locationXML = pt.fromxml(Location_LOC, 'clinic', {
        "ClinicID": "ClinicID",
        "Lat": "Lat",
        "Lon": "Lon"
    })

    # join the csv file's using the inbuilt function join using ClinicID as main key
    fileJoin = pt.join(servicesData, fileData, key="ClinicID")

    # join the csv file using the inbuilt function join using ClinicID as main key
    MainJoin = pt.join(fileJoin, locationXML, key="ClinicID")

    # acquire the required columns
    result = pt.cut(MainJoin, 'ClinicServiceID', 'Service', 'ClinicID',
                    'Suburb', 'Postcode', 'Lat', 'Lon')

    # creating the final csv file which is clinicservicelocations.csv
    pt.tocsv(result, "clinic_service_locations.csv")
    print('Csv file generated.!!!')
Ejemplo n.º 9
0
def test_fromxml_2():

    # initial data
    f = NamedTemporaryFile(delete=False)
    data = """<table>
    <tr>
        <td v='foo'/><td v='bar'/>
    </tr>
    <tr>
        <td v='a'/><td v='1'/>
    </tr>
    <tr>
        <td v='b'/><td v='2'/>
    </tr>
    <tr>
        <td v='c'/><td v='2'/>
    </tr>
</table>"""
    f.write(data)
    f.close()

    print open(f.name).read()
    actual = fromxml(f.name, 'tr', 'td', 'v')
    print actual
    expect = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2'))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice
Ejemplo n.º 10
0
def test_fromxml():

    # initial data
    f = NamedTemporaryFile(delete=False)
    data = """<table>
    <tr>
        <td>foo</td><td>bar</td>
    </tr>
    <tr>
        <td>a</td><td>1</td>
    </tr>
    <tr>
        <td>b</td><td>2</td>
    </tr>
    <tr>
        <td>c</td><td>2</td>
    </tr>
</table>"""
    f.write(data)
    f.close()

    actual = fromxml(f.name, "tr", "td")
    expect = (("foo", "bar"), ("a", "1"), ("b", "2"), ("c", "2"))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice
Ejemplo n.º 11
0
def ptf9_staff_import(data, input_file, destination):
    """Import staff users from a Timetabler PTF9 export file."""
    staff_users = petl.fromxml(
        input_file,
        '{http://www.timetabling.com.au/TDV9}Teachers/{http://www.timetabling.com.au/TDV9}Teacher',
        {
            'TeacherID': '{http://www.timetabling.com.au/TDV9}TeacherID',
            'Code': '{http://www.timetabling.com.au/TDV9}Code',
            'FirstName': '{http://www.timetabling.com.au/TDV9}FirstName',
            'MiddleName': '{http://www.timetabling.com.au/TDV9}MiddleName',
            'LastName': '{http://www.timetabling.com.au/TDV9}LastName',
            'Salutation': '{http://www.timetabling.com.au/TDV9}Salutation',
            'DaysUnavailable':
            '{http://www.timetabling.com.au/TDV9}DaysUnavailable',
            'PeriodsUnavailable':
            '{http://www.timetabling.com.au/TDV9}PeriodsUnavailable',
            'ProposedLoad': '{http://www.timetabling.com.au/TDV9}ProposedLoad',
            'ActualLoad': '{http://www.timetabling.com.au/TDV9}ActualLoad',
            'FinalLoad': '{http://www.timetabling.com.au/TDV9}FinalLoad',
            'ConsecutivePeriods':
            '{http://www.timetabling.com.au/TDV9}ConsecutivePeriods',
            'MaxYardDutyLoad':
            '{http://www.timetabling.com.au/TDV9}MaxYardDutyLoad',
            'PeriodsOff': '{http://www.timetabling.com.au/TDV9}PeriodsOff',
            'Email': '{http://www.timetabling.com.au/TDV9}Email',
            'SpareField1': '{http://www.timetabling.com.au/TDV9}SpareField1',
            'SpareField2': '{http://www.timetabling.com.au/TDV9}SpareField2',
            'SpareField3': '{http://www.timetabling.com.au/TDV9}SpareField3',
        })
    data.set(destination, staff_users)
Ejemplo n.º 12
0
    def run(self, driver, task, log):
        inp = task["source"]["file"]
        inp = compat.translate_unicode(inp)
        inp = "input/{}".format(inp)
        row_match = task["source"].get("row", None)
        value_match = task["source"].get("value", None)
        attr = task["source"].get("attr", None)
        mapping = task["source"].get("mapping", None)

        if row_match and value_match:
            if attr:
                record_set = etl.fromxml(inp, row_match, value_match, attr)
            else:
                record_set = etl.fromxml(inp, row_match, value_match)
        elif row_match and mapping:
            record_set = etl.fromxml(inp, row_match, mapping)
        else:
            raise ValueError('Incorrect parameter for source')

        if not etl.data(record_set).any():
            log.write("Task skipped. No rows on source")
        else:
            transform = TransformSubTask(task, log)
            record_set = transform.get_result(record_set)

            out = task["target"]["file"]
            out = compat.translate_unicode(out)
            out = "output/{}".format(out)
            separator = task["target"].get("delimiter", ";")
            separator = compat.translate_unicode(separator)
            enc = task["target"].get("encoding", "utf-8")

            task_log = "log/xml-csv_{}_{}.log".format(task["name"],
                                                      get_time_filename())
            with open(task_log, "w") as lg:
                if "truncate" in task["target"] and task["target"]["truncate"]:
                    record_set.progress(10000,
                                        out=lg).tocsv(out,
                                                      encoding=enc,
                                                      delimiter=separator)
                else:
                    record_set.progress(10000,
                                        out=lg).appendcsv(out,
                                                          encoding=enc,
                                                          delimiter=separator)
Ejemplo n.º 13
0
def test_teehtml():

    t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)
    etl.wrap(t1).teehtml(f1.name).selectgt('bar', 1).topickle(f2.name)

    ieq(t1, etl.fromxml(f1.name, './/tr', ('th', 'td')).convertnumbers())
    ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
Ejemplo n.º 14
0
def test_teehtml():

    t1 = (("foo", "bar"), ("a", 2), ("b", 1), ("c", 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)
    etl.wrap(t1).teehtml(f1.name).selectgt("bar", 1).topickle(f2.name)

    ieq(t1, etl.fromxml(f1.name, ".//tr", ("th", "td")).convertnumbers())
    ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))
Ejemplo n.º 15
0
def test_teehtml_unicode():

    t1 = ((u"foo", u"bar"), (u"Արամ Խաչատրյան", 2), (u"Johann Strauß", 1), (u"Вагиф Сәмәдоғлу", 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)
    (etl.wrap(t1).teehtml(f1.name, encoding="utf-8").selectgt("bar", 1).topickle(f2.name))

    ieq(t1, (etl.fromxml(f1.name, ".//tr", ("th", "td"), encoding="utf-8").convertnumbers()))
    ieq(etl.wrap(t1).selectgt("bar", 1), etl.frompickle(f2.name))
Ejemplo n.º 16
0
def day_import(data, input_file, destination):
    """Import courses from a Timetabler PTF9 export file."""
    new_days = petl.fromxml(
        input_file,
        '{http://www.timetabling.com.au/TDV9}Days/{http://www.timetabling.com.au/TDV9}Day',
        {
            'DayID': '{http://www.timetabling.com.au/TDV9}DayID',
            'Code': '{http://www.timetabling.com.au/TDV9}Code',
            'Name': '{http://www.timetabling.com.au/TDV9}Name',
        })
    data.set(destination, new_days)
Ejemplo n.º 17
0
def ptf9_student_enrollment_import(data, input_file, destination):
    """Import student enrollments from a Timetabler PTF9 export file."""
    student_enrollments = petl.fromxml(input_file, '{http://www.timetabling.com.au/TDV9}StudentLessons/{http://www.timetabling.com.au/TDV9}StudentLesson',
                                       {
                                            'StudentID': '{http://www.timetabling.com.au/TDV9}StudentID',
                                            'CourseID': '{http://www.timetabling.com.au/TDV9}CourseID',
                                            'LessonType': '{http://www.timetabling.com.au/TDV9}LessonType',
                                            'ClassCode': '{http://www.timetabling.com.au/TDV9}ClassCode',
                                            'RollClassCode': '{http://www.timetabling.com.au/TDV9}RollClassCode',
                                       })
    data.cat(destination, student_enrollments)
Ejemplo n.º 18
0
def teacher_timetable_import(data, input_file, destination):
    """Import the teacher timetable information from a PTF9 file."""
    teacher_timetables = petl.fromxml(input_file, '{http://www.timetabling.com.au/TDV9}Timetables/{http://www.timetabling.com.au/TDV9}Timetable', {
                             'TimetableID': '{http://www.timetabling.com.au/TDV9}TimetableID',
                             'RollClassID': '{http://www.timetabling.com.au/TDV9}RollClassID',
                             'PeriodID': '{http://www.timetabling.com.au/TDV9}PeriodID',
                             'ClassID': '{http://www.timetabling.com.au/TDV9}ClassID',
                             'RoomID': '{http://www.timetabling.com.au/TDV9}RoomID',
                             'TeacherID': '{http://www.timetabling.com.au/TDV9}TeacherID'
                             })
    data.set(destination, teacher_timetables)
Ejemplo n.º 19
0
def etl(file):
    tb1 = list()
    import petl as etl
    import sqllite3
    if ".csv" in file:
    	tb1 = etl.fromcsv(file)
    elif ".json" in file:
    	tb1 = etl.fromjson(file)	
    elif ".xls" in file:
    	tb1 = etl.fromxls(file)
    elif ".xml" in file:
    	tb1 = etl.fromxml(file,"row")
Ejemplo n.º 20
0
def room_import(data, input_file, destination):
    """Import courses from a Timetabler PTF9 export file."""
    new_rooms = petl.fromxml(
        input_file,
        '{http://www.timetabling.com.au/TDV9}Rooms/{http://www.timetabling.com.au/TDV9}Room',
        {
            'RoomID': '{http://www.timetabling.com.au/TDV9}RoomID',
            'Code': '{http://www.timetabling.com.au/TDV9}Code',
            'Name': '{http://www.timetabling.com.au/TDV9}Name',
            'Seats': '{http://www.timetabling.com.au/TDV9}Seats',
            'SiteNo': '{http://www.timetabling.com.au/TDV9}SiteNo',
            'Notes': '{http://www.timetabling.com.au/TDV9}Notes',
        })
    data.set(destination, new_rooms)
Ejemplo n.º 21
0
def ptf9_staff_enrollment_import(data, input_file, destination):
    """Import staff enrolments from a Timetabler PTF9 export file."""
    staff_enrollments = petl.fromxml(
        input_file,
        '{http://www.timetabling.com.au/TDV9}Timetables/{http://www.timetabling.com.au/TDV9}Timetable',
        {
            'TimetableID': '{http://www.timetabling.com.au/TDV9}TimetableID',
            'RollClassID': '{http://www.timetabling.com.au/TDV9}RollClassID',
            'PeriodID': '{http://www.timetabling.com.au/TDV9}PeriodID',
            'ClassID': '{http://www.timetabling.com.au/TDV9}ClassID',
            'RoomID': '{http://www.timetabling.com.au/TDV9}RoomID',
            'TeacherID': '{http://www.timetabling.com.au/TDV9}TeacherID',
        })
    data.set(destination, staff_enrollments)
Ejemplo n.º 22
0
def test_teehtml_unicode():

    t1 = ((u'foo', u'bar'), (u'Արամ Խաչատրյան', 2), (u'Johann Strauß', 1),
          (u'Вагиф Сәмәдоғлу', 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)
    (etl.wrap(t1).teehtml(f1.name,
                          encoding='utf-8').selectgt('bar',
                                                     1).topickle(f2.name))

    ieq(t1, (etl.fromxml(f1.name, './/tr',
                         ('th', 'td'), encoding='utf-8').convertnumbers()))
    ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
Ejemplo n.º 23
0
def student_timetable_import(data, input_file, destination):
    """Import the teacher timetable information from a PTF9 file."""
    student_timetables = petl.fromxml(
        input_file,
        '{http://www.timetabling.com.au/TDV9}StudentLessons/{http://www.timetabling.com.au/TDV9}StudentLesson',
        {
            'StudentID': '{http://www.timetabling.com.au/TDV9}StudentID',
            'CourseID': '{http://www.timetabling.com.au/TDV9}CourseID',
            'LessonType': '{http://www.timetabling.com.au/TDV9}LessonType',
            'ClassCode': '{http://www.timetabling.com.au/TDV9}ClassCode',
            'RollClassCode':
            '{http://www.timetabling.com.au/TDV9}RollClassCode'
        })
    data.set(destination, student_timetables)
Ejemplo n.º 24
0
def period_import(data, input_file, destination):
    """Import courses from a Timetabler PTF9 export file."""
    new_periods = petl.fromxml(input_file, '{http://www.timetabling.com.au/TDV9}Periods/{http://www.timetabling.com.au/TDV9}Period', {
                             'PeriodID': '{http://www.timetabling.com.au/TDV9}PeriodID',
                             'DayID': '{http://www.timetabling.com.au/TDV9}DayID',
                             'Code': '{http://www.timetabling.com.au/TDV9}Code',
                             'Name': '{http://www.timetabling.com.au/TDV9}Name',
                             'Doubles': '{http://www.timetabling.com.au/TDV9}Doubles',
                             'Triples': '{http://www.timetabling.com.au/TDV9}Triples',
                             'Quadruples': '{http://www.timetabling.com.au/TDV9}Quadruples',
                             'SiteMove': '{http://www.timetabling.com.au/TDV9}SiteMove',
                             'Load': '{http://www.timetabling.com.au/TDV9}Load',
                             'Index': '{http://www.timetabling.com.au/TDV9}Index',
                             'Number': '{http://www.timetabling.com.au/TDV9}Number',
                             'StartTime': '{http://www.timetabling.com.au/TDV9}StartTime',
                             'FinishTime': '{http://www.timetabling.com.au/TDV9}FinishTime',
                             })
    data.set(destination, new_periods)
Ejemplo n.º 25
0
def test_fromxml_5():

    # initial data
    f = NamedTemporaryFile(delete=False)
    data = """<table>
    <row>
        <foo>a</foo><baz><bar v='1'/><bar v='3'/></baz>
    </row>
    <row>
        <foo>b</foo><baz><bar v='2'/></baz>
    </row>
    <row>
        <foo>c</foo><baz><bar v='2'/></baz>
    </row>
</table>"""
    f.write(data)
    f.close()

    actual = fromxml(f.name, "row", {"foo": "foo", "bar": ("baz/bar", "v")})
    expect = (("foo", "bar"), ("a", ("1", "3")), ("b", "2"), ("c", "2"))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice
Ejemplo n.º 26
0
def test_fromxml_5():

    # initial data
    f = NamedTemporaryFile(delete=False)
    data = """<table>
    <row>
        <foo>a</foo><baz><bar v='1'/><bar v='3'/></baz>
    </row>
    <row>
        <foo>b</foo><baz><bar v='2'/></baz>
    </row>
    <row>
        <foo>c</foo><baz><bar v='2'/></baz>
    </row>
</table>"""
    f.write(data)
    f.close()

    actual = fromxml(f.name, 'row', {'foo': 'foo', 'bar': ('baz/bar', 'v')})
    expect = (('foo', 'bar'), ('a', ('1', '3')), ('b', '2'), ('c', '2'))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice
Ejemplo n.º 27
0
def ptf9_student_import(data, input_file, destination):
    """Import student users from a Timetabler PTF9 export file."""
    student_users = petl.fromxml(input_file, '{http://www.timetabling.com.au/TDV9}Students/{http://www.timetabling.com.au/TDV9}Student',
                                 {
                                   'StudentID': '{http://www.timetabling.com.au/TDV9}StudentID',
                                   'Code': '{http://www.timetabling.com.au/TDV9}Code',
                                   'FirstName': '{http://www.timetabling.com.au/TDV9}FirstName',
                                   'MiddleName': '{http://www.timetabling.com.au/TDV9}MiddleName',
                                   'FamilyName': '{http://www.timetabling.com.au/TDV9}FamilyName',
                                   'Gender': '{http://www.timetabling.com.au/TDV9}Gender',
                                   'Email': '{http://www.timetabling.com.au/TDV9}Email',
                                   'House': '{http://www.timetabling.com.au/TDV9}House',
                                   'RollClass': '{http://www.timetabling.com.au/TDV9}RollClass',
                                   'YearLevel': '{http://www.timetabling.com.au/TDV9}YearLevel',
                                   'HomeGroup': '{http://www.timetabling.com.au/TDV9}HomeGroup',
                                   'WCSet': '{http://www.timetabling.com.au/TDV9}WCSet',
                                   'BOSCode': '{http://www.timetabling.com.au/TDV9}BOSCode',
                                   'SpareField1': '{http://www.timetabling.com.au/TDV9}SpareField1',
                                   'SpareField2': '{http://www.timetabling.com.au/TDV9}SpareField2',
                                   'SpareField3': '{http://www.timetabling.com.au/TDV9}SpareField3',
                                 })
    data.cat(destination, student_users)
Ejemplo n.º 28
0
Archivo: xml.py Proyecto: zli69/petl
        <td>foo</td><td>bar</td>
    </tr>
    <tr>
        <td>a</td><td>1</td>
    </tr>
    <tr>
        <td>b</td><td>2</td>
    </tr>
    <tr>
        <td>c</td><td>2</td>
    </tr>
</table>'''
with open('example1.xml', 'w') as f:
    f.write(d)

table1 = etl.fromxml('example1.xml', 'tr', 'td')
table1
# if the data values are stored in an attribute, provide the attribute name
# as an extra positional argument
d = '''<table>
    <tr>
        <td v='foo'/><td v='bar'/>
    </tr>
    <tr>
        <td v='a'/><td v='1'/>
    </tr>
    <tr>
        <td v='b'/><td v='2'/>
    </tr>
    <tr>
        <td v='c'/><td v='2'/>
Ejemplo n.º 29
0
Archivo: xml.py Proyecto: DeanWay/petl
        <td>foo</td><td>bar</td>
    </tr>
    <tr>
        <td>a</td><td>1</td>
    </tr>
    <tr>
        <td>b</td><td>2</td>
    </tr>
    <tr>
        <td>c</td><td>2</td>
    </tr>
</table>'''
with open('example1.xml', 'w') as f:
    f.write(d)

table1 = etl.fromxml('example1.xml', 'tr', 'td')
table1
# if the data values are stored in an attribute, provide the attribute name
# as an extra positional argument
d = '''<table>
    <tr>
        <td v='foo'/><td v='bar'/>
    </tr>
    <tr>
        <td v='a'/><td v='1'/>
    </tr>
    <tr>
        <td v='b'/><td v='2'/>
    </tr>
    <tr>
        <td v='c'/><td v='2'/>
Ejemplo n.º 30
0
def dof9_import_dates(
        data, input_file, teacher_absences_destination,
        period_replacements_destination,
        emergency_teacher_availables_destination,
        emergency_teacher_yard_duty_available_destination,
        yard_duty_replacements_destination, room_replacements_destination,
        room_edits_destination, lesson_cancellations_destination,
        yard_duty_cancellations_destination, load_adjustments_destination,
        room_availables_destination, attendence_changes_destination):
    """Import the dates and definitions for the replacements that need to happen."""
    namespaces = {'x': 'http://www.timetabling.com.au/DOV9'}

    all_teacher_absences = petl.wrap([[]])
    all_period_replacements = petl.wrap([[]])
    all_emergency_teacher_availables = petl.wrap([[]])
    all_emergency_teacher_yard_duty_available = petl.wrap([[]])
    all_yard_duty_replacements = petl.wrap([[]])
    all_room_replacements = petl.wrap([[]])
    all_room_edits = petl.wrap([[]])
    all_lesson_cancellations = petl.wrap([[]])
    all_yard_duty_cancellations = petl.wrap([[]])
    all_load_adjustments = petl.wrap([[]])
    all_room_availables = petl.wrap([[]])
    all_attendence_changes = petl.wrap([[]])

    tree = lxml.etree.parse(open(input_file))  # Read Everything
    date_subtrees = tree.xpath(
        '/x:DailyOrganiserData/x:Dates/x:Date',
        namespaces={'x': 'http://www.timetabling.com.au/DOV9'})
    for date_subtree in date_subtrees:
        cf_date = date_subtree.findtext('x:Date', namespaces=namespaces)
        cf_datestring = date_subtree.findtext('x:DateString',
                                              namespaces=namespaces)
        cf_day = date_subtree.findtext('x:Day', namespaces=namespaces)

        add_common_fields = lambda i, cf_date=cf_date, cf_datestring=cf_datestring, cf_day=cf_day: i.addfield(
            'Date', cf_date).addfield('DateString', cf_datestring).addfield(
                'Day', cf_day)  # noqa

        subtree_data = petl.MemorySource(lxml.etree.tostring(date_subtree))

        if teacher_absences_destination is not None:
            teacher_absences = petl.fromxml(
                subtree_data,
                '{http://www.timetabling.com.au/DOV9}TeacherAbsences/{http://www.timetabling.com.au/DOV9}TeacherAbsence',
                {
                    'TeacherAbsenceID':
                    '{http://www.timetabling.com.au/DOV9}TeacherAbsenceID',
                    'TeacherCode':
                    '{http://www.timetabling.com.au/DOV9}TeacherCode',
                    'SessionNo':
                    '{http://www.timetabling.com.au/DOV9}SessionNo',
                    'Precedes':
                    '{http://www.timetabling.com.au/DOV9}Precedes',
                    'IsYardDuty':
                    '{http://www.timetabling.com.au/DOV9}IsYardDuty',
                    'PeriodCode':
                    '{http://www.timetabling.com.au/DOV9}PeriodCode',
                    'TeacherAbsenceReasonID':
                    '{http://www.timetabling.com.au/DOV9}TeacherAbsenceID',
                    'Counted':
                    '{http://www.timetabling.com.au/DOV9}Counted',
                    'Load':
                    '{http://www.timetabling.com.au/DOV9}Load',
                    'ArchiveTimetableReference':
                    '{http://www.timetabling.com.au/DOV9}ArchiveTimetableReference',
                    'ArchiveErrorType':
                    '{http://www.timetabling.com.au/DOV9}ArchiveErrorType'
                })
            teacher_absences = add_common_fields(teacher_absences)
            all_teacher_absences = all_teacher_absences.cat(teacher_absences)

        if period_replacements_destination is not None:
            period_replacements = petl.fromxml(
                subtree_data,
                '{http://www.timetabling.com.au/DOV9}PeriodReplacements/{http://www.timetabling.com.au/DOV9}PeriodReplacement',
                {
                    'PeriodReplacementID':
                    '{http://www.timetabling.com.au/DOV9}PeriodReplacementID',
                    'RollClassCode':
                    '{http://www.timetabling.com.au/DOV9}RollClassCode',
                    'ClassCode':
                    '{http://www.timetabling.com.au/DOV9}ClassCode',
                    'ClassGroupRowID':
                    '{http://www.timetabling.com.au/DOV9}ClassGroupRowID',
                    'PeriodCode':
                    '{http://www.timetabling.com.au/DOV9}PeriodCode',
                    'PeriodNo':
                    '{http://www.timetabling.com.au/DOV9}PeriodNo',
                    'ReplacementTeacherCode':
                    '{http://www.timetabling.com.au/DOV9}ReplacementTeacherCode',
                    'Load':
                    '{http://www.timetabling.com.au/DOV9}Load',
                    'Count':
                    '{http://www.timetabling.com.au/DOV9}Count',
                    'InLieu':
                    '{http://www.timetabling.com.au/DOV9}InLieu',
                    'Notes':
                    '{http://www.timetabling.com.au/DOV9}Notes',
                    'Index':
                    '{http://www.timetabling.com.au/DOV9}Index',
                    'NotRequired':
                    '{http://www.timetabling.com.au/DOV9}NotRequired',
                    'DuplicateReplacementID':
                    '{http://www.timetabling.com.au/DOV9}DuplicateReplacementID',
                    'ReferenceTeacherCode':
                    '{http://www.timetabling.com.au/DOV9}ReferenceTeacherCode',
                    'IsComposites':
                    '{http://www.timetabling.com.au/DOV9}IsComposites',
                    'ArchiveTimetableReference':
                    '{http://www.timetabling.com.au/DOV9}ArchiveTimetableReference',
                    'ArchiveErrorType':
                    '{http://www.timetabling.com.au/DOV9}ArchiveErrorType'
                })
            period_replacements = add_common_fields(period_replacements)
            all_period_replacements = all_period_replacements.cat(
                period_replacements)

        if yard_duty_replacements_destination is not None:
            yard_duty_replacements = petl.fromxml(
                subtree_data,
                '{http://www.timetabling.com.au/DOV9}YardDutyReplacements/{http://www.timetabling.com.au/DOV9}YardDutyReplacement',
                {
                    'YardDutyReplacementID':
                    '{http://www.timetabling.com.au/DOV9}YardDutyReplacementID',
                    'YardDutyCode':
                    '{http://www.timetabling.com.au/DOV9}YardDutyCode',
                    'PeriodCode':
                    '{http://www.timetabling.com.au/DOV9}PeriodCode',
                    'PeriodNo':
                    '{http://www.timetabling.com.au/DOV9}PeriodNo',
                    'Precedes':
                    '{http://www.timetabling.com.au/DOV9}Precedes',
                    'SessionNo':
                    '{http://www.timetabling.com.au/DOV9}SessionNo',
                    'ReplacementTeacherCode':
                    '{http://www.timetabling.com.au/DOV9}ReplacementTeacherCode',
                    'Load':
                    '{http://www.timetabling.com.au/DOV9}Load',
                    'Count':
                    '{http://www.timetabling.com.au/DOV9}Count',
                    'InLieu':
                    '{http://www.timetabling.com.au/DOV9}InLieu',
                    'Notes':
                    '{http://www.timetabling.com.au/DOV9}Notes',
                    'Index':
                    '{http://www.timetabling.com.au/DOV9}Index',
                    'NotRequired':
                    '{http://www.timetabling.com.au/DOV9}NotRequired',
                    'ActivityCode':
                    '{http://www.timetabling.com.au/DOV9}ActivityCode',
                    'ReferenceTeacherCode':
                    '{http://www.timetabling.com.au/DOV9}ReferenceTeacherCode',
                    'DuplicateReplacementID':
                    '{http://www.timetabling.com.au/DOV9}DuplicateReplacementID',
                    'ArchiveTimetableReference':
                    '{http://www.timetabling.com.au/DOV9}ArchiveTimetableReference',
                    'ArchiveErrorType':
                    '{http://www.timetabling.com.au/DOV9}ArchiveErrorType'
                })
            yard_duty_replacements = add_common_fields(yard_duty_replacements)
            all_yard_duty_replacements = all_yard_duty_replacements.cat(
                yard_duty_replacements)

        if room_edits_destination is not None:
            room_edits = petl.fromxml(
                subtree_data,
                '{http://www.timetabling.com.au/DOV9}RoomEdits/{http://www.timetabling.com.au/DOV9}RoomEdit',
                {
                    'ClassCode':
                    '{http://www.timetabling.com.au/DOV9}ClassCode',
                    'ClassGroupRowID':
                    '{http://www.timetabling.com.au/DOV9}ClassGroupRowID',
                    'RollClassCode':
                    '{http://www.timetabling.com.au/DOV9}RollClassCode',
                    'PeriodCode':
                    '{http://www.timetabling.com.au/DOV9}PeriodCode',
                    'ReplacementRoomCode':
                    '{http://www.timetabling.com.au/DOV9}ReplacementRoomCode',
                    'ArchiveTimetableReference':
                    '{http://www.timetabling.com.au/DOV9}ArchiveTimetableReference',
                    'ArchiveErrorType':
                    '{http://www.timetabling.com.au/DOV9}ArchiveErrorType'
                })
            room_edits = add_common_fields(room_edits)
            all_room_edits = all_room_edits.cat(room_edits)

    if teacher_absences_destination is not None:
        data.set(teacher_absences_destination, all_teacher_absences)
    if period_replacements_destination is not None:
        data.set(period_replacements_destination, all_period_replacements)
    if yard_duty_replacements_destination is not None:
        data.set(yard_duty_replacements_destination,
                 all_yard_duty_replacements)
    if room_edits_destination is not None:
        data.set(room_edits_destination, all_room_edits)
def createTableFromXML(xmlFile):
    return etl.fromxml(xmlFile, "store",{"Name":"Name", "Suburb":"Suburb", "Lat":"Lat", "Lon":"Lon"})
Ejemplo n.º 32
0
def main(argv):

    urls = {
        'INDIVIDUALS': {
            'xmlroot': 'response/individuals/individual',
            'parse_dict': {
                'Family ID': ('family', 'id'),
                'Individual ID': ('.', 'id'),
                'Family Position': 'family_position',
                'Prefix': 'salutation',
                'First Name': 'first_name',
                'Middle Name': 'middle_name',
                'Last Name': 'last_name',
                'Legal Name': 'legal_first_name',
                'Legal Name': 'legal_first_name',
                'Active': 'active',
                'Campus': 'campus',
                'Email': 'email',

                'Mailing Street': ".//address[@type='mailing']/street_address",
                'Mailing City': ".//address[@type='mailing']/city",
                'Mailing State': ".//address[@type='mailing']/state",
                'Mailing Postal Code': ".//address[@type='mailing']/zip",
                'Mailing Country': ".//address[@type='mailing']/country",

                'Home Street': ".//address[@type='home']/street_address",
                'Home City': ".//address[@type='home']/city",
                'Home State': ".//address[@type='home']/state",
                'Home Postal Code': ".//address[@type='home']/zip",
                'Home Country': ".//address[@type='home']/country",

                'Other Street': ".//address[@type='other']/street_address",
                'Other City': ".//address[@type='other']/city",
                'Other State': ".//address[@type='other']/state",
                'Other Postal Code': ".//address[@type='other']/zip",
                'Other Country': ".//address[@type='other']/country",

                'Contact Phone': ".//phone[@type='contact']",
                'Home Phone': ".//phone[@type='home']",
                'Work Phone': ".//phone[@type='work']",
                'Mobile Phone': ".//phone[@type='mobile']",
                'Emergency Phone': ".//phone[@type='emergency']",

                'Birthday': 'birthday',
                'Anniversary': 'anniversary',
                'Gender': 'gender',
                'Giving Number': 'giving_number',
                'Marital Status': 'marital_status',
                'Membership Start Date': 'membership_date',
                'Membership End Date': 'membership_end',
                'Membership Type': 'membership_type',
                'Baptized': 'baptized',
                # 'School District': ??,
                # 'How They Heard': ??,
                # 'How They Joined': ??,
                # 'Reason Left Church': ??,
                # 'Job Title': ??,
                'Deceased': 'deceased',

                # !!!

                'Baptism Date': ".//user_defined_date_fields/user_defined_date_field[label='Baptism Date']/date",
                'Baptized By': ".//user_defined_text_fields/user_defined_text_field[label='Baptized By']/text",
                'Confirmed Date': ".//user_defined_date_fields/user_defined_date_field[label='Confirmed Date']/date",
                'Confirmed': ".//user_defined_pulldown_fields/user_defined_pulldown_field[label='Confirmed']/selection",
                'Mailbox Number': ".//user_defined_text_fields/user_defined_text_field[label='Mailbox Number']/text",
                'Spirit Mailing': ".//user_defined_pulldown_fields/user_defined_pulldown_field[label='Spirit Mailing']/selection",
                'Photo Release': ".//user_defined_pulldown_fields/user_defined_pulldown_field[label='Photo Release']/selection",
                'Ethnicity': ".//user_defined_pulldown_fields/user_defined_pulldown_field[label='Ethnicity']/selection",
                'Transferred Frm': ".//user_defined_text_fields/user_defined_text_field[label='Transferred Frm']/text",
                'Transferred To': ".//user_defined_text_fields/user_defined_text_field[label='Transferred To']/text",
                'Pastr When Join': ".//user_defined_text_fields/user_defined_text_field[label='Pastr When Join']/text",
                'Pastr When Leav': ".//user_defined_text_fields/user_defined_text_field[label='Pastr When Leav']/text",
                'SK Indiv ID': ".//user_defined_text_fields/user_defined_text_field[label='SK Indiv ID']/text"
            }
        },
        'GROUPS': 'https://ingomar.ccbchurch.com/api.php?srv=group_profiles',
        'ACCOUNTS': 'https://ingomar.ccbchurch.com/api.php?srv=transaction_detail_type_list',
        'TRANSACTIONS': {
            'xmlroot': 'response/batches/batch/transactions/transaction',
            'parse_dict': {
                'Date': 'date',
                'Payment Type': 'payment_type',
                'Check Number': 'check_number',
                'Individual ID': ('individual', 'id'),
                'Account': './/transaction_details/transaction_detail/coa',
                'Amount': './/transaction_details/transaction_detail/amount',
                'Tax Deductible': './/transaction_details/transaction_detail/tax_deductible',
                'Note': './/transaction_details/transaction_detail/note'
            }
        }
    }

    parser = argparse.ArgumentParser(description="Parses XML file into CSV output")
    parser.add_argument("--type", required=True, help='One of ' + ', '.join(urls.keys()))
    parser.add_argument("--xml-input-filename", required=True, help="XML file to parse")
    parser.add_argument("--csv-output-filename", required=True, help="CSV output file")
    args = parser.parse_args()

    table1 = petl.fromxml(args.xml_input_filename, urls[args.type]['xmlroot'], urls[args.type]['parse_dict'])
    petl.tocsv(table1, args.csv_output_filename)
Ejemplo n.º 33
0
import petl as etl

# Open CSV file
stores = etl.fromcsv('stores.csv')

# Open XML document
locations = etl.fromxml('locations.xml', 'store', {'Name': 'Name', 'Lat': 'Lat', 'Lon': 'Lon'})
print(locations)

# Set output
output_table = [["ID", "Name", "Suburb", "State", "Postcode"]]

store_id = 1

# Read through the store.csv to generate output_table
store = etl.cut(stores, 'Name', 'Suburb', 'State', 'Postcode').distinct()
print(store)
for s in etl.values(store, 'Name', 'Suburb', 'State', 'Postcode'):
    output_table.append([store_id, s])
    store_id += 1
print (output_table)

# Merge and join XML and CSV together
merge_output = etl.join(stores, locations, key="Name")
print(merge_output)

store_table = etl.cut(merge_output, 'ID', 'Name', 'Suburb', 'State', 'Postcode', 'Lat', 'Lon')
print(etl.head(store_table, 5))

# Export to CSV file
etl.tocsv(merge_output, 'store_locations.csv')
Ejemplo n.º 34
0
table3 = etl.frompickle('pickel_file.p')
print('Pick')
print(table3)

###################Reading Text Files#################################

text = 'a,1\nb,2\nc,2\n'
with open('example.txt', 'w') as f:
    f.write(text)

table4 = etl.fromtext('example.txt')
print(table4)

################Reading XML files##################################

table5 = etl.fromxml('data.xml', 'tr', 'td')
print(table5)

################Reading JASON files###############################

data = '''
[{"foo": "a", "bar": 1},
{"foo": "b", "bar": 2},
{"foo": "c", "bar": 2}]
'''
with open('example.json', 'w') as f:
    f.write(data)

table6 = etl.fromjson('example.json', header=['foo', 'bar'])
print(table6)
Ejemplo n.º 35
0
# List of source and target files for merge

healthcsvfile = './datafiles/Vic_Health_Care.csv'
locationxmlfile = './datafiles/Vic_Locations.xml'
mergedcsvfile = './datafiles/practice_locations.csv'

# xmlfields is a dictionary to be used as 

xmlfields = {'Town_name': 'Town', 'Latitude': 'Lat', 'Longitude': 'Lon'}  # type: Dict[str, str]
xmlparent = 'Town_location'
initialrow = ['Practice_Name', 'Latitude', 'Longitude', 'Town', 'State', 'Post_Code']

# tables in memory created from xml and csv files

csvtable = petl.fromcsv(healthcsvfile)
xmltable = petl.fromxml(locationxmlfile, xmlparent, xmlfields)

# Find the row in xmltable matching town from csv 
lktbl = petl.lookupone(xmltable, 'Town_name')  # type: Union[Dict[Any, Union[tuple[Any], Tuple[Any]]], Any]
nmdtbl = petl.namedtuples(csvtable)
finaltabl = [initialrow]

for lin in nmdtbl:
    tabl = lktbl[lin.Town]
    latitude = tabl[0]
    longitude = tabl[1]

    insertline = (str(lin.Practice_Name) + ',' + latitude + ',' + longitude + ',' + str(
        lin.Town) + ',' + str(lin.State) + ',' + str(lin.Postcode)).split(',')
    print insertline
    finaltabl.extend([insertline])
Ejemplo n.º 36
0
def test_fromxml_url():

    tbl = fromxml('http://feeds.bbci.co.uk/news/rss.xml', './/item', 'title')
    print tbl
    assert nrows(tbl) > 0
Ejemplo n.º 37
0
parser.add_argument('-e',
                    action='store_true',
                    help='add quotes to header to fit SQL pattern')
parser.add_argument('config', help='path of config yaml file')
args = parser.parse_args()

info = yaml.load(open(args.config))
xml_file = args.f or info['xml']
csv_file = args.t or info['csv']
table = petl.empty()

# substitute namespace to keys
for key in eval(Template(str(info['keys'])).substitute(
        **info['namespace'])) if 'namespace' in info else info['keys']:
    # collect data from each key
    table = table.cat(petl.fromxml(xml_file, key['anchor'], key['select']))

if 'pks' in info:
    table = table.mergeduplicates(
        info['pks'] if len(info['pks']) > 1 else info['pks'][0])

if 'orderBy' in info:
    table = table.sort(info['orderBy'])

if 'skip' in info:
    table = table.tail(len(table) - info['skip'])

if 'first' in info:
    table = table.head(info['first'])

if 'replace' in info:
Ejemplo n.º 38
0
def test_fromxml_url():

    tbl = fromxml('http://feeds.bbci.co.uk/news/rss.xml', './/item', 'title')
    print tbl
    assert nrows(tbl) > 0
Ejemplo n.º 39
0
import petl as etl
import csv

table1=(etl.fromcsv('covid.csv'))

# importing data from xml file and creating table
table2 = etl.fromxml('Country_location.xml','.//tr',('th','td'))
# print(table2)

# removing column country from table
table3=etl.cutout(table2,'country')

# merging the covid table with xml data
table4=etl.join(table1,table3,key='location')
print(table4)

# writing result to csv file
with open('covid_countries.csv','w') as f:
    writer=csv.writer(f)
    writer.writerows(table4)
Ejemplo n.º 40
0
# use to read the csv file using the petl framework
# the below code is use to read services csv file
fileData = pt.fromcsv('services.csv')
# the below code is use to read clinicservices csv file
servicesData = pt.fromcsv('clinicservices.csv')
# join the csv file using the inbuilt function join using ServiceID as main key
fileJoin = pt.join(servicesData, fileData, key="ServiceID")
# the below code is use to read clinic csv file
readCsv = pt.fromcsv('clinics.csv')
# join the csv file using the inbuilt function join using ClinicID as main key
doubleJoin = pt.join(fileJoin, readCsv, key='ClinicID')
# reading the xml file cliniclocations.xml
locationXML = pt.fromxml('cliniclocations.xml', 'clinic', {
    "ClinicID": "ClinicID",
    "Lat": "Lat",
    "Lon": "Lon"
})
# join the csv file using the inbuilt function join using ClinicID as main key
doubleJoin2 = pt.join(doubleJoin, locationXML, key="ClinicID")
# removing the spaces from the email field
cleanOne = substitute(doubleJoin2, 'Email', '\s', '')
# adding @myclinic.com.au behind every email id
cleanTwo = substitute(cleanOne, 'Email', '(^[\w]+$)', '\\[email protected]')
# acquire the required columns
result = pt.cut(cleanTwo, 'ClinicServiceID', 'ClinicID', 'ServiceID',
                'Service', 'Name', 'Suburb', 'State', 'Email', 'Lat', 'Lon')
# creating the final csv file which is clinicservicelocations.csv
pt.tocsv(result, "clinicservicelocations.csv")
print('Csv file generated.!!!')