Beispiel #1
0
    def test_individual_xml_validates(self):
        dtd = etree.DTD(StringIO("""
        <!ELEMENT responseData    (response, totalResults, individuals)>
        <!ELEMENT response        (#PCDATA)>
        <!ELEMENT totalResults    (#PCDATA)>
        <!ELEMENT individuals     (individual+)>
        <!ELEMENT individual      (name, displayName, cnetid, chicagoid, contacts, resources)>
        <!ELEMENT name            (#PCDATA)>
        <!ELEMENT displayName     (#PCDATA)>
        <!ELEMENT cnetid          (#PCDATA)>
        <!ELEMENT chicagoid       (#PCDATA)>
        <!ELEMENT contacts        (contact)>
        <!ELEMENT contact         (title, division, department, subDepartment, email, phone, facultyExchange)>
        <!ELEMENT title           (#PCDATA)>
        <!ELEMENT division        (name, resources)>
        <!ELEMENT resources       (directoryURL, xmlURL)>
        <!ELEMENT directoryURL    (#PCDATA)>
        <!ELEMENT xmlURL          (#PCDATA)>
        <!ELEMENT department      (name, resources)>
        <!ELEMENT subDepartment   (name, resources)>
        <!ELEMENT email           (#PCDATA)>
        <!ELEMENT phone           (#PCDATA)>
        <!ELEMENT facultyExchange (#PCDATA)>
        """))

        cnetids = get_all_library_cnetids_from_directory()
        root = etree.XML(get_xml_from_directory_api('https://directory.uchicago.edu/api/v2/individuals/' + cnetids[0] + '.xml'))
        self.assertEqual(dtd.validate(root), True)
Beispiel #2
0
    def test_directory_xml_validates(self):
        dtd = etree.DTD(StringIO("""
        <!ELEMENT responseData    (response, totalResults, organizations)>
        <!ELEMENT response        (#PCDATA)>
        <!ELEMENT totalResults    (#PCDATA)>
        <!ELEMENT organizations   (organization+)>
        <!ELEMENT organization    (name, type, departments, members, resources)>
        <!ELEMENT name            (#PCDATA)>
        <!ELEMENT type            (#PCDATA)>
        <!ELEMENT departments     (department+)>
        <!ELEMENT department      (name, resources)>
        <!--      name (see above) -->
        <!ELEMENT resources       (directoryURL, xmlURL)>
        <!ELEMENT directoryURL    (#PCDATA)>
        <!ELEMENT xmlURL          (#PCDATA)>
        <!ELEMENT members         (member+)>
        <!ELEMENT member          (name, displayName, cnetid, chicagoid, title, email, phone, facultyExchange, resources)>
        <!--      name (see above) -->
        <!ELEMENT displayName     (#PCDATA)>
        <!ELEMENT cnetid          (#PCDATA)>
        <!ELEMENT chicagoid       (#PCDATA)>
        <!ELEMENT title           (#PCDATA)>
        <!ELEMENT email           (#PCDATA)>
        <!ELEMENT phone           (#PCDATA)>
        <!ELEMENT facultyExchange (#PCDATA)>
        <!--      resources (see above) -->
        """))

        root = etree.XML(get_xml_from_directory_api('https://directory.uchicago.edu/api/v2/divisions/16.xml'))
        self.assertEqual(dtd.validate(root), True)
def get_data(tree, node=None):
    if node == None:
        node = tree
    x = ElementTree.fromstring(get_xml_from_directory_api(node['xml']))

    # no matter what, name is in the same place- but sometimes it gets split into chunks. 
    node['name'] = x.find(".//organizations/organization/name").text
   
    # division level 
    if node['xml'].find("/divisions/") > -1:
        for d in x.findall(".//departments/department/resources/xmlURL"):
            child_node = {
                'name': '',
                'xml': d.text,
                'children': []
            }
            node['children'].append(child_node)
            get_data(tree, child_node)

    # department level
    elif node['xml'].find("/departments/") > -1:
        parent_node = find_node_for_xml(tree, node['xml'])
        if not parent_node:
            parent_node = node
        for d in x.findall(".//subDepartments/subDepartment/resources/xmlURL"):
            child_node = {
                'name': '',
                'xml': d.text,
                'children': []
            }
            parent_node['children'].append(child_node)
            get_data(tree, child_node)
Beispiel #4
0
def get_all_library_cnetids_from_directory(xml_string = None):
    if not xml_string:
        xml_string = get_xml_from_directory_api('https://directory.uchicago.edu/api/v2/divisions/16.xml')

    # get xml element tree.
    x = ElementTree.fromstring(xml_string)

    # get cnetids.
    cnetids = set()
    for cnetid in x.findall(".//member/cnetid"):
        cnetids.add(cnetid.text)
    return sorted(list(cnetids))
Beispiel #5
0
def get_all_library_cnetids_from_directory(xml_string=None):
    if not xml_string:
        xml_string = get_xml_from_directory_api(
            'https://directory.uchicago.edu/api/v2/divisions/16.xml')

    # get xml element tree.
    x = ElementTree.fromstring(xml_string)

    # get cnetids.
    cnetids = set()
    for cnetid in x.findall(".//member/cnetid"):
        cnetids.add(cnetid.text)
    return sorted(list(cnetids))
Beispiel #6
0
 def _get_campus_directory_unit_names(self):
     """
     Report unit names in the campus directory.
 
     Returns:
         a set() of campus directory full names, as strings.
     """
     unit_names = set()
     x = ElementTree.fromstring(
         get_xml_from_directory_api(
             'https://directory.uchicago.edu/api/v2/divisions/16'))
     for d in x.findall(".//departments/department"):
         department_name = re.sub('\s+', ' ', d.find('name').text).strip()
         unit_names.add(department_name)
         department_xml = d.find('resources/xmlURL').text
         x2 = ElementTree.fromstring(
             get_xml_from_directory_api(department_xml))
         for d2 in x2.findall(".//subDepartments/subDepartment"):
             subdepartment_name = re.sub('\s+', ' ',
                                         d2.find('name').text).strip()
             unit_names.add(department_name + ' - ' + subdepartment_name)
     return unit_names
def get_data(tree, node=None):
    if node == None:
        node = tree
    x = ElementTree.fromstring(get_xml_from_directory_api(node['xml']))

    # no matter what, name is in the same place- but sometimes it gets split into chunks.
    node['name'] = x.find(".//organizations/organization/name").text

    # division level
    if node['xml'].find("/divisions/") > -1:
        for d in x.findall(".//departments/department/resources/xmlURL"):
            child_node = {'name': '', 'xml': d.text, 'children': []}
            node['children'].append(child_node)
            get_data(tree, child_node)

    # department level
    elif node['xml'].find("/departments/") > -1:
        parent_node = find_node_for_xml(tree, node['xml'])
        if not parent_node:
            parent_node = node
        for d in x.findall(".//subDepartments/subDepartment/resources/xmlURL"):
            child_node = {'name': '', 'xml': d.text, 'children': []}
            parent_node['children'].append(child_node)
            get_data(tree, child_node)
    def _staff_out_of_sync(self):
        """
        Get lists of staff pages that are out of sync.

        This function returns two lists--first, a list of the names of staff
        pages that are present in Wagtail, but missing in the campus directory.
        Second, a list of the names of staff pages that are present in the
        campus directory but missing in Wagtail.

        Returns: two lists of strings.
        """
        def _format(cnetid, value, field):
            value = '' if value is None else str(value)
            return '{} -{}- ({})'.format(cnetid, self._clean(value), field)

        # Don't sync up some staff accounts. Former library directors may
        # appear in the campus directory, but they shouldn't appear in
        # staff listings on the library website. In other cases, staff
        # might have phone numbers connected with non-library jobs. If they
        # want those numbers to appear on the library website, they can add
        # them manually but we won't keep their information in sync.
        skip_cnetids = ['judi']

        api_staff_info = set()
        for cnetid in get_all_library_cnetids_from_directory():
            if cnetid in skip_cnetids:
                continue
            api_staff_info.add(cnetid)
            xml_string = get_xml_from_directory_api(
                'https://directory.uchicago.edu/api/v2/individuals/{}.xml'.
                format(cnetid))
            info = get_individual_info_from_directory(xml_string)
            api_staff_info.add(
                _format(cnetid, info['officialName'], 'officialName'))
            api_staff_info.add(
                _format(cnetid, info['displayName'], 'displayName'))
            api_staff_info.add(
                _format(cnetid, info['positionTitle'], 'positionTitle'))
            for email in info['email']:
                api_staff_info.add(_format(cnetid, email, 'email'))
            for phone_facex in info['phoneFacultyExchanges']:
                api_staff_info.add(
                    _format(cnetid, re.sub(r"\n", " ", phone_facex),
                            'phoneFacultyExchange'))
            for department in info['departments']:
                api_staff_info.add(_format(cnetid, department, 'department'))

        wag_staff_info = set()
        for s in StaffPage.objects.live():
            if s.cnetid in skip_cnetids:
                continue
            wag_staff_info.add(s.cnetid)
            wag_staff_info.add(
                _format(s.cnetid, s.official_name, 'officialName'))
            wag_staff_info.add(_format(s.cnetid, s.display_name,
                                       'displayName'))
            wag_staff_info.add(
                _format(s.cnetid, s.position_title, 'positionTitle'))
            for e in s.staff_page_email.all():
                wag_staff_info.add(_format(s.cnetid, e.email, 'email'))
            for p in s.staff_page_phone_faculty_exchange.all():
                wag_staff_info.add(
                    _format(s.cnetid, '{} {}'.format(p.faculty_exchange,
                                                     p.phone_number),
                            'phoneFacultyExchange'))
            for d in s.staff_page_units.all():
                wag_staff_info.add(
                    _format(s.cnetid,
                            d.library_unit.get_campus_directory_full_name(),
                            'department'))

        missing_in_campus_directory = sorted(
            list(wag_staff_info.difference(api_staff_info)))
        missing_in_wagtail = sorted(
            list(api_staff_info.difference(wag_staff_info)))
        return missing_in_campus_directory, missing_in_wagtail
    def handle(self, *args, **options):
        """
        The actual logic of the command. Subclasses must implement this 
        method. It may return a Unicode string which will be printed to 
        stdout. More: https://docs.djangoproject.com/en/1.8/howto/custom
        -management-commands/#django.core.management.BaseCommand.handle
        """

        try:
            cnetid = options['cnetid']
        except:
            sys.exit(1)
    
        staff_index_path = StaffIndexPage.objects.first().path
        staff_index_depth = StaffIndexPage.objects.first().depth
        staff_index_url = StaffIndexPage.objects.first().url
        staff_index_content_type_pk = ContentType.objects.get(model='staffindexpage').pk
        staff_content_type_pk = ContentType.objects.get(model='staffpage').pk
  
        xml_string = get_xml_from_directory_api('https://directory.uchicago.edu/api/v2/individuals/' + cnetid + '.xml') 
        info = get_individual_info_from_directory(xml_string)
        next_available_path = get_available_path_under(staff_index_path)

        # Update a StaffPage
        # works with just 'title' set as a default. 
        # setting path destroyed the staff index!!!
        # 'path': next_available_path

        if StaffPage.objects.filter(cnetid=info['cnetid']):
            sp, created = StaffPage.objects.update_or_create(
                cnetid=info['cnetid'],
                defaults = {
                    'title': info['displayName'],
                    'display_name': info['displayName'],
                    'official_name': info['officialName'],
                    'slug': make_slug(info['displayName']),
                    'url_path': '/loop/staff/' + make_slug(info['displayName']) + '/',
                    'depth': staff_index_depth + 1,
                })
            StaffIndexPage.objects.first().fix_tree(destructive=False)
        else:
            StaffPage.objects.create(
            title=info['displayName'],
            slug=make_slug(info['displayName']),
            path=next_available_path,
            depth=len(next_available_path) // 4,
            numchild=0,
            url_path='/staff/' + make_slug(info['displayName']) + '/',
            cnetid=info['cnetid'],
            display_name=info['displayName'],
            official_name=info['officialName'],
            )

        # Add new VCards
        for vcard in info['title_department_subdepartments_dicts']:
            faculty_exchange = ''
            if hasattr(vcard, 'facultyexchange'):
                faculty_exchange = vcard['facultyexchange']

            email = ''
            if hasattr(vcard, 'email'):
                email = vcard['email']

            phone_label = ''
            phone_number = ''
            if hasattr(vcard, 'phone'):
                phone_label = 'work'
                phone_number = vcard['phone']

            v, created = StaffPagePageVCards.objects.get_or_create(
                title=vcard['title'], 
                unit=DirectoryUnit.objects.get(pk=vcard['department']), 
                faculty_exchange=faculty_exchange,
                email=email,
                phone_label=phone_label,
                phone_number=phone_number,
                page=StaffPage.objects.get(cnetid=cnetid))

        # Delete unnecesary VCards
        for vcard in StaffPage.objects.get(cnetid=info['cnetid']).vcards.all():
            d = {
                'title': vcard.title,
                'department': vcard.unit.id,
                'facultyexchange': vcard.faculty_exchange,
                'email': vcard.email,
                'phone': vcard.phone_number
            }
            if not d in info['title_department_subdepartments_dicts']:
                vcard.delete()
        
        staff_page = StaffPage.objects.get(cnetid=info['cnetid'])
        staff_page.page_maintainer = staff_page
        staff_page.save()
    def handle(self, *args, **options):
        """
        The actual logic of the command. Subclasses must implement this 
        method. It may return a Unicode string which will be printed to 
        stdout. More: https://docs.djangoproject.com/en/1.8/howto/custom
        -management-commands/#django.core.management.BaseCommand.handle
        """

        # api staff, wagtail staff
        api_staff = set(get_all_library_cnetids_from_directory())
        wag_staff = set(get_all_library_cnetids_from_wagtail())

        output = []

        '''
        # JEJ
        print(
        get_individual_info_from_directory('amybuckland')
        )
        import sys
        sys.exit()
        '''

        missing_in_api = wag_staff.difference(api_staff)
        if missing_in_api:
            output.append("THE FOLLOWING STAFF APPEAR IN WAGTAIL, BUT NOT IN THE UNIVERSITY'S API:")
            output = output + list(missing_in_api)
            # if this happens, go into the user object and mark is_active False. 

        missing_in_wagtail = api_staff.difference(wag_staff)
        if missing_in_wagtail:
            output.append("THE FOLLOWING STAFF APPEAR IN THE UNIVERSITY'S API, BUT NOT IN WAGTAIL:")
            output = output + list(missing_in_wagtail)
            # if this happens, report that a new user needs to be created.

        for s in sorted(list(api_staff.intersection(wag_staff))):
            xml_string = xml_string = get_xml_from_directory_api('https://directory.uchicago.edu/api/v2/individuals/' + s + '.xml')
            api = get_individual_info_from_directory(xml_string)
            wag = get_individual_info_from_wagtail(s)

            if not api['officialName'] == wag['officialName']:
                output.append(s + "'s officialName is " + api['officialName'] + ", not " + wag['officialName'])

            if not api['displayName'] == wag['displayName']:
                output.append(s + "'s displayName is " + api['displayName'] + ", not " + wag['displayName'])
                # In the user management command, change the following things in the User object:
                # (note- in the User object, username = cnetid)
                # prompt a human for first_name, last_name.
                # In the StaffPage object,
                # check displayName and officialName.

            diffs = api['title_department_subdepartments'].difference(wag['title_department_subdepartments'])
            if diffs:
                output.append("THE FOLLOWING VCARDS APPEAR FOR " + s + " IN THE UNIVERSITY'S API, BUT NOT IN WAGTAIL:")
                for d in diffs:
                    output.append(d)

            diffs = wag['title_department_subdepartments'].difference(api['title_department_subdepartments'])
            if diffs:
                output.append("THE FOLLOWING VCARDS APPEAR FOR " + s + " IN WAGTAIL, BUT NOT IN THE UNIVERSITY'S API:")
                for d in diffs:
                    output.append(d)

        return "\n".join(output)
    def handle(self, *args, **options):
        """
        The actual logic of the command. Subclasses must implement this 
        method. It may return a Unicode string which will be printed to 
        stdout. More: https://docs.djangoproject.com/en/1.8/howto/custom
        -management-commands/#django.core.management.BaseCommand.handle
        """

        # api staff, wagtail staff
        api_staff = set(get_all_library_cnetids_from_directory())
        wag_staff = set(get_all_library_cnetids_from_wagtail())

        output = []
        '''
        # JEJ
        print(
        get_individual_info_from_directory('amybuckland')
        )
        import sys
        sys.exit()
        '''

        missing_in_api = wag_staff.difference(api_staff)
        if missing_in_api:
            output.append(
                "THE FOLLOWING STAFF APPEAR IN WAGTAIL, BUT NOT IN THE UNIVERSITY'S API:"
            )
            output = output + list(missing_in_api)
            # if this happens, go into the user object and mark is_active False.

        missing_in_wagtail = api_staff.difference(wag_staff)
        if missing_in_wagtail:
            output.append(
                "THE FOLLOWING STAFF APPEAR IN THE UNIVERSITY'S API, BUT NOT IN WAGTAIL:"
            )
            output = output + list(missing_in_wagtail)
            # if this happens, report that a new user needs to be created.

        for s in sorted(list(api_staff.intersection(wag_staff))):
            xml_string = xml_string = get_xml_from_directory_api(
                'https://directory.uchicago.edu/api/v2/individuals/' + s +
                '.xml')
            api = get_individual_info_from_directory(xml_string)
            wag = get_individual_info_from_wagtail(s)

            if not api['officialName'] == wag['officialName']:
                output.append(s + "'s officialName is " + api['officialName'] +
                              ", not " + wag['officialName'])

            if not api['displayName'] == wag['displayName']:
                output.append(s + "'s displayName is " + api['displayName'] +
                              ", not " + wag['displayName'])
                # In the user management command, change the following things in the User object:
                # (note- in the User object, username = cnetid)
                # prompt a human for first_name, last_name.
                # In the StaffPage object,
                # check displayName and officialName.

            diffs = api['title_department_subdepartments'].difference(
                wag['title_department_subdepartments'])
            if diffs:
                output.append("THE FOLLOWING VCARDS APPEAR FOR " + s +
                              " IN THE UNIVERSITY'S API, BUT NOT IN WAGTAIL:")
                for d in diffs:
                    output.append(d)

            diffs = wag['title_department_subdepartments'].difference(
                api['title_department_subdepartments'])
            if diffs:
                output.append("THE FOLLOWING VCARDS APPEAR FOR " + s +
                              " IN WAGTAIL, BUT NOT IN THE UNIVERSITY'S API:")
                for d in diffs:
                    output.append(d)

        return "\n".join(output)
    def handle(self, *args, **options):
        """
        The actual logic of the command. Subclasses must implement this 
        method. It may return a Unicode string which will be printed to 
        stdout. More: https://docs.djangoproject.com/en/1.8/howto/custom
        -management-commands/#django.core.management.BaseCommand.handle
        """

        try:
            cnetid = options['cnetid']
        except:
            sys.exit(1)

        staff_index_path = StaffIndexPage.objects.first().path
        staff_index_depth = StaffIndexPage.objects.first().depth
        staff_index_url = StaffIndexPage.objects.first().url
        staff_index_content_type_pk = ContentType.objects.get(
            model='staffindexpage').pk
        staff_content_type_pk = ContentType.objects.get(model='staffpage').pk

        xml_string = get_xml_from_directory_api(
            'https://directory.uchicago.edu/api/v2/individuals/' + cnetid +
            '.xml')
        info = get_individual_info_from_directory(xml_string)
        next_available_path = get_available_path_under(staff_index_path)

        # Update a StaffPage
        # works with just 'title' set as a default.
        # setting path destroyed the staff index!!!
        # 'path': next_available_path

        if StaffPage.objects.filter(cnetid=info['cnetid']):
            sp, created = StaffPage.objects.update_or_create(
                cnetid=info['cnetid'],
                defaults={
                    'title': info['displayName'],
                    'display_name': info['displayName'],
                    'official_name': info['officialName'],
                    'slug': make_slug(info['displayName']),
                    'url_path':
                    '/loop/staff/' + make_slug(info['displayName']) + '/',
                    'depth': staff_index_depth + 1,
                })
            StaffIndexPage.objects.first().fix_tree(destructive=False)
        else:
            StaffPage.objects.create(
                title=info['displayName'],
                slug=make_slug(info['displayName']),
                path=next_available_path,
                depth=len(next_available_path) // 4,
                numchild=0,
                url_path='/staff/' + make_slug(info['displayName']) + '/',
                cnetid=info['cnetid'],
                display_name=info['displayName'],
                official_name=info['officialName'],
            )

        # Add new contact information.
        # for contact_info in info['title_department_subdepartments_dicts']:
        # add new email addresses, phone faculty exchange pairs,
        # telephone numbers.

        # Delete unnecesary contact information.

        staff_page = StaffPage.objects.get(cnetid=info['cnetid'])
        staff_page.page_maintainer = staff_page
        staff_page.save()