コード例 #1
0
 def parse_video_format(self, record):
     """
     Extract format info from av entity and returns a VideoFormat props.
     """
     node = record.find('./efg:avManifestation/efg:format', self.ns)
     if node is not None:
         video_format = {}
         # gauge (0..1) enum
         gauge_el = node.find('efg:gauge', self.ns)
         if gauge_el is not None and gauge_el.text is not None and gauge_el.text.lower(
         ) != 'n/a':
             code_el = codelists.fromCode(gauge_el.text.strip(),
                                          codelists.GAUGE)
             if code_el is None:
                 self.warnings.append('Invalid gauge for: ' +
                                      gauge_el.text.strip())
             else:
                 video_format['gauge'] = code_el[0]
         # aspectRation (0..1) enum
         aspect_ratio_el = node.find('efg:aspectRatio', self.ns)
         if aspect_ratio_el is not None and aspect_ratio_el.text is not None and aspect_ratio_el.text.lower(
         ) != 'n/a':
             code_el = codelists.fromCode(aspect_ratio_el.text.strip(),
                                          codelists.ASPECT_RATIO)
             if code_el is None:
                 self.warnings.append('Invalid aspect ratio for: ' +
                                      aspect_ratio_el.text.strip())
             else:
                 video_format['aspect_ratio'] = code_el[0]
         # sound (0..1) enum
         sound_el = node.find('efg:sound', self.ns)
         if sound_el is not None and sound_el.text is not None and sound_el.text.lower(
         ) != 'n/a':
             code_el = codelists.fromDescription(sound_el.text.strip(),
                                                 codelists.VIDEO_SOUND)
             if code_el is None:
                 self.warnings.append('Invalid format sound for: ' +
                                      sound_el.text.strip())
             else:
                 video_format['sound'] = code_el[0]
         # colour (0..1)
         colour_el = node.find('efg:colour', self.ns)
         if colour_el is not None and colour_el.text is not None and colour_el.text.lower(
         ) != 'n/a':
             code_el = codelists.fromDescription(colour_el.text.strip(),
                                                 codelists.COLOUR)
             if code_el is None:
                 self.warnings.append('Invalid format colour for: ' +
                                      colour_el.text.strip())
             else:
                 video_format['colour'] = code_el[0]
         log.debug(video_format)
         return video_format
コード例 #2
0
 def parse_descriptions(self, record):
     descriptions = []
     for node in record.findall("efg:description", self.ns):
         description = {}
         dtype = node.get('type')
         if dtype is not None and dtype.lower() != 'n/a':
             code_el = codelists.fromDescription(
                 dtype, codelists.DESCRIPTION_TYPES)
             if code_el is None:
                 self.warnings.append('Invalid description type for: ' +
                                      dtype)
             else:
                 description['description_type'] = code_el[0]
         lang = node.get('lang')
         if lang is not None and lang.lower() != 'n/a':
             lang_val = lang.lower()
             lang_code = codelists.fromCode(lang_val, codelists.LANGUAGE)
             if lang_code is None:
                 self.warnings.append('Invalid description language for: ' +
                                      lang)
             else:
                 description['language'] = lang_code[0]
         description['source_ref'] = node.get('source')
         description['text'] = node.text.strip()
         log.debug('description: {}'.format(description))
         descriptions.append(description)
     # october 2018: change: description is optional
     #if len(descriptions) == 0:
     #    raise ValueError('Description is missing')
     return descriptions
コード例 #3
0
    def parse_record_sources(self, record, audio_visual=False):
        """
        Returns a list of sources in the form of:
        [[<recordsource 'dict'>, <provider 'dict'>], etc.]
        """
        record_sources = []
        bind_url = False
        for node in record.findall("./efg:recordSource", self.ns):
            rs = {}
            rs['source_id'] = node.find('efg:sourceID', self.ns).text.strip()
            log.debug('record source [ID]: %s' % rs['source_id'])

            # record provider
            provider = {}
            provider_el = node.find('efg:provider', self.ns)
            provider['name'] = provider_el.text.strip()
            provider['identifier'] = provider_el.get('id').upper()
            p_scheme = provider_el.get('schemeID')
            scheme = codelists.fromDescription(p_scheme,
                                               codelists.PROVIDER_SCHEMES)
            if scheme is None:
                raise ValueError('Invalid provider scheme value for [%s]' %
                                 p_scheme)
            provider['scheme'] = scheme[0]
            log.debug('Record Provider: {}'.format(provider))

            # bind here the url only to the first element
            # this is a naive solution but enough because we expect here ONLY
            # one record_source (that of the archive)
            if not bind_url:
                rs['is_shown_at'] = self.get_record_source_url(
                    record, audio_visual)
                bind_url = True
            record_sources.append([rs, provider])
        return record_sources
コード例 #4
0
 def get_colour(self, record):
     node = record.find('./efg:nonAVManifestation/efg:colour', self.ns)
     if node is not None:
         code_el = codelists.fromDescription(node.text.strip(),
                                             codelists.COLOUR)
         if code_el is not None:
             return code_el[0]
         self.warnings.append('Invalid format colour for: ' +
                              code_el.text.strip())
コード例 #5
0
    def parse_related_agents(self, record):
        """
        Extract related agents as a list of Agent props with their related
        contribution activities in the creation.
        e.g. [[<type 'dict'>, ['Director', 'Screenplay']], etc.]
        """
        nodes = []
        persons = record.findall('./efg:relPerson', self.ns)
        if len(persons) > 0:
            nodes.extend(persons)
        corporates = record.findall('./efg:relCorporate', self.ns)
        if len(corporates) > 0:
            nodes.extend(corporates)

        agents = []
        for agent_node in nodes:
            props = {}
            props['names'] = [
                agent_node.find('efg:name', self.ns).text.strip()
            ]
            activities = []
            rel_agent_type = agent_node.find('efg:type', self.ns)
            if rel_agent_type is not None and rel_agent_type.text.lower(
            ) != 'n/a':
                code_el = codelists.fromDescription(
                    rel_agent_type.text.strip(), codelists.TYPE_OF_ACTIVITY)
                if code_el is None:
                    self.warnings.append('Invalid agent activity for: ' +
                                         rel_agent_type.text.strip())
                else:
                    activities.append(rel_agent_type.text.strip())

            if agent_node.tag == 'relPerson' or 'efg:relPerson':
                props['agent_type'] = 'P'
            elif agent_node.tag == 'relCorporate' or 'efg:relCorporate':
                props['agent_type'] = 'C'
            else:
                # should never be reached
                raise ValueError('Invalid tag name for: {}'.format(
                    agent_node.tag))

            agent = None
            # de-duplicate agents
            for item in agents:
                if props['names'][0] in item[0]['names']:
                    log.debug('FOUND agent: ' + props['names'][0])
                    agent = item[0]
                    item[1].extend(activities)
                    log.debug('added activities: {}'.format(activities))
                    break
            if agent is None:
                agents.append([props, activities])

        log.debug(agent.names[0] for agent in agents)
        return agents
コード例 #6
0
 def get_rights_status(self, record, audio_visual=False):
     inpath = 'efg:avManifestation' if audio_visual else 'efg:nonAVManifestation'
     node = record.find("./" + inpath + "/efg:rightsStatus", self.ns)
     if node is None:
         raise ValueError("Rights status is missing")
     code_el = codelists.fromDescription(node.text.strip(),
                                         codelists.RIGHTS_STATUS)
     if code_el is None:
         raise ValueError('Invalid rights status description for: ' +
                          node.text.strip())
     return code_el[0]
コード例 #7
0
 def get_non_av_specific_type(self, record):
     node = record.find('./efg:nonAVManifestation/efg:specificType',
                        self.ns)
     if node is None:
         raise ValueError('Non-AV specific type is missing')
     code_el = codelists.fromDescription(node.text.strip(),
                                         codelists.NON_AV_SPECIFIC_TYPES)
     if code_el is None:
         raise ValueError('Invalid Non-AV spefic type for: ' +
                          node.text.strip())
     return code_el[0]
コード例 #8
0
    def parse_languages(self, record, audio_visual=False):
        """
        Extract language and usage if any. It returns an array of arrays as in
        the following example:
        [['fr','03'],['fr','25'],['ca','25']]
        The second nested element corresponds to the usage code in the
        controlled codelist.
        """
        inpath = 'efg:avManifestation' if audio_visual else 'efg:nonAVManifestation'
        languages = []
        for node in record.findall("./" + inpath + "/efg:language", self.ns):
            lang = node.text.lower()
            if lang.lower() == 'n/a':
                continue
            lang_code = codelists.fromCode(lang, codelists.LANGUAGE)
            if lang_code is None:
                self.warnings.append('Invalid language for: ' + node.text)
                continue
            else:
                lang = lang_code[0]

            usage = node.get('usage')
            if usage is not None:
                if usage.lower() == 'n/a':
                    usage = None
                else:
                    code_el = codelists.fromDescription(
                        usage, codelists.LANGUAGE_USAGES)
                    if code_el is None:
                        self.warnings.append('Invalid language usage for: ' +
                                             usage)
                        usage = None
                    else:
                        usage = code_el[0]
            lang_usage = [lang, usage]
            log.debug("lang code: {}, usage code: {}".format(
                lang_usage[0], lang_usage[1]))
            languages.append(lang_usage)
        return languages
コード例 #9
0
    def parse_keywords(self, record):
        keywords = []
        for node in record.findall("efg:keywords", self.ns):
            for term in node.findall('efg:term', self.ns):
                keyword = {}
                ktype = node.get('type')
                if ktype is not None and ktype.lower() != 'n/a':
                    # filter ktype with value 'Project'
                    if ktype == 'Project':
                        continue
                    code_el = codelists.fromDescription(
                        ktype, codelists.KEYWORD_TYPES)
                    if code_el is None:
                        self.warnings.append('Invalid keyword type for: ' +
                                             ktype)
                    else:
                        keyword['keyword_type'] = code_el[0]
                        log.debug('keyword [type]: %s' %
                                  keyword['keyword_type'])
                lang = node.get('lang')
                if lang is not None and lang.lower() != 'n/a':
                    lang_val = lang.lower()
                    lang_code = codelists.fromCode(lang_val,
                                                   codelists.LANGUAGE)
                    if lang_code is None:
                        self.warnings.append('Invalid keyword language for: ' +
                                             lang.text)
                    else:
                        keyword['language'] = lang_code[0]
                        log.debug('language: {}'.format(keyword['language']))
                if ktype == 'Form':
                    # check term from a controlled IMC list
                    if term.text.lower() == 'n/a':
                        continue
                    code_el = codelists.fromCode(term.text.strip(),
                                                 codelists.FORM)
                    if code_el is None:
                        self.warnings.append('Invalid form type for: ' +
                                             term.text.strip())
                        continue
                    else:
                        keyword['term'] = code_el[0]
                else:
                    keyword['term'] = term.text.strip()

                log.debug('keyword: {}'.format(keyword['term']))

                #log.debug('term id: %s' % term.get('id'))
                if term.get('id') is not None:
                    # check keyword term id is integer (keyword term id is optional)
                    try:
                        int(term.get('id'))
                        keyword['termID'] = term.get('id')
                    except Exception:
                        self.warnings.append('Invalid keyword term id for: ' +
                                             term.get('id') +
                                             '. Expected integer.')
                else:
                    keyword['termID'] = None
                keyword['schemeID'] = node.get('scheme')
                keywords.append(keyword)
        return keywords