コード例 #1
0
        def get_codes(self):

            if self.codes:
                return self.codes

            self.codes = CODES.copy()

            templates = []
            try:
                if self.name:
                    templates = wtp.data_from_templates(self.name, lang='it')
            except Exception as e:
                logger.error(e)

            ac_template = [t for t in templates
                           if normalize_string(t['name']) == \
                                'controllo_di_autorità']
            ac_data = ac_template[0]['data'] if ac_template else {}

            # logger.debug('ac_data: {data}'.format(data=ac_data))

            if ac_data.get('VIAF') is not None:
                self.codes['viaf'] = ac_data['VIAF'].encode('utf-8')

            if ac_data.get('SBN') is not None:
                self.codes['sbn'] = ac_data['SBN'].encode('utf-8')

            if ac_data.get('LCCN') is not None:
                self.codes['lccn'] = ac_data['LCCN'].encode('utf-8')

            return self.codes
コード例 #2
0
ファイル: item.py プロジェクト: CristianCantoro/sbntoolkit
        def get_codes(self):

            if self.codes:
                return self.codes

            self.codes = CODES.copy()

            templates = []
            try:
                if self.name:
                    templates = wtp.data_from_templates(self.name, lang='it')
            except Exception as e:
                logger.error(e)

            ac_template = [t for t in templates
                           if normalize_string(t['name']) == \
                                'controllo_di_autorità']
            ac_data = ac_template[0]['data'] if ac_template else {}

            # logger.debug('ac_data: {data}'.format(data=ac_data))

            if ac_data.get('VIAF') is not None:
                self.codes['viaf'] = ac_data['VIAF'].encode('utf-8')

            if ac_data.get('SBN') is not None:
                self.codes['sbn'] = ac_data['SBN'].encode('utf-8')

            if ac_data.get('LCCN') is not None:
                self.codes['lccn'] = ac_data['LCCN'].encode('utf-8')

            return self.codes
コード例 #3
0
    logger.debug('no. of keys already collected: {no}'.format(
        no=len(set(wikipedia.keys()))))
    logger.debug('no. of pages in it.wiki with authority control, still to get: {no}'.format(
        no=len(wikipages_to_get)))

    count = 0
    for page in wikipages_to_get:
        count += 1
        logger.debug(count)

        viaf_code = None
        sbn_code = None
        templates = []

        try:
            templates = wtp.data_from_templates(page, lang='it')
        except:
            pass

        ac_template = [t for t in templates
                       if normalize_string(t['name']) == 'controllo_di_autorità']
        ac_data = ac_template[0]['data'] if ac_template else {}

        logger.debug('page: %s, ac_data: %s' % (page, ac_data))
        if ac_data.get('VIAF') is not None:
            logger.debug('VIAF from template')
            viaf_code = ac_data['VIAF']
        if ac_data.get('SBN') is not None:
            logger.debug('SBN from template')
            sbn_code = ac_data['SBN']
コード例 #4
0
ファイル: template.py プロジェクト: SpazioDati/Nuts4Nuts
    def analyze_templates(self):
        logger.debug(self.page)
        finalplaces = list()
        types = list()
        fathers = list()
        try:
            templates = wtp.data_from_templates(self.page, self.lang)
        except ValueError:
            templates = []

        logger.debug(templates)

        for t in templates:
            name = self._treat(t['name'])
            if name in TEMPLATES_TO_ANALYZE_IT.keys():
                attributes = TEMPLATES_TO_ANALYZE_IT[name]
                tdata = {self._treat(k): v.lower() for k, v in t['data'].iteritems()}
                logger.debug(tdata)
                locations = [tdata[attr]
                             for attr in attributes
                             if (attr in tdata and tdata[attr] != '')]

                logger.debug(locations)
                for place in locations:
                    logger.debug(place)
                    place = place.replace('italia', '')
                    place = place.strip().strip(',')

                    if PIXELS.search(place):
                        start = PIXELS.search(place).start()
                        stop = PIXELS.search(place).end()
                        logger.debug(start)
                        logger.debug(stop)
                        place = place[stop:].strip()

                    if CAP.search(place):
                        split = CAP.split(place)
                        split = [s.strip() for s in split if s.strip() != '']
                        not_address = self._find_index(split)
                        place = split[not_address]

                    if PARENTHESIS.search(place):
                        start = PARENTHESIS.search(place).start()
                        stop = PARENTHESIS.search(place).end()
                        place = place[:start] + place[stop:]

                    if CURLY.search(place):
                        start = CURLY.search(place).start()
                        stop = CURLY.search(place).end()
                        place = place[:start] + place[stop:]

                    if ',' in place:
                        split = place.split(',')
                        split = [s.strip() for s in split if s.strip() != '']
                        not_address = self._find_index(split)
                        logger.debug(not_address)
                        place = split[not_address]

                    place = place.split(' - ')[0]

                    place = place.strip(',').strip()
                    place = place.replace('[', '').replace(']', '')
                    logger.debug(place)

                    reconres = NR.query(query=place)

                    if not reconres:
                        if DI.search(place):
                            place = DI.split(place)[-1]
                            reconres = NR.query(query=place)

                    for r in reconres:
                        types = self._get_types(reconres)
                        fathers = self._get_fathers(reconres)

                    logger.debug('place: name={name}, types={types}'.format(
                                 name=place.encode('utf-8'),
                                 types=types))

                    if set(lau for lau, id_ in types).intersection(ALLOWEDTYPES):
                        place = PlaceCandidate(name=place.encode('utf-8').title(),
                                               fathers = fathers)
                        place_type = place.set_type_from_candidates(types)
                        place.set_id_from_candidates(types, place_type)
                        finalplaces.append(place)

        for cand in finalplaces:
            cand.score = 1.0/float(len(finalplaces))

        return finalplaces