Beispiel #1
0
    def update_aliases_from_asc(self, filename):
        '''Gets aliases from sample x-ray file and expands them if users settings say to do so'''
        cursor = connect(filename).cursor()
        characters = {x[1]: [x[1]] for x in cursor.execute('SELECT * FROM entity').fetchall() if x[3] == 1}

        self._aliases = {}
        for alias, fullname in auto_expand_aliases(characters).items():
            if fullname not in self._aliases.keys():
                self._aliases[fullname] = [alias]
                continue
            self._aliases[fullname].append(alias)
Beispiel #2
0
    def get_characters(self, entity_id):
        '''Gets book's character data'''
        if self._page_source is None:
            return

        characters = self._page_source.xpath(
            '//div[@class="clearFloats" and contains(., "Characters")]//div[@class="infoBoxRowItem"]//a'
        )
        character_data = {}
        for char in characters:
            if '/characters/' not in char.get('href'):
                continue
            resp = open_url(self._connection, char.get('href'))

            if not resp:
                continue

            char_page = html.fromstring(resp)
            if char_page is None:
                continue

            desc = char_page.xpath(
                '//div[@class="workCharacterAboutClear"]/text()')
            if desc and re.sub(r'\s+', ' ', desc[0]).strip():
                desc = str(
                    re.sub(r'\s+', ' ',
                           desc[0]).strip().decode('utf-8').encode('latin-1'))
            else:
                desc = 'No description found on Goodreads.'
            alias_list = char_page.xpath(
                '//div[@class="grey500BoxContent" and contains(.,"aliases")]/text()'
            )
            alias_list = [
                re.sub(r'\s+', ' ', x).strip() for aliases in alias_list
                for x in aliases.split(',') if re.sub(r'\s+', ' ', x).strip()
            ]
            character_data[entity_id] = {
                'label': str(char.text.decode('utf-8').encode('latin-1')),
                'description': desc,
                'aliases': alias_list
            }
            entity_id += 1

        if prefs['expand_aliases']:
            characters = {}
            for char, char_data in list(character_data.items()):
                characters[char] = [char_data['label']] + char_data['aliases']

            expanded_aliases = auto_expand_aliases(characters)
            for alias, ent_id in list(expanded_aliases.items()):
                character_data[ent_id]['aliases'].append(alias)

        return character_data
    def get_characters(self, entity_id):
        '''Gets book's character data'''
        if self._page_source is None:
            return

        characters = self._page_source.xpath('//div[@class="clearFloats" and contains(., "Characters")]//div[@class="infoBoxRowItem"]//a')
        character_data = {}
        for char in characters:
            if '/characters/' not in char.get('href'):
                continue
            resp = open_url(self._connection, char.get('href'))

            if not resp:
                continue

            char_page = html.fromstring(resp)
            if char_page is None:
                continue

            desc = char_page.xpath('//div[@class="workCharacterAboutClear"]/text()')
            if desc and re.sub(r'\s+', ' ', desc[0]).strip():
                desc = unicode(re.sub(r'\s+', ' ', desc[0]).strip().decode('utf-8').encode('latin-1'))
            else:
                desc = u'No description found on Goodreads.'
            alias_list = char_page.xpath('//div[@class="grey500BoxContent" and contains(.,"aliases")]/text()')
            alias_list = [re.sub(r'\s+', ' ', x).strip() for aliases in alias_list for x in aliases.split(',')
                          if re.sub(r'\s+', ' ', x).strip()]
            character_data[entity_id] = {'label': unicode(char.text.decode('utf-8').encode('latin-1')),
                                         'description': desc,
                                         'aliases': alias_list}
            entity_id += 1

        if prefs['expand_aliases']:
            characters = {}
            for char, char_data in character_data.items():
                characters[char] = [char_data['label']] + char_data['aliases']

            expanded_aliases = auto_expand_aliases(characters)
            for alias, ent_id in expanded_aliases.items():
                character_data[ent_id]['aliases'].append(alias)

        return character_data