def list_mylist3(xml_fh, completed=False): titles = [] xml_text = xml_fh.getvalue() soup = bs4.BeautifulSoup(xml_text, features='xml') root = soup.find('root') lst = root.find('custom') while lst.nextSibling is not None: lst = lst.nextSibling if lst.name == 'animes': break for a_ in lst.findAll('anime'): title = {} pass_year(title, a_['year']) # print a_['year'],title title['type'] = a_['type'] title[keys.ANI_ID] = a_['id'] title[keys.EPS] = a_.find('neps')['cnt'] if str(a_.find('status')['watched']) == '1': title[keys.COMPLETED] = True else: title[keys.COMPLETED] = False # print str(title_.find('status')['watched']), title['completed'] for a_ in a_.find('titles').findAll('title'): if a_['type'] == 'main': title[keys.ANI_NAME] = a_.text if completed and title[keys.COMPLETED]: titles.append(title) if not completed: titles.append(title) return titles
def list_company3(html_fh): titles = list() title = dict() html_text = html_fh.getvalue() soup = bs4.BeautifulSoup(html_text) table = soup.find('table', id='stafflist') rows = table.findAll('tr') for tr in rows: cols = tr.findAll('td') for td in cols: if td['class'] == ['name']: link = td.find('a') if (link): ani_id_data = re.findall(r'show=anime&aid=(\d+)', link['href']) if len(ani_id_data) == 1: # print title titles.append(title) title = dict() title['link'] = link['href'] title['name'] = link.text title['anidb_id'] = ani_id_data[0] elif td['class'] == ['credit']: if not title.has_key('credit'): title['credit'] = list() title['credit'].append(td.text) # print td.text elif td['class'] == ['year']: # title['year'] = str(td.text) pass_year(title, str(td.text)) # print 'year:', td.text elif td['class'] == ['type']: title['type'] = str(td.text) # print 'type:', td.text elif td['class'] == ['eps']: title['episodes'] = str(td.text) # print 'eps:', td.text # print '####################################' titles.append(title) del titles[0] return titles
def list_person3(html_fh): titles = list() html_text = html_fh.getvalue() soup = bs4.BeautifulSoup(html_text) table = soup.find('table', id='characterlist') rows = table.findAll('tr') for tr in rows: title = dict() cols = tr.findAll('td') for td in cols: if td['class'] == ['name']: link = td.find('a') if (link): ani_id_regexp = re.compile(r'show=anime&aid=(\d+)') ani_id_data = re.findall(ani_id_regexp, link['href']) if len(ani_id_data) == 1: title[keys.ANI_LINK] = link['href'] title[keys.ANI_NAME] = link.text title[keys.ANI_ID] = ani_id_data[0] char_id_regexp = re.compile( r'show=character&charid=(\d+)') char_id_data = re.findall(char_id_regexp, link['href']) if len(char_id_data) == 1: title[keys.CHAR_NAME] = link.text title[keys.CHAR_LINK] = link['href'] title[keys.CHAR_ID] = char_id_data[0] elif td['class'] == ['year']: # title['year'] = str(td.text) pass_year(title, str(td.text)) elif td['class'] == ['type']: title[keys.TYPE] = str(td.text) elif td['class'] == ['eps']: title[keys.EPS] = str(td.text) titles.append(title) del titles[0] return titles