def get_user_history_results(data): user_history = {} nodes = PyQuery(data)('tr') def parse_row(index, node): data = {} row = PyQuery(node) if index!=0: data = {'title':row('td').eq(0).text(), 'city': row('td').eq(1).text(), 'date': row('td').eq(2).text(), 'place': row('td').eq(3).text(), 'time': row('td').eq(4).text(), 'results': row('td').eq(5)('a').attr("href") } user_history[index] = data nodes.each(parse_row) return user_history
row_header = PyQuery(row.find('td')[0]).text() if not row_header: # This is an intermediary header row to remind readers which column is which. continue target_cell_imgs = PyQuery(row.find('td')[column]).find('img[alt$=svg]') if not target_cell_imgs: continue row_sign_filenames = [] target_cell_imgs.each(lambda: row_sign_filenames.append( { 'type': 'sign filename', 'text': PyQuery(this).attr('alt').replace(' ', '_'), } )) filenames_to_remove = [] for filename in row_sign_filenames: # Download the SVG(s). try: image_page = PyQuery('http://en.wikipedia.org/wiki/File:{}'.format(filename['text'])) svg_link = image_page.find('a.internal').filter(lambda: this.text_content().strip() == 'Original file') svg_url = svg_link.attr('href') if not svg_url: print( 'Could not find URL for {table_header}: {row_header}: {filename}\n'.format( table_header=table_header,