def get_user_history_results(data):
    user_history = {}
    nodes = PyQuery(data)('tr')

    def parse_row(index, node):
        data = {}
        row = PyQuery(node)
        if index!=0:
            data = {'title':row('td').eq(0).text(),
                    'city': row('td').eq(1).text(),
                    'date': row('td').eq(2).text(),
                    'place': row('td').eq(3).text(),
                    'time': row('td').eq(4).text(),
                    'results': row('td').eq(5)('a').attr("href")
                    }
            user_history[index] = data


    nodes.each(parse_row)
    return user_history
예제 #2
0
        row_header = PyQuery(row.find('td')[0]).text()

        if not row_header:
            # This is an intermediary header row to remind readers which column is which.
            continue

        target_cell_imgs = PyQuery(row.find('td')[column]).find('img[alt$=svg]')

        if not target_cell_imgs:
            continue

        row_sign_filenames = []

        target_cell_imgs.each(lambda: row_sign_filenames.append(
            {
                'type': 'sign filename',
                'text': PyQuery(this).attr('alt').replace(' ', '_'),
            }
        ))

        filenames_to_remove = []
        for filename in row_sign_filenames:
            # Download the SVG(s).
            try:
                image_page = PyQuery('http://en.wikipedia.org/wiki/File:{}'.format(filename['text']))
                svg_link = image_page.find('a.internal').filter(lambda: this.text_content().strip() == 'Original file')
                svg_url = svg_link.attr('href')

                if not svg_url:
                    print(
                        'Could not find URL for {table_header}: {row_header}: {filename}\n'.format(
                            table_header=table_header,