def parse_links(self, element): """ Parses a wikipedia 'page', adding links from the article """ try: redirect = element.find(ns+'redirect') if redirect is not None: return None title = element.find(ns+'title').text.encode('utf-8') id = element.find(ns+'id').text revision = element.find(ns+'revision') text = revision.find(ns+'text').text.encode('utf-8') count_of_links = links_counter.get_count_of_links(text) self.graph.add_links(title, count_of_links) except Exception as e: return None
# -*- coding: utf-8 -*- __author__ = 'Peter.Ogden' __copyright__ = 'Copyright (C) 2015, Auto Trader UK' from backend.wikiparse.parser.links_counter import get_count_of_links from copy import copy path = r'C:\Users\peter.ogden\PycharmProjects\jeph_pynode\backend\fixtures\test_wikipedia_snippet.xml' file = open(path, 'r') snippet = file.read() list_of_snippets = snippet.split('<page>') return_item = [] for snippet in list_of_snippets: new_page = {} new_page['pageid'] = copy(snippet).split('</id>')[0].split('<id>')[-1] new_page['name'] = copy(snippet).split('<title>')[-1].split('</title>')[0] new_page['linkedArticles'] = get_count_of_links(copy(snippet).split('<text')[-1].split('</text>')[0]).keys() return_item.append(new_page) with open('new_mock_data.py', 'w') as new_file: new_file.write(str(return_item))
# -*- coding: utf-8 -*- __author__ = 'Peter.Ogden' __copyright__ = 'Copyright (C) 2015, Auto Trader UK' from backend.wikiparse.parser.links_counter import get_count_of_links from copy import copy path = r'C:\Users\peter.ogden\PycharmProjects\jeph_pynode\backend\fixtures\test_wikipedia_snippet.xml' file = open(path, 'r') snippet = file.read() list_of_snippets = snippet.split('<page>') return_item = [] for snippet in list_of_snippets: new_page = {} new_page['pageid'] = copy(snippet).split('</id>')[0].split('<id>')[-1] new_page['name'] = copy(snippet).split('<title>')[-1].split('</title>')[0] new_page['linkedArticles'] = get_count_of_links( copy(snippet).split('<text')[-1].split('</text>')[0]).keys() return_item.append(new_page) with open('new_mock_data.py', 'w') as new_file: new_file.write(str(return_item))