Beispiel #1
0
    def parse_links(self, element):
        """
        Parses a wikipedia 'page', adding links from the article
        """
        try:
            redirect = element.find(ns+'redirect')
            if redirect is not None:
                return None
            title = element.find(ns+'title').text.encode('utf-8')
            id = element.find(ns+'id').text
            revision = element.find(ns+'revision')
            text = revision.find(ns+'text').text.encode('utf-8')
            count_of_links = links_counter.get_count_of_links(text)
            self.graph.add_links(title, count_of_links)

        except Exception as e:
            return None
Beispiel #2
0
# -*- coding: utf-8 -*-

__author__ = 'Peter.Ogden'
__copyright__ = 'Copyright (C) 2015, Auto Trader UK'

from backend.wikiparse.parser.links_counter import get_count_of_links
from copy import copy

path = r'C:\Users\peter.ogden\PycharmProjects\jeph_pynode\backend\fixtures\test_wikipedia_snippet.xml'

file = open(path, 'r')
snippet = file.read()

list_of_snippets = snippet.split('<page>')
return_item = []

for snippet in list_of_snippets:
    new_page = {}
    new_page['pageid'] = copy(snippet).split('</id>')[0].split('<id>')[-1]
    new_page['name'] = copy(snippet).split('<title>')[-1].split('</title>')[0]
    new_page['linkedArticles'] = get_count_of_links(copy(snippet).split('<text')[-1].split('</text>')[0]).keys()
    return_item.append(new_page)

with open('new_mock_data.py', 'w') as new_file:
    new_file.write(str(return_item))

Beispiel #3
0
# -*- coding: utf-8 -*-

__author__ = 'Peter.Ogden'
__copyright__ = 'Copyright (C) 2015, Auto Trader UK'

from backend.wikiparse.parser.links_counter import get_count_of_links
from copy import copy

path = r'C:\Users\peter.ogden\PycharmProjects\jeph_pynode\backend\fixtures\test_wikipedia_snippet.xml'

file = open(path, 'r')
snippet = file.read()

list_of_snippets = snippet.split('<page>')
return_item = []

for snippet in list_of_snippets:
    new_page = {}
    new_page['pageid'] = copy(snippet).split('</id>')[0].split('<id>')[-1]
    new_page['name'] = copy(snippet).split('<title>')[-1].split('</title>')[0]
    new_page['linkedArticles'] = get_count_of_links(
        copy(snippet).split('<text')[-1].split('</text>')[0]).keys()
    return_item.append(new_page)

with open('new_mock_data.py', 'w') as new_file:
    new_file.write(str(return_item))