def main():
    ais = ANTLRFileStream(sys.argv[1])
    lexer = JSONLexer(ais)
    tokens = CommonTokenStream(lexer)
    parser = JSONParser(tokens)
    parser.setBuildParseTree(True)
    tree = parser.json()
    print tree
    #print tree.toStringTree(parser)

    walker = ParseTreeWalker()
    converter = XMLEmitter()
    walker.walk(converter, tree)
    print tree
Exemple #2
0
def do_harvest(query, iterations):
    book_data = {}
    currentPosition = 0
    query_string = QUERY_FORMAT_STRING.format(query)

    graph = Graph()
    parser = JSONParser.JSONParser()

    # map for collecting nodes
    nodes = {}
    while (iterations > len(nodes)):
        page = requests.get(query_string)
        tree = html.fromstring(page.content)

        links = tree.xpath('//table[@id="searchresult"]//a/@href')

        if (len(links) == 0):
            break

        for link in links:
            book_info_response = requests.get(BASE_URL_DNB + link)
            get_data_from_book_info(book_data, book_info_response, "Titel")
            get_data_from_book_info(book_data, book_info_response,
                                    "Person(en)")
            get_data_from_book_info_link(book_data, book_info_response,
                                         "Schlagwörter")

            if (len(book_data['Schlagwörter']) > 0):
                for v in book_data.values():
                    print(v)

                for s in book_data['Schlagwörter']:
                    node = None
                    node = graph.add_node(s)
                    nodes[s] = node

                s1 = book_data['Schlagwörter'][0]
                for s in book_data['Schlagwörter']:
                    if s != s1:
                        edge = graph.add_edge(nodes[s1], nodes[s])
                        edge['label'] = book_data['Titel']

        query_string = QUERY_FORMAT_STRING_2.format(query,
                                                    str(currentPosition))
        currentPosition += len(links)
        iterations -= 1
    return parser.tostring(graph)