Exemple #1
0
def main():
    usage = """
    python parse_weathrebox_templates wikipedia_dump.xml

    Create and output a table of weatherbox templates
    """
    num_args = 1
    parser = OptionParser(usage=usage)

    #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str')
    #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    if args[0] == '-':
        f = sys.stdin
    else:
        f = open(args[0], 'r')

    weatherbox_re = re.compile('Template:(.*[wW]eatherbox.*)')
    boxes = {}
    counter = 0
    wb_counter = 0

    for page in wp.wikipedia_to_single_line_pages(f):
        counter += 1
        title = wp.parse_title(page)

        match = weatherbox_re.match(title)
        if match is not None:
            wb_counter += 1
            template_title = match.groups(1)[0]

            weather_box = wp.parse_weather_box(page)

            if weather_box is not None:
                boxes[template_title] = weather_box

            print >> sys.stderr, "Weatherbox counter:", counter, wb_counter, title

    print json.dumps(boxes, indent=2)
def main():
    usage = """
    python parse_weathrebox_templates wikipedia_dump.xml

    Create and output a table of weatherbox templates
    """
    num_args= 1
    parser = OptionParser(usage=usage)

    #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str')
    #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option')

    (options, args) = parser.parse_args()

    if len(args) < num_args:
        parser.print_help()
        sys.exit(1)

    if args[0] == '-':
        f = sys.stdin
    else:
        f = open(args[0], 'r')

    weatherbox_re = re.compile('Template:(.*[wW]eatherbox.*)')
    boxes = {}
    counter = 0
    wb_counter = 0

    for page in wp.wikipedia_to_single_line_pages(f):
        counter += 1
        title = wp.parse_title(page)

        match = weatherbox_re.match(title) 
        if match is not None:
            wb_counter += 1
            template_title = match.groups(1)[0]

            weather_box = wp.parse_weather_box(page)

            if weather_box is not None:
                boxes[template_title] = weather_box

            print >>sys.stderr, "Weatherbox counter:", counter, wb_counter, title

    print json.dumps(boxes, indent=2)
def test_parse_weather_box():
    with open('test/data/denver_weatherbox.wiki', 'r') as f:
        for page in wp.wikipedia_to_single_line_pages(f):
            weather_box = wp.parse_weather_box(page)
            print >>sys.stderr, json.dumps(weather_box)
def test_parse_weather_box():
    with open('test/data/denver_weatherbox.wiki', 'r') as f:
        for page in wp.wikipedia_to_single_line_pages(f):
            weather_box = wp.parse_weather_box(page)
            print >> sys.stderr, json.dumps(weather_box)