Example #1
0
pages = dict(cursor.fetchall())

print(len(pages))
added = 0
tried = 0
try:
    counter = 0
    for page, id in pages.items():
        with open(filepath(language, page)) as f:
            links = set(re.findall(links_re, f.read()))
            # print(len(links))
            # print(page)
            # print([make_title(link) for link in links[:5]])
            tried+= len(links)
            for link in links:
                link_title = make_title(link)
                if link_title in pages:
                    if not dry_run:
                        cursor.execute('INSERT INTO %s (from_id, to_id) VALUES (%s, %s)', (AsIs(table_name), id, pages[link_title]))
                    added+= 1
        counter+= 1

        if counter % 50 == 0:
            stdout.write('\r%d: added %d, tried %d' % (counter, added, tried))
except Exception as e:

    import traceback
    traceback.print_exc(file=stdout)
                   
finally:
    print('\nadded %d, tried %d' % (added,tried,))
Example #2
0
                payload, buffer = grab_partn(f, '<page>', '</page>', buffer)
            except EOFError:
                break

            counter+= 1    
            xml = ET.fromstring(payload)
            
            
            if xml.find('redirect'):
                continue
    
            text = xml.find('revision/text').text
            if not text:
                continue
                
            title = make_title(xml.find('title').text)
            try:
                coords = wiki_coord(text)
            except ValueError:
                continue
    
            if coords:
                save(title, text, coords, file_directory, args['--dry-run'])
                withcoord+=1
                written+=1
                continue
    

            # infobox = re.findall(ib_regex, text)
            
            # if len(infobox) == 0 or not infobox[0]:
Example #3
0
    exp_num = 8
    x_num = 400
    samples = 20
    arch = [1, 20, 20, 1]
    act = "rbf"
    kern = "rbf"

    iters_1 = 40
    scale = -0.5
    step = 0.1

    save_plot = True
    save_during = False
    plot_during = True

    save_title = make_title(exp_num, x_num, samples, kern,
                            arch, act, iters_1, scale, step)

    save_dir = os.path.join(os.getcwd(), 'plots', 'exp', save_title)

    if save_during:
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

    num_weights, bnn_predict, unpack_params, \
    init_bnn_params, sample_bnn, sample_gpp, \
    kl, grad_kl = map_gpp_bnn(layer_sizes=arch, nonlinearity=rbf,
                              n_data=x_num, N_samples=samples, kernel=kern)

    if plot_during:
        f, ax = plt.subplots(3, sharex=True)
        plt.ion()