pages = dict(cursor.fetchall()) print(len(pages)) added = 0 tried = 0 try: counter = 0 for page, id in pages.items(): with open(filepath(language, page)) as f: links = set(re.findall(links_re, f.read())) # print(len(links)) # print(page) # print([make_title(link) for link in links[:5]]) tried+= len(links) for link in links: link_title = make_title(link) if link_title in pages: if not dry_run: cursor.execute('INSERT INTO %s (from_id, to_id) VALUES (%s, %s)', (AsIs(table_name), id, pages[link_title])) added+= 1 counter+= 1 if counter % 50 == 0: stdout.write('\r%d: added %d, tried %d' % (counter, added, tried)) except Exception as e: import traceback traceback.print_exc(file=stdout) finally: print('\nadded %d, tried %d' % (added,tried,))
payload, buffer = grab_partn(f, '<page>', '</page>', buffer) except EOFError: break counter+= 1 xml = ET.fromstring(payload) if xml.find('redirect'): continue text = xml.find('revision/text').text if not text: continue title = make_title(xml.find('title').text) try: coords = wiki_coord(text) except ValueError: continue if coords: save(title, text, coords, file_directory, args['--dry-run']) withcoord+=1 written+=1 continue # infobox = re.findall(ib_regex, text) # if len(infobox) == 0 or not infobox[0]:
exp_num = 8 x_num = 400 samples = 20 arch = [1, 20, 20, 1] act = "rbf" kern = "rbf" iters_1 = 40 scale = -0.5 step = 0.1 save_plot = True save_during = False plot_during = True save_title = make_title(exp_num, x_num, samples, kern, arch, act, iters_1, scale, step) save_dir = os.path.join(os.getcwd(), 'plots', 'exp', save_title) if save_during: if not os.path.exists(save_dir): os.makedirs(save_dir) num_weights, bnn_predict, unpack_params, \ init_bnn_params, sample_bnn, sample_gpp, \ kl, grad_kl = map_gpp_bnn(layer_sizes=arch, nonlinearity=rbf, n_data=x_num, N_samples=samples, kernel=kern) if plot_during: f, ax = plt.subplots(3, sharex=True) plt.ion()