return { "inbound": inbound, "outbound": outbound, "others": others } while True: domains_to_visit = Domains.get_all_unvisited_domains(conn) if len(domains_to_visit) == 0: break for domain_row in domains_to_visit: result = collect_domain(domain_row["domain"]) domain_id = domain_row["domain_id"] Domains.visit_domain(conn, domain_id) Domains.insert_domains(conn, result["inbound"]) Links.insert_links(conn, result["outbound"], domain_id) print("Sleeping for 20 seconds ZzZzz") time.sleep(20)
for link in soup.find_all("a"): href = link.get("href") if href is None: continue if "start.bg" in href and "javascript:" not in href: inbound.add(href) elif "link.php" in href: outbound.add(href) else: others.add(href) return {"inbound": inbound, "outbound": outbound, "others": others} while True: domains_to_visit = Domains.get_all_unvisited_domains(conn) if len(domains_to_visit) == 0: break for domain_row in domains_to_visit: result = collect_domain(domain_row["domain"]) domain_id = domain_row["domain_id"] Domains.visit_domain(conn, domain_id) Domains.insert_domains(conn, result["inbound"]) Links.insert_links(conn, result["outbound"], domain_id)