예제 #1
0
    return {
        "inbound": inbound,
        "outbound": outbound,
        "others": others
    }


while True:
    domains_to_visit = Domains.get_all_unvisited_domains(conn)  
    
    if len(domains_to_visit) == 0:
        break

    for domain_row in domains_to_visit:
        result = collect_domain(domain_row["domain"])
    
    domain_id = domain_row["domain_id"]
    
    Domains.visit_domain(conn, domain_id)
    Domains.insert_domains(conn, result["inbound"])
    Links.insert_links(conn, result["outbound"], domain_id)
    
    print("Sleeping for 20 seconds ZzZzz")
    time.sleep(20)






예제 #2
0
    for link in soup.find_all("a"):
        href = link.get("href")

        if href is None:
            continue

        if "start.bg" in href and "javascript:" not in href:
            inbound.add(href)
        elif "link.php" in href:
            outbound.add(href)
        else:
            others.add(href)

    return {"inbound": inbound, "outbound": outbound, "others": others}


while True:
    domains_to_visit = Domains.get_all_unvisited_domains(conn)

    if len(domains_to_visit) == 0:
        break

    for domain_row in domains_to_visit:
        result = collect_domain(domain_row["domain"])

    domain_id = domain_row["domain_id"]

    Domains.visit_domain(conn, domain_id)
    Domains.insert_domains(conn, result["inbound"])
    Links.insert_links(conn, result["outbound"], domain_id)