Example #1
0
# profile headline contains 'recruiter', taken into account for depth >= 2
#crawler.add_crawl_from_connections(CrawlConditions({"headline": re.compile(r'recruiter')}, 2))
# ->: means connection
# eg.: A (initial profile) -> AA (accountant for wonderful company) [depth=1] -> AAA (accountant for wonderful company) [depth=2] **IGNORED**
#                          -> AB (accountant for wonderful company) [depth=1] -> ABA (recruiter for wonderful company) [depth=2]  **OK**

# Crawl in order to find someone called Patrick working at 'wonderful company'
crawler.add_target_short_profile(
    CrawlTarget({
        "fullname": re.compile(r'patrick'),
        "headline": re.compile(r'wonderful company')
    }))
# and someone called Charles whose work location is France
crawler.add_target_full_profile(
    CrawlTarget({
        "fullname": re.compile(r'charles'),
        "fmt_location": re.compile(r'france')
    }))
# /!\ NO CAPITAL LETTERS

while num_scans < max_profiles and crawler.has_next():
    # In case of big sleep
    if num_scans != 0 and num_scans % long_sleep_every == 0:
        num_loops = long_sleep_time / 60
        for i in range(num_loops):
            print "Waiting... %d/%d" % (i, num_loops)
            time.sleep(60)

    # Sleep between each profile
    num_scans += 1
    time.sleep(sleep_time)