Esempio n. 1
0
def test_friends():
    fp = "friends_results.txt"
    crawler = FriendsCollector(limit=1)
    crawler.authenticate(api_key_file_path)
    crawler.connect_output([FileWriter(fp, clear=True)])
    user_id, cursor, cnt = crawler.collect(
        [187908577, 34156194, 66003384, 19248625])
    crawler.close()
    os.remove(fp)
    assert cnt > 0
Esempio n. 2
0
def test_lookup():
    fp = "lookup_results.txt"
    crawler = UserLookup()
    crawler.authenticate(api_key_file_path)
    crawler.connect_output([FileWriter(fp, clear=True)])
    query_idx, cnt = crawler.collect(
        screen_names=["ferencberes91", "Istvan_A_Seres"])
    crawler.close()
    os.remove(fp)
    assert cnt > 0
Esempio n. 3
0
def test_people():
    fp = "people_results.txt"
    crawler = PeopleCrawler(limit=2)
    crawler.authenticate(api_key_file_path)
    crawler.connect_output([FileWriter(fp, clear=True)])
    search_params = {"q": "data scientist"}
    crawler.set_search_arguments(search_args=search_params)
    page, cnt = crawler.search()
    crawler.close()
    os.remove(fp)
    assert cnt > 0
Esempio n. 4
0
def test_recursive():
    fp = "recursive_results.txt"
    crawler = RecursiveCrawler(limit=2)
    crawler.authenticate(api_key_file_path)
    crawler.connect_output([FileWriter(fp, clear=True)])
    search_params = {
        "q": "#latest OR #news",
        "result_type": 'recent',
        "count": 5
    }
    crawler.set_search_arguments(search_args=search_params)
    success, max_id, latest_id, cnt = crawler.search(term_func=None)
    crawler.close()
    os.remove(fp)
    assert (success and cnt > 0)
Esempio n. 5
0
def test_stream():
    fp = "stream_results.txt"
    crawler = StreamCrawler(sync_time=1, limit=10)
    crawler.authenticate(api_key_file_path)
    crawler.connect_output([FileWriter(fp, clear=True)])
    search_params = {
        "q": "#latest OR #news",
        "result_type": 'recent',
        "count": 5
    }
    crawler.set_search_arguments(search_args=search_params)
    crawler.search(90, None)
    crawler.close()
    results = FileReader(fp).read()
    os.remove(fp)
    assert len(results) > 0
Esempio n. 6
0
from twittercrawler.crawlers import RecursiveCrawler
from twittercrawler.data_io import FileWriter, FileReader
from twittercrawler.search import get_time_termination, get_id_termination
import datetime, time

# initialize
file_path = "recursive_results.txt"
recursive = RecursiveCrawler()
recursive.authenticate("../api_key.json")
recursive.connect_output([FileWriter(file_path, clear=True)])

# query
search_params = {
    "q": "#bitcoin OR #ethereum OR blockchain",
    "result_type": "recent",
    "lang": "en",
    "count": 100
}
recursive.set_search_arguments(search_args=search_params)

# termination (collect tweets from the last 5 minutes)
now = datetime.datetime.now()
time_str = (
    now -
    datetime.timedelta(seconds=300)).strftime("%a %b %d %H:%M:%S +0000 %Y")
print(time_str)
time_terminator = get_time_termination(time_str)

# run search - FIRST STAGE
success, max_id, latest_id, cnt = recursive.search(term_func=time_terminator)
print("\nFirst stage report:")
Esempio n. 7
0
from twittercrawler.crawlers import PeopleCrawler
from twittercrawler.data_io import FileWriter, SocketWriter, FileReader

# prepare writers
keys = ["name", "location", "description"]
file_path = "people_results.txt"
fw = FileWriter(file_path, clear=True, include_mask=keys)
sw = SocketWriter(7000, include_mask=keys)
# execute this command in a bash console to continue: telnet localhost 7000

# initialize

people = PeopleCrawler(limit=5)
people.authenticate("../api_key.json")
people.connect_output([fw, sw])

# query
search_params = {
    "q": "data scientist AND phd student",
}
people.set_search_arguments(search_args=search_params)

# run search
page, cnt = people.search()
print(page, cnt)

# close
people.close()

#load results
results_df = FileReader(file_path).read()