Example #1
0
def run_crawlers():
    portals = request.get_json(force=True)["portals"]
    options = request.get_json(force=True)["options"]
    factory = CrawlerFactory()
    data = []
    for portal in portals:
        try:
            data.append(factory.create_crawler(portal, options).fetch_data())
        except AttributeError as err:
            print(err)
            return jsonify(err), 400
        except BaseException as err:
            print(err)
            return jsonify({"message": "Something went wrong in crawler"}), 400
    if not data:
        return jsonify(data)
    return jsonify(flatten(data))
Example #2
0
'''
Created on 22 Jan 2012

@author: george
'''
import datetime
from crawlers.CrawlerFactory import CrawlerFactory
from database.model.tweets import TwoGroupsTweet
from mongoengine import *

f = CrawlerFactory()
t = f.get_crawler("topsy")

search_hashtags = "uk OR #uk OR #UK or #usa OR #USA OR #US OR usa OR us"
t.search_for(search_hashtags)
t.search_between(from_date=datetime.datetime(2011, 01, 23, 0, 0, 0), 
                 to_date=datetime.datetime(2011, 01, 25, 0, 0, 0), 
                 granularity_days=1, 
                 granularity_hours=0, 
                 granularity_mins=0)
t.retrieve_items_of_type(TwoGroupsTweet)
t.crawl()

Example #3
0
 def test_construction_of_twitter_crawlers(self):
     factory = CrawlerFactory()
     t = factory.get_crawler("twitter")
     t.login()
     info = t.getUserInfoByScreenName("GeorgeEracleous")