def main(): define("console", default=False, type=bool) define("mongo_host", default="localhost") parse_command_line() basicConfig(options=options) db = Connection(options.mongo_host)["crunch"]["company_stats"]
def main(): define("console", default=False, type=bool) define("mongo_host", default='localhost') parse_command_line() basicConfig(options=options) country_db = CountryDB(options.mongo_host) company_db = CompanyDB(options.mongo_host) stats_db = StatsDB(options.mongo_host) #companies = company_db.find({}, fields={'offices': 1}, count=None) ##for company in adv_enumerate(companies): ## for office in company['offices']: ## country_db.increment(office['country_code']) ##print company_db #_create_stats(stats_db, company_db) _create_csv(stats_db, company_db) return -1 categories = defaultdict(int) year_month = defaultdict(int) year = defaultdict(int) for stats in adv_enumerate(stats_db.find()): categories[get_dotted(stats, 'data.category_code')] += 1 founded_at = get_dotted(stats, 'data.founded_at') if not founded_at: continue #if founded_at.year < 1995: # print stats year_month[(founded_at.year, founded_at.month)] += 1 year[founded_at.year] += 1 print sum(categories.values()) print year print sorted(year_month.items())
def main(): define("port", default="2111", type=int) define("debug", default=False, type=bool) define("console", default=False, type=bool) define("loglevel") define("mongo_host", default='localhost') parse_command_line() basicConfig(options=options) # Setup tornado handlers = [ (r'/companies.*?', CompanyHandler, { 'mongo_host': options.mongo_host, } ), (r'/stats.*?', StatsHandler, { 'mongo_host': options.mongo_host, } ), ] app_settings = { 'debug' : options.debug, 'static_path': os.path.join(os.path.dirname(__file__), "static"), 'gzip': True } application = tornado.web.Application(handlers, **app_settings) http_server = tornado.httpserver.HTTPServer(application) logging.info('traffic_server listening on port %s' % options.port) http_server.listen(int(options.port)) tornado.ioloop.IOLoop.instance().start() return 0
import logging import pymongo from pymongo import Connection import grequests from time import sleep import ujson from loggingutils import basicConfig basicConfig(console=True) SEARCH_ENDPOINT = 'http://api.crunchbase.com/v/1/search.js?query=a&page=%s' COMPANY_ENDPOINT = 'http://api.crunchbase.com/v/1/company/%s.js' def _get_batch(urls, field=None): """ @param urls: list(str) @param field: str @return: list(dict) """ try: rs = [grequests.get(u) for u in urls] responses = grequests.map(rs) if field: results_list = [ujson.decode(r.text)[field] for r in responses] else: results_list = [ujson.decode(r.text) for r in responses if r.ok] except Exception, e: logging.exception('exception received.. sleeping it off for 5 sec') sleep(5) return _get_batch(urls, field)