Ejemplo n.º 1
0
def main():
    define("console", default=False, type=bool)
    define("mongo_host", default="localhost")

    parse_command_line()
    basicConfig(options=options)
    db = Connection(options.mongo_host)["crunch"]["company_stats"]
Ejemplo n.º 2
0
def main():
    define("console", default=False, type=bool)
    define("mongo_host", default='localhost')

    parse_command_line()
    basicConfig(options=options)
    country_db = CountryDB(options.mongo_host)
    company_db = CompanyDB(options.mongo_host)
    stats_db = StatsDB(options.mongo_host)
    #companies = company_db.find({}, fields={'offices': 1}, count=None)
    ##for company in adv_enumerate(companies):
    ##    for office in company['offices']:
    ##        country_db.increment(office['country_code'])
    ##print company_db
    #_create_stats(stats_db, company_db)
    _create_csv(stats_db, company_db)
    return -1
    categories = defaultdict(int)
    year_month = defaultdict(int)
    year = defaultdict(int)
    for stats in adv_enumerate(stats_db.find()):
        categories[get_dotted(stats, 'data.category_code')] += 1
        founded_at = get_dotted(stats, 'data.founded_at') 
        if not founded_at:
            continue
        #if founded_at.year < 1995:
        #    print stats
        year_month[(founded_at.year, founded_at.month)] += 1
        year[founded_at.year] += 1

    print sum(categories.values())
    print year
    print sorted(year_month.items())
Ejemplo n.º 3
0
def main():
    define("port", default="2111", type=int)
    define("debug", default=False, type=bool)
    define("console", default=False, type=bool)
    define("loglevel")
    define("mongo_host", default='localhost')

    parse_command_line()
    basicConfig(options=options)

    # Setup tornado
    handlers = [
                (r'/companies.*?', CompanyHandler,
                    {
                     'mongo_host': options.mongo_host,
                    }
                ),
                (r'/stats.*?', StatsHandler,
                    {
                     'mongo_host': options.mongo_host,
                    }
                ),
                ]
    app_settings = {
            'debug' : options.debug,
            'static_path': os.path.join(os.path.dirname(__file__), "static"),
            'gzip': True
            }
    application = tornado.web.Application(handlers, **app_settings)

    http_server = tornado.httpserver.HTTPServer(application)
    logging.info('traffic_server listening on port %s' % options.port)
    http_server.listen(int(options.port))
    tornado.ioloop.IOLoop.instance().start()

    return 0
Ejemplo n.º 4
0
import logging
import pymongo
from pymongo import Connection
import grequests
from time import sleep
import ujson

from loggingutils import basicConfig

basicConfig(console=True)

SEARCH_ENDPOINT = 'http://api.crunchbase.com/v/1/search.js?query=a&page=%s'
COMPANY_ENDPOINT = 'http://api.crunchbase.com/v/1/company/%s.js'

def _get_batch(urls, field=None):
    """
    @param urls: list(str)
    @param field: str
    @return: list(dict)
    """
    try:
        rs = [grequests.get(u) for u in urls]
        responses = grequests.map(rs)
        if field:
            results_list = [ujson.decode(r.text)[field] for r in responses]
        else:
            results_list = [ujson.decode(r.text) for r in responses if r.ok]
    except Exception, e:
        logging.exception('exception received.. sleeping it off for 5 sec')
        sleep(5)
        return _get_batch(urls, field)