Exemple #1
0
def main():
    define("console", default=False, type=bool)
    define("mongo_host", default='localhost')

    parse_command_line()
    basicConfig(options=options)
    country_db = CountryDB(options.mongo_host)
    company_db = CompanyDB(options.mongo_host)
    stats_db = StatsDB(options.mongo_host)
    #companies = company_db.find({}, fields={'offices': 1}, count=None)
    ##for company in adv_enumerate(companies):
    ##    for office in company['offices']:
    ##        country_db.increment(office['country_code'])
    ##print company_db
    #_create_stats(stats_db, company_db)
    _create_csv(stats_db, company_db)
    return -1
    categories = defaultdict(int)
    year_month = defaultdict(int)
    year = defaultdict(int)
    for stats in adv_enumerate(stats_db.find()):
        categories[get_dotted(stats, 'data.category_code')] += 1
        founded_at = get_dotted(stats, 'data.founded_at') 
        if not founded_at:
            continue
        #if founded_at.year < 1995:
        #    print stats
        year_month[(founded_at.year, founded_at.month)] += 1
        year[founded_at.year] += 1

    print sum(categories.values())
    print year
    print sorted(year_month.items())
Exemple #2
0
def _create_csv(stats_db, company_db):
    stats_cursor = stats_db.find({}, None)
    print 'name,num_countries,funding,funding_capped,days_to_funding,num_rounds,employees,year,category,country'
    for stats in adv_enumerate(stats_cursor, frequency=1000):
        name = stats.get('_id').replace(' ', '_')
        stats = stats['data']
        countries = stats.get('countries')
        country = countries[0] if len(countries) else None
        country = country if country in COUNTRIES else 'other'
        num_rounds = len(stats.get('funding_rounds'))
        founded_at = stats.get('founded_at')
        days_to_funding = 0
        funding_rounds = stats.get('funding_rounds')
        if funding_rounds and funding_rounds[0]['funded_at'] and founded_at.year > 1995:
            days_to_funding = (funding_rounds[0]['funded_at'] - founded_at).days

        funding_capped = min(float(stats.get('total_money_raised')) / 10**6, 100)
        funding = float(stats.get('total_money_raised')) / 10**6
        employees = stats.get('number_of_employees')
        year = founded_at.year
        category = stats.get('category_code')
        try:
            print ','.join(map(lambda k: str(k), [name, len(countries), funding, funding_capped, days_to_funding, num_rounds, employees, year, category, country]))
        except Exception, e:
            pass
Exemple #3
0
def _create_stats(stats_db, company_db):
    companies = company_db.find({},{}, count=None)
    for company in adv_enumerate(companies, frequency=1000):
        stats = {}
        name = company.get('name')
        countries = []
        for office in company['offices']:
            countries.append(office['country_code'])
        stats['countries'] = countries
        for fn in STATS:
            stat_name, val = fn(company)
            stats[stat_name] = val
        stats_db.save(name, stats)