def find_possible_issuers(con, log_file, company): cur = con.cursor() pat = get_db_name_pattern(company) query = "select issuer_name from master_issuers where issuer_name like '%s'" % pat log_query(query, log_file) cur.execute("select issuer_name from master_issuers where issuer_name like ?", [pat]) results = cur.fetchall() return [row[0] for row in results]
def find_possible_issuers(con, log_file, company): cur = con.cursor() pat = get_db_name_pattern(company) query = "select issuer_name from master_issuers where issuer_name like '%s'" % pat log_query(query, log_file) cur.execute( "select issuer_name from master_issuers where issuer_name like ?", [pat]) results = cur.fetchall() return [row[0] for row in results]
print "Generating master issuer coverage stats..." query_log_file = open("queries.txt", "w") missing_companies = [] with query_log_file: for company in companies: possible_issuers = find_possible_issuers(con, query_log_file, company) master_issuers_count += len(possible_issuers) crawled_issuers_for_company_count = fetch_count(con, '''select count(distinct substr(cusip, 1, 6)) from valid_items where issuer_name like ?''', [get_db_name_pattern(company)]) company_master_issuers_count = len(possible_issuers) if company_master_issuers_count > 0: percentage_issuers_crawled_for_company = Decimal(100) * Decimal(crawled_issuers_for_company_count) / Decimal(company_master_issuers_count) percentage_issuers_crawled_for_company = percentage_issuers_crawled_for_company.quantize(TWOPLACES) else: percentage_issuers_crawled_for_company = "NaN" missing_companies.append(company) companies_coverage[company] = {'CRAWLED_ISSUERS_COUNT' : crawled_issuers_for_company_count, 'MASTER_ISSUERS_COUNT' : company_master_issuers_count, 'COVERAGE (%)' : percentage_issuers_crawled_for_company} #print "%s: %d of %d issuer numbers crawled. Coverage: %s" % (company, # crawled_issuers_for_company_count,
print "Generating master issuer coverage stats..." query_log_file = open("queries.txt", "w") missing_companies = [] with query_log_file: for company in companies: possible_issuers = find_possible_issuers(con, query_log_file, company) master_issuers_count += len(possible_issuers) crawled_issuers_for_company_count = fetch_count( con, '''select count(distinct substr(cusip, 1, 6)) from valid_items where issuer_name like ?''', [get_db_name_pattern(company)]) company_master_issuers_count = len(possible_issuers) if company_master_issuers_count > 0: percentage_issuers_crawled_for_company = Decimal( 100) * Decimal(crawled_issuers_for_company_count ) / Decimal(company_master_issuers_count) percentage_issuers_crawled_for_company = percentage_issuers_crawled_for_company.quantize( TWOPLACES) else: percentage_issuers_crawled_for_company = "NaN" missing_companies.append(company) companies_coverage[company] = { 'CRAWLED_ISSUERS_COUNT': crawled_issuers_for_company_count, 'MASTER_ISSUERS_COUNT': company_master_issuers_count,