def find_possible_issuers(con, log_file, company):
    cur = con.cursor()
    pat = get_db_name_pattern(company)
    query = "select issuer_name from master_issuers where issuer_name like '%s'" % pat
    log_query(query, log_file)

    cur.execute("select issuer_name from master_issuers where issuer_name like ?", [pat])
    results = cur.fetchall()
    return [row[0] for row in results]
예제 #2
0
def find_possible_issuers(con, log_file, company):
    cur = con.cursor()
    pat = get_db_name_pattern(company)
    query = "select issuer_name from master_issuers where issuer_name like '%s'" % pat
    log_query(query, log_file)

    cur.execute(
        "select issuer_name from master_issuers where issuer_name like ?",
        [pat])
    results = cur.fetchall()
    return [row[0] for row in results]
    print "Generating master issuer coverage stats..."
    query_log_file = open("queries.txt", "w")

    missing_companies = []
    with query_log_file:
        for company in companies:

            possible_issuers = find_possible_issuers(con, query_log_file, company)
            master_issuers_count += len(possible_issuers)

            crawled_issuers_for_company_count = fetch_count(con,
                                                            '''select count(distinct substr(cusip, 1, 6))
                                                            from valid_items
                                                            where issuer_name like ?''',
                                                            [get_db_name_pattern(company)])
            company_master_issuers_count = len(possible_issuers)

            if company_master_issuers_count > 0:
                percentage_issuers_crawled_for_company = Decimal(100) * Decimal(crawled_issuers_for_company_count) / Decimal(company_master_issuers_count)
                percentage_issuers_crawled_for_company = percentage_issuers_crawled_for_company.quantize(TWOPLACES)
            else:
                percentage_issuers_crawled_for_company = "NaN"
                missing_companies.append(company)

            companies_coverage[company] = {'CRAWLED_ISSUERS_COUNT' : crawled_issuers_for_company_count,
                                           'MASTER_ISSUERS_COUNT' : company_master_issuers_count,
                                           'COVERAGE (%)' : percentage_issuers_crawled_for_company}

            #print "%s: %d of %d issuer numbers crawled. Coverage: %s"  % (company,
            #                                                              crawled_issuers_for_company_count,
예제 #4
0
    print "Generating master issuer coverage stats..."
    query_log_file = open("queries.txt", "w")

    missing_companies = []
    with query_log_file:
        for company in companies:

            possible_issuers = find_possible_issuers(con, query_log_file,
                                                     company)
            master_issuers_count += len(possible_issuers)

            crawled_issuers_for_company_count = fetch_count(
                con, '''select count(distinct substr(cusip, 1, 6))
                                                            from valid_items
                                                            where issuer_name like ?''',
                [get_db_name_pattern(company)])
            company_master_issuers_count = len(possible_issuers)

            if company_master_issuers_count > 0:
                percentage_issuers_crawled_for_company = Decimal(
                    100) * Decimal(crawled_issuers_for_company_count
                                   ) / Decimal(company_master_issuers_count)
                percentage_issuers_crawled_for_company = percentage_issuers_crawled_for_company.quantize(
                    TWOPLACES)
            else:
                percentage_issuers_crawled_for_company = "NaN"
                missing_companies.append(company)

            companies_coverage[company] = {
                'CRAWLED_ISSUERS_COUNT': crawled_issuers_for_company_count,
                'MASTER_ISSUERS_COUNT': company_master_issuers_count,