Exemple #1
0
def prime_raw_dataset_from_sql(topic_name="abortion"):
    global ml_sql_retrieve

    dataset, info = tfds.load('imdb_reviews/subwords8k',
                              with_info=True,
                              as_supervised=True)
    encoder = info.features['text'].encoder

    if ml_sql_retrieve is None:
        ml_sql_retrieve = SqlStorer(db_name="BillInfoDB",
                                    table_name="ml_bill_info_table")
        ml_sql_retrieve.set_up_connection()

    ############################
    ###PULL REPUBLICAN RESULTS##
    ############################
    republican_results = ml_sql_retrieve.execute_raw_query(
        raw_query_str=(sql_republican_weight_request % topic_name))

    ############################
    ####PULL DEMOCRAT RESULTS###
    ############################
    democrat_results = ml_sql_retrieve.execute_raw_query(
        raw_query_str=(sql_democrat_weight_request % topic_name))

    ############################
    ####PRIMED RESULTS READY####
    ############################
    primed_results = prime_sql_results(rep_sql_res=republican_results,
                                       dem_sql_res=democrat_results)

    train_dataset, test_dataset = dataset['train'], dataset['test']
    entry_list = []
    label_list = []
    encoded_entry_list = []
    hash_set = set()

    for entry in primed_results:
        try:
            if (entry["content"]):  # Only add entry if it is string
                content = entry["content"].encode('utf-8')
                # Take a hash of the content to check that if there
                # are duplicates
                hash_object = hashlib.md5(content)
                hash_digest = hash_object.hexdigest()
                if hash_digest not in hash_set:
                    hash_set.add(hash_digest)
                    # If the content is new (no collision)
                    # add it to the list
                    entry_list.append(entry["content"])
                    encoded_entry_list.append(encoder.encode(entry["content"]))
                    label_list.append(entry["label"])
        except KeyError:
            continue
    return train_dataset, test_dataset, encoded_entry_list, label_list, encoder
Exemple #2
0
def request_candidates_bills():

    # SQL Retriever
    poly_voting_record_sql_retriever = SqlStorer(
        db_name=cand_bills_db_name, table_name=cand_bills_table_name)
    poly_voting_record_sql_retriever.set_up_connection()

    # ----------------------
    wild_card_query = {}
    bills_list = []

    cand_id = request.args.get("VoteSmartCandID")
    category_id = request.args.get("VoteSmartPrimaryCategoryId")
    print("Candidate ID: %s " % cand_id)
    categories = category_id.strip(',').split(',')
    print("Category ID: %s " % categories)

    # Specialized raw request query
    # raw_query = "   SELECT  B.*,V.* FROM    PoliticianInfo.bill_info_table B \
    # inner join              PoliticianInfo.politician_voting_record_table V ON \
    # B.VoteSmartBillId = V.VoteSmartBillId \
    # where V.VoteSmartCandID='%s' and B.VoteSmartPrimaryCategoryId='%s' ORDER BY `DateIntroduced`" % (cand_id, category_id);
    raw_query = "   SELECT  B.*,V.* FROM    PoliticianInfo.bill_info_table B \
                    inner join              PoliticianInfo.politician_voting_record_table V ON \
                    B.VoteSmartBillId = V.VoteSmartBillId \
                    where V.VoteSmartCandID='%s' and (" % (cand_id)
    # Iterate through category
    # options appending each to query.
    for i in range(0, len(categories)):
        category = categories[i]
        raw_query += "B.VoteSmartPrimaryCategoryId='%s'" % category
        if i == (len(categories) - 1):
            raw_query += ") "
            break
        raw_query += " OR "

    raw_query += "ORDER BY `DateIntroduced`"

    response = poly_voting_record_sql_retriever.execute_raw_query(
        raw_query, verbose=False)

    if response:
        bills_list = convert_bills_response_2_detailed_dict(response)

    # return json.dumps(bills_list, default = myconverter)
    # Fixed date formatting issue using following source:
    # https://stackoverflow.com/questions/11875770/how-to-overcome-datetime-datetime-not-json-serializable/36142844#36142844
    return json.dumps(bills_list, indent=4, sort_keys=True, default=str)