Example #1
0
def write_rates_df_to_table(table_name: str,
                            table: pd.DataFrame,
                            account_name: str = None,
                            account_key: str = None,
                            connection_string: str = None):

    # Set up connection to table service
    table_service = __connect(table_name, account_name, account_key,
                              connection_string)

    # Specify PartitonKey and RowKey
    table["PartitionKey"] = table["Date"]
    table["Date"] = pd.to_datetime(table["Date"])
    table["RowKey"] = table["country_code"] + "_" + table["Maturity"].astype(
        str)

    # Iterate through each PartitionKey and insert the rows into batch and submit to table service
    for _, partition_df in table.groupby(
        ["PartitionKey", np.arange(len(table)) // 100]):
        batch = TableBatch()
        rates_list = json.loads(
            partition_df.to_json(date_format="iso", orient="records"))
        for rate in rates_list:
            batch.insert_or_replace_entity(rate)
        table_service.commit_batch(table_name, batch)
Example #2
0
def uploadToTable():
    connection_string, container_name = get_credentials()
    blob = BlobClient.from_connection_string(conn_str=connection_string,
                                             container_name=container_name,
                                             blob_name=filename)
    blob_data = blob.download_blob().readall()
    json_file_content = blob_data.decode('utf8')
    jlist = json.loads(json_file_content)

    table_service = TableService(connection_string=connection_string)
    table_service.create_table(table_name=fixed_tablename)

    partition_key = 'Fishlanding'
    row = 1

    batch_list = []
    for jdict in jlist:
        task = {'PartitionKey': partition_key, 'RowKey': str(row)}
        for key in jdict:
            keyVal = key.replace(' ', '')
            if keyVal == '':
                pass
            task[keyVal] = jdict[key]
        batch_list.append(task)
        row += 1

    seperated_batch_list = list_of_groups(init_list=batch_list,
                                          childern_list_len=50)
    for children_list in seperated_batch_list:
        batch = TableBatch()
        for task in children_list:
            batch.insert_or_replace_entity(task)
        table_service.commit_batch(table_name=tablename, batch=batch)

    return batch_list
def main(event: func.EventGridEvent):
    event_type = event.event_type
    event_subject = event.subject
    containername = event_subject.split("/")[-3]

    # credentials needed
    connection_string = os.getenv('AZURE_CONNECTION_STRING')
    container_name = os.getenv('ContainerName')

    # Only blob creations at container would trigger this function
    if event_type == "Microsoft.Storage.BlobCreated" and containername == container_name:
        filename = event_subject.split("/")[-1]
        tablename = gettablename(
            filename=filename) + datetime.now().strftime("%H%M%S")
        table_service = TableService(connection_string=connection_string)
        table_service.create_table(table_name=tablename)

        # Download the blob data
        blob = BlobClient.from_connection_string(conn_str=connection_string,
                                                 container_name=container_name,
                                                 blob_name=filename)
        blob_data = blob.download_blob().readall()
        json_file_content = blob_data.decode('utf8').replace("'", '"')
        jlist = json.loads(json_file_content)

        # The partition key might be changed later. This is only for DEVELOPMENT purpose
        partition_key = tablename
        row = 1
        batch_list = []
        for jdict in jlist:
            if 'Cruise_ID' in jdict:
                partition_key = jdict['Cruise_ID']
            task = {'PartitionKey': partition_key, 'RowKey': str(row)}
            for key in jdict:
                keyVal = key.replace(' ', '')
                if keyVal == '':
                    continue
                task[keyVal] = jdict[key]
            batch_list.append(task)
            row += 1

        seperated_batch_list = list_of_groups(init_list=batch_list,
                                              childern_list_len=50)
        for children_list in seperated_batch_list:
            batch = TableBatch()
            for task in children_list:
                batch.insert_or_replace_entity(task)
            table_service.commit_batch(table_name=tablename, batch=batch)
def sendEntities():
    table_service = TableService(
        connection_string='DefaultEndpointsProtocol=https;AccountName=sauokgp;AccountKey=113mdwUqIiqt4K2HonK80HakIOplxYZINmQME5KB1IZfP+v3JHZK64wpoTP5NBFaG0MaO/TVqA0nW4KuCINTow==;EndpointSuffix=core.windows.net')

    batch = TableBatch()
    inBatch = 0
    for a in bitcoins:
        batch.insert_or_replace_entity(a)
        inBatch += 1
        if inBatch > 99:
            table_service.commit_batch('HelpPage', batch)
            batch = TableBatch()
            inBatch = 0
    table_service.commit_batch('HelpPage', batch)
    batch = TableBatch()
    inBatch = 0
def sendEntities():
    table_service = TableService(
        connection_string='')

    batch = TableBatch()
    inBatch = 0
    for a in bitcoins:
        batch.insert_or_replace_entity(a)
        inBatch += 1
        if inBatch > 99:
            table_service.commit_batch('CryptoCompareAPIData', batch)
            batch = TableBatch()
            inBatch = 0
    table_service.commit_batch('CryptoCompareAPIData', batch)
    batch = TableBatch()
    inBatch = 0
Example #6
0
def crawl_and_store(table_service, symbol):
    batch = TableBatch()

    batch_size = 0
    for each_trend in fetch(symbol):
        batch.insert_or_replace_entity(each_trend)
        batch_size = batch_size + 1

        if batch_size >= 75:
            table_service.commit_batch('stocktrend', batch)
            batch = TableBatch()
            print("stored a batch, size:", batch_size)
            batch_size = 0

    if batch_size > 0:
        table_service.commit_batch('stocktrend', batch)
        print("stored a batch, size:", batch_size)
Example #7
0
def uploadentities():
    table_service = TableService(
        connection_string=tablestorageconnectionstring)

    batch = TableBatch()
    inBatch = 0
    for e in entities:
        batch.insert_or_replace_entity(e)
        inBatch += 1
        if inBatch > 99:
            print("Sending batch...")
            table_service.commit_batch('PredictionData', batch)
            print("Batch sent!")
            batch = TableBatch()
            inBatch = 0
    print("Sending batch...")
    table_service.commit_batch('PredictionData', batch)
    print("Batch sent!")
    batch = TableBatch()
    inBatch = 0
Example #8
0
def main(myblob: func.InputStream):
    logging.info(
        f"Python blob trigger function processed blob \n"
        f"Name: {myblob.name}\n"
        f"Blob Size: {myblob.length} bytes"
    )
    postcode_data = pandas.read_csv(
        "https://raw.githubusercontent.com/Elkfox/Australian-Postcode-Data/master/au_postcodes.csv",
        dtype={"postcode": str},
    )

    df = pandas.read_excel(
        io.BytesIO(myblob.read()),
        engine="openpyxl",
        true_values=("Y",),
        false_values=("n"),
    )

    df.dropna(subset=["ABN"], inplace=True)  # Remove entries with no ABN
    df.drop_duplicates(
        subset=["ABN"], keep="last", inplace=True
    )  # Remove duplicate entries and keep the last
    df["Town_City"] = df["Town_City"].apply(lambda x: str(x).title())
    df["Reporting_hours___Paid"].fillna(0, inplace=True)
    df["Reporting_hours___Unpaid"].fillna(0, inplace=True)
    df["Reporting_hours___Total"].fillna(0, inplace=True)
    check_columns = (
        "Operates_in_ACT",
        "Operates_in_NSW",
        "Operates_in_NT",
        "Operates_in_QLD",
        "Operates_in_SA",
        "Operates_in_TAS",
        "Operates_in_VIC",
        "Operates_in_WA",
        "Relief_of_poverty_sickness_or_the_needs_of_the_aged",
        "The_advancement_of_education",
        "The_advancement_of_religion",
        "The_provision_of_child_care_services",
        "Other_purposes_beneficial_to_the_community",
        "BASIC_RELIGIOUS",
        "Conducted_Activities",
        "Animal_Protection",
        "Aged_Care_Activities",
        "Civic_and_advocacy_activities",
        "Culture_and_arts",
        "Economic_social_and_community_development",
        "Emergency_Relief",
        "Employment_and_training",
        "Environmental_activities",
        "Grant_making_activities",
        "Higher_education",
        "Hospital_services_and_rehabilitation_activities",
        "Housing_activities",
        "Income_support_and_maintenance",
        "International_activities",
        "Law_and_legal_services",
        "Mental_health_and_crisis_intervention",
        "Political_activities",
        "Primary_and_secondary_education",
        "Religious_activities",
        "Research",
        "Social_services",
        "Sports",
        "Other_Educations",
        "other_health_service_delivery",
        "Other_recreation_and_social_club_activity",
        "Other",
        "Aboriginal_or_TSI",
        "Aged_Persons",
        "Children",
        "Communities_Overseas",
        "Ethnic_Groups",
        "Gay_Lesbian_Bisexual",
        "General_Community_in_Australia",
        "men",
        "Migrants_Refugees_or_Asylum_Seekers",
        "Pre_Post_Release_Offenders",
        "People_with_Chronic_Illness",
        "People_with_Disabilities",
        "People_at_risk_of_homelessness",
        "Unemployed_Persons",
        "Veterans_or_their_families",
        "Victims_of_crime",
        "Victims_of_Disasters",
        "Women",
        "Youth",
        "Other_charities",
        "Other_beneficiaries_not_listed",
        "Reporting_Obligations___ACT",
        "Reporting_Obligations___NSW",
        "Reporting_Obligations___NT",
        "Reporting_Obligations___QLD",
        "Reporting_Obligations___SA",
        "Reporting_Obligations___TAS",
        "Reporting_Obligations___VIC",
        "Reporting_Obligations___WA",
    )
    col_names = list(df.columns)
    for col in check_columns:
        if col in col_names:
            df[col].fillna(False, inplace=True)
    df["State"] = df["Postcode"].apply(map_postcode)
    df = pandas.merge(
        df,
        postcode_data,
        left_on=["Postcode", "Town_City"],
        right_on=["postcode", "place_name"],
        how="left",
    ).drop(columns=["postcode", "place_name", "state_name", "state_code", "accuracy"])
    # Connect to Azure Table Store
    table_service = TableService(
        account_name=os.getenv("TABLE_STORAGE_ACCOUNT_NAME"),
        account_key=os.getenv("TABLE_STORAGE_KEY"),
    )

    # Convert dataframe to list of dictionary
    records = df.to_dict(orient="records")

    # Batch in 100 rows
    for record_batch in (
        records[pos : pos + 100] for pos in range(0, len(records), 100)
    ):
        batch = TableBatch()
        for record in record_batch:
            entry = {
                "PartitionKey": f"ais",
                "RowKey": str(record["ABN"]),
            }
            for k, v in record.items():
                cleanCamelKey = camelKey(k)
                # Check that removing those characters didn't create a duplicate key
                if cleanCamelKey in entry:
                    raise ValueError(f"Key collision on {cleanCamelKey}")

                entry[cleanCamelKey] = v

            batch.insert_or_replace_entity(entry)

        table_service.commit_batch("ACN", batch)