Beispiel #1
0
def sync_excel_blobs_and_az_tables():
    table_service = TableService(account_name=Config.TABLE_ACCOUNT_NAME,
                                 account_key=Config.TABLE_KEY)
    if not table_service.exists(Config.PRODUCT_TABLE_NAME):
        create_table(table_service, Config.PRODUCT_TABLE_NAME)
    # TODO: Consider insert_or_merge_entity() here instead https://docs.microsoft.com/en-us/python/api/azure-cosmosdb-table/azure.cosmosdb.table.tableservice.tableservice?view=azure-python
    delete_all_entries_in_table(table_service, Config.PRODUCT_TABLE_NAME)
    batch = TableBatch()

    table_data = get_metadata_blob_data()
    products_data = get_product_blobs_data()
    for p in table_data:
        try:
            p["cumulative"] = str(products_data[p["id"]]["cumulative"])
        except KeyError:
            p["cumulative"] = None
        batch.insert_entity(p)
    try:
        table_service.commit_batch(table_name=Config.PRODUCT_TABLE_NAME,
                                   batch=batch)
        print(
            f"Uploaded products data to '{Config.PRODUCT_TABLE_NAME}' table in '{Config.TABLE_ACCOUNT_NAME}' storage account"
        )

    except HeaderParsingError as e:
        print(e)
def migrate_task_os(table_service: TableService) -> None:
    table_name = "Task"
    tasks = table_service.query_entities(
        table_name, select="PartitionKey,RowKey,os,config")
    partitionKey = None

    count = 0
    batch = TableBatch()
    for task in tasks:
        if partitionKey != task.PartitionKey:
            table_service.commit_batch(table_name, batch)
            batch = TableBatch()

        partitionKey = task.PartitionKey
        if "os" not in task or (not task.os):
            config = json.loads(task.config)
            print(config)
            if "windows".lower() in config["vm"]["image"].lower():
                task["os"] = "windows"
            else:
                task["os"] = "linux"
            count = count + 1
        batch.merge_entity(task)
    table_service.commit_batch(table_name, batch)
    print("migrated %s rows" % count)
Beispiel #3
0
def uploadToTable():
    connection_string, container_name = get_credentials()
    blob = BlobClient.from_connection_string(conn_str=connection_string,
                                             container_name=container_name,
                                             blob_name=filename)
    blob_data = blob.download_blob().readall()
    json_file_content = blob_data.decode('utf8')
    jlist = json.loads(json_file_content)

    table_service = TableService(connection_string=connection_string)
    table_service.create_table(table_name=fixed_tablename)

    partition_key = 'Fishlanding'
    row = 1

    batch_list = []
    for jdict in jlist:
        task = {'PartitionKey': partition_key, 'RowKey': str(row)}
        for key in jdict:
            keyVal = key.replace(' ', '')
            if keyVal == '':
                pass
            task[keyVal] = jdict[key]
        batch_list.append(task)
        row += 1

    seperated_batch_list = list_of_groups(init_list=batch_list,
                                          childern_list_len=50)
    for children_list in seperated_batch_list:
        batch = TableBatch()
        for task in children_list:
            batch.insert_or_replace_entity(task)
        table_service.commit_batch(table_name=tablename, batch=batch)

    return batch_list
def main(event: func.EventGridEvent):
    event_type = event.event_type
    event_subject = event.subject
    containername = event_subject.split("/")[-3]

    # credentials needed
    connection_string = os.getenv('AZURE_CONNECTION_STRING')
    container_name = os.getenv('ContainerName')

    # Only blob creations at container would trigger this function
    if event_type == "Microsoft.Storage.BlobCreated" and containername == container_name:
        filename = event_subject.split("/")[-1]
        tablename = gettablename(
            filename=filename) + datetime.now().strftime("%H%M%S")
        table_service = TableService(connection_string=connection_string)
        table_service.create_table(table_name=tablename)

        # Download the blob data
        blob = BlobClient.from_connection_string(conn_str=connection_string,
                                                 container_name=container_name,
                                                 blob_name=filename)
        blob_data = blob.download_blob().readall()
        json_file_content = blob_data.decode('utf8').replace("'", '"')
        jlist = json.loads(json_file_content)

        # The partition key might be changed later. This is only for DEVELOPMENT purpose
        partition_key = tablename
        row = 1
        batch_list = []
        for jdict in jlist:
            if 'Cruise_ID' in jdict:
                partition_key = jdict['Cruise_ID']
            task = {'PartitionKey': partition_key, 'RowKey': str(row)}
            for key in jdict:
                keyVal = key.replace(' ', '')
                if keyVal == '':
                    continue
                task[keyVal] = jdict[key]
            batch_list.append(task)
            row += 1

        seperated_batch_list = list_of_groups(init_list=batch_list,
                                              childern_list_len=50)
        for children_list in seperated_batch_list:
            batch = TableBatch()
            for task in children_list:
                batch.insert_or_replace_entity(task)
            table_service.commit_batch(table_name=tablename, batch=batch)
def sendEntities():
    table_service = TableService(
        connection_string='DefaultEndpointsProtocol=https;AccountName=sauokgp;AccountKey=113mdwUqIiqt4K2HonK80HakIOplxYZINmQME5KB1IZfP+v3JHZK64wpoTP5NBFaG0MaO/TVqA0nW4KuCINTow==;EndpointSuffix=core.windows.net')

    batch = TableBatch()
    inBatch = 0
    for a in bitcoins:
        batch.insert_or_replace_entity(a)
        inBatch += 1
        if inBatch > 99:
            table_service.commit_batch('HelpPage', batch)
            batch = TableBatch()
            inBatch = 0
    table_service.commit_batch('HelpPage', batch)
    batch = TableBatch()
    inBatch = 0
def sendEntities():
    table_service = TableService(
        connection_string='')

    batch = TableBatch()
    inBatch = 0
    for a in bitcoins:
        batch.insert_or_replace_entity(a)
        inBatch += 1
        if inBatch > 99:
            table_service.commit_batch('CryptoCompareAPIData', batch)
            batch = TableBatch()
            inBatch = 0
    table_service.commit_batch('CryptoCompareAPIData', batch)
    batch = TableBatch()
    inBatch = 0
    def save(self, storage_account: str, key: str) -> None:
        from azure.cosmosdb.table.tableservice import TableService
        from azure.cosmosdb.table.models import Entity
        from azure.cosmosdb.table.tablebatch import TableBatch

        # Create the table storage object
        ts = TableService(account_name=storage_account, account_key=key)

        # Create the records from the data
        records = (self.data.assign(
            date_retrieved=lambda df: df['date_retrieved'].dt.date).rename(
                columns={
                    'date_retrieved': 'PartitionKey',
                    'date_forecast': 'RowKey'
                }).astype(str).to_dict(orient='records'))

        # Add the data to the batch and save it
        batch = TableBatch()
        list(map(batch.insert_entity, records))
        ts.commit_batch('WeatherNetworkForecasts', batch)
Beispiel #8
0
def uploadentities():
    table_service = TableService(
        connection_string=tablestorageconnectionstring)

    batch = TableBatch()
    inBatch = 0
    for e in entities:
        batch.insert_or_replace_entity(e)
        inBatch += 1
        if inBatch > 99:
            print("Sending batch...")
            table_service.commit_batch('PredictionData', batch)
            print("Batch sent!")
            batch = TableBatch()
            inBatch = 0
    print("Sending batch...")
    table_service.commit_batch('PredictionData', batch)
    print("Batch sent!")
    batch = TableBatch()
    inBatch = 0
Beispiel #9
0
def main(mytimer: func.TimerRequest) -> None:
    logging.info('Running server cleanup...')

    table_name = 'servers'
    table_service = TableService(
        connection_string=os.environ['AzureWebJobsStorage'])
    batch = TableBatch()

    servers = table_service.query_entities(table_name)

    for server in servers:
        last_heartbeat = server.lastHeartbeatTime
        current_time = datetime.now(timezone.utc)
        seconds_since_last_heartbeat = current_time - last_heartbeat

        if seconds_since_last_heartbeat.total_seconds() >= int(
                os.environ['HeartbeatTimeoutSeconds']):
            batch.delete_entity(server.PartitionKey, server.RowKey)

    table_service.commit_batch(table_name, batch)

    logging.info('Server clean complete!')
def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.info('Starting bulk insert.')
    ret = dict()

    table_name = req.headers.get('name')

    values = req.get_json()

    if table_name:
        retrieved_secret = getConnectionString()

        table_service = TableService(connection_string=retrieved_secret.value)
        batch = TableBatch()
        for i in range(0, len(values['rows'])):
            batch.insert_entity(values['rows'][i])

        table_service.commit_batch(table_name, batch)

        ret['result'] = "Success"
        return func.HttpResponse(json.dumps(ret), status_code=200)
    else:
        ret['result'] = 'Error'
        return func.HttpResponse(json.dumps(ret), status_code=400)
Beispiel #11
0
class AzureTable(object):
    def __init__(self, account_name: str, account_key: str, table_name: str,
                 partition_key_field: str, clustering_key_field: str):
        self.table = TableService(account_name=account_name,
                                  account_key=account_key)
        self.table_name = self.table_name
        self.partition_key_field = partition_key_field
        self.clustering_key_field = clustering_key_field

    @property
    def partition_key_name(self) -> str:
        return 'PartitionKey'

    @property
    def clustering_key_name(self) -> str:
        return 'RowKey'

    def get_payload(self, payload: dict):
        item = deepcopy(payload)
        partition_key = payload.get(self.partition_key_field)
        clustering_key = payload.get(self.clustering_key_field)
        if partition_key is None:
            raise PartitionKeyNotFoundError(
                'payload={} does not have a partition key')
        if clustering_key is None:
            raise ClusteringKeyNotFoundError(
                'payload={} does not have a clustering key')

        item.update({
            self.partition_key_name: partition_key,
            self.clustering_key_name: clustering_key
        })

        return item

    def create(self):
        return self.table.create_table(self.table_name)

    def insert(self, item: dict):
        return self.table.insert_entity(self.table_name,
                                        self.get_payload(item))

    def update(self, item: dict):
        pass

    def upsert(self, item: dict):
        pass

    def delete(self, key: str):
        pass

    def read(self, key: str):
        pass

    def insert_batch(self, items: list):
        batch = TableBatch()
        for item in items:
            batch.insert_entity(self.get_payload(item))

        return self.table.commit_batch(self.table_name, batch)

    def get(self, partition_key: str, clustering_key: str):
        return self.table.get_entity(self.table_name, partition_key,
                                     clustering_key)

    def get_by_partition(self, partition_key: str) -> list:
        return self.table.query_entities(self.table_name,
                                         filter="{} eq '{}'".format(
                                             self.partition_key_name,
                                             partition_key))
Beispiel #12
0
batch = TableBatch()
task008 = {
    'PartitionKey': 'tasksTigard',
    'RowKey': '008',
    'description': 'Go grocery shopping',
    'priority': 400
}
task009 = {
    'PartitionKey': 'tasksTigard',
    'RowKey': '009',
    'description': 'Clean the bathroom',
    'priority': 100
}
batch.insert_entity(task008)
batch.insert_entity(task009)
table_service.commit_batch('tasktable', batch)

task010 = {
    'PartitionKey': 'tasksTigard',
    'RowKey': '010',
    'description': 'Go grocery shopping',
    'priority': 400
}
task011 = {
    'PartitionKey': 'tasksTigard',
    'RowKey': '011',
    'description': 'Clean the bathroom',
    'priority': 100
}

with table_service.batch('tasktable') as batch:
def main(myblob: func.InputStream):
    logging.info(
        f"Python blob trigger function processed blob \n"
        f"Name: {myblob.name}\n"
        f"Blob Size: {myblob.length} bytes"
    )
    postcode_data = pandas.read_csv(
        "https://raw.githubusercontent.com/Elkfox/Australian-Postcode-Data/master/au_postcodes.csv",
        dtype={"postcode": str},
    )

    df = pandas.read_excel(
        io.BytesIO(myblob.read()),
        engine="openpyxl",
        true_values=("Y",),
        false_values=("n"),
    )

    df.dropna(subset=["ABN"], inplace=True)  # Remove entries with no ABN
    df.drop_duplicates(
        subset=["ABN"], keep="last", inplace=True
    )  # Remove duplicate entries and keep the last
    df["Town_City"] = df["Town_City"].apply(lambda x: str(x).title())
    df["Reporting_hours___Paid"].fillna(0, inplace=True)
    df["Reporting_hours___Unpaid"].fillna(0, inplace=True)
    df["Reporting_hours___Total"].fillna(0, inplace=True)
    check_columns = (
        "Operates_in_ACT",
        "Operates_in_NSW",
        "Operates_in_NT",
        "Operates_in_QLD",
        "Operates_in_SA",
        "Operates_in_TAS",
        "Operates_in_VIC",
        "Operates_in_WA",
        "Relief_of_poverty_sickness_or_the_needs_of_the_aged",
        "The_advancement_of_education",
        "The_advancement_of_religion",
        "The_provision_of_child_care_services",
        "Other_purposes_beneficial_to_the_community",
        "BASIC_RELIGIOUS",
        "Conducted_Activities",
        "Animal_Protection",
        "Aged_Care_Activities",
        "Civic_and_advocacy_activities",
        "Culture_and_arts",
        "Economic_social_and_community_development",
        "Emergency_Relief",
        "Employment_and_training",
        "Environmental_activities",
        "Grant_making_activities",
        "Higher_education",
        "Hospital_services_and_rehabilitation_activities",
        "Housing_activities",
        "Income_support_and_maintenance",
        "International_activities",
        "Law_and_legal_services",
        "Mental_health_and_crisis_intervention",
        "Political_activities",
        "Primary_and_secondary_education",
        "Religious_activities",
        "Research",
        "Social_services",
        "Sports",
        "Other_Educations",
        "other_health_service_delivery",
        "Other_recreation_and_social_club_activity",
        "Other",
        "Aboriginal_or_TSI",
        "Aged_Persons",
        "Children",
        "Communities_Overseas",
        "Ethnic_Groups",
        "Gay_Lesbian_Bisexual",
        "General_Community_in_Australia",
        "men",
        "Migrants_Refugees_or_Asylum_Seekers",
        "Pre_Post_Release_Offenders",
        "People_with_Chronic_Illness",
        "People_with_Disabilities",
        "People_at_risk_of_homelessness",
        "Unemployed_Persons",
        "Veterans_or_their_families",
        "Victims_of_crime",
        "Victims_of_Disasters",
        "Women",
        "Youth",
        "Other_charities",
        "Other_beneficiaries_not_listed",
        "Reporting_Obligations___ACT",
        "Reporting_Obligations___NSW",
        "Reporting_Obligations___NT",
        "Reporting_Obligations___QLD",
        "Reporting_Obligations___SA",
        "Reporting_Obligations___TAS",
        "Reporting_Obligations___VIC",
        "Reporting_Obligations___WA",
    )
    col_names = list(df.columns)
    for col in check_columns:
        if col in col_names:
            df[col].fillna(False, inplace=True)
    df["State"] = df["Postcode"].apply(map_postcode)
    df = pandas.merge(
        df,
        postcode_data,
        left_on=["Postcode", "Town_City"],
        right_on=["postcode", "place_name"],
        how="left",
    ).drop(columns=["postcode", "place_name", "state_name", "state_code", "accuracy"])
    # Connect to Azure Table Store
    table_service = TableService(
        account_name=os.getenv("TABLE_STORAGE_ACCOUNT_NAME"),
        account_key=os.getenv("TABLE_STORAGE_KEY"),
    )

    # Convert dataframe to list of dictionary
    records = df.to_dict(orient="records")

    # Batch in 100 rows
    for record_batch in (
        records[pos : pos + 100] for pos in range(0, len(records), 100)
    ):
        batch = TableBatch()
        for record in record_batch:
            entry = {
                "PartitionKey": f"ais",
                "RowKey": str(record["ABN"]),
            }
            for k, v in record.items():
                cleanCamelKey = camelKey(k)
                # Check that removing those characters didn't create a duplicate key
                if cleanCamelKey in entry:
                    raise ValueError(f"Key collision on {cleanCamelKey}")

                entry[cleanCamelKey] = v

            batch.insert_or_replace_entity(entry)

        table_service.commit_batch("ACN", batch)
Beispiel #14
0
from azure.cosmosdb.table.tableservice import TableService
from azure.cosmosdb.table.models import Entity

# Let's import the batch library
from azure.cosmosdb.table.tablebatch import TableBatch

# Connect to the Table service
table_service = TableService(connection_string=connect_str)
print("\nConnected to Table service")

# Let's perform a batch transaction, and add two course expiry dates
batch = TableBatch()
task001 = {
    'PartitionKey': 'Azure',
    'RowKey': '280',
    'expiryDate': '1 July 2020'
}
task002 = {
    'PartitionKey': 'Azure',
    'RowKey': '381',
    'expiryDate': '1 July 2020'
}

# Perform batch transaction using MERGE (could be update, insert, etc)
batch.merge_entity(task001)
batch.merge_entity(task002)
table_service.commit_batch(table_name, batch)

print("\nBatch transaction complete")
print("=======================")