Exemple #1
0
def transformation(store_name: str, sql_query: "sql_query",
                   sink_table_name: str):
    """
    Execute a sql query on a database and store the results in another table in the same database
    """
    request = {
        "SqlTransformationParameters": {
            "DataStoreName": store_name,
            "SqlQuery": sql_query
        },
        "SinkTableName": sink_table_name
    }
    response = neuro_call("80", "DataMovementService", "SqlTransformation",
                          request)

    check_request = {"JobId": response["JobId"]}
    status = 0
    errormsg = ""
    while status == 0:
        time.sleep(1)
        response_c = neuro_call("80", "DataMovementService", "CheckJob",
                                check_request)
        status = response_c["Status"]
        if status > 1:
            errormsg = response_c["Message"]

    neuro_call("80", "DataMovementService", "FinaliseJob", check_request)

    if status != 1:
        raise Exception("Neuroverse error: " + errormsg)

    return {"JobId": response["JobId"], "TimeStamp": response["TimeStamp"]}
Exemple #2
0
def delete_datalake_file(store_name: str, table_name: str, file_name_including_partition: str):
    """
    Delete a file from a processed datalake table in Neuroverse
    """
    table_def = sm.get_table_definition(store_name, table_name)
    schema_type = list(sm.SCHEMA_TYPE_MAP.keys())[list(sm.SCHEMA_TYPE_MAP.values()).index(table_def["SchemaType"])]
    file_path = "/managed/" + schema_type + "/table/" + table_name + "/"
    file_path = file_path.lower()
    file_path += file_name_including_partition.strip('/')

    request = {"DataStoreName" : store_name, "TableName" : table_name, "FilePath" : file_path}
    response = neuro_call("80", "DataMovementService", "DataLakeDeleteFile", request)

    check_request = {"JobId" : response["JobId"]}
    status = 0
    errormsg = ""
    while status == 0:
        time.sleep(1)
        response_c = neuro_call("80", "DataMovementService", "CheckJob", check_request)
        status = response_c["Status"]
        if status > 1:
            errormsg = response_c["Message"]

    neuro_call("80", "DataMovementService", "FinaliseJob", check_request)

    if status != 1:
        raise Exception("Neuroverse error: " + errormsg)

    return {"JobId" : response["JobId"], "TimeStamp" : response["TimeStamp"]}
Exemple #3
0
def stream(source: "SourceParameters", sink: "SinkParameters"):
    """
    Stream data from a tabular data source to a tabular data sink
    """
    request = {"SourceParameters": source, "SinkParameters": sink}
    method = source["Type"] + "To" + sink["Type"]
    response = neuro_call("80", "DataMovementService", method, request)

    check_request = {"JobId": response["JobId"]}
    status = 0
    errormsg = ""
    while status == 0:
        time.sleep(1)
        response_c = neuro_call("80", "DataMovementService", "CheckJob",
                                check_request)
        status = response_c["Status"]
        if status > 1:
            errormsg = response_c["Message"]

    neuro_call("80", "DataMovementService", "FinaliseJob", check_request)

    if status != 1:
        raise Exception("Neuroverse error: " + errormsg)

    return {"JobId": response["JobId"], "TimeStamp": response["TimeStamp"]}
Exemple #4
0
def get_lines_in_datalake_csv(store_name: str, table_name: str, file_name_including_partition: str):
    """
    Get the number of lines for a file in a datalake
    """
    table_def = sm.get_table_definition(store_name, table_name)
    schema_type = list(sm.SCHEMA_TYPE_MAP.keys())[list(sm.SCHEMA_TYPE_MAP.values()).index(table_def["SchemaType"])]
    file_path = "/managed/" + schema_type + "/table/" + table_name + "/"
    file_path = file_path.lower()
    file_path += file_name_including_partition.strip('/')

    request = {"DataStoreName" : store_name, "TableName" : table_name, "FilePath" : file_path}
    response = neuro_call("80", "DataMovementService", "GetLinesInDataLakeCsvFile", request)

    check_request = {"JobId" : response["JobId"]}
    status = 0
    errormsg = ""
    while status == 0:
        time.sleep(1)
        response_c = neuro_call("80", "DataMovementService", "CheckJob", check_request)
        status = response_c["Status"]
        errormsg = response_c["Message"]

    neuro_call("80", "DataMovementService", "FinaliseJob", check_request)

    if status != 1:
        raise Exception("Neuroverse error: " + errormsg)

    return int(errormsg)
Exemple #5
0
def delete_rows(store_name: str, table_name: str, where_clause: str = None):
    """
    Delete rows of a sql table using a where clause. If no where clause is supplied all rows are deleted
    """
    request = {
        "DataStoreName": store_name,
        "TableName": table_name,
        "WhereClause": where_clause
    }
    response = neuro_call("80", "DataMovementService", "SqlDelete", request)

    check_request = {"JobId": response["JobId"]}
    status = 0
    errormsg = ""
    while status == 0:
        time.sleep(1)
        response_c = neuro_call("80", "DataMovementService", "CheckJob",
                                check_request)
        status = response_c["Status"]
        if status > 1:
            errormsg = response_c["Message"]

    neuro_call("80", "DataMovementService", "FinaliseJob", check_request)

    if status != 1:
        raise Exception("Neuroverse error: " + errormsg)

    return None
def create_data_store(data_store_name: str,
                      data_store_type: "DataStoreType",
                      data_store_tier: "StoreTierType" = StoreTierType.Small):
    request = {
        "StoreName": data_store_name,
        "DataStoreTypeId": data_store_type.value,
        "StoreTierTypeId": data_store_tier.value
    }
    neuro_call('80', 'datastoremanager', 'createdatastore', request)
Exemple #7
0
def add_table_indexes(store_name: str, table_name: str,
                      table_indexes: "List[index_definition]"):
    """
    Add indexes to a table in a Neuroverse SQL data store
    """
    table_def = get_table_definition(store_name, table_name)
    table_def["DestinationTableDefinitionIndexes"].append(table_indexes)
    neuro_call("80", "datapopulation", "UpdateDestinationTableDefinition",
               table_def)
def delete_data_store(data_store_name: str):
    datastore = [
        ds for ds in list_data_stores()['DataStores']
        if ds['StoreName'] == data_store_name
    ]
    #Require interactive
    check = input("Are you sure you want to delete %s (y/n)" % data_store_name)
    if check == 'y':
        request = {'DataStoreId': datastore['DataStoreId']}
        neuro_call('80', 'datastoremanager', 'deletedatastore', request)
        return "%s has been deleted" % data_store_name
Exemple #9
0
def sql_to_csv(store_name: str, sql_query: "sql_query", file_name: str):
    """
    Execute a sql query and have the result put in a csv file in your notebook session
    """
    file_name = (os.getcwd().replace(home_directory(), "") + "/" +
                 file_name).strip('/')

    path_list = file_name.split('/')

    indices = [i for i, x in enumerate(path_list) if x == ".."]
    new_indices = []

    for ind in indices:
        new_indices.append(ind - 1)
        new_indices.append(ind)

    new_path_list = []
    for i in range(0, len(path_list)):
        if i not in new_indices:
            new_path_list.append(path_list[i])

    file_name = "/".join(new_path_list)

    request = {
        "SqlParameters": {
            "DataStoreName": store_name,
            "SqlQuery": sql_query
        },
        "FileName": file_name
    }
    response = neuro_call("80", "DataMovementService",
                          "SqlQueryToCsvNotebookFileShare", request)

    check_request = {"JobId": response["JobId"]}
    status = 0
    errormsg = ""
    while status == 0:
        time.sleep(1)
        response_c = neuro_call("80", "DataMovementService", "CheckJob",
                                check_request)
        status = response_c["Status"]
        if status > 1:
            errormsg = response_c["Message"]

    neuro_call("80", "DataMovementService", "FinaliseJob", check_request)

    if status != 1:
        raise Exception("Neuroverse error: " + errormsg)

    return None
Exemple #10
0
def list_views(store_name: str):
    """
    List SQL views
    """
    data_stores = neuro_call("80", "datastoremanager", "GetDataStores",
                             {"StoreName": store_name})["DataStores"]
    if len(data_stores) == 0:
        raise Exception("Data store doesn't exist")

    response = neuro_call("80",
                          "datapopulation",
                          "ListDataPopulationViews",
                          {"DataStoreId": data_stores[0]["DataStoreId"]},
                          controller="DataPopulationView")
    return response["Names"]
Exemple #11
0
def list_active_sessions():
    """
    Get list of active notebook sessions
    """

    return neuro_call("8080", "notebookmanagementservice",
                      "GetDetailedSessionList", None)
Exemple #12
0
def delete_view(store_name: str, view_name: str):
    """
    Delete a SQL view
    """
    data_stores = neuro_call("80", "datastoremanager", "GetDataStores",
                             {"StoreName": store_name})["DataStores"]
    if len(data_stores) == 0:
        raise Exception("Data store doesn't exist")

    neuro_call("80",
               "datapopulation",
               "DeleteDataPopulationView", {
                   "DataStoreId": data_stores[0]["DataStoreId"],
                   "Name": view_name
               },
               controller="DataPopulationView")
Exemple #13
0
def list_libraries(workspace_id: str = None, cluster_id: str = None, show_all: bool = False):
    """
    List the non default libraries available on the cluster
    """
    list_jobs_response = neuro_call("80", "sparkmanager", "ListClusterLibraries", 
                                     {
                                         "WorkspaceId" : workspace_id,
                                         "ClusterId" : cluster_id
                                     }
                                   )
    if show_all:
        return list_jobs_response["Libraries"]
    else:
        tmp_libraries=sorted(list_jobs_response["Libraries"],key=lambda x:str(x['LibraryType'])+x['LibraryName']+x['LibraryVersion'])
        libraries=[]
        for n in range(0,len(tmp_libraries)):
            i=tmp_libraries[n]
            if i['Status']=='INSTALLED' or i['Status']=='PENDING':
                libraries.append(i)
            elif i['Status'] == 'UNINSTALL_ON_RESTART' and len(libraries)>0 and libraries[-1]['LibraryType']==i['LibraryType'] and libraries[-1]['LibraryName']==i['LibraryName']:
                if libraries[-1]['Status']=='INSTALLED':
                    libraries[-1]['Status']='PENDING'
                    libraries.append(i)
                else:
                    libraries[-1]=i
        return libraries
def create_update_event_hub_raw_data_capture(
        namespace_name: str,
        event_hub_name: str,
        datalake_name: str,
        datetime_partition_level: "DateTimeLevels" = DateTimeLevels.NA,
        partition_id_level: "PartitionByIdLevel" = PartitionIdLevels.NA,
        max_file_in_minutes: int = None,
        max_file_in_MB: int = None):
    endpoint = next(
        obj for obj in list_event_hubs(namespace_name)
        if obj["EventHubNamespace"] == namespace_name
        and obj["Name"] == event_hub_name and obj['EndpointTypeId'] == 2)
    datastore = neuro_call('80', 'datastoremanager', 'getdatastores',
                           {"StoreName": datalake_name})['DataStores'][0]
    request = {
        'EndPointId': endpoint['EndPointId'],
        'DataStoreId': datastore['DataStoreId'],
        'PartitionByDateTimeLevel': datetime_partition_level.value,
        'PartitionByIdLevel': partition_id_level.value,
        'FileTimeMinutesMax': max_file_in_minutes,
        'FileSizeMBMax': max_file_in_MB
    }
    neuro_call_v2(service='endpointmanager',
                  method='PutRawData',
                  requestbody=request,
                  controller="endpointmanagement")
Exemple #15
0
def sql_to_df(store_name: str, sql_query: "sql_query", use_pyodbc=True):
    """
    Execute a sql query and have the result put into a pandas dataframe in the notebook
    """
    if use_pyodbc:
        connstrbits = neuro_call(
            '80', 'datastoremanager', 'GetDataStores',
            {'StoreName': store_name
             })['DataStores'][0]['ConnectionString'].split(';')
        server = connstrbits[0].split(':')[1].split(',')[0]
        database = connstrbits[1].split('=')[1]
        username = connstrbits[2].split('=')[1]
        password = connstrbits[3].split('=')[1]
        driver = '{ODBC Driver 13 for SQL Server}'
        with pyodbc.connect('DRIVER=' + driver + ';SERVER=' + server +
                            ';PORT=1433;DATABASE=' + database + ';UID=' +
                            username + ';PWD=' + password) as cnxn:
            with cnxn.cursor() as cursor:
                return pandas.read_sql(build_sql(sql_query), cnxn)
    else:
        if not os.path.exists(home_directory() + "/tmp"):
            os.makedirs(home_directory() + "/tmp")

        file_name = str(uuid.uuid4()) + ".csv"

        count = len(os.getcwd().replace(home_directory(), "").split('/')) - 1

        backs = ""
        for c in range(0, count):
            backs += "../"
        sql_to_csv(store_name, sql_query, backs + "tmp/" + file_name)

        df = pandas.read_csv(home_directory() + "/" + "tmp/" + file_name)
        os.remove(home_directory() + "/" + "tmp/" + file_name)
        return df
Exemple #16
0
def submit_job(job_name: str, pyspark_script: str,
               script_parameters: "List[script_parameter]" = None,
               import_tables: "List[import_table]" = None,
               export_tables: "List[export_table]" = None,
               dependencies: "List[library]" = None,
               workspace_id: str = None, cluster_id: str = None,
               run_retry: bool = False, max_concurrent_runs: int = None):
    """
    Submit a spark job (template) and recieve back the JobId
    """
    return neuro_call("80", "sparkmanager", "submitjob", 
                                     {
                                       "JobName" : job_name,
                                       "Script" : pyspark_script,
                                       "ScriptLanguage" : 0,
                                       "ScriptParameters" : script_parameters,
                                       "ImportTables" : import_tables,
                                       "ExportTables" : export_tables,
                                       "WorkspaceId" : workspace_id,
                                       "ClusterId" : cluster_id,
                                       "RunRetry" : run_retry,
                                       "MaxConcurrentRuns" : max_concurrent_runs,
                                       "LibraryDependencies" : dependencies
                                     }
                                    )
Exemple #17
0
def list_tables(store_name: str, table_name: str = '', schema_type: str = ''):
    """
    List existing tables in a Neuroverse data store
    """
    data_stores = neuro_call("80", "datastoremanager", "GetDataStores",
                             {"StoreName": store_name})["DataStores"]
    if len(data_stores) == 0:
        raise Exception("Data store doesn't exist")

    table_defs = neuro_call("80", "DataPopulation", "GetTableInfos",
                            {"DataStoreId": data_stores[0]["DataStoreId"]})

    return [{
        'TableId': table['TableId'],
        'TableName': table['TableName'],
        'SchemaType': SCHEMA_TYPE_MAP_REV[table['TableTypeId']]
    } for table in table_defs['TableInfos']]
Exemple #18
0
def get_view(store_name: str, view_name: str):
    """
    Get a SQL view
    """
    data_stores = neuro_call("80", "datastoremanager", "GetDataStores",
                             {"StoreName": store_name})["DataStores"]
    if len(data_stores) == 0:
        raise Exception("Data store doesn't exist")

    response = neuro_call("80",
                          "datapopulation",
                          "GetDataPopulationView", {
                              "DataStoreId": data_stores[0]["DataStoreId"],
                              "Name": view_name
                          },
                          controller="DataPopulationView")
    return {"Name": response["Name"], "Query": response["Query"]}
Exemple #19
0
def destroy_context(context_id: str):
    """
    Destroy an interactive spark context
    """
    destroy_context_response = neuro_call("80", "sparkmanager", "DestroyContext", 
                                     {
                                         "ContextId" : context_id
                                     }
                                   )
Exemple #20
0
def cancel_command(command_id: str):
    """
    Cancel a running command in a context
    """
    cancel_command_response = neuro_call("80", "sparkmanager", "CancelCommand", 
                                     {
                                         "CommandId" : command_id
                                     }
                                   )
Exemple #21
0
def cancel_run(run_id: str):
    """
    Cancel a running instance of a job
    """
    cancel_run_response = neuro_call("80", "sparkmanager", "CancelRun", 
                                     {
                                         "RunId" : run_id
                                     }
                                   )
Exemple #22
0
def list_datalake_table_files_with_partitions(store_name: str, table_name: str):
    """
    List all the files associated with a datalake file in Neuroverse
    """
    request = {"DataStoreName" : store_name, "TableName" : table_name}
    files = neuro_call("80", "DataMovementService", "ListDataLakeTableFiles", request)["Files"]
    return_list = []
    for file in files:
        return_list.append(file.split(table_name.lower())[1])
    return return_list
Exemple #23
0
def delete_cluster(cluster_id: str = None, workspace_id: str = None):
    """
    Delete a cluster
    """
    delete_cluster_response = neuro_call("80", "sparkmanager", "DeleteCluster", 
                                     {
                                         "ClusterId" : cluster_id,
                                         "WorkspaceId" : workspace_id
                                     }
                                   )
Exemple #24
0
def start_cluster(cluster_id: str = None, workspace_id: str = None):
    """
    Start a cluster
    """
    start_cluster_response = neuro_call("80", "sparkmanager", "StartCluster", 
                                     {
                                         "ClusterId" : cluster_id,
                                         "WorkspaceId" : workspace_id
                                     }
                                   )
Exemple #25
0
def get_job_details(job_id: str):
    """
    Get details about a submitted job
    """
    get_job_details_response = neuro_call("80", "sparkmanager", "getjobdetails", 
                                     {
                                         "JobId":job_id
                                     }
                                   )
    return get_job_details_response["JobDetails"]
Exemple #26
0
def restart_cluster(cluster_id: str = None, workspace_id: str = None):
    """
    Restart a cluster
    Useful for downgrading libraries
    """
    restart_cluster_response = neuro_call("80", "sparkmanager", "RestartCluster", 
                                     {
                                         "ClusterId" : cluster_id,
                                         "WorkspaceId" : workspace_id
                                     }
                                   )
Exemple #27
0
def list_jobs(workspace_id: str = None, cluster_id: str = None, max_returned: int = None):
    """
    List the jobs submitted to spark manager
    """
    list_jobs_response = neuro_call("80", "sparkmanager", "listjobs", 
                                     {
                                         "WorkspaceId" : workspace_id,
                                         "ClusterId" : cluster_id,
                                         "NumberReturned" : max_returned
                                     }
                                   )
    return list_jobs_response["JobSummaries"]
Exemple #28
0
def inspect_command(command_id: str):
    """
    Inspect the status and result of a command
    """
    inspect_command_response = neuro_call("80", "sparkmanager", "InspectCommand", 
                                     {
                                         "CommandId" : command_id
                                     }
                                   )
    del inspect_command_response['Error']
    del inspect_command_response['ErrorCode']
    return inspect_command_response
Exemple #29
0
def list_commands(context_id: str):
    """
    List commands in a context
    """
    list_commands_response = neuro_call("80", "sparkmanager", "ListCommands", 
                                     {
                                         "ContextId" : context_id
                                     }
                                   )
    del list_commands_response['Error']
    del list_commands_response['ErrorCode']
    return list_commands_response
Exemple #30
0
def inspect_context(context_id: str):
    """
    Inspect status of an interactive spark context
    """
    inspect_context_response = neuro_call("80", "sparkmanager", "InspectContext", 
                                     {
                                         "ContextId" : context_id
                                     }
                                   )
    del inspect_context_response['Error']
    del inspect_context_response['ErrorCode']
    return inspect_context_response