def ds_create(self, df_up, name, description=''): dsr = DataSetRequest() dsr.name = name dsr.description = description dsr.schema = Schema([ Column(ColumnType.STRING, 'tt1'), Column(ColumnType.STRING, 'tt2') ]) new_ds_info = self.datasets.create(dsr) self.utilities.stream_upload(new_ds_info['id'], df_up, warn_schema_change=False) return new_ds_info['id']
def streams(domo): '''Streams are useful for uploading massive data sources in chunks, in parallel. They are also useful with data sources that are constantly changing/growing. Streams Docs: https://developer.domo.com/docs/data-apis/data ''' domo.logger.info("\n**** Domo API - Stream Examples ****\n") streams = domo.streams # Define a DataSet Schema to populate the Stream Request dsr = DataSetRequest() dsr.name = 'Leonhard Euler Party' dsr.description = 'Mathematician Guest List' dsr.schema = Schema([ Column(ColumnType.STRING, 'Friend'), Column(ColumnType.STRING, 'Attending') ]) # Build a Stream Request stream_request = CreateStreamRequest(dsr, UpdateMethod.APPEND) # Create a Stream w/DataSet stream = streams.create(stream_request) domo.logger.info("Created Stream {} containing the new DataSet {}".format( stream['id'], stream['dataSet']['id'])) # Get a Stream's metadata retrieved_stream = streams.get(stream['id']) domo.logger.info("Retrieved Stream {} containing DataSet {}".format( retrieved_stream['id'], retrieved_stream['dataSet']['id'])) # List Streams limit = 1000 offset = 0 stream_list = streams.list(limit, offset) domo.logger.info("Retrieved a list containing {} Stream(s)".format( len(stream_list))) # Update a Stream's metadata stream_update = CreateStreamRequest(dsr, UpdateMethod.REPLACE) updated_stream = streams.update(retrieved_stream['id'], stream_update) domo.logger.info("Updated Stream {} to update method: {}".format( updated_stream['id'], updated_stream['updateMethod'])) # Search for Streams stream_property = 'dataSource.name:' + dsr.name searched_streams = streams.search(stream_property) domo.logger.info("Stream search: there are {} Stream(s) with the DataSet " "title: {}".format(len(searched_streams), dsr.name)) # Create an Execution (Begin an upload process) execution = streams.create_execution(stream['id']) domo.logger.info("Created Execution {} for Stream {}".format( execution['id'], stream['id'])) # Get an Execution retrieved_execution = streams.get_execution(stream['id'], execution['id']) domo.logger.info("Retrieved Execution with id: {}".format( retrieved_execution['id'])) # List Executions execution_list = streams.list_executions(stream['id'], limit, offset) domo.logger.info("Retrieved a list containing {} Execution(s)".format( len(execution_list))) # Upload Data: Multiple Parts can be uploaded in parallel part = 1 csv = '"Pythagoras","FALSE"\n"Alan Turing","TRUE"' execution = streams.upload_part(stream['id'], execution['id'], part, csv) part = 2 csv = '"George Boole","TRUE"' execution = streams.upload_part(stream['id'], execution['id'], part, csv) # Commit the execution (End an upload process) # Executions/commits are NOT atomic committed_execution = streams.commit_execution(stream['id'], execution['id']) domo.logger.info("Committed Execution {} on Stream {}".format( committed_execution['id'], stream['id'])) # Abort a specific Execution execution = streams.create_execution(stream['id']) aborted_execution = streams.abort_execution(stream['id'], execution['id']) domo.logger.info("Aborted Execution {} on Stream {}".format( aborted_execution['id'], stream['id'])) # Abort any Execution on a given Stream streams.create_execution(stream['id']) streams.abort_current_execution(stream['id']) domo.logger.info("Aborted Executions on Stream {}".format(stream['id'])) # Delete a Stream streams.delete(stream['id']) domo.logger.info("Deleted Stream {}; the associated DataSet must be " "deleted separately".format(stream['id'])) # Delete the associated DataSet domo.datasets.delete(stream['dataSet']['id'])
get_issues_for_repo(repo_data) # Close json output files json_gh_repos_file.close() json_zh_repos_file.close() # Domo Create ghzh_repos_history dataset if ghzh_repo_history_dsid != "": ds_id = ghzh_repo_history_dsid else: domo.logger.info("\n**** Create Domo dataset ghzh_repos_history ****\n") dsr.name = "ghzh_repos_history" dsr.description = "" dsr.schema = Schema([ Column(ColumnType.STRING, 'RepoName'), Column(ColumnType.STRING, 'PipelineName'), Column(ColumnType.STRING, 'IssueName'), Column(ColumnType.DECIMAL, 'IssueNumber'), Column(ColumnType.STRING, 'IssueLabels'), Column(ColumnType.STRING, 'IssueMilestones'), Column(ColumnType.STRING, 'IssueAssignees'), Column(ColumnType.STRING, 'IssueEpics'), Column(ColumnType.STRING, 'IssueUrl'), Column(ColumnType.STRING, 'IssueOpenClosed'), Column(ColumnType.DECIMAL, 'IssuePoints'), Column(ColumnType.STRING, 'RepoId'), Column(ColumnType.STRING, 'IssueId'), Column(ColumnType.DECIMAL, '_BATCH_ID_'), Column(ColumnType.DATETIME, '_BATCH_LAST_RUN_') ])
# if new dataset, create one in DOMO and save the ID if config['output_dataset_id'] == None: # Create an instance of the SDK Client domo = Domo(domo_config.domo_id, domo_config.domo_secret, api_host="api.domo.com") # define the dataset, name, description, schema dsr = DataSetRequest() dsr.name = config['output_filename'][:-4] + ' Cannibalization Results' dsr.description = '' # Valid column types are STRING, DECIMAL, LONG, DOUBLE, DATE, DATETIME. # cannibalization results schema dsr.schema = Schema([ Column(ColumnType.DATETIME, 'run_at'), Column(ColumnType.STRING, 'device'), Column(ColumnType.LONG, 'cu'), Column(ColumnType.LONG, 'cc'), Column(ColumnType.LONG, 'ccs'), Column(ColumnType.DECIMAL, 'control_conv'), Column(ColumnType.LONG, 'tu'), Column(ColumnType.LONG, 'tc'), Column(ColumnType.LONG, 'tcs'), Column(ColumnType.DECIMAL, 'test_conv'), Column(ColumnType.DECIMAL, 'prob_cann'), Column(ColumnType.DECIMAL, 'conf_int_l'), Column(ColumnType.DECIMAL, 'conf_int_h'), Column(ColumnType.DATE, 'date_start'), Column(ColumnType.DATE, 'date_end') ])
def datasets(domo): '''DataSets are useful for data sources that only require occasional replacement. See the docs at https://developer.domo.com/docs/data-apis/data ''' domo.logger.info("\n**** Domo API - DataSet Examples ****\n") datasets = domo.datasets # Define a DataSet Schema dsr = DataSetRequest() dsr.name = 'Leonhard Euler Party' dsr.description = 'Mathematician Guest List' dsr.schema = Schema([Column(ColumnType.STRING, 'Friend')]) # Create a DataSet with the given Schema dataset = datasets.create(dsr) domo.logger.info("Created DataSet " + dataset['id']) # Get a DataSets's metadata retrieved_dataset = datasets.get(dataset['id']) domo.logger.info("Retrieved DataSet " + retrieved_dataset['id']) # List DataSets dataset_list = list(datasets.list(sort=Sorting.NAME)) domo.logger.info("Retrieved a list containing {} DataSet(s)".format( len(dataset_list))) # Update a DataSets's metadata update = DataSetRequest() update.name = 'Leonhard Euler Party - Update' update.description = 'Mathematician Guest List - Update' update.schema = Schema([ Column(ColumnType.STRING, 'Friend'), Column(ColumnType.STRING, 'Attending') ]) updated_dataset = datasets.update(dataset['id'], update) domo.logger.info("Updated DataSet {}: {}".format(updated_dataset['id'], updated_dataset['name'])) # Import Data from a string csv_upload = '"Pythagoras","FALSE"\n"Alan Turing","TRUE"\n' \ '"George Boole","TRUE"' datasets.data_import(dataset['id'], csv_upload) domo.logger.info("Uploaded data to DataSet " + dataset['id']) # Export Data to a string include_csv_header = True csv_download = datasets.data_export(dataset['id'], include_csv_header) domo.logger.info("Downloaded data from DataSet {}:\n{}".format( dataset['id'], csv_download)) # Export Data to a file (also returns a readable/writable file object) csv_file_path = './math.csv' include_csv_header = True csv_file = datasets.data_export_to_file(dataset['id'], csv_file_path, include_csv_header) csv_file.close() domo.logger.info("Downloaded data as a file from DataSet {}".format( dataset['id'])) # Import Data from a file csv_file_path = './math.csv' datasets.data_import_from_file(dataset['id'], csv_file_path) domo.logger.info("Uploaded data from a file to DataSet {}".format( dataset['id'])) # Personalized Data Policies (PDPs) # Build a Policy Filter (hide sensitive columns/values from users) pdp_filter = PolicyFilter() pdp_filter.column = 'Attending' # The DataSet column to filter on pdp_filter.operator = FilterOperator.EQUALS pdp_filter.values = ['TRUE'] # The DataSet row value to filter on # Build the Personalized Data Policy (PDP) pdp_request = Policy() pdp_request.name = 'Only show friends attending the party' # A single PDP can contain multiple filters pdp_request.filters = [pdp_filter] pdp_request.type = PolicyType.USER # The affected user ids (restricted access by filter) pdp_request.users = [998, 999] # The affected group ids (restricted access by filter) pdp_request.groups = [99, 100] # Create the PDP pdp = datasets.create_pdp(dataset['id'], pdp_request) domo.logger.info("Created a Personalized Data Policy (PDP): " "{}, id: {}".format(pdp['name'], pdp['id'])) # Get a Personalized Data Policy (PDP) pdp = datasets.get_pdp(dataset['id'], pdp['id']) domo.logger.info("Retrieved a Personalized Data Policy (PDP):" " {}, id: {}".format(pdp['name'], pdp['id'])) # List Personalized Data Policies (PDP) pdp_list = datasets.list_pdps(dataset['id']) domo.logger.info( "Retrieved a list containing {} PDP(s) for DataSet {}".format( len(pdp_list), dataset['id'])) # Update a Personalized Data Policy (PDP) # Negate the previous filter (logical NOT). Note that in this case you # must treat the object as a dictionary - `pdp_filter.not` is invalid # syntax. pdp_filter['not'] = True pdp_request.name = 'Only show friends not attending the party' # A single PDP can contain multiple filters pdp_request.filters = [pdp_filter] pdp = datasets.update_pdp(dataset['id'], pdp['id'], pdp_request) domo.logger.info( "Updated a Personalized Data Policy (PDP): {}, id: {}".format( pdp['name'], pdp['id'])) # Delete a Personalized Data Policy (PDP) datasets.delete_pdp(dataset['id'], pdp['id']) domo.logger.info( "Deleted a Personalized Data Policy (PDP): {}, id: {}".format( pdp['name'], pdp['id'])) # Delete a DataSet datasets.delete(dataset['id']) domo.logger.info("Deleted DataSet {}".format(dataset['id']))
def upload_csv(self, source, destination, engine, **kwargs): with open(source + "/metadata.json") as file: data = json.load(file) data_source = data["source"] table = data["table"] rows = data["rows"] columns = list(data["columns"]) types = list(data["types"]) if destination == DataSource.DOMO: domo = DomoAPI(self.logger, engine) if not self.dataset_id: # Create a new Dataset Schema if not self.dataset_name: self.dataset_name = table schema = dict( zip( columns, DataSource.convert_to_domo_types(source=data_source, types=types))) dsr = domo.create_dataset(schema=Schema( [Column(schema[col], col) for col in schema]), name=self.dataset_name, description=self.dataset_desc) else: # Get existing Dataset Schema dsr = domo.get_dataset(self.dataset_id) # Search for existing Stream streams = domo.search_stream(self.dataset_name) # Build a Stream Request update_method = "APPEND" if "part" in kwargs else self.update_method domo.stream = streams[0] if streams else domo.create_stream( dsr, update_method) self.dataset_id = domo.stream["dataSet"]["id"] self.logger.info(f"Stream created: {domo.stream}") # Create an Execution domo.execution = domo.create_execution(domo.stream) self.logger.info(f"Execution created: {domo.execution}") # Begin upload process results = domo.upload( mode=Mode.PARALLEL, source=source + "/parts", columns=columns, np_types=DataSource.convert_to_np_types(source=data_source, types=types), date_columns=DataSource.select_date_columns(columns, types), total_records=self.chunk_size if "part" in kwargs else rows, chunk_size=self.chunk_size, part=kwargs["part"] if "part" in kwargs else None) # elif destination == DataSource.HANA: # pass # elif destination == DataSource.ORACLE: # pass elif destination == DataSource.SNOWFLAKE: snowflake = SnowflakeAPI(self.logger, engine, self.sf_schema, self.sf_table) results = snowflake.upload( mode=Mode.SEQUENTIAL, source=source + "/parts", columns=columns, np_types=DataSource.convert_to_np_types(source=data_source, types=types), date_columns=DataSource.select_date_columns(columns, types), total_records=self.chunk_size if "part" in kwargs else rows, chunk_size=self.chunk_size, part=kwargs["part"] if "part" in kwargs else None) else: self.logger.exception( "Unable to support provided data destination: {}".format( destination)) raise Exception( "Unable to support provided data destination: {}".format( destination)) if "merge" in kwargs and kwargs["merge"]: self.merge_csv(source, table) if "keep" in kwargs and not kwargs["keep"]: import shutil shutil.rmtree(source) return results
data['date'] = pd.to_datetime(data['date']) data['reason'] = data['reason'].replace(np.nan, '') data['nps_reason'] = data['nps_reason'].replace(np.nan,'') csv = StringIO() data.to_csv(csv, index=False) if "dsid" in CONF.keys(): pass else: dsr = DataSetRequest() dsr.name = 'ZenDesk Sweethawk Surveys' dsr.description = 'Zendesk Surveys Exported from Sweethawk' dsr.schema = Schema([ Column(ColumnType.LONG,'ticket'), Column(ColumnType.STRING, 'brand'), Column(ColumnType.LONG, 'score'), Column(ColumnType.STRING, 'reason'), Column(ColumnType.DECIMAL, 'nps'), Column(ColumnType.STRING, 'nps_reason'), Column(ColumnType.DATE, 'date') ]) dataset = domo.datasets.create(dsr) CONF['dsid'] = dataset['id'] with open('conf.json', 'w') as f: f.write(json.dumps(CONF)) domo.datasets.data_import(CONF['dsid'], csv.getvalue())
client_secret, logger_name='foo', log_level=logging.INFO, api_host=api_host) dsr = DataSetRequest() datasets = domo.datasets # Id of the dataset when we upload. final_dataset_id = "27fecc96-5313-485a-9cb2-31874c7c41a8" # To create a dataset you need to create schema. # NOTE: Will throw an error if you have wrong # of columns data_schema = Schema([ Column(ColumnType.STRING, "propertyid"), Column(ColumnType.LONG, "Check Requested"), Column(ColumnType.LONG, "Closed"), Column(ColumnType.LONG, "Doc Date"), Column(ColumnType.LONG, "Estoppel"), Column(ColumnType.LONG, "Execution"), Column(ColumnType.LONG, "Inventory"), Column(ColumnType.LONG, "Inventory - Active"), Column(ColumnType.LONG, "Inventory - New"), Column(ColumnType.LONG, "Inventory - Ready to Relist"), Column(ColumnType.LONG, "Inventory - Scheduled"), Column(ColumnType.LONG, "Inventory - Unsold"), Column(ColumnType.LONG, "Purchase Agreement"), Column(ColumnType.LONG, "Sale Date"), Column(ColumnType.LONG, "Transfered"), Column(ColumnType.LONG, "Welcome"),
def datasets(self): # DataSet Docs: https://developer.domo.com/docs/data-apis/data self.logger.info("\n**** Domo API - DataSet Examples ****\n") datasets = self.domo.datasets # Define a DataSet Schema dsr = DataSetRequest() dsr.name = 'Leonhard Euler Party' dsr.description = 'Mathematician Guest List' dsr.schema = Schema([Column(ColumnType.STRING, 'Friend')]) # Create a DataSet with the given Schema dataset = datasets.create(dsr) self.logger.info("Created DataSet " + str(dataset.id)) # Get a DataSets's metadata retrieved_dataset = datasets.get(dataset.id) self.logger.info("Retrieved DataSet " + str(retrieved_dataset.id)) # List DataSets dataset_list = list(datasets.list(sort=Sorting.NAME)) self.logger.info("Retrieved a list containing " + str(len(dataset_list)) + " DataSet(s)") # Update a DataSets's metadata update = DataSetRequest() update.name = 'Leonhard Euler Party - Update' update.description = 'Mathematician Guest List - Update' update.schema = Schema([ Column(ColumnType.STRING, 'Friend'), Column(ColumnType.STRING, 'Attending') ]) updated_dataset = datasets.update(dataset.id, update) self.logger.info("Updated DataSet " + str(updated_dataset.id) + " : " + updated_dataset.name) # Import Data from a string csv_upload = "\"Pythagoras\",\"FALSE\"\n\"Alan Turing\",\"TRUE\"\n\"George Boole\",\"TRUE\"" datasets.data_import(dataset.id, csv_upload) self.logger.info("Uploaded data to DataSet " + str(dataset.id)) # Export Data to a string include_csv_header = True csv_download = datasets.data_export(dataset.id, include_csv_header) self.logger.info("Downloaded data as a string from DataSet " + str(dataset.id) + ":\n" + str(csv_download)) # Export Data to a file (also returns the readable/writable file object) csv_file_path = './math.csv' include_csv_header = True csv_file = datasets.data_export_to_file(dataset.id, csv_file_path, include_csv_header) csv_file.close() self.logger.info("Downloaded data as a file from DataSet " + str(dataset.id)) # Import Data from a file csv_file_path = './math.csv' datasets.data_import_from_file(dataset.id, csv_file_path) self.logger.info("Uploaded data from a file to DataSet " + str(dataset.id)) # Personalized Data Policies (PDPs) # Build a Policy Filter (hide sensitive columns/values from users) pdp_filter = PolicyFilter() pdp_filter.column = 'Attending' # The DataSet column to filter on pdp_filter.operator = FilterOperator.EQUALS pdp_filter.values = ['TRUE'] # The DataSet row value to filter on # Build the Personalized Data Policy (PDP) pdp_request = Policy() pdp_request.name = 'Only show friends attending the party' pdp_request.filters = [pdp_filter ] # A single PDP can contain multiple filters pdp_request.type = PolicyType.USER pdp_request.users = [ 998, 999 ] # The affected user ids (restricted access by filter) pdp_request.groups = [ 99, 100 ] # The affected group ids (restricted access by filter) # Create the PDP pdp = datasets.create_pdp(dataset.id, pdp_request) self.logger.info("Created a Personalized Data Policy (PDP): " + pdp.name + ", id: " + str(pdp.id)) # Get a Personalized Data Policy (PDP) retrieved_pdp = datasets.get_pdp(dataset.id, pdp.id) self.logger.info("Retrieved a Personalized Data Policy (PDP): " + retrieved_pdp.name + ", id: " + str(retrieved_pdp.id)) # List Personalized Data Policies (PDP) pdp_list = datasets.list_pdps(dataset.id) self.logger.info("Retrieved a list containing " + str(len(pdp_list)) + " PDP(s) for DataSet " + str(dataset.id)) # Update a Personalized Data Policy (PDP) pdp_filter.NOT = True # Negate the previous filter (logical NOT) pdp_request.name = 'Only show friends not attending the party' pdp_request.filters = [pdp_filter ] # A single PDP can contain multiple filters pdp = datasets.update_pdp(dataset.id, pdp.id, pdp_request) self.logger.info("Updated a Personalized Data Policy (PDP): " + pdp.name + ", id: " + str(pdp.id)) # Delete a Personalized Data Policy (PDP) datasets.delete_pdp(dataset.id, pdp.id) self.logger.info("Deleted a Personalized Data Policy (PDP) " + pdp.name + ", id: " + str(pdp.id)) # Delete a DataSet datasets.delete(dataset.id) self.logger.info("Deleted DataSet " + str(dataset.id))
def streams(self): # Streams Docs: https://developer.domo.com/docs/data-apis/data self.logger.info("\n**** Domo API - Stream Examples ****\n") streams = self.domo.streams # Define a DataSet Schema to populate the Stream Request dsr = DataSetRequest() dsr.name = 'Leonhard Euler Party' dsr.description = 'Mathematician Guest List' dsr.schema = Schema([ Column(ColumnType.STRING, 'Friend'), Column(ColumnType.STRING, 'Attending') ]) # Build a Stream Request stream_request = CreateStreamRequest(dsr, UpdateMethod.APPEND) # Create a Stream w/DataSet stream = streams.create(stream_request) self.logger.info("Created Stream " + str(stream.id) + " containing the new DataSet " + stream.dataSet.id) # Get a Stream's metadata retrieved_stream = streams.get(stream.id) self.logger.info("Retrieved Stream " + str(retrieved_stream.id) + " containing DataSet " + retrieved_stream.dataSet.id) # List Streams limit = 1000 offset = 0 stream_list = streams.list(limit, offset) self.logger.info("Retrieved a list containing " + str(len(stream_list)) + " Stream(s)") # Update a Stream's metadata stream_update = CreateStreamRequest(dsr, UpdateMethod.REPLACE) updated_stream = streams.update(retrieved_stream.id, stream_update) self.logger.info("Updated Stream " + str(updated_stream.id) + " to update method: " + updated_stream.updateMethod) # Search for Streams stream_property = 'dataSource.name: ' + dsr.name searched_streams = streams.search(stream_property) self.logger.info("Stream search: there are " + str(len(searched_streams)) + " Stream(s) with the DataSet title: " + dsr.name) # Create an Execution (Begin an upload process) execution = streams.create_execution(stream.id) self.logger.info("Created Execution " + str(execution.id) + " for Stream " + str(stream.id)) # Get an Execution retrieved_execution = streams.get_execution(stream.id, execution.id) self.logger.info("Retrieved Execution with id: " + str(retrieved_execution.id)) # List Executions execution_list = streams.list_executions(stream.id, limit, offset) self.logger.info("Retrieved a list containing " + str(len(execution_list)) + " Execution(s)") # Upload Data: Multiple Parts can be uploaded in parallel part = 1 csv = "\"Pythagoras\",\"FALSE\"\n\"Alan Turing\",\"TRUE\"\n\"George Boole\",\"TRUE\"" execution = streams.upload_part(stream.id, execution.id, part, csv) # Commit the execution (End an upload process)(Executions/commits are NOT atomic) committed_execution = streams.commit_execution(stream.id, execution.id) self.logger.info("Committed Execution " + str(committed_execution.id) + " on Stream " + str(stream.id)) # Abort a specific Execution execution = streams.create_execution(stream.id) aborted_execution = streams.abort_execution(stream.id, execution.id) self.logger.info("Aborted Execution " + str(aborted_execution.id) + " on Stream " + str(stream.id)) # Abort any Execution on a given Stream streams.create_execution(stream.id) streams.abort_current_execution(stream.id) self.logger.info("Aborted Executions on Stream " + str(stream.id)) # Delete a Stream streams.delete(stream.id) self.logger.info("Deleted Stream " + str(stream.id) + "; the associated DataSet must be deleted separately") # Delete the associated DataSet self.domo.datasets.delete(stream.dataSet.id)