def get_columns(self): columns = [] for name, column in self.get_class_members(): column.name = to_identifier(name) column.schema_name = to_identifier(self.schema_name) columns.append(column) return columns
def __init__(self, title, folder=None, reference=None, schemaReference=None, dataType=None, datetime=False, format=None): self.title = to_title(title) self.folder = to_identifier(folder) self.folder_title = to_title(folder) self.reference = to_identifier(reference) self.schemaReference = to_identifier(schemaReference) self.dataType = dataType self.datetime = datetime self.format = format
def __init__( self, title, folder=None, reference=None, schemaReference=None, dataType=None, datetime=False, references_cp=None ): self.title = to_title(title) self.folder = to_identifier(folder) self.folder_title = to_title(folder) self.reference = to_identifier(reference) self.schemaReference = to_identifier(schemaReference) self.dataType = dataType self.datetime = datetime # an attribute useful for labels, # to know if they reference a connection point self.references_cp = references_cp
def get_sli_manifest(self): '''Create JSON manifest from columns in schema. See populateColumnsFromSchema in AbstractConnector.java ''' parts = [] for column in self.get_columns(): # special additional column for date if isinstance(column, Date): parts.append(get_date_dt_column(column, self.schema_name)) if column.datetime: parts.append(get_time_tm_column(column, self.schema_name)) parts.append( get_tm_time_id_column(column, self.schema_name)) parts.append(column.get_sli_manifest_part()) return { "dataSetSLIManifest": { "parts": parts, "file": CSV_DATA_FILENAME, "dataSet": 'dataset.%s' % to_identifier(self.schema_name), "csvParams": { "quoteChar": '"', "escapeChar": '"', "separatorChar": ",", "endOfLine": "\n" } } }
def has_label(self, label_name, title=None, hyperlink=False): """ Labels are particular because they are bound to columns. :param label_name: the name of the label to get :param title: if needed, the title of the label :param hyperlink: a boolean telling if we're looking for an hyperlink """ col_uris= self.get_column_uris() label_identifier_re = 'label\.%(dataset)s\.[a-zA-Z_]+\.%(label_name)s' % { 'dataset': to_identifier(self.schema_name), 'label_name': label_name, } for col_uri in col_uris['attributes'] + col_uris['facts']: col_json = self.get_column_detail(col_uri) for display in col_json['content'].get('displayForms', []): if re.match(label_identifier_re, display['meta']['identifier']): if (not title or (title and display['meta']['title'] == title)): if hyperlink and display['content'].get('type', '') != "GDC.link": return False return True return False
def get_time_tm_column(self, full_upload): name = '%(name)s_tm' % self populates = 'tm.dt.%s.%s' % (to_identifier(self.schema_name), self.name) return { 'populates': [populates], 'columnName': name, 'mode': "INCREMENTAL" if not full_upload else "FULL" }
def get_date_dt_column(self, full_upload): name = '%(name)s_dt' % self populates = 'dt.%s.%s' % (to_identifier(self.schema_name), self.name) return { 'populates': [populates], 'columnName': name, "mode": "INCREMENTAL" if not full_upload else "FULL" }
def upload_time(self, name): data = open(os.path.join(os.path.dirname(__file__), 'resources', 'connector', 'data.csv')).read() sli_manifest = open(os.path.join(os.path.dirname(__file__), 'resources', 'connector', 'upload_info.json')).read()\ .replace('%id%', text.to_identifier(name))\ .replace('%name%', name) dir_name = self.connection.webdav.upload(data, sli_manifest) self.project.integrate_uploaded_data(dir_name, wait_for_finish=True) self.connection.webdav.delete(dir_name)
def get_maql(self, name=None, include_time=False): '''Get MAQL for date dimension. See generateMaqlCreate in DateDimensionConnect.java ''' if not name: return self.DATE_MAQL maql = self.DATE_MAQL_ID % (text.to_identifier(name), name) if include_time: file_path = os.path.join(os.path.dirname(__file__), 'resources', 'connector', 'TimeDimension.maql') time_dimension = open(file_path).read()\ .replace('%id%', text.to_identifier(name))\ .replace('%name%', name) maql = ''.join((maql, time_dimension)) return maql
def get_maql(self): if not self.alteration_state['nb_changes']: return '' hyperlink_change = self.alteration_state['simple'] & self.alteration_state['hyperlink'] return self.column.get_alter_maql( schema_name=to_identifier(self.schema_name), name=self.col_name, new_attributes=self.new_attrs, hyperlink_change=hyperlink_change )
def has_column(self, col_name, attribute=False, fact=False, date=False, reference=False, title=None): """ A function to check that a dataset has a specific column (attribute or fact), saved on GoodData. :param col_name: the name of the column. :param attribute: a boolean that says if the column to look for is an attribute. :param fact: a boolean that says if the column to look for is an fact. :param date: a boolean that says if the column to look for is a date. :param reference: a boolean that says if the column to look for is a reference. :param title: if needed, the title to look for """ col_uris = self.get_column_uris() if attribute: col_uris = col_uris['attributes'] elif reference: col_uris = col_uris['dataLoadingColumns'] else: col_uris = col_uris['facts'] suffix = '' if date: prefix = 'dt.' elif reference: prefix = 'f_' suffix = '_id' elif attribute: prefix = 'attr.' elif fact: prefix = 'fact.' col_identifier = '%(prefix)s%(dataset)s.%(col_name)s%(suffix)s' % { 'prefix': prefix, 'dataset': to_identifier(self.schema_name), 'col_name': col_name, 'suffix': suffix, } for col_uri in col_uris: col_json = self.get_column_detail(col_uri) if col_json['meta']['identifier'] == col_identifier: if (not title or (title and col_json['meta']['title'] == title)): return True return False
def get_metadata(self, name=None): """ Retrieve the metadata for a given dataset. :param name: the dataset name """ try: datasets = self.get_datasets_metadata().json()['dataSetsInfo']['sets'] except KeyError: datasets = [] identifier = 'dataset.%s' % to_identifier(name) if name else self.identifier for dataset in datasets: if dataset['meta']['identifier'] == identifier: return dataset raise DataSetNotFoundError( 'DataSet %(dataset)s not found', sets=datasets, project_name=name, dataset=name )
def get_dlc_info(self, column_uris, sli_manifest, schema_name=None): """ A function to build the dlc_info dictionary. :param column_uris: the column uris to query :param sli_manifest: useful for to retrieve the DLC info :param schema_name: the schema_name to look for (self by default) """ identifier = to_identifier(schema_name) or self.identifier dlc_info = [] # dataLoadingColumns for column_uri in column_uris: column_json = self.get_column_detail(column_uri) info = retrieve_dlc_info(self.identifier, column_json, sli_manifest) if info: dlc_info.append(info) unique_dlc_info = {} for key, value in dlc_info: if key not in unique_dlc_info or (not value.get('is_ref', False)): unique_dlc_info[key] = value return unique_dlc_info
def get_sli_manifest(self): '''Create JSON manifest from columns in schema. See populateColumnsFromSchema in AbstractConnector.java ''' parts = [] for column in self.get_columns(): # special additional column for date if isinstance(column, Date): parts.append(get_date_dt_column(column, self.schema_name)) if column.datetime: parts.append(get_time_tm_column(column, self.schema_name)) parts.append(get_tm_time_id_column(column, self.schema_name)) parts.append(column.get_sli_manifest_part()) return {"dataSetSLIManifest": {"parts": parts, "file": CSV_DATA_FILENAME, "dataSet": 'dataset.%s' % to_identifier(self.schema_name), "csvParams": {"quoteChar": '"', "escapeChar": '"', "separatorChar": ",", "endOfLine": "\n" }}}
def test_date(self): self.assertEquals('test', text.to_identifier('Test'))
def get_maql(self): return self.column.get_maql(to_identifier(self.schema_name), self.col_name, self.label_references_cp)
def get_maql(self): maql = [] maql.append(""" # THIS IS MAQL SCRIPT THAT GENERATES PROJECT LOGICAL MODEL. # SEE THE MAQL DOCUMENTATION AT http://developer.gooddata.com/api/maql-ddl.html FOR MORE DETAILS # CREATE DATASET. DATASET GROUPS ALL FOLLOWING LOGICAL MODEL ELEMENTS TOGETHER. CREATE DATASET {dataset.%s} VISUAL(TITLE "%s"); """ % (to_identifier(self.schema_name), to_title(self.schema_name))) attribute_folders, fact_folders = self.get_folders() if attribute_folders or fact_folders: maql.append('# CREATE THE FOLDERS THAT GROUP ATTRIBUTES AND FACTS') for folder, folder_title in attribute_folders: maql.append('CREATE FOLDER {dim.%s} VISUAL(TITLE "%s") TYPE ATTRIBUTE;' \ % (folder, folder_title)) maql.append('') for folder, folder_title in fact_folders: maql.append('CREATE FOLDER {ffld.%s} VISUAL(TITLE "%s") TYPE FACT;' \ % (folder, folder_title)) maql.append('') maql.append('# CREATE ATTRIBUTES.\n# ATTRIBUTES ARE CATEGORIES THAT ARE USED FOR SLICING AND DICING THE NUMBERS (FACTS)') column_list = self.get_columns() for column in column_list: if isinstance(column, (Attribute, ConnectionPoint))\ or (isinstance(column, Date) and not column.schemaReference): maql.append(column.get_maql()) maql.append('# CREATE FACTS\n# FACTS ARE NUMBERS THAT ARE AGGREGATED BY ATTRIBUTES.') for column in column_list: if isinstance(column, Fact): maql.append(column.get_maql()) maql.append('# CREATE DATE FACTS\n# DATES ARE REPRESENTED AS FACTS\n# DATES ARE ALSO CONNECTED TO THE DATE DIMENSIONS') for column in column_list: if isinstance(column, Date) and column.schemaReference: maql.append(column.get_maql()) maql.append('# CREATE REFERENCES\n# REFERENCES CONNECT THE DATASET TO OTHER DATASETS') for column in column_list: if isinstance(column, Reference): maql.append(column.get_maql()) for column in column_list: default_set = False if isinstance(column, Label): maql.append(column.get_maql()) if not default_set: maql.append(column.get_maql_default()) default_set = True cp = False for column in column_list: if isinstance(column, ConnectionPoint): cp = True maql.append('# ADD LABEL TO CONNECTION POINT') maql.append(column.get_original_label_maql()) # TODO: not sure where this came from in Department example, wild guess only! if not cp: '''#maql.append('ALTER ATTRIBUTE {attr.%s.%s} ADD LABELS {label.%s.%s} VISUAL(TITLE "%s") AS {f_%s.nm_%s};'\ % (to_identifier(self.schema_name), to_identifier(self.schema_name), to_identifier(self.schema_name), to_identifier(self.schema_name), to_title(self.schema_name), to_identifier(self.schema_name), to_identifier(self.schema_name))) ''' maql.append('CREATE ATTRIBUTE {attr.%s.factsof} VISUAL(TITLE "Records of %s") AS KEYS {f_%s.id} FULLSET;'\ % (to_identifier(self.schema_name), to_title(self.schema_name), to_identifier(self.schema_name))) maql.append('ALTER DATASET {dataset.%s} ADD {attr.%s.factsof};'\ % (to_identifier(self.schema_name), to_identifier(self.schema_name))) maql.append("""# SYNCHRONIZE THE STORAGE AND DATA LOADING INTERFACES WITH THE NEW LOGICAL MODEL SYNCHRONIZE {dataset.%s}; """ % to_identifier(self.schema_name)) return '\n'.join(maql)
def get_maql(self): return self.column.get_drop_maql(to_identifier(self.schema_name), self.col_name)
def get_maql(self): maql = [] maql.append(""" # THIS IS MAQL SCRIPT THAT GENERATES PROJECT LOGICAL MODEL. # SEE THE MAQL DOCUMENTATION AT http://developer.gooddata.com/api/maql-ddl.html FOR MORE DETAILS # CREATE DATASET. DATASET GROUPS ALL FOLLOWING LOGICAL MODEL ELEMENTS TOGETHER. CREATE DATASET {dataset.%s} VISUAL(TITLE "%s"); """ % (to_identifier(self.schema_name), to_title(self.schema_name))) attribute_folders, fact_folders = self.get_folders() if attribute_folders or fact_folders: maql.append('# CREATE THE FOLDERS THAT GROUP ATTRIBUTES AND FACTS') for folder, folder_title in attribute_folders: maql.append('CREATE FOLDER {dim.%s} VISUAL(TITLE "%s") TYPE ATTRIBUTE;' \ % (folder, folder_title)) maql.append('') for folder, folder_title in fact_folders: maql.append('CREATE FOLDER {ffld.%s} VISUAL(TITLE "%s") TYPE FACT;' \ % (folder, folder_title)) maql.append('') maql.append('# CREATE ATTRIBUTES.\n# ATTRIBUTES ARE CATEGORIES THAT ARE USED FOR SLICING AND DICING THE NUMBERS (FACTS)') column_list = self.get_columns() for column in column_list: if isinstance(column, (Attribute, ConnectionPoint))\ or (isinstance(column, Date) and not column.schemaReference): maql.append(column.get_maql()) maql.append('# CREATE FACTS\n# FACTS ARE NUMBERS THAT ARE AGGREGATED BY ATTRIBUTES.') for column in column_list: if isinstance(column, Fact): maql.append(column.get_maql()) maql.append('# CREATE DATE FACTS\n# DATES ARE REPRESENTED AS FACTS\n# DATES ARE ALSO CONNECTED TO THE DATE DIMENSIONS') for column in column_list: if isinstance(column, Date) and column.schemaReference: maql.append(column.get_maql()) maql.append('# CREATE REFERENCES\n# REFERENCES CONNECT THE DATASET TO OTHER DATASETS') for column in column_list: if isinstance(column, Reference): maql.append(column.get_maql()) default_set = False for column in column_list: if isinstance(column, Label): maql.append(column.get_maql()) if not default_set: maql.append(column.get_maql_default()) default_set = True cp = False for column in column_list: if isinstance(column, ConnectionPoint): cp = True maql.append(column.get_original_label_maql()) # TODO: not sure where this came from in Department example, wild guess only! if not cp: maql.append('ALTER ATTRIBUTE {attr.%s.%s} ADD LABELS {label.%s.%s} VISUAL(TITLE "%s") AS {f_%s.nm_%s};'\ % (to_identifier(self.schema_name), to_identifier(self.schema_name), to_identifier(self.schema_name), to_identifier(self.schema_name), to_title(self.schema_name), to_identifier(self.schema_name), to_identifier(self.schema_name))) maql.append("""# SYNCHRONIZE THE STORAGE AND DATA LOADING INTERFACES WITH THE NEW LOGICAL MODEL SYNCHRONIZE {dataset.%s}; """ % to_identifier(self.schema_name)) return '\n'.join(maql)
def get_time_tm_column(column, schema_name): name = '%s_tm' % column.name populates = 'tm.dt.%s.%s' % (to_identifier(schema_name), column.name) return {'populates': [populates], 'columnName': name, 'mode': 'FULL'}
def get_date_dt_column(column, schema_name): name = '%s_dt' % column.name populates = 'dt.%s.%s' % (to_identifier(schema_name), column.name) return {'populates': [populates], 'columnName': name, 'mode': 'FULL'}