def update_tq_links(file_name, adv_name): if zipfile.is_zipfile(file_name): # extract the xml from the packaged file xml = xfile.get_xml_from_archive(file_name) else: xml = xfile.xml_open(file_name[:-1]) # get the root of the xml to prepare it for parsing root = xml.getroot() # iterate over the root for child in root: # find all instances of the node 'zone' for zone in child.iter('zone'): # get the attribute 'url' url = zone.get('url') # only focus on those nodes that have the attribute if url is not None: # break up the url on '/' url_split = url.split('/') # update the adv_name in the url url_split[5] = adv_name # build url (see function) new_url = build_url(url_split) # set the 'url' property to the new url zone.set('url', new_url) if zipfile.is_zipfile(file_name): # save updated xml back into the zipped file xfile.save_into_archive(xml, file_name) # print u" Links for " + file_name + " repointed" write_log('output/run.log', u" Links for " + file_name + " repointed", print_log_value=False) else: xml.write(file_name[:-1], encoding="utf-8", xml_declaration=True)
def update_toc_links(file_name, adv_name): if zipfile.is_zipfile(file_name): # extract the xml from the packaged file xml = xfile.get_xml_from_archive(file_name) else: xml = xfile.xml_open(file_name[:-1]) # get the root of the xml to prepare it for parsing root = xml.getroot() # iterate over the child nodes for child in root: # find all instances of the node 'zone' for zone in child.iter('zone'): # get the attribute 'url' url = zone.get('url') # only focus on those nodes that have the attribute if url is not None: # break up the url on '/' url_split = url.split('/') # determine what the url links to and rebuild it try: if url_split[6] == 'workbooks': # individual workbooks url_split[5] = adv_name workbook_name = reverse_lookup(url_split[7]) new_workbook_id = get_content_id( adv_name, project=None, workbook=workbook_name) url_split[7] = new_workbook_id new_url = build_url(url_split) zone.set('url', new_url) elif url_split[6] == 'views?order=name:asc': # all views url_split[5] = adv_name new_url = build_url(url_split) zone.set('url', new_url) elif url_split[3] == 't': # sandbox web edit url_split[4] = adv_name new_url = build_url(url_split) zone.set('url', new_url) elif url_split[6] == 'projects': # sandbox views url_split[5] = adv_name url_split[7] = get_content_id(adv_name, project='Sandbox') new_url = build_url(url_split) zone.set('url', new_url) # if TypeError, then build a url that links to all views except TypeError: url_split[5] = adv_name new_url = build_url(url_split) zone.set('url', new_url) if zipfile.is_zipfile(file_name): # save updated xml back into the zipped file xfile.save_into_archive(xml, file_name) # print u" Links for " + file_name + " repointed" write_log('output/run.log', u" Links for " + file_name + " repointed", print_log_value=False) else: xml.write(file_name[:-1], encoding="utf-8", xml_declaration=True)
def update_rs_datasources(file_name, adv_id): if zipfile.is_zipfile(file_name): # extract the xml from the packaged file xml = xfile.get_xml_from_archive(file_name) else: xml = xfile.xml_open(file_name[:-1]) # get the root of the xml to prepare it for parsing root = xml.getroot() for items in root: for conn in items.iter('relation'): connection = conn.get('connection') if re.search('redshift', str(connection)): # Update redshift connections find_client_ref_id = re.compile(r'client_ref_id\s?=\s?\d+') find_advertiser_id = re.compile(r'advertiser_id\s?=\s?\d+') # handle MIDemo account if adv_id == '999999': new_query = find_client_ref_id.sub('client_ref_id = 877', conn.text) new_query_2 = find_advertiser_id.sub( 'advertiser_id = 877', new_query) else: new_query = find_client_ref_id.sub( 'client_ref_id = ' + adv_id, conn.text) new_query_2 = find_advertiser_id.sub( 'advertiser_id = ' + adv_id, new_query) conn.text = new_query_2 elif re.search('mysql', str(connection)): # update mysql connections find_advertiser_id = re.compile(r'advertiser_id\s?=\s?\d+') # handle MIDemo account if adv_id == '999999': new_query = find_advertiser_id.sub('advertiser_id = 877', conn.text) else: new_query = find_advertiser_id.sub( 'advertiser_id = ' + adv_id, conn.text) conn.text = new_query # write the file if zipfile.is_zipfile(file_name): # save updated xml back into the zipped file xfile.save_into_archive(xml, file_name) # print u" Links for " + file_name + " repointed" write_log('output/run.log', " Links for " + file_name + " repointed", print_log_value=True) else: xml.write(file_name[:-1], encoding="utf-8", xml_declaration=True)
def parseDataSources(self): '''Parse the XML for each datasource (accepts either .tds or .tdsx format).''' # parse datasources differently depending on the file type (.tds or .tdsx) for datasource in os.listdir(): if datasource.endswith('.tds'): contents = xml.parse(datasource).getroot() elif datasource.endswith('.tdsx'): contents = twbx.xml_open(datasource).getroot() # if there's no xml to parse (e.g. a static csv datasource), then delete it if contents != None: if contents.attrib.get('formatted-name').startswith('federated.') == False: self.parsedDatasources.append(contents) else: print(f'{datasource} is invalid, removing from directory.') os.remove(datasource)
def __init__(self, filename): """Open the workbook at `filename`. This will handle packaged and unpacked workbook files automatically. This will also parse Data Sources and Worksheets for access. """ self._filename = filename self._workbookTree = xml_open(self._filename, 'workbook') self._workbookRoot = self._workbookTree.getroot() self._source_platform = self._workbookRoot.get('source-platform') self._source_build = self._workbookRoot.get('source-build') # prepare our datasource objects self._datasources = self._prepare_datasources( self._workbookRoot) # self.workbookRoot.find('datasources') self._datasource_index = self._prepare_datasource_index( self._datasources) self._worksheets = self._prepare_worksheets(self._workbookRoot, self._datasource_index)
def __init__(self, filename): """ Constructor. """ self._filename = filename self._workbookTree = xml_open(self._filename, self.__class__.__name__.lower()) self._workbookRoot = self._workbookTree.getroot() # prepare our datasource objects self._datasources = self._prepare_datasources( self._workbookRoot) # self.workbookRoot.find('datasources') self._datasource_index = self._prepare_datasource_index(self._datasources) self._worksheets = self._prepare_worksheets( self._workbookRoot, self._datasource_index )
def __init__(self, filename): """ Constructor. """ self._filename = filename self._workbookTree = xml_open(self._filename) self._workbookRoot = self._workbookTree.getroot() # prepare our datasource objects self._datasources = self._prepare_datasources( self._workbookRoot) # self.workbookRoot.find('datasources') self._datasource_index = self._prepare_datasource_index( self._datasources) self._worksheets = self._prepare_worksheets(self._workbookRoot, self._datasource_index)
def __init__(self, filename): """Open the workbook at `filename`. This will handle packaged and unpacked workbook files automatically. This will also parse Data Sources and Worksheets for access. """ self._filename = filename self._workbookTree = xml_open(self._filename, 'workbook') self._workbookRoot = self._workbookTree.getroot() # prepare our datasource objects self._datasources = self._prepare_datasources( self._workbookRoot) # self.workbookRoot.find('datasources') self._datasource_index = self._prepare_datasource_index(self._datasources) self._worksheets = self._prepare_worksheets( self._workbookRoot, self._datasource_index )
def update_dates(file_name, min_date, max_date): if zipfile.is_zipfile(file_name): # extract the xml from the packaged file xml = xfile.get_xml_from_archive(file_name) else: xml = xfile.xml_open(file_name) # get the root of the xml to prepare it for parsing root = xml.getroot() for items in root: # print items for datasources in items.iter('datasources'): for datasource in datasources.findall('datasource'): datasource_name = datasource.get('name') datasource_conn = datasource.get('hasconnection') if datasource_name == 'Parameters' and datasource_conn == "false": for columns in datasource: column_name = columns.get('caption') # column_value = columns.get('value') if column_name is not None: # print column_name, column_value if column_name == 'Start Date': columns.set('value', min_date.strftime("#%Y-%m-%d#")) calc = columns.find('calculation') calc.set('formula', min_date.strftime("#%Y-%m-%d#")) elif column_name == 'End Date': columns.set('value', max_date.strftime("#%Y-%m-%d#")) calc = columns.find('calculation') calc.set('formula', max_date.strftime("#%Y-%m-%d#")) if zipfile.is_zipfile(file_name): # save updated xml back into the zipped file xfile.save_into_archive(xml, file_name) # write_log('output/run.log', " Links for " + file_name + " repointed", print_log_value=True) else: xml.write(file_name, encoding="utf-8", xml_declaration=True)
def audit_wb_fields(outfile, *args): with open(outfile, 'a') as text_file: master_col_list = [] for file_name in args: if zipfile.is_zipfile(file_name): # extract the xml from the packaged file xml = xfile.get_xml_from_archive(file_name) else: xml = xfile.xml_open(file_name) # get the root of the xml to prepare it for parsing root = xml.getroot() ws_data = dict() text_file.write('\n' + '------------ ' + file_name + '\n') # print '\n' + '------------ ' + file_name + '\n' for items in root: for worksheets in items.iter('worksheets'): for worksheet in worksheets.iter('worksheet'): worksheet_name = worksheet.get('name') ds_names = dict() ds_list = dict() for datasource in worksheet.iter('datasource'): ds_names[datasource.get('name')] = datasource.get( 'caption') ds_names['Parameters'] = 'Parameters' for data_dep in worksheet.iter( 'datasource-dependencies'): data_dep_name = data_dep.get('datasource') col_list = [] for column in data_dep.iter('column'): column_name = column.get('name') column_caption = column.get('caption') if column_caption is None: column_caption = column_name[1:-1] col_list.append(column_caption) ds_list[ds_names[data_dep_name]] = col_list master_col_list += col_list ws_data[worksheet_name] = ds_list for windows in items.iter('windows'): viz_data = defaultdict(list) for window in windows.iter('window'): window_name = window.get('name') window_class = window.get('class') if window_class == 'dashboard': for viewpoint in window.iter('viewpoint'): viewpoint_name = viewpoint.get('name') viz_data[window_name].append(viewpoint_name) final_data = defaultdict(list) for dashboard in viz_data: for worksheet in viz_data[dashboard]: final_data[dashboard].append( {worksheet: ws_data[worksheet]}) for dashboard in final_data: text_file.write(str(dashboard) + '\n') for worksheets in final_data[dashboard]: text_file.write(str(worksheets) + '\n') np_col_array = np.array(master_col_list) np_col_array_unique, counts = np.unique(np_col_array, return_counts=True) counts = np.array(counts) fields = np.array(np_col_array_unique) freq_array = np.column_stack((counts, fields)) with open('freqterms.csv', 'w') as outfile: outfile.write('frequency,field_name\n') for row in freq_array.tolist(): outfile.write( str(row).replace("'", "", 2).replace(' ', '', 1)[1:-1] + '\n') freq_df = pd.read_csv('freqterms.csv', delimiter=',', dtype={ 'frequency': int, 'field_names': str }) freq_df.sort(['frequency', 'field_name'], ascending=[False, True]).to_csv('sorted_freq.csv') with open('sorted_freq.csv', 'rb') as source: rdr = csv.reader(source) with open('Field_Frequency.csv', 'wb') as results: wtr = csv.writer(results) for row in rdr: wtr.writerow((row[1], row[2])) os.remove('freqterms.csv') os.remove('sorted_freq.csv')
def from_file(cls, filename): """Initialize datasource from file (.tds ot .tdsx)""" dsxml = xml_open(filename, 'datasource').getroot() return cls(dsxml, filename)
# if field.description: # print(' the description is {}'.format(field.description)) for field in datasource.fields.values(): field_attributes = [ field.id, field.caption, field.alias, field.datatype, field.role, field.is_quantitative, field.is_ordinal, field.is_nominal, field.calculation, field.default_aggregation, field.description ] print(field_attributes) # file_name = "C:\\Users\\jj2362\\Desktop\\docs in\\standard frequent flyer.tds" file_name = "C:\\Users\\jj2362\\Desktop\\Sheet1 (Visual_Analytics_TOC_DataSimulated).tds" # file_name = "C:\\Users\\jj2362\\Desktop\\docs in\\Master.twb" file_type = xml_open(file_name) base = file_type.getroot() print(base.tag) if base.tag == 'datasource': document = tableaudocumentapi.Datasource.from_file(file_name) process_datasources(document) else: document = Workbook(file_name) for datasource in document.datasources: process_datasources(datasource) print("") # for datasource in workbook.datasources:
def from_file(cls, filename): """Initialize datasource from file (.tds)""" dsxml = xml_open(filename, cls.__name__.lower()).getroot() return cls(dsxml, filename)