Example #1
0
def update_tq_links(file_name, adv_name):
    if zipfile.is_zipfile(file_name):
        # extract the xml from the packaged file
        xml = xfile.get_xml_from_archive(file_name)
    else:
        xml = xfile.xml_open(file_name[:-1])
    # get the root of the xml to prepare it for parsing
    root = xml.getroot()
    # iterate over the root
    for child in root:
        # find all instances of the node 'zone'
        for zone in child.iter('zone'):
            # get the attribute 'url'
            url = zone.get('url')
            # only focus on those nodes that have the attribute
            if url is not None:
                # break up the url on '/'
                url_split = url.split('/')
                # update the adv_name in the url
                url_split[5] = adv_name
                # build url (see function)
                new_url = build_url(url_split)
                # set the 'url' property to the new url
                zone.set('url', new_url)
    if zipfile.is_zipfile(file_name):
        # save updated xml back into the zipped file
        xfile.save_into_archive(xml, file_name)
        # print u" Links for " + file_name + " repointed"
        write_log('output/run.log',
                  u"  Links for " + file_name + " repointed",
                  print_log_value=False)
    else:
        xml.write(file_name[:-1], encoding="utf-8", xml_declaration=True)
Example #2
0
def update_toc_links(file_name, adv_name):
    if zipfile.is_zipfile(file_name):
        # extract the xml from the packaged file
        xml = xfile.get_xml_from_archive(file_name)
    else:
        xml = xfile.xml_open(file_name[:-1])
    # get the root of the xml to prepare it for parsing
    root = xml.getroot()
    # iterate over the child nodes
    for child in root:
        # find all instances of the node 'zone'
        for zone in child.iter('zone'):
            # get the attribute 'url'
            url = zone.get('url')
            # only focus on those nodes that have the attribute
            if url is not None:
                # break up the url on '/'
                url_split = url.split('/')
                # determine what the url links to and rebuild it
                try:
                    if url_split[6] == 'workbooks':  # individual workbooks
                        url_split[5] = adv_name
                        workbook_name = reverse_lookup(url_split[7])
                        new_workbook_id = get_content_id(
                            adv_name, project=None, workbook=workbook_name)
                        url_split[7] = new_workbook_id
                        new_url = build_url(url_split)
                        zone.set('url', new_url)
                    elif url_split[6] == 'views?order=name:asc':  # all views
                        url_split[5] = adv_name
                        new_url = build_url(url_split)
                        zone.set('url', new_url)
                    elif url_split[3] == 't':  # sandbox web edit
                        url_split[4] = adv_name
                        new_url = build_url(url_split)
                        zone.set('url', new_url)
                    elif url_split[6] == 'projects':  # sandbox views
                        url_split[5] = adv_name
                        url_split[7] = get_content_id(adv_name,
                                                      project='Sandbox')
                        new_url = build_url(url_split)
                        zone.set('url', new_url)
                # if TypeError, then build a url that links to all views
                except TypeError:
                    url_split[5] = adv_name
                    new_url = build_url(url_split)
                    zone.set('url', new_url)
    if zipfile.is_zipfile(file_name):
        # save updated xml back into the zipped file
        xfile.save_into_archive(xml, file_name)
        # print u" Links for " + file_name + " repointed"
        write_log('output/run.log',
                  u" Links for " + file_name + " repointed",
                  print_log_value=False)
    else:
        xml.write(file_name[:-1], encoding="utf-8", xml_declaration=True)
Example #3
0
def update_rs_datasources(file_name, adv_id):
    if zipfile.is_zipfile(file_name):
        # extract the xml from the packaged file
        xml = xfile.get_xml_from_archive(file_name)
    else:
        xml = xfile.xml_open(file_name[:-1])
    # get the root of the xml to prepare it for parsing
    root = xml.getroot()
    for items in root:
        for conn in items.iter('relation'):
            connection = conn.get('connection')
            if re.search('redshift', str(connection)):
                # Update redshift connections
                find_client_ref_id = re.compile(r'client_ref_id\s?=\s?\d+')
                find_advertiser_id = re.compile(r'advertiser_id\s?=\s?\d+')
                # handle MIDemo account
                if adv_id == '999999':
                    new_query = find_client_ref_id.sub('client_ref_id = 877',
                                                       conn.text)
                    new_query_2 = find_advertiser_id.sub(
                        'advertiser_id = 877', new_query)
                else:
                    new_query = find_client_ref_id.sub(
                        'client_ref_id = ' + adv_id, conn.text)
                    new_query_2 = find_advertiser_id.sub(
                        'advertiser_id = ' + adv_id, new_query)
                conn.text = new_query_2
            elif re.search('mysql', str(connection)):
                # update mysql connections
                find_advertiser_id = re.compile(r'advertiser_id\s?=\s?\d+')
                # handle MIDemo account
                if adv_id == '999999':
                    new_query = find_advertiser_id.sub('advertiser_id = 877',
                                                       conn.text)
                else:
                    new_query = find_advertiser_id.sub(
                        'advertiser_id = ' + adv_id, conn.text)
                conn.text = new_query
    # write the file
    if zipfile.is_zipfile(file_name):
        # save updated xml back into the zipped file
        xfile.save_into_archive(xml, file_name)
        # print u" Links for " + file_name + " repointed"
        write_log('output/run.log',
                  "  Links for " + file_name + " repointed",
                  print_log_value=True)
    else:
        xml.write(file_name[:-1], encoding="utf-8", xml_declaration=True)
Example #4
0
    def parseDataSources(self):
        '''Parse the XML for each datasource (accepts either .tds or .tdsx format).'''

        # parse datasources differently depending on the file type (.tds or .tdsx)
        for datasource in os.listdir():
            if datasource.endswith('.tds'):
                contents = xml.parse(datasource).getroot()
            elif datasource.endswith('.tdsx'):
                contents = twbx.xml_open(datasource).getroot()

            # if there's no xml to parse (e.g. a static csv datasource), then delete it
            if contents != None:
                if contents.attrib.get('formatted-name').startswith('federated.') == False:
                    self.parsedDatasources.append(contents)
                else:
                    print(f'{datasource} is invalid, removing from directory.')
                    os.remove(datasource)
    def __init__(self, filename):
        """Open the workbook at `filename`. This will handle packaged and unpacked
        workbook files automatically. This will also parse Data Sources and Worksheets
        for access.

        """
        self._filename = filename
        self._workbookTree = xml_open(self._filename, 'workbook')
        self._workbookRoot = self._workbookTree.getroot()
        self._source_platform = self._workbookRoot.get('source-platform')
        self._source_build = self._workbookRoot.get('source-build')
        # prepare our datasource objects
        self._datasources = self._prepare_datasources(
            self._workbookRoot)  # self.workbookRoot.find('datasources')
        self._datasource_index = self._prepare_datasource_index(
            self._datasources)
        self._worksheets = self._prepare_worksheets(self._workbookRoot,
                                                    self._datasource_index)
    def __init__(self, filename):
        """
        Constructor.

        """

        self._filename = filename

        self._workbookTree = xml_open(self._filename, self.__class__.__name__.lower())

        self._workbookRoot = self._workbookTree.getroot()
        # prepare our datasource objects
        self._datasources = self._prepare_datasources(
            self._workbookRoot)  # self.workbookRoot.find('datasources')

        self._datasource_index = self._prepare_datasource_index(self._datasources)

        self._worksheets = self._prepare_worksheets(
            self._workbookRoot, self._datasource_index
        )
Example #7
0
    def __init__(self, filename):
        """
        Constructor.

        """

        self._filename = filename

        self._workbookTree = xml_open(self._filename)

        self._workbookRoot = self._workbookTree.getroot()
        # prepare our datasource objects
        self._datasources = self._prepare_datasources(
            self._workbookRoot)  # self.workbookRoot.find('datasources')

        self._datasource_index = self._prepare_datasource_index(
            self._datasources)

        self._worksheets = self._prepare_worksheets(self._workbookRoot,
                                                    self._datasource_index)
Example #8
0
    def __init__(self, filename):
        """Open the workbook at `filename`. This will handle packaged and unpacked
        workbook files automatically. This will also parse Data Sources and Worksheets
        for access.

        """

        self._filename = filename

        self._workbookTree = xml_open(self._filename, 'workbook')

        self._workbookRoot = self._workbookTree.getroot()
        # prepare our datasource objects
        self._datasources = self._prepare_datasources(
            self._workbookRoot)  # self.workbookRoot.find('datasources')

        self._datasource_index = self._prepare_datasource_index(self._datasources)

        self._worksheets = self._prepare_worksheets(
            self._workbookRoot, self._datasource_index
        )
Example #9
0
def update_dates(file_name, min_date, max_date):
    if zipfile.is_zipfile(file_name):
        # extract the xml from the packaged file
        xml = xfile.get_xml_from_archive(file_name)
    else:
        xml = xfile.xml_open(file_name)
    # get the root of the xml to prepare it for parsing
    root = xml.getroot()
    for items in root:
        # print items
        for datasources in items.iter('datasources'):
            for datasource in datasources.findall('datasource'):
                datasource_name = datasource.get('name')
                datasource_conn = datasource.get('hasconnection')
                if datasource_name == 'Parameters' and datasource_conn == "false":
                    for columns in datasource:
                        column_name = columns.get('caption')
                        # column_value = columns.get('value')
                        if column_name is not None:
                            # print column_name, column_value
                            if column_name == 'Start Date':
                                columns.set('value',
                                            min_date.strftime("#%Y-%m-%d#"))
                                calc = columns.find('calculation')
                                calc.set('formula',
                                         min_date.strftime("#%Y-%m-%d#"))
                            elif column_name == 'End Date':
                                columns.set('value',
                                            max_date.strftime("#%Y-%m-%d#"))
                                calc = columns.find('calculation')
                                calc.set('formula',
                                         max_date.strftime("#%Y-%m-%d#"))
    if zipfile.is_zipfile(file_name):
        # save updated xml back into the zipped file
        xfile.save_into_archive(xml, file_name)
        # write_log('output/run.log', "  Links for " + file_name + " repointed", print_log_value=True)
    else:
        xml.write(file_name, encoding="utf-8", xml_declaration=True)
Example #10
0
def audit_wb_fields(outfile, *args):
    with open(outfile, 'a') as text_file:
        master_col_list = []
        for file_name in args:
            if zipfile.is_zipfile(file_name):
                # extract the xml from the packaged file
                xml = xfile.get_xml_from_archive(file_name)
            else:
                xml = xfile.xml_open(file_name)
            # get the root of the xml to prepare it for parsing
            root = xml.getroot()
            ws_data = dict()
            text_file.write('\n' + '------------ ' + file_name + '\n')
            # print '\n' + '------------ ' + file_name + '\n'
            for items in root:
                for worksheets in items.iter('worksheets'):
                    for worksheet in worksheets.iter('worksheet'):
                        worksheet_name = worksheet.get('name')
                        ds_names = dict()
                        ds_list = dict()
                        for datasource in worksheet.iter('datasource'):
                            ds_names[datasource.get('name')] = datasource.get(
                                'caption')
                        ds_names['Parameters'] = 'Parameters'

                        for data_dep in worksheet.iter(
                                'datasource-dependencies'):
                            data_dep_name = data_dep.get('datasource')

                            col_list = []
                            for column in data_dep.iter('column'):
                                column_name = column.get('name')
                                column_caption = column.get('caption')
                                if column_caption is None:
                                    column_caption = column_name[1:-1]
                                col_list.append(column_caption)
                            ds_list[ds_names[data_dep_name]] = col_list
                            master_col_list += col_list
                        ws_data[worksheet_name] = ds_list

                for windows in items.iter('windows'):
                    viz_data = defaultdict(list)
                    for window in windows.iter('window'):
                        window_name = window.get('name')
                        window_class = window.get('class')
                        if window_class == 'dashboard':
                            for viewpoint in window.iter('viewpoint'):
                                viewpoint_name = viewpoint.get('name')
                                viz_data[window_name].append(viewpoint_name)

                    final_data = defaultdict(list)
                    for dashboard in viz_data:
                        for worksheet in viz_data[dashboard]:
                            final_data[dashboard].append(
                                {worksheet: ws_data[worksheet]})
                    for dashboard in final_data:
                        text_file.write(str(dashboard) + '\n')
                        for worksheets in final_data[dashboard]:
                            text_file.write(str(worksheets) + '\n')

        np_col_array = np.array(master_col_list)
        np_col_array_unique, counts = np.unique(np_col_array,
                                                return_counts=True)
        counts = np.array(counts)
        fields = np.array(np_col_array_unique)
        freq_array = np.column_stack((counts, fields))

        with open('freqterms.csv', 'w') as outfile:
            outfile.write('frequency,field_name\n')
            for row in freq_array.tolist():
                outfile.write(
                    str(row).replace("'", "", 2).replace(' ', '', 1)[1:-1] +
                    '\n')
        freq_df = pd.read_csv('freqterms.csv',
                              delimiter=',',
                              dtype={
                                  'frequency': int,
                                  'field_names': str
                              })
        freq_df.sort(['frequency', 'field_name'],
                     ascending=[False, True]).to_csv('sorted_freq.csv')
        with open('sorted_freq.csv', 'rb') as source:
            rdr = csv.reader(source)
            with open('Field_Frequency.csv', 'wb') as results:
                wtr = csv.writer(results)
                for row in rdr:
                    wtr.writerow((row[1], row[2]))
        os.remove('freqterms.csv')
        os.remove('sorted_freq.csv')
    def from_file(cls, filename):
        """Initialize datasource from file (.tds ot .tdsx)"""

        dsxml = xml_open(filename, 'datasource').getroot()
        return cls(dsxml, filename)
    #     if field.description:
    #         print('      the description is {}'.format(field.description))
    for field in datasource.fields.values():
        field_attributes = [
            field.id, field.caption, field.alias, field.datatype, field.role,
            field.is_quantitative, field.is_ordinal, field.is_nominal,
            field.calculation, field.default_aggregation, field.description
        ]
        print(field_attributes)


# file_name = "C:\\Users\\jj2362\\Desktop\\docs in\\standard frequent flyer.tds"
file_name = "C:\\Users\\jj2362\\Desktop\\Sheet1 (Visual_Analytics_TOC_DataSimulated).tds"
# file_name = "C:\\Users\\jj2362\\Desktop\\docs in\\Master.twb"

file_type = xml_open(file_name)

base = file_type.getroot()
print(base.tag)

if base.tag == 'datasource':
    document = tableaudocumentapi.Datasource.from_file(file_name)
    process_datasources(document)
else:
    document = Workbook(file_name)
    for datasource in document.datasources:
        process_datasources(datasource)
        print("")

# for datasource in workbook.datasources:
    def from_file(cls, filename):
        """Initialize datasource from file (.tds)"""

        dsxml = xml_open(filename, cls.__name__.lower()).getroot()
        return cls(dsxml, filename)
    def from_file(cls, filename):
        """Initialize datasource from file (.tds)"""

        dsxml = xml_open(filename, cls.__name__.lower()).getroot()
        return cls(dsxml, filename)