Example #1
0
    def check_images(self, exclude=None):
        """
        Lists all the images not in https://zen-marketing-documentation.s3.amazonaws.com/docs/en.
        Ask writer to fix in source and html files.

        :param exclude: list of article ids that use English images in localized versions
        :return: boolean
        """
        self.verify_handoff_exists()

        file_path = os.path.join(self.handoff_folder, 'articles')
        files = glob.glob(os.path.join(file_path, '*.html'))
        missing_images = ''
        for file in files:
            if exclude and get_id_from_filename(file) in exclude:  # or?
                continue

            tree = create_tree(file)
            images = tree.find_all('img')
            for image in images:
                src = image['src']
                if 'zen-marketing-documentation' in src or 'embed-ssl.wistia.com' in src:
                    continue
                missing_images += '* in {} -> {}\n'.format(
                    os.path.basename(file), src)

        if missing_images:
            print('The following images are not on S3:')
            print(missing_images)
        else:
            print('All image links point to S3!\n')
Example #2
0
    def update_srcs(self, locales=None, write=True):
        """
        Updates links to S3 images in the tree. Verifies that link is pointing to a localized image on S3
        before updating it.
        :param locales:
        :param write:
        :return:
        """
        self.verify_handoff_exists(localized=True)
        registry = self.read_registry(self.image_registry)

        if locales is None:                 # if locales not specified, use all locales
            locales = self.locales

        for locale in locales:
            files = self.get_localized_files(locale)
            for file in files:
                #  /Users/cnadeau/production/handoffs/2016-11-18/chat/localized/
                print('\nOpening {}{} for srcs'.format(locale, file.split(locale.upper())[1]))
                tree = create_tree(file)
                img_links = tree.find_all('img', src=re.compile('/docs/en/'))
                for link in img_links:
                    base_name = os.path.basename(link['src'])
                    if base_name not in registry[locale]:
                        print(' - Not localized on S3 - {}'.format(link['src']))
                        continue
                    if locale == 'pt-br':
                        link['src'] = re.sub(r'docs/en', 'docs/{}'.format('pt'), link['src'])
                    else:
                        link['src'] = re.sub(r'docs/en', 'docs/{}'.format(locale), link['src'])
                    print(' - Updated src  - {}'.format(link['src']))

                if write:
                    rewrite_file(file, tree)
Example #3
0
    def check_images(self, exclude=None):
        """
        Lists all the images not in https://zen-marketing-documentation.s3.amazonaws.com/docs/en.
        Ask writer to fix in source and html files.

        :param exclude: list of article ids that use English images in localized versions
        :return: boolean
        """
        self.verify_handoff_exists()

        file_path = os.path.join(self.handoff_folder, 'articles')
        files = glob.glob(os.path.join(file_path, '*.html'))
        missing_images = ''
        for file in files:
            if exclude and get_id_from_filename(file) in exclude:       # or?
                continue

            tree = create_tree(file)
            images = tree.find_all('img')
            for image in images:
                src = image['src']
                if 'zen-marketing-documentation' in src or 'embed-ssl.wistia.com' in src:
                    continue
                missing_images += '* in {} -> {}\n'.format(os.path.basename(file), src)

        if missing_images:
            print('The following images are not on S3:')
            print(missing_images)
        else:
            print('All image links point to S3!\n')
Example #4
0
    def update_hrefs(self, locales=None, write=True):
        """
        Updates links to HC articles in the tree. Verifies that link is pointing to a localized HC article
        before updating it.
        :param locales: tuple
        :param write: boolean
        :return: nothing
        """
        self.verify_handoff_exists(localized=True)
        registry = self.read_registry(self.article_registry)

        if locales is None:  # if locales not specified, use all locales
            locales = self.locales

        for locale in locales:
            files = self.get_localized_files(locale)
            for file in files:
                print('\nOpening {}{} for hrefs'.format(
                    locale,
                    file.split(locale.upper())[1]))
                tree = create_tree(file)
                links = tree.find_all('a', href=re.compile('/hc/en-us/'))
                for link in links:
                    if '/community/posts/' in link[
                            'href']:  # exclude links to community posts
                        continue
                    if '/article_attachments/' in link[
                            'href']:  # exclude in-text attachment links
                        continue
                    if '/#topic' in link['href']:
                        link['href'] = link['href'].split('/#topic')[
                            0]  # /hc/fr/articles/206544348#topic_etm_2qc_5s

                    base_name = os.path.basename(link['href']).replace(
                        '#', '-').split('-')[0]
                    if '%' in base_name:
                        base_name = base_name.split('%')[0]
                    if '?' in base_name:
                        base_name = base_name.split('?')[0]

                    try:
                        int(base_name)
                    except ValueError:
                        print(' - Link is not an id number - {}'.format(
                            base_name))
                        continue

                    if int(base_name) not in registry[
                            locale]:  # does link have a known loc target?
                        print(' - Not localized in HC - {}'.format(
                            link['href']))
                        continue

                    link['href'] = re.sub(r'hc/en-us', 'hc/{}'.format(locale),
                                          link['href'])
                    print(' - Updated xref - {}'.format(link['href']))

                if write:
                    rewrite_file(file, tree)
Example #5
0
    def copy_images(self, months=4, exclude=None):
        """
        Parses articles in handoff package and copies recently updated en-us images from s3 folder
        to handoff folder.

        :param months: number of months when en-us images were last updated
        :param exclude: list of article ids that use English images in localized versions
        :return:
        """
        self.verify_handoff_exists()

        # Create images folder in handoff folder
        images_path = os.path.join(self.handoff_folder, 'images')
        images_path_fullsize = os.path.join(images_path, 'fullsize')
        images_path_resized = os.path.join(images_path, 'resized')
        if not os.path.exists(images_path):
            os.makedirs(images_path_fullsize)
            os.makedirs(images_path_resized)

        # Get fnames of images in s3 folder that were updated in the last x months
        src_images = []
        now = arrow.utcnow()
        files = glob.glob(os.path.join(self.image_path, '*.*'))
        for file in files:
            modified_at = arrow.get(os.path.getmtime(file))
            if modified_at > now.replace(months=-months):
                src_images.append(os.path.basename(file))

        # Parse each article in handoff_folder and move recently updated images to handoff package
        file_path = os.path.join(self.handoff_folder, 'articles')
        files = glob.glob(os.path.join(file_path, '*.html'))
        copied = []
        skipped = []
        for file in files:
            if exclude and get_id_from_filename(file) in exclude:  # or?
                continue

            tree = create_tree(file)
            images = tree.find_all('img')
            for image in images:
                image_name = os.path.basename(image['src'])
                if image_name in src_images:
                    if image_name not in copied:
                        src_image_path = os.path.join(self.image_path,
                                                      image_name)
                        if 'width' in image.attrs:
                            shutil.copy(src_image_path, images_path_fullsize)
                        else:
                            shutil.copy(src_image_path, images_path_resized)
                        copied.append(image_name)
                else:
                    if image_name not in skipped:
                        skipped.append(image_name)

        print(
            '\nImages copied (on s3 and updated in the last {} months): \n{}'.
            format(months, '\n'.join(copied)))
        print('\nImages not copied: \n{}'.format('\n'.join(skipped)))
Example #6
0
    def copy_images(self, months=4, exclude=None):
        """
        Parses articles in handoff package and copies recently updated en-us images from s3 folder
        to handoff folder.

        :param months: number of months when en-us images were last updated
        :param exclude: list of article ids that use English images in localized versions
        :return:
        """
        self.verify_handoff_exists()

        # Create images folder in handoff folder
        images_path = os.path.join(self.handoff_folder, 'images')
        images_path_fullsize = os.path.join(images_path, 'fullsize')
        images_path_resized = os.path.join(images_path, 'resized')
        if not os.path.exists(images_path):
            os.makedirs(images_path_fullsize)
            os.makedirs(images_path_resized)

        # Get fnames of images in s3 folder that were updated in the last x months
        src_images = []
        now = arrow.utcnow()
        files = glob.glob(os.path.join(self.image_path, '*.*'))
        for file in files:
            modified_at = arrow.get(os.path.getmtime(file))
            if modified_at > now.replace(months=-months):
                src_images.append(os.path.basename(file))

        # Parse each article in handoff_folder and move recently updated images to handoff package
        file_path = os.path.join(self.handoff_folder, 'articles')
        files = glob.glob(os.path.join(file_path, '*.html'))
        copied = []
        skipped = []
        for file in files:
            if exclude and get_id_from_filename(file) in exclude:       # or?
                continue

            tree = create_tree(file)
            images = tree.find_all('img')
            for image in images:
                image_name = os.path.basename(image['src'])
                if image_name in src_images:
                    if image_name not in copied:
                        src_image_path = os.path.join(self.image_path, image_name)
                        if 'width' in image.attrs:
                            shutil.copy(src_image_path, images_path_fullsize)
                        else:
                            shutil.copy(src_image_path, images_path_resized)
                        copied.append(image_name)
                else:
                    if image_name not in skipped:
                        skipped.append(image_name)

        print('\nImages copied (on s3 and updated in the last {} months): \n{}'.format(months, '\n'.join(copied)))
        print('\nImages not copied: \n{}'.format('\n'.join(skipped)))
Example #7
0
    def publish_handoff(self, locales=None, write=True):
        self.verify_handoff_exists(localized=True)
        kb = KB(self.product)

        if locales is None:                 # if locales not specified, use all locales
            locales = self.locales

        for locale in locales:
            print('\nPushing \'{}\' translations ...\n'.format(locale))
            files = self.get_localized_files(locale)
            for file in files:
                article_id = get_id_from_filename(file)
                print('Publishing {}...'.format(article_id))

                if article_id == 203661746:                            # if glossary, paste in HC by hand
                    print('Glossary, 203661746, skipped. Enter manually.')
                    continue

                # # if included in a later loc handoff that has since been delivered, skip
                # later_handoff = [115012184908, 115014797387, 203664326, 115012399428, 115014417408, 115012258168,
                #                  231747367, 115012794168, 115014810447]
                # if article_id in later_handoff:
                #     print(f'{article_id} skipped. Was delivered in a later handoff.')
                #     continue

                # #  id changes since handoff
                # changes = {235723507: 203664366, 235651328: 216207658, 235721887: 224858627}
                # if article_id in changes:
                #     article_id = changes[article_id]

                missing_translations = kb.get_missing_translations(article_id)
                if missing_translations is None:
                    print('Error getting missing translations for article {}. Exiting'.format(article_id))
                    exit()
                if locale in missing_translations:          # get http method to use for article
                    http_method = 'post'
                else:
                    http_method = 'put'

                tree = create_tree(file)
                title = ' '.join(tree.h1.stripped_strings)
                tree.h1.decompose()

                if http_method == 'post':
                    data = {'translation': {'locale': locale, 'title': title, 'body': str(tree), 'draft': False}}
                    if write:
                        print(f'- posting {article_id}')
                        kb.post_translation(article_id, payload=data)
                else:
                    data = {'translation': {'title': title, 'body': str(tree), 'draft': False}}
                    if write:
                        print(f'- putting {article_id}')
                        kb.put_translation(article_id, locale, payload=data)
Example #8
0
 def update_post_from_file(self, post_id, filename):
     endpoint = '/posts/{}.json'.format(post_id)
     url = self.community_root + endpoint
     file = os.path.join(self.staging, 'posts', filename)
     tree = create_tree(file)
     if tree.h1.string is None:
         print('ERROR: title h1 problem in {} (extra inner tags, etc)'.format(filename))
         exit()
     title = tree.h1.string.strip()
     tree.h1.decompose()
     data = {'post': {'title': title, 'details': str(tree)}}
     response = self.put_record(url, payload=data)
     if response:
         print('Successfully updated post {}'.format(post_id))
Example #9
0
    def update_hrefs(self, locales=None, write=True):
        """
        Updates links to HC articles in the tree. Verifies that link is pointing to a localized HC article
        before updating it.
        :param locales: tuple
        :param write: boolean
        :return: nothing
        """
        self.verify_handoff_exists(localized=True)
        registry = self.read_registry(self.article_registry)

        if locales is None:                 # if locales not specified, use all locales
            locales = self.locales

        for locale in locales:
            files = self.get_localized_files(locale)
            for file in files:
                print('\nOpening {}{} for hrefs'.format(locale, file.split(locale.upper())[1]))
                tree = create_tree(file)
                links = tree.find_all('a', href=re.compile('/hc/en-us/'))
                for link in links:
                    if '/community/posts/' in link['href']:                # exclude links to community posts
                        continue
                    if '/article_attachments/' in link['href']:            # exclude in-text attachment links
                        continue
                    if '/#topic' in link['href']:
                        link['href'] = link['href'].split('/#topic')[0]    # /hc/fr/articles/206544348#topic_etm_2qc_5s

                    base_name = os.path.basename(link['href']).replace('#', '-').split('-')[0]
                    if '%' in base_name:
                        base_name = base_name.split('%')[0]
                    if '?' in base_name:
                        base_name = base_name.split('?')[0]

                    try:
                        int(base_name)
                    except ValueError:
                        print(' - Link is not an id number - {}'.format(base_name))
                        continue

                    if int(base_name) not in registry[locale]:            # does link have a known loc target?
                        print(' - Not localized in HC - {}'.format(link['href']))
                        continue

                    link['href'] = re.sub(r'hc/en-us', 'hc/{}'.format(locale), link['href'])
                    print(' - Updated xref - {}'.format(link['href']))

                if write:
                    rewrite_file(file, tree)
Example #10
0
    def strip_comments(self, write=True):
        self.verify_handoff_exists()

        print('\n')
        file_path = os.path.join(self.handoff_folder, 'articles')
        files = glob.glob(os.path.join(file_path, '*.html'))
        for file in files:
            if get_id_from_filename(file) == 203661526:        # voice pricing article - don't modify this file
                continue
            tree = create_tree(file)
            count = 0
            comments = tree.find_all(text=lambda text: isinstance(text, Comment))
            for comment in comments:
                comment.extract()
                count += 1
            if write and count > 0:
                rewrite_file(file, tree)

            print('Stripped {} comments in {}'.format(count, os.path.basename(file)))
Example #11
0
    def push_staged(self, folder, locale='en-us', write=False):
        """
        Pushes id-named html files in staging folder specified in tools.ini
        :param folder: Folder in the staging folder containing the files
        :param locale: Default
        :param write: Actually put the files.
        :return:
        """
        path = os.path.join(os.sep, *self.settings['staging'].split('/'), folder)
        if not os.path.exists(path):
            print('Folder named "{}" not found in the staging folder.'.format(folder))
            exit()
        files = glob.glob(os.path.join(path, '*.html'))
        for file in files:
            article_id = get_id_from_filename(file)
            if article_id == 203661746:
                print('Glossary, 203661746, skipped. Enter manually.')
                continue

            missing_translations = self.get_missing_translations(article_id)
            if missing_translations is None:
                print('Error getting missing translations for article {}. Exiting'.format(article_id))
                exit()
            if locale in missing_translations:  # get http method to use for article
                http_method = 'post'
            else:
                http_method = 'put'

            tree = create_tree(file)
            title = ' '.join(tree.h1.stripped_strings)
            tree.h1.decompose()

            if http_method == 'post':
                data = {'translation': {'locale': locale, 'title': title, 'body': str(tree.body), 'draft': False}}
                if write:
                    print(f'Posting translation {article_id}: {title}')
                    self.post_translation(article_id, payload=data)
            else:
                data = {'translation': {'title': title, 'body': str(tree), 'draft': False}}
                if write:
                    print(f'Putting translation {article_id}: {title}')
                    self.put_translation(article_id, locale, payload=data)
Example #12
0
    def update_srcs(self, locales=None, write=True):
        """
        Updates links to S3 images in the tree. Verifies that link is pointing to a localized image on S3
        before updating it.
        :param locales:
        :param write:
        :return:
        """
        self.verify_handoff_exists(localized=True)
        registry = self.read_registry(self.image_registry)

        if locales is None:  # if locales not specified, use all locales
            locales = self.locales

        for locale in locales:
            files = self.get_localized_files(locale)
            for file in files:
                #  /Users/cnadeau/production/handoffs/2016-11-18/chat/localized/
                print('\nOpening {}{} for srcs'.format(
                    locale,
                    file.split(locale.upper())[1]))
                tree = create_tree(file)
                img_links = tree.find_all('img', src=re.compile('/docs/en/'))
                for link in img_links:
                    base_name = os.path.basename(link['src'])
                    if base_name not in registry[locale]:
                        print(' - Not localized on S3 - {}'.format(
                            link['src']))
                        continue
                    if locale == 'pt-br':
                        link['src'] = re.sub(r'docs/en',
                                             'docs/{}'.format('pt'),
                                             link['src'])
                    else:
                        link['src'] = re.sub(r'docs/en',
                                             'docs/{}'.format(locale),
                                             link['src'])
                    print(' - Updated src  - {}'.format(link['src']))

                if write:
                    rewrite_file(file, tree)
Example #13
0
 def take_image_inventory(self, backup_folder):
     """
     Reads the files written to a backup folder and create a JSON inventory of all images in the files.
     Prerequisites:
     1. Take an inventory of articles for the HC or the category (e.g., agent or admin categories in Support).
     See take_inventory() method.
     2. Write the files in the article inventory to the backups folder. See backup_inventory() method.
     3. Pass the backup folder name to this method.
     :param backup_folder: Folder in backups containing the article files
     :return: None
     """
     image_list = []
     folder_path = os.path.join(os.sep, *self.settings['backups'].split('/'), backup_folder)
     files = glob.glob(os.path.join(folder_path, '*.html'))
     for file in files:
         tree = create_tree(file)
         img_links = tree('img')
         for image in img_links:
             base_name = os.path.basename(image['src'])
             image_list.append(base_name)
     image_list = list(set(image_list))      # a set is not JSON-serializable
     write_to_json(image_list, self.product_cache, f'kb_image_inventory_{backup_folder}.json')
Example #14
0
    def strip_comments(self, write=True):
        self.verify_handoff_exists()

        print('\n')
        file_path = os.path.join(self.handoff_folder, 'articles')
        files = glob.glob(os.path.join(file_path, '*.html'))
        for file in files:
            if get_id_from_filename(
                    file
            ) == 203661526:  # voice pricing article - don't modify this file
                continue
            tree = create_tree(file)
            count = 0
            comments = tree.find_all(
                text=lambda text: isinstance(text, Comment))
            for comment in comments:
                comment.extract()
                count += 1
            if write and count > 0:
                rewrite_file(file, tree)

            print('Stripped {} comments in {}'.format(count,
                                                      os.path.basename(file)))
Example #15
0
    def push_transformed_articles(self, folder, write=True):
        """
        Pushes dita-transformed html files to the appropriate KBs.
        Reads the ditamap.yml file in the /production folder on the Documentation Team Drive.
        Looks for the transformed files in the production/staging/transformed_files folder.
        :param folder: The name of folder in /production/staging/transformed_files that contains the transformed files
        :param write: Boolean. If true, write changes to Help Centers
        :return: None, but prints the push results in json and xlsx files in /production/reports/publishing
        """
        with open(self.dita_map, 'r') as f:
            dita_map = yaml.load(f)
        files_path = os.path.join(os.sep, *self.settings['staging'].split('/'), 'transformed_files', folder)
        files = glob.glob(os.path.join(files_path, '*.html'))
        results = []
        for file in files:
            dita_name = get_dita_name_from_filename(file)
            mapping_exists = False
            for mapping in dita_map:
                if mapping['dita'] == dita_name:
                    mapping_exists = True
                    break
            if mapping_exists:
                print('Updating \"{}\" ({}) in {} kb'.format(mapping['dita'], mapping['id'], mapping['hc']))
                tree = create_tree(file)
                if tree.h1.string is None:
                    print('ERROR: title h1 problem in {} (extra inner tags, etc)'.format(mapping['dita']))
                    results.append(
                        {'dita': mapping['dita'], 'id': mapping['id'], 'hc': mapping['hc'], 'pushed': False,
                         'notes': 'h1 problem'})
                    continue
                title = tree.h1.string.strip()
                tree.h1.decompose()
                body = tree.body
                data = {'translation': {'title': title, 'body': str(body)}}
                hc_root = 'https://{}.zendesk.com/api/v2/help_center'.format(mapping['hc'])
                endpoint = '/articles/{}/translations/en-us.json'.format(mapping['id'])
                url = hc_root + endpoint
                print(url)
                if write:
                    response = self.put_record(url=url, payload=data)
                else:
                    print('Testing mode!')
                    response = 1
                if response is not None:
                    results.append(
                        {'dita': mapping['dita'], 'id': mapping['id'], 'hc': mapping['hc'], 'pushed': True,
                         'notes': ''})
                else:
                    results.append(
                        {'dita': mapping['dita'], 'id': mapping['id'], 'hc': mapping['hc'], 'pushed': False,
                         'notes': 'request error'})
            else:
                print('Skipping \"{}\" in {} kb because has no mapping'.format(mapping['dita'], mapping['hc']))
                results.append(
                    {'dita': dita_name, 'id': None, 'hc': None, 'pushed': False, 'notes': 'no mapping'})

            now = arrow.now('US/Pacific')
            report_name = 'push_results_{}'.format(now.format('YYYY-MM-DD'))

            reports_path = os.path.join(self.shared_folder, 'reports', self.subdomain, 'publishing')
            write_to_json(results, reports_path, '{}.json'.format(report_name))
            write_to_excel(results, reports_path, '{}.xlsx'.format(report_name))
Example #16
0
    def publish_handoff(self, locales=None, write=True):
        self.verify_handoff_exists(localized=True)
        kb = KB(self.product)

        if locales is None:  # if locales not specified, use all locales
            locales = self.locales

        for locale in locales:
            print('\nPushing \'{}\' translations ...\n'.format(locale))
            files = self.get_localized_files(locale)
            for file in files:
                article_id = get_id_from_filename(file)
                print('Publishing {}...'.format(article_id))

                if article_id == 203661746:  # if glossary, paste in HC by hand
                    print('Glossary, 203661746, skipped. Enter manually.')
                    continue

                # # if included in a later loc handoff that has since been delivered, skip
                # later_handoff = [115012184908, 115014797387, 203664326, 115012399428, 115014417408, 115012258168,
                #                  231747367, 115012794168, 115014810447]
                # if article_id in later_handoff:
                #     print(f'{article_id} skipped. Was delivered in a later handoff.')
                #     continue

                # #  id changes since handoff
                # changes = {235723507: 203664366, 235651328: 216207658, 235721887: 224858627}
                # if article_id in changes:
                #     article_id = changes[article_id]

                missing_translations = kb.get_missing_translations(article_id)
                if missing_translations is None:
                    print(
                        'Error getting missing translations for article {}. Exiting'
                        .format(article_id))
                    exit()
                if locale in missing_translations:  # get http method to use for article
                    http_method = 'post'
                else:
                    http_method = 'put'

                tree = create_tree(file)
                title = ' '.join(tree.h1.stripped_strings)
                tree.h1.decompose()

                if http_method == 'post':
                    data = {
                        'translation': {
                            'locale': locale,
                            'title': title,
                            'body': str(tree),
                            'draft': False
                        }
                    }
                    if write:
                        print(f'- posting {article_id}')
                        kb.post_translation(article_id, payload=data)
                else:
                    data = {
                        'translation': {
                            'title': title,
                            'body': str(tree),
                            'draft': False
                        }
                    }
                    if write:
                        print(f'- putting {article_id}')
                        kb.put_translation(article_id, locale, payload=data)