def take_section_inventory(self, section_id, src_locale='en-us'): dm = Ditamap() ditamap = dm.open() section_articles = [] # list of id/title/author/section_id/url dicts url = self.kb_root + '/sections/{}/articles.json?include=users'.format(section_id) article_list = self.get_record_list(url) user_names = {} for user in article_list['users']: if user['id'] not in user_names: user_names[user['id']] = user['name'] for article in article_list['articles']: if article['source_locale'] != src_locale: continue if article['draft']: continue dita_name = None for mapping in ditamap: if mapping['id'] == article['id']: dita_name = mapping['dita'] break section_articles.append({'id': article['id'], 'title': article['title'], 'dita': dita_name, 'author': user_names[article['author_id']], 'section_id': section_id, 'created_at': article['created_at'], 'url': article['html_url']}) list_fname = 'section_{}_article_inventory.json'.format(section_id) write_to_json(section_articles, self.product_cache, list_fname) print('Inventory written')
def take_inventory(self, categories=None, excel=False, drafts=False): """ Gets a list of all articles in the specified categories in Help Center. Skips any section that's viewable only by agents and managers. Also skips any articles in Draft mode. Make sure to get a fresh version of the ditamap on the Documentation Team Drive (under /All products/production) :param categories: List of category ids. Omit argument for all categories :param excel: Whether to export the inventory to an Excel file or not :param drafts: Whether or not to include draft articles in the inventory :return: nothing """ if categories is None: categories = self.categories dm = Ditamap() ditamap = dm.open() source_locale = 'en-us' kb_articles = [] # list of id/title/author/section_id/url dicts for category in categories: url = self.kb_root + f'/{source_locale}/categories/{category}/sections.json' section_list = self.get_record_list(url) for section in section_list['sections']: if section['locale'] != source_locale: continue if section['user_segment_id']: url = self.kb_root + '/user_segments/{}.json'.format(section['user_segment_id']) user_segment = self.get_record(url)['user_segment'] if user_segment['user_type'] == 'staff': continue url = self.kb_root + f'/{source_locale}/sections/{section["id"]}/articles.json?include=users' article_list = self.get_record_list(url) user_names = {} for user in article_list['users']: if user['id'] not in user_names: user_names[user['id']] = user['name'] for article in article_list['articles']: if article['source_locale'] != source_locale: continue if drafts is False and article['draft']: continue dita_name = None for mapping in ditamap: if mapping['id'] == article['id']: dita_name = mapping['dita'] break kb_articles.append({'id': article['id'], 'title': article['title'], 'dita': dita_name, 'author': user_names[article['author_id']], 'section_id': article['section_id'], 'created_at': article['created_at'], 'edited_at': article['edited_at'], 'source_locale': article['source_locale'], 'url': article['html_url']}) write_to_json(kb_articles, self.product_cache, self.product_inventory) print('Inventory written to {}'.format(os.path.join(self.product_cache, self.product_inventory))) if excel is True: fname = self.product_inventory[:-4] + 'xlsx' write_to_excel(kb_articles, self.product_cache, fname) print('Inventory of {} articles written to Excel in {}'.format(len(kb_articles), self.product_cache))
def write_files(self, loader='_loader.txt'): """ Creates a handoff folder, gets specified en-us articles from HC, and writes them in the folder. :param loader: custom name of file with list of article ids to download :return: boolean """ if os.path.exists(self.handoff_folder): print('A handoff with that name already exists. Exiting.') exit() article_list = [] with open(os.path.join(self.base_path, loader), mode='r') as f: for line in f: article_list.append(int( line.strip())) # [203660036, 203664356, 203663816] file_path = os.path.join(self.handoff_folder, 'articles') os.makedirs(file_path) handoff_list = '{} handoff files:\n\n'.format(self.name) kb = KB(self.product) for article in article_list: translation = kb.get_translation(article, 'en-us') if translation: if translation[ 'source_id'] == 203661526: # skip voice pricing article continue else: print('{} in loader.txt does not exist on Help Center'.format( article)) continue markup = get_page_markup(translation) if markup is None: continue filename = '{}.html'.format(translation['source_id']) with open(os.path.join(file_path, filename), mode='w', encoding='utf-8') as f: f.write(markup) print('{} written'.format(filename)) handoff_list += '{} - "{}"\n'.format(filename, translation['title']) with open(os.path.join(file_path, 'handoff_list.txt'), mode='w', encoding='utf-8') as f: f.write(handoff_list) handoff_log = read_from_json( os.path.join(kb.shared_folder, 'handoff_log.json')) handoff_log[self.product].append({ 'date': self.name, 'files': article_list }) write_to_json(handoff_log, kb.shared_folder, 'handoff_log.json') print('Package created at {}'.format(self.handoff_folder))
def get_articles_by_docs_team(self): cache_file = os.path.join(self.docs_team_cache, 'team_articles.json') backup(cache_file) team_articles = read_from_json(cache_file) date_times = [] [date_times.append(article['created_at']) for article in team_articles] start_date = arrow.get(max(date_times)) subdomains = 'support', 'chat', 'bime', 'help' docs_team = read_from_json(os.path.join(self.docs_team_cache, 'members.json')) for subdomain in subdomains: with open(os.path.join('cache', subdomain, self.product_inventory), mode='r') as f: inventory = json.load(f) for article in inventory: created_at = arrow.get(article['created_at']) if article['author'] in docs_team and created_at > start_date: team_articles.append(article) write_to_json(team_articles, cache_file) write_to_excel(team_articles, self.docs_team_cache, 'team_articles.xlsx') print('Articles written to docs_team cache')
def write_files(self, loader='_loader.txt'): """ Creates a handoff folder, gets specified en-us articles from HC, and writes them in the folder. :param loader: custom name of file with list of article ids to download :return: boolean """ if os.path.exists(self.handoff_folder): print('A handoff with that name already exists. Exiting.') exit() article_list = [] with open(os.path.join(self.base_path, loader), mode='r') as f: for line in f: article_list.append(int(line.strip())) # [203660036, 203664356, 203663816] file_path = os.path.join(self.handoff_folder, 'articles') os.makedirs(file_path) handoff_list = '{} handoff files:\n\n'.format(self.name) kb = KB(self.product) for article in article_list: translation = kb.get_translation(article, 'en-us') if translation: if translation['source_id'] == 203661526: # skip voice pricing article continue else: print('{} in loader.txt does not exist on Help Center'.format(article)) continue markup = get_page_markup(translation) if markup is None: continue filename = '{}.html'.format(translation['source_id']) with open(os.path.join(file_path, filename), mode='w', encoding='utf-8') as f: f.write(markup) print('{} written'.format(filename)) handoff_list += '{} - "{}"\n'.format(filename, translation['title']) with open(os.path.join(file_path, 'handoff_list.txt'), mode='w', encoding='utf-8') as f: f.write(handoff_list) handoff_log = read_from_json(os.path.join(kb.shared_folder, 'handoff_log.json')) handoff_log[self.product].append({'date': self.name, 'files': article_list}) write_to_json(handoff_log, kb.shared_folder, 'handoff_log.json') print('Package created at {}'.format(self.handoff_folder))
def take_image_inventory(self, backup_folder): """ Reads the files written to a backup folder and create a JSON inventory of all images in the files. Prerequisites: 1. Take an inventory of articles for the HC or the category (e.g., agent or admin categories in Support). See take_inventory() method. 2. Write the files in the article inventory to the backups folder. See backup_inventory() method. 3. Pass the backup folder name to this method. :param backup_folder: Folder in backups containing the article files :return: None """ image_list = [] folder_path = os.path.join(os.sep, *self.settings['backups'].split('/'), backup_folder) files = glob.glob(os.path.join(folder_path, '*.html')) for file in files: tree = create_tree(file) img_links = tree('img') for image in img_links: base_name = os.path.basename(image['src']) image_list.append(base_name) image_list = list(set(image_list)) # a set is not JSON-serializable write_to_json(image_list, self.product_cache, f'kb_image_inventory_{backup_folder}.json')
def take_locale_inventory(self, locale='en-us', categories=None): """ Gets a list of all articles in the specified locale and categories in Help Center. Skips any section that's viewable only by agents and managers. Also skips any articles in Draft mode. :param locale: The locale of the articles :param categories: List of category ids. Omit argument for all categories :return: nothing """ if categories is None: categories = self.categories kb_articles = [] # list of id/title/author/section_id/url dicts for category in categories: url = self.kb_root + '/{}/categories/{}/sections.json'.format(locale, category) section_list = self.get_record_list(url) for section in section_list['sections']: if section['user_segment_id']: url = self.kb_root + '/user_segments/{}.json'.format(section['user_segment_id']) user_segment = self.get_record(url)['user_segment'] if user_segment['user_type'] == 'staff': continue url = self.kb_root + '/{}/sections/{}/articles.json?include=users'.format(locale, section['id']) article_list = self.get_record_list(url) user_names = {} for user in article_list['users']: if user['id'] not in user_names: user_names[user['id']] = user['name'] for article in article_list['articles']: if article['draft']: continue kb_articles.append({'id': article['id'], 'title': article['title'], 'author': user_names[article['author_id']], 'section_id': article['section_id'], 'source_locale': article['source_locale'], 'created_at': article['created_at'], 'url': article['html_url']}) fname = f'kb_{locale}_article_inventory.json' write_to_json(kb_articles, self.product_cache, fname) print('Inventory written to {}'.format(os.path.join(self.product_cache, self.product_inventory)))
def take_localized_article_inventory(self, inventory=True): """ Writes a list of localized articles to cache. Prerequisite: Run take_inventory() first to refresh the article data. :param inventory: Whether or not a current inventory exists. :return: None """ if inventory is False: print('Taking inventory...') self.take_inventory() inventory = read_from_json(os.path.join(self.product_cache, self.product_inventory)) # with open(os.path.join(self.product_cache, self.product_inventory), mode='r') as f: # inventory = json.load(f) localized_articles = [] for article in inventory: if article['source_locale'] != 'en-us': # not one of ours continue url = self.kb_root + '/articles/{}/translations/missing.json'.format(article['id']) print('Checking translations for {}'.format(article['id'])) missing = self.get_record(url) if not missing['locales']: localized_articles.append(article) write_to_json(localized_articles, self.product_cache, 'kb_localized_article_inventory.json') print('{} articles in loc inventory'.format(len(localized_articles)))
def push_transformed_articles(self, folder, write=True): """ Pushes dita-transformed html files to the appropriate KBs. Reads the ditamap.yml file in the /production folder on the Documentation Team Drive. Looks for the transformed files in the production/staging/transformed_files folder. :param folder: The name of folder in /production/staging/transformed_files that contains the transformed files :param write: Boolean. If true, write changes to Help Centers :return: None, but prints the push results in json and xlsx files in /production/reports/publishing """ with open(self.dita_map, 'r') as f: dita_map = yaml.load(f) files_path = os.path.join(os.sep, *self.settings['staging'].split('/'), 'transformed_files', folder) files = glob.glob(os.path.join(files_path, '*.html')) results = [] for file in files: dita_name = get_dita_name_from_filename(file) mapping_exists = False for mapping in dita_map: if mapping['dita'] == dita_name: mapping_exists = True break if mapping_exists: print('Updating \"{}\" ({}) in {} kb'.format(mapping['dita'], mapping['id'], mapping['hc'])) tree = create_tree(file) if tree.h1.string is None: print('ERROR: title h1 problem in {} (extra inner tags, etc)'.format(mapping['dita'])) results.append( {'dita': mapping['dita'], 'id': mapping['id'], 'hc': mapping['hc'], 'pushed': False, 'notes': 'h1 problem'}) continue title = tree.h1.string.strip() tree.h1.decompose() body = tree.body data = {'translation': {'title': title, 'body': str(body)}} hc_root = 'https://{}.zendesk.com/api/v2/help_center'.format(mapping['hc']) endpoint = '/articles/{}/translations/en-us.json'.format(mapping['id']) url = hc_root + endpoint print(url) if write: response = self.put_record(url=url, payload=data) else: print('Testing mode!') response = 1 if response is not None: results.append( {'dita': mapping['dita'], 'id': mapping['id'], 'hc': mapping['hc'], 'pushed': True, 'notes': ''}) else: results.append( {'dita': mapping['dita'], 'id': mapping['id'], 'hc': mapping['hc'], 'pushed': False, 'notes': 'request error'}) else: print('Skipping \"{}\" in {} kb because has no mapping'.format(mapping['dita'], mapping['hc'])) results.append( {'dita': dita_name, 'id': None, 'hc': None, 'pushed': False, 'notes': 'no mapping'}) now = arrow.now('US/Pacific') report_name = 'push_results_{}'.format(now.format('YYYY-MM-DD')) reports_path = os.path.join(self.shared_folder, 'reports', self.subdomain, 'publishing') write_to_json(results, reports_path, '{}.json'.format(report_name)) write_to_excel(results, reports_path, '{}.xlsx'.format(report_name))