def __get_attachment(self, page_id, filename): """ Get page attachment :param page_id: confluence page id :param filename: attachment filename :return: attachment info in case of success, False otherwise """ url = '%s/rest/api/content/%s/child/attachment?filename=%s' \ '&expand=metadata.properties.hash' \ % (CONFLUENCE_API_URL, page_id, filename) response = common.make_request_get(url) data = response.json() LOGGER.debug('data: %s', str(data)) if len(data[u'results']) >= 1: data = data[u'results'][0] att_id = data[u'id'] att_hash = None props = data[u'metadata'][u'properties'] if u'hash' in props: hash_prop = props[u'hash'][u'value'] if u'sha256' in hash_prop: att_hash = hash_prop[u'sha256'] att_info = collections.namedtuple('AttachmentInfo', ['id', 'hash']) attr_info = att_info(att_id, att_hash) return attr_info return False
def main(): """ Main program :return: """ LOGGER.info('----------------------------------') LOGGER.info('Markdown to Confluence Upload Tool') LOGGER.info('----------------------------------') LOGGER.info('Space Key: %s', SPACE_KEY) if not NOTRACK: CHILD_PAGES.track_child_pages() # upload everything under the ancestor root_ancestors = common.get_page_as_ancestor(ANCESTOR) for root in DOCUMENTATION_ROOTS: upload_folder(root, root_ancestors) # for any pages with refs that could not be resolved, # revisit them and try again RESOLVERS.resolve_missing_refs() if not NOTRACK and CHILD_PAGES.trash_needed(): trash = PAGE_API.create_trash() CHILD_PAGES.trim_child_pages(trash) LOGGER.info('Markdown Converter completed successfully.')
def get_html(self, filepath): """ Generate html from md file :param filepath: the file to translate to html :return: html translation """ with codecs.open(filepath, 'r', 'utf-8') as mdfile: read = mdfile.read() read = macros.remove_collapsible_headings(read) html = markdown.markdown(read, extensions=['markdown.extensions.tables', 'markdown.extensions.fenced_code', 'mdx_truly_sane_lists']) html = '\n'.join(html.split('\n')[1:]) html = macros.add_note(html) html = macros.convert_info_macros(html) html = macros.convert_comment_block(html) html = macros.convert_code_block(html) html = macros.remove_empty_list_items(html) if CONTENTS: html = macros.add_contents(html) html = macros.process_refs(html) html = PAGE_CACHE.resolve_refs(html, filepath) if LOG_HTML: title = self.get_title(filepath) html_log_file = open(os.path.dirname(LOG_FILE) + title + '.html', 'w+') html_log_file.write('<h1>' + title + '</h1>') html_log_file.write(html) else: LOGGER.debug('file: %s\n\nhtml: %s\n\n', filepath, html) return html
def resolve_missing_refs(self): """ Resolve missing refs """ refs_to_resolve = PAGE_CACHE.get_refs_to_resolve_again() if len(refs_to_resolve) > 0: LOGGER.info('-- Attempting to resolve cross-references --') for page in refs_to_resolve: self.__update_page_refs_only(page)
def __label_page(self, page_id): """ Attach a label to the page to indicate it was auto-generated """ LOGGER.info("Labeling page %s", page_id) url = '%s/rest/api/content/%s/label' % (CONFLUENCE_API_URL, page_id) page_json = [{"name": "md_to_conf"}] common.make_request_post(url, data=json.dumps(page_json))
def create_or_update_page(self, title, body, ancestors, filepath): """ Create a new page :param title: confluence page title :param body: confluence page content :param ancestors: confluence page ancestor :param filepath: markdown file full path :return: created or updated page id """ page = PAGE_CACHE.get_page(title) if page: return self.update_page(page.id, title, body, page.version, ancestors, filepath) else: LOGGER.info('Creating page %s...', title) url = '%s/rest/api/content/' % CONFLUENCE_API_URL new_page = { 'type': 'page', 'title': title, 'space': { 'key': SPACE_KEY }, 'body': { 'storage': { 'value': body, 'representation': 'storage' } }, 'ancestors': ancestors } LOGGER.debug("data: %s", json.dumps(new_page)) response = common.make_request_post(url, data=json.dumps(new_page)) data = response.json() space_name = data[u'space'][u'name'] page_id = data[u'id'] version = data[u'version'][u'number'] link = '%s%s' % (CONFLUENCE_API_URL, data[u'_links'][u'webui']) LOGGER.info('Page created in %s with ID: %s.', space_name, page_id) LOGGER.info('URL: %s', link) # label the page self.__label_page(page_id) img_check = re.search(r'<img(.*?)\/>', body) if img_check: LOGGER.info('Attachments found, update procedure called.') return self.update_page(page_id, title, body, version, ancestors, filepath) else: return page_id
def deleteEmptyDirectories(path_, deletePath_) : dirList = os.listdir(path_) for dir in dirList : dirPath = os.path.join(path_, dir) if ( os.path.isdir(dirPath) ) : deleteEmptyDirectories(dirPath, True) if deletePath_ : dirList = os.listdir(path_) if (len(dirList) == 0) : LOGGER.info('Deleting empty directory: %s', path_) if not SIMULATE_MODE : os.rmdir(path_)
def trim_child_pages(self, trash_ancestor): """ Trim (delete) any child pages under the "active" children of the ANCESTOR which are not "active" """ # remove any pages that are no longer needed; any top-level # page under the ancestor, and its children, are spared; but # any children under any page that we have touched are fair # game to be removed for original_child_page in self.__ORIGINAL_CHILD_PAGES: if original_child_page not in self.__ACTIVE_PAGES: LOGGER.info("Sparing original page: %s", original_child_page) else: for child in self.__ORIGINAL_CHILD_PAGES[original_child_page]: if child not in self.__ACTIVE_PAGES: self.__delete_page(child, trash_ancestor)
def updateEpisode(cursor_, video_, subEng_, subFr_, downloading) : """Update the database for a specific episode :param cursor_: sqlite3 cursor :param video_: video to update in database :param bool subEng_: is there an associated english sub for this episode :param bool subFr_: is there an associated french sub for this episode :param bool downloading: is this episode currently downloading """ LOGGER.info('Updating database for %s', video_.name) command = 'SELECT id FROM series WHERE name=\'%s\' AND season=%d AND episode=%d' % (video_.series, video_.season, video_.episode) cursor_.execute(command) row = cursor_.fetchone() foundId = row[0] if not row is None else None cursor_.execute('INSERT OR REPLACE INTO series (id, name, title, season, episode, sub_en, sub_fr, downloading) VALUES(?, ?, ?, ?, ?, ?, ?, 0)', (foundId, video_.series, video_.title, video_.season, str(video_.episode), str(int(subEng_)), str(int(subFr_))))
def check_for_errors(response): """ Check response for errors and log help if necessary :param response: the received response :return """ try: response.raise_for_status() except requests.RequestException as err: LOGGER.error('err.response: %s', err) if response.status_code == 404: LOGGER.error('Error: Page not found. Check the following are correct:') LOGGER.error('Space Key : %s', SPACE_KEY) LOGGER.error('Confluence URL : %s', CONFLUENCE_API_URL) else: LOGGER.error('Error: %d - %s', response.status_code, response.content) sys.exit(1)
def get_page(self, title): """ Retrieve page details by title :param title: page tile :return: Confluence page info """ if title in self.__CACHED_PAGE_INFO: return self.__CACHED_PAGE_INFO[title] LOGGER.info('Retrieving page information: %s', title) url = '%s/rest/api/content?title=%s&spaceKey=%s' \ '&expand=version,ancestors,metadata.labels,body.storage' \ % (CONFLUENCE_API_URL, urllib.parse.quote_plus(title), SPACE_KEY) response = common.make_request_get(url) data = response.json() LOGGER.debug("data: %s", str(data)) if len(data[u'results']) >= 1: data = data[u'results'][0] page_id = data[u'id'] version_num = data[u'version'][u'number'] link = '%s%s' % (CONFLUENCE_API_URL, data[u'_links'][u'webui']) ancestor = data[u'ancestors'][-1][u'id'] labels = map(lambda r: r[u'name'], data[u'metadata'][u'labels'][u'results']) body = data[u'body'][u'storage'][u'value'] # These properties do not round-trip; confluence adds them, so strip them out body = re.sub(' ac:schema-version="[^"]+"', '', body) body = re.sub(' ac:macro-id="[^"]+"', '', body) # Confluence replaces some quotes (but not all) with xml quotes body = re.sub('"', '"', body) title = data[u'title'] page_info = collections.namedtuple('PageInfo', ['id', 'version', 'link', 'ancestor', 'labels', 'body', 'title']) page = page_info(page_id, version_num, link, ancestor, labels, body, title) self.__CACHED_PAGE_INFO[title] = page return page return False
def updateSeriesDB(dbFilePath_, seriesPath_) : """Update the database for a given path :param String dbFilePath_ :param String seriesPath_ """ conn = sqlite3.connect(dbFilePath_) c = conn.cursor() LOGGER.info('*** START UPDATING DATABASE ***') videos = subliminal.scan_videos(seriesPath_, True, True) for video in videos : subEn = babelfish.Language('eng') in video.subtitle_languages subFr = babelfish.Language('fra') in video.subtitle_languages updateEpisode(c, video, subEn, subFr, True) conn.commit() conn.close()
def __get_direct_child_pages(self, page_id): """ Retrieve every direct child page id :param page_id: page id :return: ids of immediate child pages """ url = '%s/rest/api/content/search?cql=parent=%s' % \ (CONFLUENCE_API_URL, urllib.parse.quote_plus(page_id)) response = common.make_request_get(url) data = response.json() LOGGER.debug("data: %s", str(data)) page_ids = [] for result in data[u'results']: page_ids.append(result[u'id']) return page_ids
def __get_child_pages(self, page_id): """ Retrieve details of the child pages by page id :param page_id: page id :return: the ids of all the child pages """ if page_id in self.__CHILD_PAGES_CACHE: return self.__CHILD_PAGES_CACHE[page_id] LOGGER.info('Retrieving information of original child pages: %s', page_id) page_ids = self.__get_direct_child_pages(page_id) for page_id in page_ids: child_pages = self.__get_child_pages(page_id) if child_pages: page_ids = page_ids + list(set(child_pages) - set(page_ids)) self.__CHILD_PAGES_CACHE[page_id] = page_ids return page_ids
def __update_page_refs_only(self, filepath): """ Update refs on a page without changing anything else about it :param filepath: markdown file full path """ title = FILE_API.get_title(filepath) LOGGER.info('Updating page refs %s...', title) # update the page page = PAGE_CACHE.get_page(title) html = FILE_API.get_html(filepath) version = page.version ancestors = common.get_page_as_ancestor(page.ancestor) LOGGER.info('.. title: %s .. version: %d .. ancestor: %s ..', title, version, page.ancestor) PAGE_API.update_page(page.id, title, html, version, ancestors, filepath)
def downloadFromRSS() : LOGGER.info('*** START DOWNLOADING SUBTITLES ***') conn = sqlite3.connect(DB_FILE_PATH) c = conn.cursor() feed = feedparser.parse('http://showrss.info/rss.php?user_id=244944&hd=null&proper=null') for item in feed['items'] : title = item['title'] link = item['link'] video = subliminal.Video.fromguess(title, guessit.guess_file_info(title)) localDbInfo = database.getEpisodeInfo(c, video.series, video.season, video.episode); if ( localDbInfo is None ) : print 'downloading %s' % title downloadFromMagnetLink(link) database.updateEpisode(c, video, False, False, False) else : print 'Skip %s: already in database' % title conn.commit() conn.close()
def _make_request(callback, check_response = True): """ Make a request """ session = _session() session.headers.update({'Content-Type': 'application/json'}) response = callback(session) # This happens intermittently; if it does, wait a second and try again retries = 2 while retries and response.status_code == 401: retries = retries - 1 time.sleep(1) response = callback(session) if check_response: if response.status_code >= 400: LOGGER.error('Error Response Content: %s', response.content) response.raise_for_status() return response
def __delete_page(self, page_id, trash_ancestor): """ Delete a page by moving it to the orphan folder :param page_id: confluence page id :return: None """ LOGGER.info('Moving page %s to ORPHAN...', page_id) url = '%s/rest/api/content/%s?expand=version' % (CONFLUENCE_API_URL, page_id) response = common.make_request_get(url) data = response.json() LOGGER.debug("data: %s", str(data)) page_id = data[u'id'] version = data[u'version'][u'number'] title = data[u'title'] ancestors = common.get_page_as_ancestor(trash_ancestor) url = '%s/rest/api/content/%s' % (CONFLUENCE_API_URL, page_id) page_json = { "id": page_id, "type": "page", "title": title, "version": { "number": version + 1, "minorEdit": True }, 'ancestors': ancestors } LOGGER.debug("data: %s", json.dumps(page_json)) common.make_request_put(url, data=json.dumps(page_json))
def make_request_upload(url, file_to_upload): """ Upload a file to a url """ # this is different enough from the normal make_request # that factoring out the commonalities makes it hard # to follow the logic; I preferred to just duplicate session = _session() session.headers.update({'X-Atlassian-Token': 'no-check'}) response = session.post(url, files=file_to_upload) # This happens intermittently; if it does, wait a second and try again retries = 2 while retries and response.status_code == 401: retries = retries - 1 time.sleep(1) response = session.post(url, files=file_to_upload) if response.status_code >= 400: LOGGER.error('Error Response Content: %s', response.content) response.raise_for_status() return response
def downloadSubtitles(path_) : """Download all subtitles in the given path :param String path_ """ LOGGER.info('*** START DOWNLOADING SUBTITLES ***') if SIMULATE_MODE : return # configure the cache if (not os.path.isdir(_cachePath)) : os.makedirs(_cachePath) subliminal.cache_region.configure('dogpile.cache.dbm', arguments={'filename': _cachePath + 'cachefile.dbm'}) # scan for videos in the folder and their subtitles upath = unicode(path_) videos = subliminal.scan_videos(upath, subtitles=True, embedded_subtitles=True) # download subs = subliminal.download_best_subtitles(videos, {babelfish.Language('eng')})
def createDB(dbFilePath_) : """Create table in the given database file :param String dbFilePath_: file path of the database file """ if os.path.isfile(dbFilePath_) : return #os.remove(dbFilePath_) LOGGER.info('Creating database file: %s', dbFilePath_) command = """create table series (id INTEGER PRIMARY KEY, name VARCHAR(50), title VARCHAR(255), season INTEGER, episode INTEGER, sub_en INTEGER, sub_fr INTEGER, downloading INTEGER)""" LOGGER.debug(command) conn = sqlite3.connect(dbFilePath_) c = conn.cursor() c.execute(command) conn.close()
def get_title(self, filepath): """ Returns confluence page title extracted from the markdown file :param filepath: full path to markdown file :return: confluence page title """ if filepath in self.__TITLE_CACHE_BY_FILE: return self.__TITLE_CACHE_BY_FILE[filepath] with open(filepath, 'r') as mdfile: title = mdfile.readline().lstrip('#').strip() mdfile.seek(0) basetitle = title i = 0 while (title in self.__TITLE_CACHE_BY_FILE.values()) or \ (PAGE_CACHE.is_page_unowned(title)): i += 1 title = '%s (%d)' % (basetitle, i) self.__TITLE_CACHE_BY_FILE[filepath] = title LOGGER.info('Title: %s', title) return title
def moveFile(srcFilePath_, destFilePath_) : LOGGER.debug('Moving file:'); LOGGER.debug(' From: %s', srcFilePath_) LOGGER.debug(' To: %s', destFilePath_) if SIMULATE_MODE : return if (not os.path.isdir(os.path.dirname(destFilePath_))) : os.makedirs(os.path.dirname(destFilePath_)) #os.rename(srcFilePath_, destFilePath_) shutil.move(srcFilePath_, destFilePath_)
def upload_folder(directory, ancestors): """ Upload everything under a folder, recursively :param directory: folder to upload :param ancestors: parent page in ancestors format """ LOGGER.info('Folder: %s', directory) # there must be at least one .md file under this folder or a # subfolder in order for us to proceed with processing it if not common.does_path_contain(directory, lambda file : os.path.splitext(file)[1] == '.md'): LOGGER.info('Skipping folder; no files found') return # Make sure there is a landing page for the directory doc_file = FILE_API.get_landing_page_doc_file(directory) dir_landing_page_id = PAGE_API.create_dir_landing_page(doc_file, ancestors) CHILD_PAGES.mark_page_active(dir_landing_page_id) dir_landing_as_ancestors = common.get_page_as_ancestor(dir_landing_page_id) # Walk through all other .md files in this directory and upload them all with # the landing page as its ancestor for file in os.scandir(directory): if file.is_file() and os.path.splitext(file)[1] == '.md': if os.path.normpath(file.path) != os.path.normpath(doc_file): LOGGER.info('Markdown file: %s', file.name) title = FILE_API.get_title(file.path) html = FILE_API.get_html(file.path) if SIMULATE: common.log_html(html, title) else: page_id = \ PAGE_API.create_or_update_page(title, html, \ dir_landing_as_ancestors, file.path) CHILD_PAGES.mark_page_active(page_id) # Walk through all subdirectories and recursively upload them, # using this directory's landing page as the ancestor for them for folder in os.scandir(directory): if folder.is_dir(): upload_folder(folder.path, dir_landing_as_ancestors)
def __upload_attachment(self, page_id, file, comment): """ Upload an attachment :param page_id: confluence page id :param file: attachment file :param comment: attachment comment :return: boolean """ if re.search('http.*', file): return False content_type = mimetypes.guess_type(file)[0] filename = os.path.basename(file) if not os.path.isfile(file): LOGGER.error('File %s cannot be found --> skip ', file) return False sha = FILE_API.get_sha_hash(file) file_to_upload = { 'comment': comment, 'file': (filename, open(file, 'rb'), content_type, { 'Expires': '0' }) } attachment = self.__get_attachment(page_id, filename) if attachment: if sha == attachment.hash: LOGGER.info('File %s has not changed --> skip', file) return True else: LOGGER.debug('File %s has changed', file) url = '%s/rest/api/content/%s/child/attachment/%s/data' % \ (CONFLUENCE_API_URL, page_id, attachment.id) else: LOGGER.debug('File %s is new', file) url = '%s/rest/api/content/%s/child/attachment/' % ( CONFLUENCE_API_URL, page_id) LOGGER.info('Uploading attachment %s...', filename) response = common.make_request_upload(url, file_to_upload) data = response.json() LOGGER.debug('data: %s', str(data)) # depending on create or update, sometimes you get a collection # and sometimes you get a single item if u'results' in data: data = data[u'results'][0] attachment_id = data['id'] # Set the SHA hash metadata on the attachment so that it can be later compared # first, get the current version of the property if it exists url = '%s/rest/api/content/%s/property/hash' % (CONFLUENCE_API_URL, attachment_id) response = common.make_request_get(url, False) if response.status_code == 200: data = response.json() LOGGER.debug('data: %s', str(data)) version = data[u'version'][u'number'] else: version = 0 # then set the hash propery page_json = { "value": { "sha256": sha }, "version": { "number": version + 1, "minorEdit": True } } LOGGER.debug('data: %s', json.dumps(page_json)) response = common.make_request_put(url, data=json.dumps(page_json)) return True
def update_page(self, page_id, title, body, version, ancestors, filepath): """ Update a page :param page_id: confluence page id :param title: confluence page title :param body: confluence page content :param version: confluence page version :param ancestors: confluence page ancestor :param filepath: markdown file full path :return: updated page id """ LOGGER.info('Updating page %s...', title) # Add images and attachments body = self.__add_images(page_id, body, filepath) # See if the page actually needs to be updated or not existing = PAGE_CACHE.get_page(title) if existing: if title == existing.title and \ body == existing.body and \ ancestors[0]['id'] == existing.ancestor: LOGGER.info('No changes on the page; update not necessary') return page_id else: LOGGER.info('Changes detected; update nessary') if title != existing.title: LOGGER.debug('update required: title %s != %s', title, existing.title) if body != existing.body: LOGGER.debug('update required: body %s != %s', body, existing.body) if ancestors[0]['id'] != existing.ancestor: LOGGER.debug('update required: ancestor %s != %s', ancestors[0]['id'], existing.ancestor) PAGE_CACHE.forget_page(title) url = '%s/rest/api/content/%s' % (CONFLUENCE_API_URL, page_id) page_json = { "id": page_id, "type": "page", "title": title, "space": { "key": SPACE_KEY }, "body": { "storage": { "value": body, "representation": "storage" } }, "version": { "number": version + 1, "minorEdit": True }, 'ancestors': ancestors } response = common.make_request_put(url, data=json.dumps(page_json)) data = response.json() link = '%s%s' % (CONFLUENCE_API_URL, data[u'_links'][u'webui']) LOGGER.info("Page updated successfully.") LOGGER.info('URL: %s', link) return data[u'id']