Exemple #1
0
    def __get_attachment(self, page_id, filename):
        """
        Get page attachment

        :param page_id: confluence page id
        :param filename: attachment filename
        :return: attachment info in case of success, False otherwise
        """
        url = '%s/rest/api/content/%s/child/attachment?filename=%s' \
              '&expand=metadata.properties.hash' \
              % (CONFLUENCE_API_URL, page_id, filename)

        response = common.make_request_get(url)
        data = response.json()
        LOGGER.debug('data: %s', str(data))

        if len(data[u'results']) >= 1:
            data = data[u'results'][0]
            att_id = data[u'id']

            att_hash = None
            props = data[u'metadata'][u'properties']
            if u'hash' in props:
                hash_prop = props[u'hash'][u'value']
                if u'sha256' in hash_prop:
                    att_hash = hash_prop[u'sha256']

            att_info = collections.namedtuple('AttachmentInfo', ['id', 'hash'])
            attr_info = att_info(att_id, att_hash)
            return attr_info

        return False
Exemple #2
0
def main():
    """
    Main program

    :return:
    """
    LOGGER.info('----------------------------------')
    LOGGER.info('Markdown to Confluence Upload Tool')
    LOGGER.info('----------------------------------')

    LOGGER.info('Space Key: %s', SPACE_KEY)

    if not NOTRACK:
        CHILD_PAGES.track_child_pages()

    # upload everything under the ancestor
    root_ancestors = common.get_page_as_ancestor(ANCESTOR)

    for root in DOCUMENTATION_ROOTS:
        upload_folder(root, root_ancestors)

    # for any pages with refs that could not be resolved,
    # revisit them and try again
    RESOLVERS.resolve_missing_refs()

    if not NOTRACK and CHILD_PAGES.trash_needed():
        trash = PAGE_API.create_trash()
        CHILD_PAGES.trim_child_pages(trash)

    LOGGER.info('Markdown Converter completed successfully.')
Exemple #3
0
    def get_html(self, filepath):
        """
        Generate html from md file

        :param filepath: the file to translate to html
        :return: html translation
        """
        with codecs.open(filepath, 'r', 'utf-8') as mdfile:
            read = mdfile.read()
            read = macros.remove_collapsible_headings(read)
            html = markdown.markdown(read, extensions=['markdown.extensions.tables',
                                                    'markdown.extensions.fenced_code',
                                                    'mdx_truly_sane_lists'])
        html = '\n'.join(html.split('\n')[1:])
        html = macros.add_note(html)
        html = macros.convert_info_macros(html)
        html = macros.convert_comment_block(html)
        html = macros.convert_code_block(html)
        html = macros.remove_empty_list_items(html)
        if CONTENTS:
            html = macros.add_contents(html)

        html = macros.process_refs(html)
        html = PAGE_CACHE.resolve_refs(html, filepath)
        if LOG_HTML:
            title = self.get_title(filepath)
            html_log_file = open(os.path.dirname(LOG_FILE) + title + '.html', 'w+')
            html_log_file.write('<h1>' + title + '</h1>')
            html_log_file.write(html)
        else:
            LOGGER.debug('file: %s\n\nhtml: %s\n\n', filepath, html)

        return html
Exemple #4
0
 def resolve_missing_refs(self):
     """
     Resolve missing refs
     """
     refs_to_resolve = PAGE_CACHE.get_refs_to_resolve_again()
     if len(refs_to_resolve) > 0:
         LOGGER.info('-- Attempting to resolve cross-references --')
         for page in refs_to_resolve:
             self.__update_page_refs_only(page)
Exemple #5
0
    def __label_page(self, page_id):
        """
        Attach a label to the page to indicate it was auto-generated
        """
        LOGGER.info("Labeling page %s", page_id)

        url = '%s/rest/api/content/%s/label' % (CONFLUENCE_API_URL, page_id)
        page_json = [{"name": "md_to_conf"}]

        common.make_request_post(url, data=json.dumps(page_json))
Exemple #6
0
    def create_or_update_page(self, title, body, ancestors, filepath):
        """
        Create a new page

        :param title: confluence page title
        :param body: confluence page content
        :param ancestors: confluence page ancestor
        :param filepath: markdown file full path
        :return: created or updated page id
        """
        page = PAGE_CACHE.get_page(title)
        if page:
            return self.update_page(page.id, title, body, page.version,
                                    ancestors, filepath)
        else:
            LOGGER.info('Creating page %s...', title)

            url = '%s/rest/api/content/' % CONFLUENCE_API_URL
            new_page = {
                'type': 'page',
                'title': title,
                'space': {
                    'key': SPACE_KEY
                },
                'body': {
                    'storage': {
                        'value': body,
                        'representation': 'storage'
                    }
                },
                'ancestors': ancestors
            }
            LOGGER.debug("data: %s", json.dumps(new_page))

            response = common.make_request_post(url, data=json.dumps(new_page))

            data = response.json()
            space_name = data[u'space'][u'name']
            page_id = data[u'id']
            version = data[u'version'][u'number']
            link = '%s%s' % (CONFLUENCE_API_URL, data[u'_links'][u'webui'])

            LOGGER.info('Page created in %s with ID: %s.', space_name, page_id)
            LOGGER.info('URL: %s', link)

            # label the page
            self.__label_page(page_id)

            img_check = re.search(r'<img(.*?)\/>', body)
            if img_check:
                LOGGER.info('Attachments found, update procedure called.')
                return self.update_page(page_id, title, body, version,
                                        ancestors, filepath)
            else:
                return page_id
Exemple #7
0
def deleteEmptyDirectories(path_, deletePath_) :
	dirList = os.listdir(path_)
	for dir in dirList :
		dirPath = os.path.join(path_, dir)
		if ( os.path.isdir(dirPath) ) :
			deleteEmptyDirectories(dirPath, True)
	
	if deletePath_ :
		dirList = os.listdir(path_)
		if (len(dirList) == 0) :
			LOGGER.info('Deleting empty directory: %s', path_)
			if not SIMULATE_MODE :
				os.rmdir(path_)
Exemple #8
0
 def trim_child_pages(self, trash_ancestor):
     """
     Trim (delete) any child pages under the "active"
     children of the ANCESTOR which are not "active"
     """
     # remove any pages that are no longer needed; any top-level
     # page under the ancestor, and its children, are spared; but
     # any children under any page that we have touched are fair
     # game to be removed
     for original_child_page in self.__ORIGINAL_CHILD_PAGES:
         if original_child_page not in self.__ACTIVE_PAGES:
             LOGGER.info("Sparing original page: %s", original_child_page)
         else:
             for child in self.__ORIGINAL_CHILD_PAGES[original_child_page]:
                 if child not in self.__ACTIVE_PAGES:
                     self.__delete_page(child, trash_ancestor)
Exemple #9
0
def updateEpisode(cursor_, video_, subEng_, subFr_, downloading) :
	"""Update the database for a specific episode
	
	:param cursor_: sqlite3 cursor
	:param video_: video to update in database
	:param bool subEng_: is there an associated english sub for this episode
	:param bool subFr_: is there an associated french sub for this episode
	:param bool downloading: is this episode currently downloading
	"""
	LOGGER.info('Updating database for %s', video_.name)
	
	command = 'SELECT id FROM series WHERE name=\'%s\' AND season=%d AND episode=%d' % (video_.series, video_.season, video_.episode)
	cursor_.execute(command)
	row = cursor_.fetchone()
	foundId = row[0] if not row is None else None
	cursor_.execute('INSERT OR REPLACE INTO series (id, name, title, season, episode, sub_en, sub_fr, downloading) VALUES(?, ?, ?, ?, ?, ?, ?, 0)', (foundId, video_.series, video_.title, video_.season, str(video_.episode), str(int(subEng_)), str(int(subFr_))))
Exemple #10
0
def check_for_errors(response):
    """
   Check response for errors and log help if necessary

   :param response: the received response
   :return
   """
    try:
        response.raise_for_status()
    except requests.RequestException as err:
        LOGGER.error('err.response: %s', err)
        if response.status_code == 404:
            LOGGER.error('Error: Page not found. Check the following are correct:')
            LOGGER.error('Space Key : %s', SPACE_KEY)
            LOGGER.error('Confluence URL : %s', CONFLUENCE_API_URL)
        else:
            LOGGER.error('Error: %d - %s', response.status_code, response.content)
        sys.exit(1)
Exemple #11
0
    def get_page(self, title):
        """
        Retrieve page details by title

        :param title: page tile
        :return: Confluence page info
        """
        if title in self.__CACHED_PAGE_INFO:
            return self.__CACHED_PAGE_INFO[title]

        LOGGER.info('Retrieving page information: %s', title)
        url = '%s/rest/api/content?title=%s&spaceKey=%s' \
              '&expand=version,ancestors,metadata.labels,body.storage' \
              % (CONFLUENCE_API_URL, urllib.parse.quote_plus(title), SPACE_KEY)

        response = common.make_request_get(url)
        data = response.json()
        LOGGER.debug("data: %s", str(data))

        if len(data[u'results']) >= 1:
            data = data[u'results'][0]
            page_id = data[u'id']
            version_num = data[u'version'][u'number']
            link = '%s%s' % (CONFLUENCE_API_URL, data[u'_links'][u'webui'])
            ancestor = data[u'ancestors'][-1][u'id']
            labels = map(lambda r: r[u'name'],
                         data[u'metadata'][u'labels'][u'results'])
            body = data[u'body'][u'storage'][u'value']

            # These properties do not round-trip; confluence adds them, so strip them out
            body = re.sub(' ac:schema-version="[^"]+"', '', body)
            body = re.sub(' ac:macro-id="[^"]+"', '', body)
            # Confluence replaces some quotes (but not all) with xml quotes
            body = re.sub('&quot;', '"', body)

            title = data[u'title']

            page_info = collections.namedtuple('PageInfo',
                            ['id', 'version', 'link', 'ancestor', 'labels', 'body', 'title'])
            page = page_info(page_id, version_num, link, ancestor, labels, body, title)
            self.__CACHED_PAGE_INFO[title] = page
            return page

        return False
Exemple #12
0
def updateSeriesDB(dbFilePath_, seriesPath_) :
	"""Update the database for a given path
	
	:param String dbFilePath_
	:param String seriesPath_
	"""
	conn = sqlite3.connect(dbFilePath_)
	c = conn.cursor()
	
	LOGGER.info('*** START UPDATING DATABASE ***')
	videos = subliminal.scan_videos(seriesPath_, True, True)
	
	for video in videos :
		subEn = babelfish.Language('eng') in video.subtitle_languages
		subFr = babelfish.Language('fra') in video.subtitle_languages
		updateEpisode(c, video, subEn, subFr, True)
		
	conn.commit()
	conn.close()
Exemple #13
0
    def __get_direct_child_pages(self, page_id):
        """
        Retrieve every direct child page id

        :param page_id: page id
        :return: ids of immediate child pages
        """
        url = '%s/rest/api/content/search?cql=parent=%s' % \
            (CONFLUENCE_API_URL, urllib.parse.quote_plus(page_id))

        response = common.make_request_get(url)
        data = response.json()
        LOGGER.debug("data: %s", str(data))

        page_ids = []
        for result in data[u'results']:
            page_ids.append(result[u'id'])

        return page_ids
Exemple #14
0
    def __get_child_pages(self, page_id):
        """
        Retrieve details of the child pages by page id

        :param page_id: page id
        :return: the ids of all the child pages
        """
        if page_id in self.__CHILD_PAGES_CACHE:
            return self.__CHILD_PAGES_CACHE[page_id]

        LOGGER.info('Retrieving information of original child pages: %s', page_id)
        page_ids = self.__get_direct_child_pages(page_id)

        for page_id in page_ids:
            child_pages = self.__get_child_pages(page_id)
            if child_pages:
                page_ids = page_ids + list(set(child_pages) - set(page_ids))

        self.__CHILD_PAGES_CACHE[page_id] = page_ids
        return page_ids
Exemple #15
0
    def __update_page_refs_only(self, filepath):
        """
        Update refs on a page without changing anything else about it

        :param filepath: markdown file full path
        """
        title = FILE_API.get_title(filepath)
        LOGGER.info('Updating page refs %s...', title)

        # update the page
        page = PAGE_CACHE.get_page(title)
        html = FILE_API.get_html(filepath)
        version = page.version
        ancestors = common.get_page_as_ancestor(page.ancestor)

        LOGGER.info('.. title: %s .. version: %d .. ancestor: %s ..', title,
                    version, page.ancestor)

        PAGE_API.update_page(page.id, title, html, version, ancestors,
                             filepath)
Exemple #16
0
def downloadFromRSS() :
	LOGGER.info('*** START DOWNLOADING SUBTITLES ***')
	
	conn = sqlite3.connect(DB_FILE_PATH)
	c = conn.cursor()
	
	feed = feedparser.parse('http://showrss.info/rss.php?user_id=244944&hd=null&proper=null')
	for item in feed['items'] :
		title = item['title']
		link = item['link']
		video = subliminal.Video.fromguess(title, guessit.guess_file_info(title))
		localDbInfo = database.getEpisodeInfo(c, video.series, video.season, video.episode);
		if ( localDbInfo is None ) :
			print 'downloading %s' % title
			downloadFromMagnetLink(link)
			database.updateEpisode(c, video, False, False, False)
		else :
			print 'Skip %s: already in database' % title
		
	conn.commit()
	conn.close()
Exemple #17
0
def _make_request(callback, check_response = True):
    """
    Make a request
    """
    session = _session()
    session.headers.update({'Content-Type': 'application/json'})
    response = callback(session)

    # This happens intermittently; if it does, wait a second and try again
    retries = 2
    while retries and response.status_code == 401:
        retries = retries - 1
        time.sleep(1)
        response = callback(session)

    if check_response:
        if response.status_code >= 400:
            LOGGER.error('Error Response Content: %s', response.content)
        response.raise_for_status()

    return response
Exemple #18
0
    def __delete_page(self, page_id, trash_ancestor):
        """
        Delete a page by moving it to the orphan folder

        :param page_id: confluence page id
        :return: None
        """
        LOGGER.info('Moving page %s to ORPHAN...', page_id)
        url = '%s/rest/api/content/%s?expand=version' % (CONFLUENCE_API_URL,
                                                         page_id)

        response = common.make_request_get(url)
        data = response.json()
        LOGGER.debug("data: %s", str(data))

        page_id = data[u'id']
        version = data[u'version'][u'number']
        title = data[u'title']
        ancestors = common.get_page_as_ancestor(trash_ancestor)

        url = '%s/rest/api/content/%s' % (CONFLUENCE_API_URL, page_id)
        page_json = {
            "id": page_id,
            "type": "page",
            "title": title,
            "version": {
                "number": version + 1,
                "minorEdit": True
            },
            'ancestors': ancestors
        }
        LOGGER.debug("data: %s", json.dumps(page_json))

        common.make_request_put(url, data=json.dumps(page_json))
Exemple #19
0
def make_request_upload(url, file_to_upload):
    """
    Upload a file to a url
    """
    # this is different enough from the normal make_request
    # that factoring out the commonalities makes it hard
    # to follow the logic; I preferred to just duplicate
    session = _session()
    session.headers.update({'X-Atlassian-Token': 'no-check'})
    response = session.post(url, files=file_to_upload)

    # This happens intermittently; if it does, wait a second and try again
    retries = 2
    while retries and response.status_code == 401:
        retries = retries - 1
        time.sleep(1)
        response = session.post(url, files=file_to_upload)

    if response.status_code >= 400:
        LOGGER.error('Error Response Content: %s', response.content)
    response.raise_for_status()
    return response
Exemple #20
0
def downloadSubtitles(path_) :
	"""Download all subtitles in the given path

	:param String path_
	"""
	
	LOGGER.info('*** START DOWNLOADING SUBTITLES ***')

	if SIMULATE_MODE :
		return
	
	# configure the cache
	if (not os.path.isdir(_cachePath)) :
		os.makedirs(_cachePath)
	subliminal.cache_region.configure('dogpile.cache.dbm', arguments={'filename': _cachePath + 'cachefile.dbm'})
	
	# scan for videos in the folder and their subtitles
	upath = unicode(path_)
	videos = subliminal.scan_videos(upath, subtitles=True, embedded_subtitles=True)
	
	# download
	subs = subliminal.download_best_subtitles(videos, {babelfish.Language('eng')})
Exemple #21
0
def createDB(dbFilePath_) :
	"""Create table in the given database file
	
	:param String dbFilePath_: file path of the database file
	"""
	if os.path.isfile(dbFilePath_) :
		return
		#os.remove(dbFilePath_)

	LOGGER.info('Creating database file: %s', dbFilePath_)
	command = """create table series (id INTEGER PRIMARY KEY,
				name VARCHAR(50),
				title VARCHAR(255),
				season INTEGER,
				episode INTEGER,
				sub_en INTEGER,
				sub_fr INTEGER,
				downloading INTEGER)"""
	LOGGER.debug(command)
		
	conn = sqlite3.connect(dbFilePath_)
	c = conn.cursor()
	c.execute(command)
	conn.close()
Exemple #22
0
    def get_title(self, filepath):
        """
        Returns confluence page title extracted from the markdown file

        :param filepath: full path to  markdown file
        :return: confluence page title
        """
        if filepath in self.__TITLE_CACHE_BY_FILE:
            return self.__TITLE_CACHE_BY_FILE[filepath]
        with open(filepath, 'r') as mdfile:
            title = mdfile.readline().lstrip('#').strip()
            mdfile.seek(0)

        basetitle = title
        i = 0
        while (title in self.__TITLE_CACHE_BY_FILE.values()) or \
              (PAGE_CACHE.is_page_unowned(title)):
            i += 1
            title = '%s (%d)' % (basetitle, i)

        self.__TITLE_CACHE_BY_FILE[filepath] = title

        LOGGER.info('Title: %s', title)
        return title
Exemple #23
0
def moveFile(srcFilePath_, destFilePath_) :
	LOGGER.debug('Moving file:');
	LOGGER.debug('  From: %s', srcFilePath_)
	LOGGER.debug('  To: %s', destFilePath_)
	
	if SIMULATE_MODE :
		return
	
	if (not os.path.isdir(os.path.dirname(destFilePath_))) :
		os.makedirs(os.path.dirname(destFilePath_))
	#os.rename(srcFilePath_, destFilePath_)
	shutil.move(srcFilePath_, destFilePath_)
Exemple #24
0
def upload_folder(directory, ancestors):
    """
    Upload everything under a folder, recursively

    :param directory: folder to upload
    :param ancestors: parent page in ancestors format
    """
    LOGGER.info('Folder: %s', directory)

    # there must be at least one .md file under this folder or a
    # subfolder in order for us to proceed with processing it
    if not common.does_path_contain(directory, lambda file : os.path.splitext(file)[1] == '.md'):
        LOGGER.info('Skipping folder; no files found')
        return

    # Make sure there is a landing page for the directory
    doc_file = FILE_API.get_landing_page_doc_file(directory)
    dir_landing_page_id = PAGE_API.create_dir_landing_page(doc_file, ancestors)
    CHILD_PAGES.mark_page_active(dir_landing_page_id)
    dir_landing_as_ancestors = common.get_page_as_ancestor(dir_landing_page_id)

    # Walk through all other .md files in this directory and upload them all with
    # the landing page as its ancestor
    for file in os.scandir(directory):
        if file.is_file() and os.path.splitext(file)[1] == '.md':
            if os.path.normpath(file.path) != os.path.normpath(doc_file):
                LOGGER.info('Markdown file: %s', file.name)
                title = FILE_API.get_title(file.path)
                html = FILE_API.get_html(file.path)

                if SIMULATE:
                    common.log_html(html, title)
                else:
                    page_id = \
                        PAGE_API.create_or_update_page(title, html, \
                            dir_landing_as_ancestors, file.path)
                    CHILD_PAGES.mark_page_active(page_id)

    # Walk through all subdirectories and recursively upload them,
    # using this directory's landing page as the ancestor for them
    for folder in os.scandir(directory):
        if folder.is_dir():
            upload_folder(folder.path, dir_landing_as_ancestors)
Exemple #25
0
    def __upload_attachment(self, page_id, file, comment):
        """
        Upload an attachment

        :param page_id: confluence page id
        :param file: attachment file
        :param comment: attachment comment
        :return: boolean
        """
        if re.search('http.*', file):
            return False

        content_type = mimetypes.guess_type(file)[0]
        filename = os.path.basename(file)

        if not os.path.isfile(file):
            LOGGER.error('File %s cannot be found --> skip ', file)
            return False

        sha = FILE_API.get_sha_hash(file)

        file_to_upload = {
            'comment': comment,
            'file': (filename, open(file, 'rb'), content_type, {
                'Expires': '0'
            })
        }

        attachment = self.__get_attachment(page_id, filename)
        if attachment:
            if sha == attachment.hash:
                LOGGER.info('File %s has not changed --> skip', file)
                return True
            else:
                LOGGER.debug('File %s has changed', file)

            url = '%s/rest/api/content/%s/child/attachment/%s/data' % \
                (CONFLUENCE_API_URL, page_id, attachment.id)
        else:
            LOGGER.debug('File %s is new', file)
            url = '%s/rest/api/content/%s/child/attachment/' % (
                CONFLUENCE_API_URL, page_id)

        LOGGER.info('Uploading attachment %s...', filename)
        response = common.make_request_upload(url, file_to_upload)

        data = response.json()
        LOGGER.debug('data: %s', str(data))

        # depending on create or update, sometimes you get a collection
        # and sometimes you get a single item
        if u'results' in data:
            data = data[u'results'][0]

        attachment_id = data['id']

        # Set the SHA hash metadata on the attachment so that it can be later compared

        # first, get the current version of the property if it exists
        url = '%s/rest/api/content/%s/property/hash' % (CONFLUENCE_API_URL,
                                                        attachment_id)
        response = common.make_request_get(url, False)

        if response.status_code == 200:
            data = response.json()
            LOGGER.debug('data: %s', str(data))
            version = data[u'version'][u'number']
        else:
            version = 0

        # then set the hash propery
        page_json = {
            "value": {
                "sha256": sha
            },
            "version": {
                "number": version + 1,
                "minorEdit": True
            }
        }
        LOGGER.debug('data: %s', json.dumps(page_json))
        response = common.make_request_put(url, data=json.dumps(page_json))

        return True
Exemple #26
0
    def update_page(self, page_id, title, body, version, ancestors, filepath):
        """
        Update a page

        :param page_id: confluence page id
        :param title: confluence page title
        :param body: confluence page content
        :param version: confluence page version
        :param ancestors: confluence page ancestor
        :param filepath: markdown file full path
        :return: updated page id
        """
        LOGGER.info('Updating page %s...', title)

        # Add images and attachments
        body = self.__add_images(page_id, body, filepath)

        # See if the page actually needs to be updated or not
        existing = PAGE_CACHE.get_page(title)
        if existing:
            if  title == existing.title and \
                body == existing.body and \
                ancestors[0]['id'] == existing.ancestor:
                LOGGER.info('No changes on the page; update not necessary')
                return page_id
            else:
                LOGGER.info('Changes detected; update nessary')
                if title != existing.title:
                    LOGGER.debug('update required: title %s != %s', title,
                                 existing.title)
                if body != existing.body:
                    LOGGER.debug('update required: body %s != %s', body,
                                 existing.body)
                if ancestors[0]['id'] != existing.ancestor:
                    LOGGER.debug('update required: ancestor %s != %s',
                                 ancestors[0]['id'], existing.ancestor)

        PAGE_CACHE.forget_page(title)

        url = '%s/rest/api/content/%s' % (CONFLUENCE_API_URL, page_id)
        page_json = {
            "id": page_id,
            "type": "page",
            "title": title,
            "space": {
                "key": SPACE_KEY
            },
            "body": {
                "storage": {
                    "value": body,
                    "representation": "storage"
                }
            },
            "version": {
                "number": version + 1,
                "minorEdit": True
            },
            'ancestors': ancestors
        }

        response = common.make_request_put(url, data=json.dumps(page_json))

        data = response.json()
        link = '%s%s' % (CONFLUENCE_API_URL, data[u'_links'][u'webui'])

        LOGGER.info("Page updated successfully.")
        LOGGER.info('URL: %s', link)
        return data[u'id']