# coding=utf-8
import logging
from pprint import pprint

from atlassian import Confluence

CONFLUENCE_URL = "http://conlfuence.example.com"
CONFLUENCE_LOGIN = "******"
CONFLUENCE_PASSWORD = "******"

logging.basicConfig(level=logging.DEBUG)

confluence = Confluence(
    url=CONFLUENCE_URL,
    username=CONFLUENCE_LOGIN,
    password=CONFLUENCE_PASSWORD,
    timeout=180,
)

pgs = confluence.get_space_content("SPACE")
pprint(pgs["page"]["results"])

if __name__ == "__main__":
    if not verify_environment_variables():
        sys.exit(
            "Make sure that the environment variables ATLASSIAN_EMAIL and "
            "ATLASSIAN_API_TOKEN are set."
        )

    confluence = Confluence(
        url="https://pyconjp.atlassian.net",
        username=os.getenv("ATLASSIAN_EMAIL"),
        password=os.getenv("ATLASSIAN_API_TOKEN"),
    )

    space_content = confluence.get_space_content("pyconjp")
    pages = space_content["page"]["results"]
    print(f"{len(pages)} pages")
    print("-" * 40)

    for page in pages:
        title = page["title"]
        html_body = page["body"]["storage"]["value"]
        content = trim_html_tags(html_body)
        print(f"{title} ({len(content)} characters)")

        urls = extract_anchor_urls(html_body)
        for url in urls:
            # 共有ドライブへの直リンクと思しきリンクを洗い出す
            if re.match(r"https?://(docs|drive).google.com/.*", url):
                print(url)