Example #1
0
def pass_defaults():
    dataobjs = data.get_items()
    SEP = sep
    # check windows parsing for js (https://github.com/Uzay-G/archivy/issues/115)
    if SEP == "\\":
        SEP += "\\"
    return dict(dataobjs=dataobjs, SEP=SEP)
Example #2
0
def parse_pocket():
    db = get_db()
    pocket = db.search(Query().type == "pocket_key")[0]
    if request.args.get("new") == "1":
        auth_data = {
            "consumer_key": pocket["consumer_key"],
            "code": pocket["code"]}
        resp = requests.post(
            "https://getpocket.com/v3/oauth/authorize",
            json=auth_data,
            headers={
                "X-Accept": "application/json",
                "Content-Type": "application/json"})
        db.update(
            operations.set(
                "access_token",
                resp.json()["access_token"]),
            Query().type == "pocket_key")
        flash(f"{resp.json()['username']} Signed in!")

    # update pocket dictionary
    pocket = db.search(Query().type == "pocket_key")[0]

    pocket_data = {
        "consumer_key": pocket["consumer_key"],
        "access_token": pocket["access_token"],
        "sort": "newest"}

    # get date of latest call to pocket api
    since = datetime(1970, 1, 1)
    for post in data.get_items(
            collections=["pocket_bookmark"],
            structured=False):
        date = datetime.strptime(post["date"].replace("-", "/"), "%x")
        since = max(date, since)

    since = datetime.timestamp(since)
    if since:
        pocket_data["since"] = since
    bookmarks = requests.post(
        "https://getpocket.com/v3/get",
        json=pocket_data).json()

    # api spec: https://getpocket.com/developer/docs/v3/retrieve
    for pocket_bookmark in bookmarks["list"].values():
        if int(pocket_bookmark["status"]) != 2:
            desc = pocket_bookmark["excerpt"] if int(
                pocket_bookmark["is_article"]) else None
            bookmark = DataObj(
                desc=desc,
                url=pocket_bookmark["resolved_url"],
                date=datetime.now(),
                type="pocket_bookmarks")
            bookmark.process_bookmark_url()

            print(bookmark.insert())
    return redirect("/")
Example #3
0
def index():
    path = request.args.get("path", "")
    files = data.get_items(path=path)
    return render_template("home.html",
                           title="Home",
                           search_enabled=app.config["SEARCH_CONF"]["enabled"],
                           dir=files,
                           current_path=path,
                           new_folder_form=forms.NewFolderForm(),
                           delete_form=forms.DeleteFolderForm())
Example #4
0
def sync(force):
    with app.app_context():
        db = get_db()

        # update pocket dictionary
        pocket = db.search(Query().type == "pocket_key")[0]

        pocket_data = {
            "consumer_key": pocket["consumer_key"],
            "access_token": pocket["access_token"],
            "sort": "newest",
        }

        # get date of latest call to pocket api
        since = datetime(1970, 1, 1)
        create_dir("pocket")
        already_saved = set()
        for post in get_items(path="pocket/", structured=False):
            date = datetime.strptime(post["date"].replace("-", "/"), "%x")
            already_saved.add(post["url"])
            since = max(date, since)

        if since != datetime(1970, 1, 1) and not force:
            since = datetime.timestamp(since)
            pocket_data["since"] = since
        bookmarks = requests.post("https://getpocket.com/v3/get",
                                  json=pocket_data).json()

        # api spec: https://getpocket.com/developer/docs/v3/retrieve
        # for some reason, if the `list` attribute is empty it returns a list instead of a dict.
        if not len(bookmarks["list"]):
            click.echo("No new bookmarks.")
        else:
            for pocket_bookmark in bookmarks["list"].values():
                url = pocket_bookmark.get("resolved_url",
                                          pocket_bookmark["given_url"])
                if int(pocket_bookmark["status"]
                       ) != 2 and url not in already_saved:
                    bookmark = DataObj(
                        url=url,
                        date=datetime.now(),
                        type="pocket_bookmark",
                        path="pocket",
                    )
                    try:
                        bookmark.process_bookmark_url()
                        click.echo(f"Saving {bookmark.title}...")
                        bookmark.insert()
                    except:
                        click.echo(
                            f"Could not save {bookmark.url} - website may already be down."
                        )
            click.echo("Done!")
Example #5
0
def test_creating_bookmark_without_passing_path_saves_to_default_dir(
        test_app, client, mocked_responses):
    mocked_responses.add(responses.GET, "http://example.org", body="Example\n")
    bookmarks_dir = "bookmarks"
    test_app.config["DEFAULT_BOOKMARKS_DIR"] = bookmarks_dir
    create_dir(bookmarks_dir)
    resp = client.post(
        "/api/bookmarks",
        json={
            "url": "http://example.org",
        },
    )
    bookmark = get_items(structured=False)[0]
    assert ("bookmarks"
            in bookmark["path"])  # verify it was saved to default bookmark dir
Example #6
0
def index():
    path = request.args.get("path", "").lstrip("/")
    try:
        files = data.get_items(path=path)
    except FileNotFoundError:
        flash("Directory does not exist.", "error")
        return redirect("/")

    return render_template("home.html",
                           title=path or "root",
                           search_enabled=app.config["SEARCH_CONF"]["enabled"],
                           dir=files,
                           current_path=path,
                           new_folder_form=forms.NewFolderForm(),
                           delete_form=forms.DeleteFolderForm())
Example #7
0
def test_initialization_in_diff_than_curr_dir(test_app, cli_runner, click_cli):
    conf_path = os.path.join(test_app.config["USER_DIR"], "config.yml")
    data_dir = mkdtemp()

    with cli_runner.isolated_filesystem():
        # input data dir - localhost - don't use ES and don't create user
        res = cli_runner.invoke(cli, ["init"], input=f"{data_dir}\nn\nn\n\n")

    assert "Config successfully created" in res.output
    conf = open(conf_path).read()

    assert f"USER_DIR: {data_dir}" in conf

    # check initialization in random directory
    # has resulted in change of user dir
    assert data_dir == test_app.config["USER_DIR"]

    # verify dataobj creation works
    assert DataObj(type="note", title="Test note").insert()
    assert len(get_items(structured=False)) == 1
Example #8
0
def test_parse_pocket(test_app, client, mocked_responses, pocket_fixture):
    """Test the /pocket endpoint

    HTTP calls to the pocket API are mocked out
    """

    # fake website
    mocked_responses.add(responses.GET,
                         "https://example.com/",
                         body="""<html>
        <head><title>Example</title></head><body><p>
            Lorem ipsum dolor sit amet, consectetur adipiscing elit
        </p></body></html>
    """)

    r: Flask.response_class = client.get('/parse_pocket?new=1')
    assert r.status_code == 302

    dataobjs = data.get_items()
    assert len(dataobjs.child_files) == 1
Example #9
0
def test_initialization(test_app, cli_runner, click_cli):
    conf_path = os.path.join(test_app.config["USER_DIR"], "config.yml")
    try:
        # conf shouldn't exist
        open(conf_path)
        assert False
    except FileNotFoundError:
        pass
    old_data_dir = test_app.config["USER_DIR"]

    with cli_runner.isolated_filesystem():
        # create user, localhost, and don't use ES
        res = cli_runner.invoke(
            click_cli, ["init"],
            input="\nn\ny\nusername\npassword\npassword\n\n")
        assert "Config successfully created" in res.output

        # verify user was created
        assert len(get_db().search(Query().type == "user"
                                   and Query().username == "username"))

        # verify dataobj creation works
        assert DataObj(type="note", title="Test note").insert()
        assert len(get_items(structured=False)) == 1

    conf = open(conf_path).read()

    # assert defaults are saved
    assert "PANDOC_HIGHLIGHT_THEME: pygments" in conf
    assert f"USER_DIR: {test_app.config['USER_DIR']}" in conf
    assert "HOST: 127.0.0.1"
    # check ES config not saved
    assert "ELASTICSEARCH" not in conf

    # check initialization in random directory
    # has resulted in change of user dir
    assert old_data_dir != test_app.config["USER_DIR"]
Example #10
0
def index():
    dataobjs = data.get_items()
    return render_template("home.html",
                           title="Home",
                           dataobjs=dataobjs,
                           search_enabled=Config.ELASTICSEARCH_ENABLED)
Example #11
0
def pass_dataobjs():
    dataobjs = data.get_items()
    return dict(dataobjs=dataobjs)
Example #12
0
def index():
    dataobjs = data.get_items()
    return render_template("home.html", title="Home", dataobjs=dataobjs)
Example #13
0
def pass_defaults():
    dataobjs = data.get_items()
    return dict(dataobjs=dataobjs, SEP=os.path.sep)
Example #14
0
DIRNAME = app.config["APP_PATH"] + "/data/"
Path(DIRNAME).mkdir(parents=True, exist_ok=True)

if app.config["ELASTICSEARCH_ENABLED"]:
    elastic_running = subprocess.run("service elasticsearch status",
                                     shell=True,
                                     stdout=subprocess.DEVNULL).returncode
    if elastic_running != 0:
        print("Enter password to enable elasticsearch")
        subprocess.run("sudo service elasticsearch restart", shell=True)
    try:
        print(extensions.elastic_client().indices.create(
            index=app.config["INDEX_NAME"], body=app.config["ELASTIC_CONF"]))
    except elasticsearch.ElasticsearchException:
        print("Elasticsearch index already created")

    Thread(target=run_watcher).start()

app.jinja_options["extensions"].append("jinja2.ext.do")

Scss(app)

# get max id
cur_id = 1
for dataobj in data.get_items(structured=False):
    cur_id = max(cur_id, dataobj["id"])

extensions.set_max_id(cur_id + 1)

from archivy import routes  # noqa:
Example #15
0
def hn_sync(save_comments, post_type, username, hn_password, force):
    global num_ask_hn, num_links, num_links_processed
    with app.app_context():
        session = requests.Session()

        print("\nLogging in...")

        r = session.post(f"{BASE_URL}/login",
                         data={
                             "acct": username,
                             "pw": hn_password
                         })

        if session.cookies.get("user", None) is None:
            print("Error logging in. Verify the credentials and try again.")
            sys.exit(1)
        print("Logged in successfully.\n")

        url = f"{BASE_URL}/{post_type}?id={username}&p="
        headers = {
            "User-Agent":
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:75.0) Gecko/20100101 Firefox/75.0",
        }

        i = 1

        # create folders in archivy to store content
        create_dir("hacker_news")
        create_dir("hacker_news/" + post_type)

        # store titles of previous posts
        seen_posts = set([
            post["url"] for post in get_items(path=f"hacker_news/{post_type}/",
                                              structured=False)
        ])
        while True:
            links_processed_prev = num_links_processed
            print(f"Getting results of page {i}")
            r = session.get(url + str(i), headers=headers)

            tree = BeautifulSoup(r.text, features="lxml")
            tree_subtext = tree.select(".subtext")
            post_links = tree.select(".titlelink")
            # Number of links on the page
            n = len(tree_subtext)

            if not n:
                print(f"Processing page {i}. No links found.")
                break

            for j in range(n):
                tree_subtext_each = tree_subtext[j].find_all("a")

                # This is to take care of situations where flag link may not be
                # present in the subtext. So number of links could be either 3
                # or 4.
                num_subtext = len(tree_subtext_each)
                # get post id by parsing link to comments
                post_id = int(
                    tree_subtext_each[num_subtext -
                                      1]["href"].split("=")[1].split("&")[0])
                post_url = post_links[j]["href"]
                hn_link = f"{BASE_URL}/item?id={post_id}"

                if (post_url in seen_posts
                        or hn_link in seen_posts) and not force:
                    # we have already seen this upvoted story
                    # this means that all stories that follow will also be seen
                    finish()
                if (post_url in seen_posts or hn_link in seen_posts) and force:
                    print(f"{post_url} already saved.")
                    continue
                # call algolia api
                try:
                    res = requests.get(
                        f"https://hn.algolia.com/api/v1/items/{post_id}").json(
                        )
                except:
                    print(f"Could not save {post_url}.")
                    continue
                # might return a 404 if not indexed, so we check if we got a response by calling .get()
                if res.get("type") and res["type"] == "story":
                    bookmark = DataObj(
                        path=f"hacker_news/{post_type}/",
                        date=datetime.utcfromtimestamp(res["created_at_i"]),
                        type="bookmark",
                    )
                    if res["url"]:
                        num_links += 1
                        bookmark.url = post_url
                        bookmark.process_bookmark_url()
                    else:
                        num_ask_hn += 1
                        bookmark.url = hn_link
                        bookmark.content = (res["title"].replace(
                            "<p>", "").replace("</p>", ""))

                    bookmark.title = res["title"]
                    bookmark.content = f"{res['points']} points on [Hacker News]({hn_link})\n\n{bookmark.content}"

                    # save comments if user requests it through option or if story is an ASK HN
                    if save_comments or not res["url"]:
                        bookmark.content += "\n\n## Comments from Hacker News"
                        for comment in res["children"]:
                            comments = "<ul>" + build_comments(
                                comment) + "</ul>"
                            bookmark.content += "\n\n" + html2text(
                                comments, bodywidth=0).replace("\n\n", "\n")
                    bookmark.insert()
                    num_links_processed += 1
                    print(f"Saving {res['title']}...")

            if n < 30:
                # no more links
                break
            elif links_processed_prev == num_links_processed:
                sleep(
                    1
                )  # throttling if no new links have been saved (when we're running force.)

            i += 1
        finish()
Example #16
0
def get_dataobjs():
    """Gets all dataobjs"""
    cur_dir = data.get_items(structured=False, json_format=True)
    return jsonify(cur_dir)
Example #17
0
def get_random_dataobj_title():
    with app.app_context():
        dataobjs = get_items(structured=False)
        click.echo(dataobjs[randint(0, len(dataobjs))]["title"])
Example #18
0
def get_dataobjs():
    cur_dir = get_items(structured=False, json_format=True)
    return jsonify(cur_dir)