def pass_defaults(): dataobjs = data.get_items() SEP = sep # check windows parsing for js (https://github.com/Uzay-G/archivy/issues/115) if SEP == "\\": SEP += "\\" return dict(dataobjs=dataobjs, SEP=SEP)
def parse_pocket(): db = get_db() pocket = db.search(Query().type == "pocket_key")[0] if request.args.get("new") == "1": auth_data = { "consumer_key": pocket["consumer_key"], "code": pocket["code"]} resp = requests.post( "https://getpocket.com/v3/oauth/authorize", json=auth_data, headers={ "X-Accept": "application/json", "Content-Type": "application/json"}) db.update( operations.set( "access_token", resp.json()["access_token"]), Query().type == "pocket_key") flash(f"{resp.json()['username']} Signed in!") # update pocket dictionary pocket = db.search(Query().type == "pocket_key")[0] pocket_data = { "consumer_key": pocket["consumer_key"], "access_token": pocket["access_token"], "sort": "newest"} # get date of latest call to pocket api since = datetime(1970, 1, 1) for post in data.get_items( collections=["pocket_bookmark"], structured=False): date = datetime.strptime(post["date"].replace("-", "/"), "%x") since = max(date, since) since = datetime.timestamp(since) if since: pocket_data["since"] = since bookmarks = requests.post( "https://getpocket.com/v3/get", json=pocket_data).json() # api spec: https://getpocket.com/developer/docs/v3/retrieve for pocket_bookmark in bookmarks["list"].values(): if int(pocket_bookmark["status"]) != 2: desc = pocket_bookmark["excerpt"] if int( pocket_bookmark["is_article"]) else None bookmark = DataObj( desc=desc, url=pocket_bookmark["resolved_url"], date=datetime.now(), type="pocket_bookmarks") bookmark.process_bookmark_url() print(bookmark.insert()) return redirect("/")
def index(): path = request.args.get("path", "") files = data.get_items(path=path) return render_template("home.html", title="Home", search_enabled=app.config["SEARCH_CONF"]["enabled"], dir=files, current_path=path, new_folder_form=forms.NewFolderForm(), delete_form=forms.DeleteFolderForm())
def sync(force): with app.app_context(): db = get_db() # update pocket dictionary pocket = db.search(Query().type == "pocket_key")[0] pocket_data = { "consumer_key": pocket["consumer_key"], "access_token": pocket["access_token"], "sort": "newest", } # get date of latest call to pocket api since = datetime(1970, 1, 1) create_dir("pocket") already_saved = set() for post in get_items(path="pocket/", structured=False): date = datetime.strptime(post["date"].replace("-", "/"), "%x") already_saved.add(post["url"]) since = max(date, since) if since != datetime(1970, 1, 1) and not force: since = datetime.timestamp(since) pocket_data["since"] = since bookmarks = requests.post("https://getpocket.com/v3/get", json=pocket_data).json() # api spec: https://getpocket.com/developer/docs/v3/retrieve # for some reason, if the `list` attribute is empty it returns a list instead of a dict. if not len(bookmarks["list"]): click.echo("No new bookmarks.") else: for pocket_bookmark in bookmarks["list"].values(): url = pocket_bookmark.get("resolved_url", pocket_bookmark["given_url"]) if int(pocket_bookmark["status"] ) != 2 and url not in already_saved: bookmark = DataObj( url=url, date=datetime.now(), type="pocket_bookmark", path="pocket", ) try: bookmark.process_bookmark_url() click.echo(f"Saving {bookmark.title}...") bookmark.insert() except: click.echo( f"Could not save {bookmark.url} - website may already be down." ) click.echo("Done!")
def test_creating_bookmark_without_passing_path_saves_to_default_dir( test_app, client, mocked_responses): mocked_responses.add(responses.GET, "http://example.org", body="Example\n") bookmarks_dir = "bookmarks" test_app.config["DEFAULT_BOOKMARKS_DIR"] = bookmarks_dir create_dir(bookmarks_dir) resp = client.post( "/api/bookmarks", json={ "url": "http://example.org", }, ) bookmark = get_items(structured=False)[0] assert ("bookmarks" in bookmark["path"]) # verify it was saved to default bookmark dir
def index(): path = request.args.get("path", "").lstrip("/") try: files = data.get_items(path=path) except FileNotFoundError: flash("Directory does not exist.", "error") return redirect("/") return render_template("home.html", title=path or "root", search_enabled=app.config["SEARCH_CONF"]["enabled"], dir=files, current_path=path, new_folder_form=forms.NewFolderForm(), delete_form=forms.DeleteFolderForm())
def test_initialization_in_diff_than_curr_dir(test_app, cli_runner, click_cli): conf_path = os.path.join(test_app.config["USER_DIR"], "config.yml") data_dir = mkdtemp() with cli_runner.isolated_filesystem(): # input data dir - localhost - don't use ES and don't create user res = cli_runner.invoke(cli, ["init"], input=f"{data_dir}\nn\nn\n\n") assert "Config successfully created" in res.output conf = open(conf_path).read() assert f"USER_DIR: {data_dir}" in conf # check initialization in random directory # has resulted in change of user dir assert data_dir == test_app.config["USER_DIR"] # verify dataobj creation works assert DataObj(type="note", title="Test note").insert() assert len(get_items(structured=False)) == 1
def test_parse_pocket(test_app, client, mocked_responses, pocket_fixture): """Test the /pocket endpoint HTTP calls to the pocket API are mocked out """ # fake website mocked_responses.add(responses.GET, "https://example.com/", body="""<html> <head><title>Example</title></head><body><p> Lorem ipsum dolor sit amet, consectetur adipiscing elit </p></body></html> """) r: Flask.response_class = client.get('/parse_pocket?new=1') assert r.status_code == 302 dataobjs = data.get_items() assert len(dataobjs.child_files) == 1
def test_initialization(test_app, cli_runner, click_cli): conf_path = os.path.join(test_app.config["USER_DIR"], "config.yml") try: # conf shouldn't exist open(conf_path) assert False except FileNotFoundError: pass old_data_dir = test_app.config["USER_DIR"] with cli_runner.isolated_filesystem(): # create user, localhost, and don't use ES res = cli_runner.invoke( click_cli, ["init"], input="\nn\ny\nusername\npassword\npassword\n\n") assert "Config successfully created" in res.output # verify user was created assert len(get_db().search(Query().type == "user" and Query().username == "username")) # verify dataobj creation works assert DataObj(type="note", title="Test note").insert() assert len(get_items(structured=False)) == 1 conf = open(conf_path).read() # assert defaults are saved assert "PANDOC_HIGHLIGHT_THEME: pygments" in conf assert f"USER_DIR: {test_app.config['USER_DIR']}" in conf assert "HOST: 127.0.0.1" # check ES config not saved assert "ELASTICSEARCH" not in conf # check initialization in random directory # has resulted in change of user dir assert old_data_dir != test_app.config["USER_DIR"]
def index(): dataobjs = data.get_items() return render_template("home.html", title="Home", dataobjs=dataobjs, search_enabled=Config.ELASTICSEARCH_ENABLED)
def pass_dataobjs(): dataobjs = data.get_items() return dict(dataobjs=dataobjs)
def index(): dataobjs = data.get_items() return render_template("home.html", title="Home", dataobjs=dataobjs)
def pass_defaults(): dataobjs = data.get_items() return dict(dataobjs=dataobjs, SEP=os.path.sep)
DIRNAME = app.config["APP_PATH"] + "/data/" Path(DIRNAME).mkdir(parents=True, exist_ok=True) if app.config["ELASTICSEARCH_ENABLED"]: elastic_running = subprocess.run("service elasticsearch status", shell=True, stdout=subprocess.DEVNULL).returncode if elastic_running != 0: print("Enter password to enable elasticsearch") subprocess.run("sudo service elasticsearch restart", shell=True) try: print(extensions.elastic_client().indices.create( index=app.config["INDEX_NAME"], body=app.config["ELASTIC_CONF"])) except elasticsearch.ElasticsearchException: print("Elasticsearch index already created") Thread(target=run_watcher).start() app.jinja_options["extensions"].append("jinja2.ext.do") Scss(app) # get max id cur_id = 1 for dataobj in data.get_items(structured=False): cur_id = max(cur_id, dataobj["id"]) extensions.set_max_id(cur_id + 1) from archivy import routes # noqa:
def hn_sync(save_comments, post_type, username, hn_password, force): global num_ask_hn, num_links, num_links_processed with app.app_context(): session = requests.Session() print("\nLogging in...") r = session.post(f"{BASE_URL}/login", data={ "acct": username, "pw": hn_password }) if session.cookies.get("user", None) is None: print("Error logging in. Verify the credentials and try again.") sys.exit(1) print("Logged in successfully.\n") url = f"{BASE_URL}/{post_type}?id={username}&p=" headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:75.0) Gecko/20100101 Firefox/75.0", } i = 1 # create folders in archivy to store content create_dir("hacker_news") create_dir("hacker_news/" + post_type) # store titles of previous posts seen_posts = set([ post["url"] for post in get_items(path=f"hacker_news/{post_type}/", structured=False) ]) while True: links_processed_prev = num_links_processed print(f"Getting results of page {i}") r = session.get(url + str(i), headers=headers) tree = BeautifulSoup(r.text, features="lxml") tree_subtext = tree.select(".subtext") post_links = tree.select(".titlelink") # Number of links on the page n = len(tree_subtext) if not n: print(f"Processing page {i}. No links found.") break for j in range(n): tree_subtext_each = tree_subtext[j].find_all("a") # This is to take care of situations where flag link may not be # present in the subtext. So number of links could be either 3 # or 4. num_subtext = len(tree_subtext_each) # get post id by parsing link to comments post_id = int( tree_subtext_each[num_subtext - 1]["href"].split("=")[1].split("&")[0]) post_url = post_links[j]["href"] hn_link = f"{BASE_URL}/item?id={post_id}" if (post_url in seen_posts or hn_link in seen_posts) and not force: # we have already seen this upvoted story # this means that all stories that follow will also be seen finish() if (post_url in seen_posts or hn_link in seen_posts) and force: print(f"{post_url} already saved.") continue # call algolia api try: res = requests.get( f"https://hn.algolia.com/api/v1/items/{post_id}").json( ) except: print(f"Could not save {post_url}.") continue # might return a 404 if not indexed, so we check if we got a response by calling .get() if res.get("type") and res["type"] == "story": bookmark = DataObj( path=f"hacker_news/{post_type}/", date=datetime.utcfromtimestamp(res["created_at_i"]), type="bookmark", ) if res["url"]: num_links += 1 bookmark.url = post_url bookmark.process_bookmark_url() else: num_ask_hn += 1 bookmark.url = hn_link bookmark.content = (res["title"].replace( "<p>", "").replace("</p>", "")) bookmark.title = res["title"] bookmark.content = f"{res['points']} points on [Hacker News]({hn_link})\n\n{bookmark.content}" # save comments if user requests it through option or if story is an ASK HN if save_comments or not res["url"]: bookmark.content += "\n\n## Comments from Hacker News" for comment in res["children"]: comments = "<ul>" + build_comments( comment) + "</ul>" bookmark.content += "\n\n" + html2text( comments, bodywidth=0).replace("\n\n", "\n") bookmark.insert() num_links_processed += 1 print(f"Saving {res['title']}...") if n < 30: # no more links break elif links_processed_prev == num_links_processed: sleep( 1 ) # throttling if no new links have been saved (when we're running force.) i += 1 finish()
def get_dataobjs(): """Gets all dataobjs""" cur_dir = data.get_items(structured=False, json_format=True) return jsonify(cur_dir)
def get_random_dataobj_title(): with app.app_context(): dataobjs = get_items(structured=False) click.echo(dataobjs[randint(0, len(dataobjs))]["title"])
def get_dataobjs(): cur_dir = get_items(structured=False, json_format=True) return jsonify(cur_dir)