def create_bookmark(): """ Creates a new bookmark **Parameters:** All parameters are sent through the JSON body. - **url** (required) - **desc** - **tags** - **path** """ json_data = request.get_json() bookmark = DataObj( url=json_data['url'], desc=json_data.get('desc'), tags=json_data.get('tags'), path=json_data.get("path", ""), type="bookmark", ) bookmark.process_bookmark_url() bookmark_id = bookmark.insert() if bookmark_id: return jsonify(bookmark_id=bookmark_id, ) return Response(status=400)
def new_bookmark(): form = forms.NewBookmarkForm() form.path.choices = [(pathname, pathname) for pathname in data.get_dirs()] if form.validate_on_submit(): path = form.path.data if form.path.data != "not classified" else "" tags = form.tags.data.split(",") if form.tags.data != "" else [] bookmark = DataObj( url=form.url.data, tags=tags, path=path, type="bookmark") bookmark.process_bookmark_url() bookmark_id = bookmark.insert() if bookmark_id: flash("Bookmark Saved!", "success") return redirect(f"/dataobj/{bookmark_id}") # for bookmarklet form.url.data = request.args.get("url", "") path = request.args.get("path", "not classified").strip('/') # handle empty argument form.path.data = path if path != "" else "not classified" return render_template( "dataobjs/new.html", title="New Bookmark", form=form)
def create_note(): """ Creates a new note. **Parameters:** All parameters are sent through the JSON body. - **title** (required) - **content** (required) - **desc** - **tags** - **path** """ json_data = request.get_json() note = DataObj(title=json_data["title"], content=json_data["content"], desc=json_data.get("desc"), tags=json_data.get("tags"), path=json_data.get("path", ""), type="note") note_id = note.insert() if note_id: return jsonify(note_id=note_id) return Response(status=400)
def new_bookmark(): default_dir = app.config.get("DEFAULT_BOOKMARKS_DIR", "root directory") form = forms.NewBookmarkForm(path=default_dir) form.path.choices = [("", "root directory")] + [ (pathname, pathname) for pathname in data.get_dirs() ] if form.validate_on_submit(): path = form.path.data tags = form.tags.data.split(",") if form.tags.data != "" else [] bookmark = DataObj(url=form.url.data, tags=tags, path=path, type="bookmark") bookmark.process_bookmark_url() bookmark_id = bookmark.insert() if bookmark_id: flash("Bookmark Saved!", "success") return redirect(f"/dataobj/{bookmark_id}") # for bookmarklet form.url.data = request.args.get("url", "") path = request.args.get("path", default_dir).strip("/") # handle empty argument form.path.data = path return render_template("dataobjs/new.html", title="New Bookmark", form=form)
def create_bookmark(): """ Creates a new bookmark **Parameters:** All parameters are sent through the JSON body. - **url** (required) - **tags** - **path** """ json_data = request.get_json() bookmark = DataObj( url=json_data["url"], tags=json_data.get("tags", []), path=json_data.get("path", current_app.config["DEFAULT_BOOKMARKS_DIR"]), type="bookmark", ) bookmark.process_bookmark_url() bookmark_id = bookmark.insert() if bookmark_id: return jsonify( bookmark_id=bookmark_id, ) return Response(status=400)
def test_new_bookmark(test_app): bookmark = DataObj( type="bookmark", tags=["example"], url="http://example.org", ) bookmark.process_bookmark_url() bookmark_id = bookmark.insert() assert bookmark_id == 1
def parse_pocket(): db = get_db() pocket = db.search(Query().type == "pocket_key")[0] if request.args.get("new") == "1": auth_data = { "consumer_key": pocket["consumer_key"], "code": pocket["code"]} resp = requests.post( "https://getpocket.com/v3/oauth/authorize", json=auth_data, headers={ "X-Accept": "application/json", "Content-Type": "application/json"}) db.update( operations.set( "access_token", resp.json()["access_token"]), Query().type == "pocket_key") flash(f"{resp.json()['username']} Signed in!") # update pocket dictionary pocket = db.search(Query().type == "pocket_key")[0] pocket_data = { "consumer_key": pocket["consumer_key"], "access_token": pocket["access_token"], "sort": "newest"} # get date of latest call to pocket api since = datetime(1970, 1, 1) for post in data.get_items( collections=["pocket_bookmark"], structured=False): date = datetime.strptime(post["date"].replace("-", "/"), "%x") since = max(date, since) since = datetime.timestamp(since) if since: pocket_data["since"] = since bookmarks = requests.post( "https://getpocket.com/v3/get", json=pocket_data).json() # api spec: https://getpocket.com/developer/docs/v3/retrieve for pocket_bookmark in bookmarks["list"].values(): if int(pocket_bookmark["status"]) != 2: desc = pocket_bookmark["excerpt"] if int( pocket_bookmark["is_article"]) else None bookmark = DataObj( desc=desc, url=pocket_bookmark["resolved_url"], date=datetime.now(), type="pocket_bookmarks") bookmark.process_bookmark_url() print(bookmark.insert()) return redirect("/")
def note_fixture(test_app): note_dict = { "type": "note", "title": "Test Note", "desc": "A note to test model functionality", "tags": ["testing", "archivy"], "path": "" } with test_app.app_context(): note = DataObj(**note_dict) note.insert() return note
def note_fixture(test_app): note_dict = { "type": "note", "title": "Test Note", "tags": ["testing", "archivy"], "path": "", } with test_app.app_context(): note = DataObj(**note_dict) note.insert() return note
def create_note(): json_data = request.get_json() note = DataObj(title=json_data["title"], content=json_data["content"], desc=json_data.get("desc"), tags=json_data.get("tags"), path=json_data.get("path", ""), type="note") note_id = note.insert() if note_id: return jsonify(note_id=note_id) return Response(status=400)
def new_note(): form = NewNoteForm() form.path.choices = [(pathname, pathname) for pathname in data.get_dirs()] if form.validate_on_submit(): note = DataObj(title=form.title.data, desc=form.desc.data, tags=form.tags.data, path=form.path.data, type="note") note_id = note.insert() if note_id: flash("Note Saved!") return redirect(f"/dataobj/{note_id}") return render_template("/notes/new.html", title="New Note", form=form)
def create_bookmark(): json_data = request.get_json() bookmark = DataObj( url=json_data['url'], desc=json_data.get('desc'), tags=json_data.get('tags'), path=json_data.get("path", ""), type="bookmarks", ) bookmark.process_bookmark_url() bookmark_id = bookmark.insert() if bookmark_id: return jsonify(bookmark_id=bookmark_id, ) return Response(status=400)
def test_unformat_directory(test_app, cli_runner, click_cli, bookmark_fixture, note_fixture): out_dir = mkdtemp() # create directory to store archivy note note_dir = "note-dir" create_dir(note_dir) nested_note = DataObj(type="note", title="Nested note", path=note_dir) nested_note.insert() # unformat directory res = cli_runner.invoke( cli, ["unformat", os.path.join(get_data_dir(), note_dir), out_dir]) assert f"Unformatted and moved {nested_note.fullpath} to {out_dir}/{note_dir}/{nested_note.title}" in res.output
def update_item_frontmatter(dataobj_id, new_frontmatter): """ Given an object id, this method overwrites the front matter of the post with `new_frontmatter`. --- date: Str id: Str path: Str tags: List[Str] title: Str type: note/bookmark --- """ from archivy.models import DataObj filename = get_by_id(dataobj_id) dataobj = frontmatter.load(filename) for key in list(new_frontmatter): dataobj[key] = new_frontmatter[key] md = frontmatter.dumps(dataobj) with open(filename, "w", encoding="utf-8") as f: f.write(md) converted_dataobj = DataObj.from_md(md) converted_dataobj.fullpath = str( filename.relative_to(current_app.config["USER_DIR"]) ) converted_dataobj.index() load_hooks().on_edit(converted_dataobj)
def new_bookmark(): form = NewBookmarkForm() form.path.choices = [(pathname, pathname) for pathname in data.get_dirs()] if form.validate_on_submit(): bookmark = DataObj(url=form.url.data, desc=form.desc.data, tags=form.tags.data.split(","), path=form.path.data, type="bookmarks") bookmark_id = bookmark.insert() if bookmark_id: flash("Bookmark Saved!") return redirect(f"/dataobj/{bookmark_id}") return render_template("dataobjs/new.html", title="New Bookmark", form=form)
def new_note(): form = forms.NewNoteForm() form.path.choices = [(pathname, pathname) for pathname in data.get_dirs()] if form.validate_on_submit(): path = form.path.data if form.path.data != "not classified" else "" tags = form.tags.data.split(",") if form.tags.data != "" else [] note = DataObj(title=form.title.data, desc=form.desc.data, tags=tags, path=path, type="note") note_id = note.insert() if note_id: flash("Note Saved!") return redirect(f"/dataobj/{note_id}") return render_template("/dataobjs/new.html", title="New Note", form=form)
def new_note(): form = forms.NewNoteForm() form.path.choices = [(pathname, pathname) for pathname in data.get_dirs()] if form.validate_on_submit(): path = form.path.data if form.path.data != "not classified" else "" tags = form.tags.data.split(",") if form.tags.data != "" else [] note = DataObj(title=form.title.data, tags=tags, path=path, type="note") note_id = note.insert() if note_id: flash("Note Saved!", "success") return redirect(f"/dataobj/{note_id}") path = request.args.get("path", "not classified").strip("/") # handle empty argument form.path.data = path if path != "" else "not classified" return render_template("/dataobjs/new.html", title="New Note", form=form)
def new_bookmark(): form = forms.NewBookmarkForm() form.path.choices = [(pathname, pathname) for pathname in data.get_dirs()] if form.validate_on_submit(): path = form.path.data if form.path.data != "not classified" else "" tags = form.tags.data.split(",") if form.tags.data != "" else [] bookmark = DataObj(url=form.url.data, desc=form.desc.data, tags=tags, path=path, type="bookmark") bookmark.process_bookmark_url() bookmark_id = bookmark.insert() if bookmark_id: flash("Bookmark Saved!") return redirect(f"/dataobj/{bookmark_id}") return render_template("dataobjs/new.html", title="New Bookmark", form=form)
def format_file(path: str): """ Converts normal md of file at `path` to formatted archivy markdown file, with yaml front matter and a filename of format "{id}-{old_filename}.md" """ from archivy.models import DataObj data_dir = get_data_dir() path = Path(path) if not path.exists(): return if path.is_dir(): for filename in path.iterdir(): format_file(filename) else: new_file = path.open("r", encoding="utf-8") file_contents = new_file.read() new_file.close() try: # get relative path of object in `data` dir datapath = path.parent.resolve().relative_to(data_dir) except ValueError: datapath = Path() note_dataobj = { "title": path.name.replace(".md", ""), "content": file_contents, "type": "note", "path": str(datapath), } dataobj = DataObj(**note_dataobj) dataobj.insert() path.unlink() current_app.logger.info( f"Formatted and moved {str(datapath / path.name)} to {dataobj.fullpath}" )
def bookmark_fixture(test_app, mocked_responses): mocked_responses.add(responses.GET, "https://example.com/", body="""<html> <head><title>Example</title></head><body><p> Lorem ipsum dolor sit amet, consectetur adipiscing elit <script>console.log("this should be sanitized")</script> <img src="/images/image1.png"> <a href="/testing-absolute-url">link</a> <a href"/empty-link"></a> </p></body></html> """) datapoints = { "type": "bookmark", "title": "Test Bookmark", "tags": ["testing", "archivy"], "path": "", "url": "https://example.com/" } with test_app.app_context(): bookmark = DataObj(**datapoints) bookmark.process_bookmark_url() bookmark.insert() return bookmark
def test_get_dataobjs(test_app, client: FlaskClient, bookmark_fixture): note_dict = { "type": "note", "title": "Nested Test Note", "tags": ["testing", "archivy"], "path": "t", } create_dir("t") note = DataObj(**note_dict) note.insert() response: Flask.response_class = client.get("/api/dataobjs") print(response.data) assert response.status_code == 200 assert isinstance(response.json, list) # check it correctly gets nested note assert len(response.json) == 2 bookmark = response.json[0] assert bookmark["metadata"]["title"] == "Example" assert bookmark["metadata"]["id"] == 1 assert bookmark["content"].startswith("Lorem ipsum")
def update_item_md(dataobj_id, new_content): """ Given an object id, this method overwrites the inner content of the post with `new_content`. This means that it won't change the frontmatter (eg tags, id, title) but it can change the file content. For example: If we have a dataobj like this: ```md --- id: 1 title: Note --- # This is random ``` Calling `update_item(1, "# This is specific")` will turn it into: ```md --- id: 1 # unchanged title: Note --- # This is specific ``` """ from archivy.models import DataObj filename = get_by_id(dataobj_id) dataobj = frontmatter.load(filename) dataobj.content = new_content md = frontmatter.dumps(dataobj) with open(filename, "w", encoding="utf-8") as f: f.write(md) converted_dataobj = DataObj.from_md(md) converted_dataobj.fullpath = str( filename.relative_to(current_app.config["USER_DIR"]) ) converted_dataobj.index() load_hooks().on_edit(converted_dataobj)
def index(): data_dir = Path(app.config["USER_DIR"]) / "data" if not app.config["SEARCH_CONF"]["enabled"]: click.echo("Search must be enabled for this command.") return for filename in data_dir.rglob("*.md"): cur_file = open(filename) dataobj = DataObj.from_md(cur_file.read()) cur_file.close() if dataobj.index(): click.echo(f"Indexed {dataobj.title}...") else: click.echo(f"Failed to index {dataobj.title}")
def sync(force): with app.app_context(): db = get_db() # update pocket dictionary pocket = db.search(Query().type == "pocket_key")[0] pocket_data = { "consumer_key": pocket["consumer_key"], "access_token": pocket["access_token"], "sort": "newest", } # get date of latest call to pocket api since = datetime(1970, 1, 1) create_dir("pocket") already_saved = set() for post in get_items(path="pocket/", structured=False): date = datetime.strptime(post["date"].replace("-", "/"), "%x") already_saved.add(post["url"]) since = max(date, since) if since != datetime(1970, 1, 1) and not force: since = datetime.timestamp(since) pocket_data["since"] = since bookmarks = requests.post("https://getpocket.com/v3/get", json=pocket_data).json() # api spec: https://getpocket.com/developer/docs/v3/retrieve # for some reason, if the `list` attribute is empty it returns a list instead of a dict. if not len(bookmarks["list"]): click.echo("No new bookmarks.") else: for pocket_bookmark in bookmarks["list"].values(): url = pocket_bookmark.get("resolved_url", pocket_bookmark["given_url"]) if int(pocket_bookmark["status"] ) != 2 and url not in already_saved: bookmark = DataObj( url=url, date=datetime.now(), type="pocket_bookmark", path="pocket", ) try: bookmark.process_bookmark_url() click.echo(f"Saving {bookmark.title}...") bookmark.insert() except: click.echo( f"Could not save {bookmark.url} - website may already be down." ) click.echo("Done!")
def test_initialization_in_diff_than_curr_dir(test_app, cli_runner, click_cli): conf_path = os.path.join(test_app.config["USER_DIR"], "config.yml") data_dir = mkdtemp() with cli_runner.isolated_filesystem(): # input data dir - localhost - don't use ES and don't create user res = cli_runner.invoke(cli, ["init"], input=f"{data_dir}\nn\nn\n\n") assert "Config successfully created" in res.output conf = open(conf_path).read() assert f"USER_DIR: {data_dir}" in conf # check initialization in random directory # has resulted in change of user dir assert data_dir == test_app.config["USER_DIR"] # verify dataobj creation works assert DataObj(type="note", title="Test note").insert() assert len(get_items(structured=False)) == 1
def test_bookmark_included_images_are_saved(test_app, client, mocked_responses): mocked_responses.add(GET, "https://example.com", body="""<html><img src='/image.png'></html>""") mocked_responses.add(GET, "https://example.com/image.png", body=open("docs/img/logo.png", "rb")) test_app.config["SCRAPING_CONF"]["save_images"] = True bookmark = DataObj(type="bookmark", url="https://example.com") bookmark.process_bookmark_url() bookmark.insert() images_dir = Path(test_app.config["USER_DIR"]) / "images" assert images_dir.exists() assert (images_dir / "image.png").exists()
def test_initialization(test_app, cli_runner, click_cli): conf_path = os.path.join(test_app.config["USER_DIR"], "config.yml") try: # conf shouldn't exist open(conf_path) assert False except FileNotFoundError: pass old_data_dir = test_app.config["USER_DIR"] with cli_runner.isolated_filesystem(): # create user, localhost, and don't use ES res = cli_runner.invoke( click_cli, ["init"], input="\nn\ny\nusername\npassword\npassword\n\n") assert "Config successfully created" in res.output # verify user was created assert len(get_db().search(Query().type == "user" and Query().username == "username")) # verify dataobj creation works assert DataObj(type="note", title="Test note").insert() assert len(get_items(structured=False)) == 1 conf = open(conf_path).read() # assert defaults are saved assert "PANDOC_HIGHLIGHT_THEME: pygments" in conf assert f"USER_DIR: {test_app.config['USER_DIR']}" in conf assert "HOST: 127.0.0.1" # check ES config not saved assert "ELASTICSEARCH" not in conf # check initialization in random directory # has resulted in change of user dir assert old_data_dir != test_app.config["USER_DIR"]
def hn_sync(save_comments, post_type, username, hn_password, force): global num_ask_hn, num_links, num_links_processed with app.app_context(): session = requests.Session() print("\nLogging in...") r = session.post(f"{BASE_URL}/login", data={ "acct": username, "pw": hn_password }) if session.cookies.get("user", None) is None: print("Error logging in. Verify the credentials and try again.") sys.exit(1) print("Logged in successfully.\n") url = f"{BASE_URL}/{post_type}?id={username}&p=" headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:75.0) Gecko/20100101 Firefox/75.0", } i = 1 # create folders in archivy to store content create_dir("hacker_news") create_dir("hacker_news/" + post_type) # store titles of previous posts seen_posts = set([ post["url"] for post in get_items(path=f"hacker_news/{post_type}/", structured=False) ]) while True: links_processed_prev = num_links_processed print(f"Getting results of page {i}") r = session.get(url + str(i), headers=headers) tree = BeautifulSoup(r.text, features="lxml") tree_subtext = tree.select(".subtext") post_links = tree.select(".titlelink") # Number of links on the page n = len(tree_subtext) if not n: print(f"Processing page {i}. No links found.") break for j in range(n): tree_subtext_each = tree_subtext[j].find_all("a") # This is to take care of situations where flag link may not be # present in the subtext. So number of links could be either 3 # or 4. num_subtext = len(tree_subtext_each) # get post id by parsing link to comments post_id = int( tree_subtext_each[num_subtext - 1]["href"].split("=")[1].split("&")[0]) post_url = post_links[j]["href"] hn_link = f"{BASE_URL}/item?id={post_id}" if (post_url in seen_posts or hn_link in seen_posts) and not force: # we have already seen this upvoted story # this means that all stories that follow will also be seen finish() if (post_url in seen_posts or hn_link in seen_posts) and force: print(f"{post_url} already saved.") continue # call algolia api try: res = requests.get( f"https://hn.algolia.com/api/v1/items/{post_id}").json( ) except: print(f"Could not save {post_url}.") continue # might return a 404 if not indexed, so we check if we got a response by calling .get() if res.get("type") and res["type"] == "story": bookmark = DataObj( path=f"hacker_news/{post_type}/", date=datetime.utcfromtimestamp(res["created_at_i"]), type="bookmark", ) if res["url"]: num_links += 1 bookmark.url = post_url bookmark.process_bookmark_url() else: num_ask_hn += 1 bookmark.url = hn_link bookmark.content = (res["title"].replace( "<p>", "").replace("</p>", "")) bookmark.title = res["title"] bookmark.content = f"{res['points']} points on [Hacker News]({hn_link})\n\n{bookmark.content}" # save comments if user requests it through option or if story is an ASK HN if save_comments or not res["url"]: bookmark.content += "\n\n## Comments from Hacker News" for comment in res["children"]: comments = "<ul>" + build_comments( comment) + "</ul>" bookmark.content += "\n\n" + html2text( comments, bodywidth=0).replace("\n\n", "\n") bookmark.insert() num_links_processed += 1 print(f"Saving {res['title']}...") if n < 30: # no more links break elif links_processed_prev == num_links_processed: sleep( 1 ) # throttling if no new links have been saved (when we're running force.) i += 1 finish()