def add_corpus(self, corpus, name, password, options, tlds=None): now = now_ts() yield self.db()["corpus"].insert_one({ "_id": corpus, "name": name, "password": salt(password), "options": options, "total_webentities": 0, "webentities_in": 0, "webentities_in_untagged": 0, "webentities_in_uncrawled": 0, "webentities_out": 0, "webentities_undecided": 0, "webentities_discovered": 0, "total_crawls": 0, "crawls_pending": 0, "crawls_running": 0, "total_pages": 0, "total_pages_crawled": 0, "total_pages_queued": 0, "total_links_found": 0, "recent_changes": False, "last_index_loop": now, "links_duration": 1, "last_links_loop": 0, "tags": Binary(msgpack.packb({})), "webentities_links": Binary(msgpack.packb({})), "created_at": now, "last_activity": now, "tlds": tlds }) yield self.init_corpus_indexes(corpus)
def add_corpus(self, corpus, name, password, options, tlds=None): now = now_ts() yield self.db["corpus"].insert({ "_id": corpus, "name": name, "password": salt(password), "options": options, "total_webentities": 0, "webentities_in": 0, "webentities_in_untagged": 0, "webentities_in_uncrawled": 0, "webentities_out": 0, "webentities_undecided": 0, "webentities_discovered": 0, "total_crawls": 0, "total_pages": 0, "total_pages_crawled": 0, "created_at": now, "last_activity": now, "recent_changes": False, "last_index_loop": now, "links_duration": 1, "last_links_loop": 0, "tlds": tlds }, safe=True) yield self.init_corpus_indexes(corpus)
def add_corpus(self, corpus, name, password, options): now = now_ts() yield self.db["corpus"].insert({ "_id": corpus, "name": name, "password": salt(password), "options": options, "total_webentities": 0, "webentities_in": 0, "webentities_out": 0, "webentities_undecided": 0, "webentities_discovered": 0, "total_crawls": 0, "total_pages": 0, "total_pages_crawled": 0, "created_at": now, "last_activity": now, "last_index_loop": now, "last_links_loop": now }, safe=True) yield self.init_corpus_indexes(corpus)