def read_ids(): docids = [] evergreen_table = evergreen_utils.get_table(evergreen_utils.get_mongo_client(), "evergreen") evergreen_docs = evergreen_table.find({}) for doc in evergreen_docs: docids.append(doc["docid"]) golden_table = evergreen_utils.get_table(evergreen_utils.get_mongo_client(), "evergreen_golden") golden_docs = golden_table.find({}) for doc in golden_docs: docids.append(doc["docid"]) return docids
def move_junk_to_candidate_for_second_review(mc): editor_assignment_count = {"eric": 0, "edward": 0, "adrienne": 0, "emily": 0} candidates = [] today = datetime.now() today_str = today.strftime("%Y-%m-%d") candidate_table = evergreen_utils.get_table(mc, "evergreen_candidate") # is evergreen full evergreen_utils.load_evergreen_articles_to_list(mc, "evergreen_junk", candidates, {"status": "1", "delete_reason": "not selected by editor", "review_count": {"$ne": "2"}, "expiration_date": {"$gt": today_str}}) # Move articles. for doc in candidates: if not dooc.has_key('assigned_to'): continue if not EDITOR_MAP.has_key(doc["assigned_to"]): continue assigned_to = EDITOR_MAP[doc["assigned_to"]] if editor_assignment_count[assigned_to] >= ASSIGN_MAX: continue editor_assignment_count[assigned_to] += 1 evergreen_utils.move_article(mc, doc, "evergreen_junk", "evergreen_candidate", DRY_RUN) update_projector = {} update_projector["assigned_to"] = assigned_to update_projector["editor_score"] = "0" update_projector["last_modified"] = today update_projector["delete_reason"] = "" update_projector["last_updated_by"] = "" update_projector["status"] = "0" update_projector["review_count"] = "2" if DRY_RUN: print doc["docid"], update_projector else: candidate_table.update_one({"docid": doc["docid"]}, {"$set": update_projector})
def update_adult(mc, news_data_table, table): evergreen_table = evergreen_utils.get_table(mc, table) evergreens = evergreen_utils.load_evergreen_articles(mc, table) for doc in evergreens: if doc.has_key("is_adult"): continue docid = doc["docid"] news_dict = news_data_table.find({"_id": docid}, projection = ['cat_class']) is_adult = False for news in news_dict: if news.has_key('cat_class'): for cat in news['cat_class']: if cat == 'adult': is_adult = True if is_adult: if DRY_RUN: print docid, is_adult else: evergreen_table.update_one({"docid": docid}, {"$set": {"is_adult": "1"}})
def dedupe_table(mc, table): processed_docs = set() collection = evergreen_utils.get_table(mc, table) evergreens = evergreen_utils.load_evergreen_articles(mc, table, {}) for doc in evergreens: docid = doc["docid"] if docid in processed_docs: continue cur_docs = evergreen_utils.load_evergreen_articles(mc, table, {"docid": docid}) docs = [] for cur_doc in cur_docs: docs.append(cur_doc) if len(docs) == 1: continue # Sort all docs for the same docid, and keep the first one only. docs = sorted(docs, key = get_key) for i in range(1, len(docs)): if DRY_RUN: print "deleting", cur_doc["docid"], cur_doc["_id"] else: collection.delete_one({"_id": cur_doc["_id"]}) processed_docs.add(docid)