Exemple #1
0
def read_ids():
    docids = []
    evergreen_table = evergreen_utils.get_table(evergreen_utils.get_mongo_client(), "evergreen")
    evergreen_docs = evergreen_table.find({})
    for doc in evergreen_docs:
        docids.append(doc["docid"])
    golden_table = evergreen_utils.get_table(evergreen_utils.get_mongo_client(), "evergreen_golden")
    golden_docs = golden_table.find({})
    for doc in golden_docs:
        docids.append(doc["docid"])
    return docids
def move_junk_to_candidate_for_second_review(mc):
  editor_assignment_count = {"eric": 0, "edward": 0, "adrienne": 0, "emily": 0}
  candidates = []
  today = datetime.now()
  today_str = today.strftime("%Y-%m-%d")
  candidate_table = evergreen_utils.get_table(mc, "evergreen_candidate")
  # is evergreen full
  evergreen_utils.load_evergreen_articles_to_list(mc, "evergreen_junk", candidates, {"status": "1", "delete_reason": "not selected by editor", "review_count": {"$ne": "2"}, "expiration_date": {"$gt": today_str}})
  # Move articles.
  for doc in candidates:
    if not dooc.has_key('assigned_to'):
      continue
    if not EDITOR_MAP.has_key(doc["assigned_to"]):
      continue
    assigned_to = EDITOR_MAP[doc["assigned_to"]]
    if editor_assignment_count[assigned_to] >= ASSIGN_MAX:
      continue
    editor_assignment_count[assigned_to] += 1
    evergreen_utils.move_article(mc, doc, "evergreen_junk", "evergreen_candidate", DRY_RUN)
    update_projector = {}
    update_projector["assigned_to"] = assigned_to
    update_projector["editor_score"] = "0"
    update_projector["last_modified"] = today
    update_projector["delete_reason"] = ""
    update_projector["last_updated_by"] = ""
    update_projector["status"] = "0"
    update_projector["review_count"] = "2"
    if DRY_RUN:
      print doc["docid"], update_projector
    else:
      candidate_table.update_one({"docid": doc["docid"]}, {"$set": update_projector})
def update_adult(mc, news_data_table, table):
  evergreen_table = evergreen_utils.get_table(mc, table)
  evergreens = evergreen_utils.load_evergreen_articles(mc, table)
  for doc in evergreens:
    if doc.has_key("is_adult"):
      continue
    docid = doc["docid"]
    news_dict = news_data_table.find({"_id": docid}, projection = ['cat_class'])
    is_adult = False
    for news in news_dict:
      if news.has_key('cat_class'):
        for cat in news['cat_class']:
          if cat == 'adult':
            is_adult = True
    if is_adult:
      if DRY_RUN:
        print docid, is_adult
      else:
        evergreen_table.update_one({"docid": docid}, {"$set": {"is_adult": "1"}})
Exemple #4
0
def dedupe_table(mc, table):
  processed_docs = set()
  collection = evergreen_utils.get_table(mc, table)
  evergreens = evergreen_utils.load_evergreen_articles(mc, table, {})
  for doc in evergreens:
    docid = doc["docid"]
    if docid in processed_docs:
      continue
    cur_docs = evergreen_utils.load_evergreen_articles(mc, table, {"docid": docid})
    docs = []
    for cur_doc in cur_docs:
      docs.append(cur_doc)
    if len(docs) == 1:
      continue
    # Sort all docs for the same docid, and keep the first one only.
    docs = sorted(docs, key = get_key)
    for i in range(1, len(docs)):
      if DRY_RUN:
        print "deleting", cur_doc["docid"], cur_doc["_id"]
      else:
        collection.delete_one({"_id": cur_doc["_id"]})
    processed_docs.add(docid)