def update_least_recently_updated(number_to_update, myredis, mydao): (tiids_to_update, docs) = get_least_recently_updated_tiids_in_db(number_to_update, mydao) update_docs_with_updater_timestamp(docs, mydao) QUEUE_DELAY_IN_SECONDS = 0.25 mixpanel.track("Trigger:Update", {"Number Items":len(tiids_to_update), "Update Type":"Scheduled Least Recently"}) item.start_item_update(tiids_to_update, myredis, mydao, sleep_in_seconds=QUEUE_DELAY_IN_SECONDS) return tiids_to_update
def update_github(): from totalimpact import item, tiredis myredis = tiredis.from_url(os.getenv("REDISTOGO_URL"), db=0) view_name = "queues/by_alias" view_rows = db.view(view_name, include_docs=False) row_count = 0 page_size = 500 start_key = ["url", "https://github.0000000"] end_key = ["url", "https://github.zzzzzzzz"] from couch_paginator import CouchPaginator page = CouchPaginator(db, view_name, page_size, include_docs=False, start_key=start_key, end_key=end_key) while page: for row in page: tiid = row.id item.start_item_update([tiid], myredis, db, sleep_in_seconds=0.05) row_count += 1 print "." logger.info("%i. getting new page, last id was %s" % (row_count, row.id)) if page.has_next: page = CouchPaginator(db, view_name, page_size, start_key=page.next, end_key=end_key, include_docs=True) else: page = None print "number items = ", row_count
def update_github(): from totalimpact import item, tiredis myredis = tiredis.from_url(os.getenv("REDISTOGO_URL"), db=0) view_name = "queues/by_alias" view_rows = db.view(view_name, include_docs=False) row_count = 0 page_size = 500 start_key = ["url", "https://github.0000000"] end_key = ["url", "https://github.zzzzzzzz"] from couch_paginator import CouchPaginator page = CouchPaginator(db, view_name, page_size, include_docs=False, start_key=start_key, end_key=end_key) while page: for row in page: tiid = row.id item.start_item_update([tiid], myredis, db, sleep_in_seconds=0.05) row_count += 1 print "." logger.info(u"%i. getting new page, last id was %s" %(row_count, row.id)) if page.has_next: page = CouchPaginator(db, view_name, page_size, start_key=page.next, end_key=end_key, include_docs=True) else: page = None print "number items = ", row_count
def refresh_from_tiids(tiids, myredis): for tiid in tiids: try: item_obj = item_module.Item.from_tiid(tiid) item = item_obj.as_old_doc() item_module.start_item_update(tiid, item["aliases"], myredis) except AttributeError: logger.debug(u"couldn't find tiid {tiid} so not refreshing its metrics".format( tiid=tiid)) return tiids
def products_refresh_post(tiids_string): tiids = tiids_string.split(",") for tiid in tiids: try: item_obj = item_module.Item.from_tiid(tiid) item = item_obj.as_old_doc() item_module.start_item_update(tiid, item["aliases"], myredis) except AttributeError: logger.debug(u"couldn't find tiid {tiid} so not refreshing its metrics".format( tiid=tiid)) resp = make_response("true", 200) return resp
def products_refresh_post(tiids_string): tiids = tiids_string.split(",") for tiid in tiids: try: item_obj = item_module.Item.from_tiid(tiid) item = item_obj.as_old_doc() item_module.start_item_update(tiid, item["aliases"], myredis) except AttributeError: logger.debug( u"couldn't find tiid {tiid} so not refreshing its metrics". format(tiid=tiid)) resp = make_response("true", 200) return resp
def update_by_tiids(all_tiids, number_to_update, myredis): tiids_to_update = all_tiids[0:min(number_to_update, len(all_tiids))] now = datetime.datetime.utcnow().isoformat() print "updating {number_to_update} of them now".format(number_to_update=number_to_update) QUEUE_DELAY_IN_SECONDS = 0.25 for tiid in tiids_to_update: item_obj = item_module.Item.query.get(tiid) # can use this method because don't need metrics item_doc = item_obj.as_old_doc() item_module.start_item_update(tiid, item_doc["aliases"], myredis) item_obj.last_update_run = now db.session.add(item_obj) time.sleep(QUEUE_DELAY_IN_SECONDS) db.session.commit() return tiids_to_update
def refresh_from_tiids(tiids, analytics_credentials, priority, myredis): item_objects = item_module.Item.query.filter(item_module.Item.tiid.in_(tiids)).all() dicts_to_refresh = [] for item_obj in item_objects: try: tiid = item_obj.tiid item_obj.set_last_refresh_start() db.session.add(item_obj) alias_dict = item_module.alias_dict_from_tuples(item_obj.alias_tuples) dicts_to_refresh += [{"tiid":tiid, "aliases_dict": alias_dict}] except AttributeError: logger.debug(u"couldn't find tiid {tiid} so not refreshing its metrics".format( tiid=tiid)) db.session.commit() item_module.start_item_update(dicts_to_refresh, priority, myredis) return tiids
def collection_update(cid=""): # first, get the tiids in this collection: try: collection = mydao.get(cid) tiids = collection["alias_tiids"].values() except Exception: logger.exception("couldn't get tiids in POST collection '{cid}'".format( cid=cid )) abort(404, "couldn't get tiids for this collection...maybe doesn't exist?") mixpanel.track("Trigger:Update", {"Number Items":len(tiids), "Report Id":cid, "Update Type":"webapp"}, request) item_module.start_item_update(tiids, myredis, mydao) resp = make_response("true", 200) resp.mimetype = "application/json" return resp
def refresh_from_tiids(tiids, analytics_credentials, priority, myredis): item_objects = item_module.Item.query.filter( item_module.Item.tiid.in_(tiids)).all() dicts_to_refresh = [] for item_obj in item_objects: try: tiid = item_obj.tiid item_obj.set_last_refresh_start() db.session.add(item_obj) alias_dict = item_module.alias_dict_from_tuples( item_obj.alias_tuples) dicts_to_refresh += [{"tiid": tiid, "aliases_dict": alias_dict}] except AttributeError: logger.debug( u"couldn't find tiid {tiid} so not refreshing its metrics". format(tiid=tiid)) db.session.commit() item_module.start_item_update(dicts_to_refresh, priority, myredis) return tiids
def collection_metrics_refresh(cid=""): # first, get the tiids in this collection: try: coll_doc = collection.get_collection_doc(cid) tiids = coll_doc["alias_tiids"].values() except (TypeError, AttributeError): logger.exception( u"couldn't get tiids in POST collection '{cid}'".format(cid=cid)) abort_custom(500, "Error doing collection_update") for tiid in tiids: try: item_obj = item_module.Item.from_tiid(tiid) item = item_obj.as_old_doc() item_module.start_item_update(tiid, item["aliases"], myredis) except AttributeError: logger.debug( u"couldn't find tiid {tiid} in {cid} so not refreshing its metrics" .format(cid=cid, tiid=tiid)) resp = make_response("true", 200) return resp
def collection_metrics_refresh(cid=""): # first, get the tiids in this collection: try: coll_doc = collection.get_collection_doc(cid) tiids = coll_doc["alias_tiids"].values() except (TypeError, AttributeError): logger.exception(u"couldn't get tiids in POST collection '{cid}'".format( cid=cid )) abort_custom(500, "Error doing collection_update") for tiid in tiids: try: item_obj = item_module.Item.from_tiid(tiid) item = item_obj.as_old_doc() item_module.start_item_update(tiid, item["aliases"], myredis) except AttributeError: logger.debug(u"couldn't find tiid {tiid} in {cid} so not refreshing its metrics".format( cid=cid, tiid=tiid)) resp = make_response("true", 200) return resp
def update_active_publisher_items(number_to_update, myredis, mydao): all_tiids = [] all_docs = [] for publisher in active_publishers: for journal_dict in active_publishers[publisher]["journals"]: min_year = 2011 #only update 2012-2013 right now (tiids_from_doi_prefix, docs_from_doi_prefix) = get_least_recently_updated_items_from_doi_prefix(min_year, journal_dict["doi_prefix"], myredis, mydao) logger.info("doi prefix {prefix} has {num_tiids} items published since {min_year} last updated more than 24 hours ago".format( num_tiids=len(tiids_from_doi_prefix), prefix=journal_dict["doi_prefix"], min_year=min_year)) all_tiids += tiids_from_doi_prefix all_docs += docs_from_doi_prefix print "recent items for active publishers that were last updated more than a day ago, n=", len(all_tiids) tiids_to_update = all_tiids[0:min(number_to_update, len(all_tiids))] docs_to_update = all_docs[0:min(number_to_update, len(all_docs))] response = update_docs_with_updater_timestamp(docs_to_update, mydao) print "updating {number_to_update} of them now".format(number_to_update=number_to_update) QUEUE_DELAY_IN_SECONDS = 0.25 mixpanel.track("Trigger:Update", {"Number Items":len(tiids_to_update), "Update Type":"Scheduled Registered"}) item.start_item_update(tiids_to_update, myredis, mydao, sleep_in_seconds=QUEUE_DELAY_IN_SECONDS) return tiids_to_update