예제 #1
0
def update_least_recently_updated(number_to_update, myredis, mydao):
    (tiids_to_update, docs) = get_least_recently_updated_tiids_in_db(number_to_update, mydao)
    update_docs_with_updater_timestamp(docs, mydao)
    QUEUE_DELAY_IN_SECONDS = 0.25
    mixpanel.track("Trigger:Update", {"Number Items":len(tiids_to_update), "Update Type":"Scheduled Least Recently"})
    item.start_item_update(tiids_to_update, myredis, mydao, sleep_in_seconds=QUEUE_DELAY_IN_SECONDS)
    return tiids_to_update
예제 #2
0
def update_github():
    from totalimpact import item, tiredis

    myredis = tiredis.from_url(os.getenv("REDISTOGO_URL"), db=0)

    view_name = "queues/by_alias"
    view_rows = db.view(view_name, include_docs=False)
    row_count = 0
    page_size = 500
    start_key = ["url", "https://github.0000000"]
    end_key = ["url", "https://github.zzzzzzzz"]

    from couch_paginator import CouchPaginator

    page = CouchPaginator(db, view_name, page_size, include_docs=False, start_key=start_key, end_key=end_key)

    while page:
        for row in page:
            tiid = row.id
            item.start_item_update([tiid], myredis, db, sleep_in_seconds=0.05)
            row_count += 1
            print "."
        logger.info("%i. getting new page, last id was %s" % (row_count, row.id))
        if page.has_next:
            page = CouchPaginator(db, view_name, page_size, start_key=page.next, end_key=end_key, include_docs=True)
        else:
            page = None

    print "number items = ", row_count
예제 #3
0
def update_github():
    from totalimpact import item, tiredis
    myredis = tiredis.from_url(os.getenv("REDISTOGO_URL"), db=0)

    view_name = "queues/by_alias"
    view_rows = db.view(view_name, include_docs=False)
    row_count = 0
    page_size = 500
    start_key = ["url", "https://github.0000000"]
    end_key = ["url", "https://github.zzzzzzzz"]

    from couch_paginator import CouchPaginator
    page = CouchPaginator(db, view_name, page_size, include_docs=False, start_key=start_key, end_key=end_key)

    while page:
        for row in page:
            tiid = row.id
            item.start_item_update([tiid], myredis, db, sleep_in_seconds=0.05)                        
            row_count += 1
            print "."
        logger.info(u"%i. getting new page, last id was %s" %(row_count, row.id))
        if page.has_next:
            page = CouchPaginator(db, view_name, page_size, start_key=page.next, end_key=end_key, include_docs=True)
        else:
            page = None

    print "number items = ", row_count
예제 #4
0
def refresh_from_tiids(tiids, myredis):
    for tiid in tiids:
        try:
            item_obj = item_module.Item.from_tiid(tiid)
            item = item_obj.as_old_doc()        
            item_module.start_item_update(tiid, item["aliases"], myredis)
        except AttributeError:
            logger.debug(u"couldn't find tiid {tiid} so not refreshing its metrics".format(
                tiid=tiid))
    return tiids
예제 #5
0
def products_refresh_post(tiids_string):
    tiids = tiids_string.split(",")
    for tiid in tiids:
        try:
            item_obj = item_module.Item.from_tiid(tiid)
            item = item_obj.as_old_doc()        
            item_module.start_item_update(tiid, item["aliases"], myredis)
        except AttributeError:
            logger.debug(u"couldn't find tiid {tiid}  so not refreshing its metrics".format(
                tiid=tiid))

    resp = make_response("true", 200)
    return resp
예제 #6
0
def products_refresh_post(tiids_string):
    tiids = tiids_string.split(",")
    for tiid in tiids:
        try:
            item_obj = item_module.Item.from_tiid(tiid)
            item = item_obj.as_old_doc()
            item_module.start_item_update(tiid, item["aliases"], myredis)
        except AttributeError:
            logger.debug(
                u"couldn't find tiid {tiid}  so not refreshing its metrics".
                format(tiid=tiid))

    resp = make_response("true", 200)
    return resp
예제 #7
0
def update_by_tiids(all_tiids, number_to_update, myredis):
    tiids_to_update = all_tiids[0:min(number_to_update, len(all_tiids))]
    now = datetime.datetime.utcnow().isoformat()

    print "updating {number_to_update} of them now".format(number_to_update=number_to_update)
    QUEUE_DELAY_IN_SECONDS = 0.25
    for tiid in tiids_to_update:
        item_obj = item_module.Item.query.get(tiid)  # can use this method because don't need metrics
        item_doc = item_obj.as_old_doc()
        item_module.start_item_update(tiid, item_doc["aliases"], myredis)
        item_obj.last_update_run = now
        db.session.add(item_obj)
        time.sleep(QUEUE_DELAY_IN_SECONDS)
    db.session.commit()
    return tiids_to_update
예제 #8
0
def refresh_from_tiids(tiids, analytics_credentials, priority, myredis):
    item_objects = item_module.Item.query.filter(item_module.Item.tiid.in_(tiids)).all()
    dicts_to_refresh = []  

    for item_obj in item_objects:
        try:
            tiid = item_obj.tiid
            item_obj.set_last_refresh_start()
            db.session.add(item_obj)
            alias_dict = item_module.alias_dict_from_tuples(item_obj.alias_tuples)       
            dicts_to_refresh += [{"tiid":tiid, "aliases_dict": alias_dict}]
        except AttributeError:
            logger.debug(u"couldn't find tiid {tiid} so not refreshing its metrics".format(
                tiid=tiid))

    db.session.commit()

    item_module.start_item_update(dicts_to_refresh, priority, myredis)
    return tiids
예제 #9
0
def collection_update(cid=""):

    # first, get the tiids in this collection:
    try:
        collection = mydao.get(cid)
        tiids = collection["alias_tiids"].values()
    except Exception:
        logger.exception("couldn't get tiids in POST collection '{cid}'".format(
            cid=cid
        ))
        abort(404, "couldn't get tiids for this collection...maybe doesn't exist?")

    mixpanel.track("Trigger:Update", 
            {"Number Items":len(tiids), "Report Id":cid, "Update Type":"webapp"}, 
            request)
    item_module.start_item_update(tiids, myredis, mydao)

    resp = make_response("true", 200)
    resp.mimetype = "application/json"
    return resp
예제 #10
0
def refresh_from_tiids(tiids, analytics_credentials, priority, myredis):
    item_objects = item_module.Item.query.filter(
        item_module.Item.tiid.in_(tiids)).all()
    dicts_to_refresh = []

    for item_obj in item_objects:
        try:
            tiid = item_obj.tiid
            item_obj.set_last_refresh_start()
            db.session.add(item_obj)
            alias_dict = item_module.alias_dict_from_tuples(
                item_obj.alias_tuples)
            dicts_to_refresh += [{"tiid": tiid, "aliases_dict": alias_dict}]
        except AttributeError:
            logger.debug(
                u"couldn't find tiid {tiid} so not refreshing its metrics".
                format(tiid=tiid))

    db.session.commit()

    item_module.start_item_update(dicts_to_refresh, priority, myredis)
    return tiids
예제 #11
0
def collection_metrics_refresh(cid=""):

    # first, get the tiids in this collection:
    try:
        coll_doc = collection.get_collection_doc(cid)
        tiids = coll_doc["alias_tiids"].values()
    except (TypeError, AttributeError):
        logger.exception(
            u"couldn't get tiids in POST collection '{cid}'".format(cid=cid))
        abort_custom(500, "Error doing collection_update")

    for tiid in tiids:
        try:
            item_obj = item_module.Item.from_tiid(tiid)
            item = item_obj.as_old_doc()
            item_module.start_item_update(tiid, item["aliases"], myredis)
        except AttributeError:
            logger.debug(
                u"couldn't find tiid {tiid} in {cid} so not refreshing its metrics"
                .format(cid=cid, tiid=tiid))

    resp = make_response("true", 200)
    return resp
예제 #12
0
def collection_metrics_refresh(cid=""):

    # first, get the tiids in this collection:
    try:
        coll_doc = collection.get_collection_doc(cid)
        tiids = coll_doc["alias_tiids"].values()
    except (TypeError, AttributeError):
        logger.exception(u"couldn't get tiids in POST collection '{cid}'".format(
            cid=cid
        ))
        abort_custom(500, "Error doing collection_update")

    for tiid in tiids:
        try:
            item_obj = item_module.Item.from_tiid(tiid)
            item = item_obj.as_old_doc()        
            item_module.start_item_update(tiid, item["aliases"], myredis)
        except AttributeError:
            logger.debug(u"couldn't find tiid {tiid} in {cid} so not refreshing its metrics".format(
                cid=cid, tiid=tiid))

    resp = make_response("true", 200)
    return resp
예제 #13
0
def update_active_publisher_items(number_to_update, myredis, mydao):
    all_tiids = []
    all_docs = []
    for publisher in active_publishers:
        for journal_dict in active_publishers[publisher]["journals"]:
            min_year = 2011  #only update 2012-2013 right now
            (tiids_from_doi_prefix, docs_from_doi_prefix) = get_least_recently_updated_items_from_doi_prefix(min_year, journal_dict["doi_prefix"], myredis, mydao)
            logger.info("doi prefix {prefix} has {num_tiids} items published since {min_year} last updated more than 24 hours ago".format(
                num_tiids=len(tiids_from_doi_prefix), prefix=journal_dict["doi_prefix"], min_year=min_year))
            all_tiids += tiids_from_doi_prefix
            all_docs += docs_from_doi_prefix

    print "recent items for active publishers that were last updated more than a day ago, n=", len(all_tiids)
    tiids_to_update = all_tiids[0:min(number_to_update, len(all_tiids))]
    docs_to_update = all_docs[0:min(number_to_update, len(all_docs))]
    response = update_docs_with_updater_timestamp(docs_to_update, mydao)        

    print "updating {number_to_update} of them now".format(number_to_update=number_to_update)
    QUEUE_DELAY_IN_SECONDS = 0.25
    mixpanel.track("Trigger:Update", {"Number Items":len(tiids_to_update), "Update Type":"Scheduled Registered"})
    item.start_item_update(tiids_to_update, myredis, mydao, sleep_in_seconds=QUEUE_DELAY_IN_SECONDS)

    return tiids_to_update