Example #1
0
def logistic_extrap(country, field, until, start, cutoff, k_frac):
    """Extrapolates the the logistic fit of a field from start, only taking
    data from cutoff into account, until the fit reaches k_frac * k.
    """
    prod = get_production(country, until)[field].dropna()[cutoff:]
    x, y = prod.index, prod.values
    nx = len(x)

    fname = os.path.join(DPATH, 
                '%s_logistic_production_%s.pkl' % (country, until[:4]))
    if os.path.exists(fname):
        f = open(fname)
        rkp_dict = load(f)
        f.close()
        r, k, p = rkp_dict[field]
    else:
        r, k, p = fit_logistic(x, y, Timestamp(start))
 

    if y[-1] > k_frac * k:
        return (x, y)

    while y[-1] < k_frac * k:
        x = list(x)
        last_date = x[-1]
        future = date_range(last_date, periods=120, freq='MS')[1:]
        x.extend(future)
        y = compute_logistic(x, (r, k, p), Timestamp(start))

    _y = y[nx:]
    _y = _y[_y < k_frac * k]
    _x = x[nx:]
    _x = _x[:len(_y)]

    return (_x, _y)
Example #2
0
def rate_of_discoveries(country, until='2013-02-01', fit_style='logistic', 
    confid=None, show_plot=False, style='urr'):
    """Fits a logistic curve to the number of fields discovered up to time t. 
    This informs us about the underlying discovery mechanism. This mechanism can
    depend on size.

    Args:
        country -> str:
            The string representing the name of the country.
        until -> str:
            datelike string useful for backtesting (left cutoff)

    Return:
        discoveries -> DataFrame:
            DataFrame containing all the discoveries.

    """
    production = get_production(country, until)
    fields = production.columns
    start_of_prod = dict((field, production[field].dropna().index[0]) for 
                          field in fields)
    start_of_prod = Series(start_of_prod)
    categories = classify_fields_according_to_urr(country, until, style)
    
    activity = date_range(START[country], until, freq='MS')
    n_vs_ts= []
    for category in categories:
        starts = array(sorted(start_of_prod[category]))
        n = [(starts <= start).sum() for start in activity]
        t = activity
        n_vs_ts.append((t, n))
 
    #return n_vs_ts
    fit_params = []
    fname = os.path.join(DPATH, '%s_%s_rate.pkl' % (country, until[:4]))
    if os.path.exists(fname):
        f = open(fname)
        rkp_dict = load(f)
        f.close()
    for i, n_vs_t in enumerate(n_vs_ts):
        x, y = n_vs_t[0], n_vs_t[1]
        if os.path.exists(fname):
            (r, k, p) = rkp_dict[i]
        else:
            #res = fit_logistic(x, y, START[country], confid, show_plot)
            (r, k, p) = fit_logistic(x, y, START[country], confid, show_plot)
        fit_params.append((r, k, p))
    
    return fit_params