Esempio n. 1
0
def irregular_vs_regular(w=8, l=10, save=False):
    uk_regular = 'WELTON'
    no_irregular = 'VALE'

    uk_prod = get_production('UK')[uk_regular].dropna()
    no_prod = get_production('NO')[no_irregular].dropna()

    fig = plt.figure(figsize=(w, l))
    ax = fig.add_subplot(211)
    uk_prod.plot(ax=ax, marker='x', ls='', color='b')
    l1 = ax.get_lines()[0]

    ax.set_ylabel('Production [barrels/day]')
    ax.set_title('Example of a regular field')
    ax.legend([l1], [uk_regular.lower().capitalize() + ' (UK)'])
   

    ax2 = fig.add_subplot(212)
    no_prod.plot(ax=ax2, marker='o', ls='', color='b')
    l2 = ax2.get_lines()[0]    

    ax2.set_xlabel('Time')
    ax2.set_ylabel('Production [barrels/day]')
    ax2.set_title('Example of an irregular field')
    ax2.legend([l2], [no_irregular.lower().capitalize() + ' (NO)'], 
               loc='upper left')

    fig.tight_layout()

    if save:
        os.chdir('article')
        fig.savefig('regular_and_irregular.pdf')
        os.chdir('..')
    else:
        plt.show()
Esempio n. 2
0
def rate_of_discoveries(w=8, l=8, save=False):
    
    fig = plt.figure(figsize=(w, l))
    fig2 = plt.figure(figsize=(w, l))

    rkps_uk = load(open('data/UK_2013_rate.pkl'))
    rkps_no = load(open('data/NO_2013_rate.pkl'))
    
    prod_uk = get_production('UK')
    prod_no = get_production('NO')

    start_of_prod_uk = dict((field, prod_uk[field].dropna().index[0]) for
                                 field in prod_uk)
    start_of_prod_uk = Series(start_of_prod_uk)
    start_of_prod_no = dict((field, prod_no[field].dropna().index[0]) for
                                 field in prod_no)
    start_of_prod_no = Series(start_of_prod_no)

    cats_uk = classify_fields_according_to_urr('UK', '2013-02-01', 'random')
    cats_no = classify_fields_according_to_urr('NO')

    activity_uk = date_range(START['UK'], '2013-02-01', freq='MS')
    n_vs_ts_uk = [] 
    for category in cats_uk:
        starts = array(sorted(start_of_prod_uk[category]))
        n = [(starts <= start).sum() for start in activity_uk]
        t = activity_uk
        n_vs_ts_uk.append((t, n))

    activity_no = date_range(START['NO'], '2013-02-01', freq='MS')
    n_vs_ts_no = [] 
    for category in cats_no:
        starts = array(sorted(start_of_prod_no[category]))
        n = [(starts <= start).sum() for start in activity_no]
        t = activity_no
        n_vs_ts_no.append((t, n))

    small_uk, medium_uk, big_uk = n_vs_ts_uk[0], n_vs_ts_uk[1], n_vs_ts_uk[2]
    small_no, medium_no, big_no = n_vs_ts_no[0], n_vs_ts_no[1], n_vs_ts_no[2]

    ax = fig.add_subplot(311)
    _ax = fig2.add_subplot(111)
    dates_uk = small_uk[0]
    dates_no = small_no[0]
    dates_plot_uk = date_range(START['UK'], '2025-01-01', freq='MS')
    dates_plot_no = date_range(START['NO'], '2025-01-01', freq='MS')
    x1 = array([float(d.toordinal())-START['UK'].toordinal() for d in 
                   dates_plot_uk])
    x2 = array([float(d.toordinal())-START['NO'].toordinal() for d in 
                   dates_plot_no])

    def logi((r, k, p), x):
        return k * p * exp(r * x) / (k + p * (exp(r * x) - 1))
Esempio n. 3
0
def perturb_fit_parms(country, until='2013-02-01', 
    fit_style='stretched exponential', xmin='my_xmin', perturbation=0.1, 
    step=0.01, parm_names=['tau', 'beta']):
    """We perturb the parameters of the fit and look at the impact on the cost
    function.

    Args:
        country -> str
            The string representing the country.
        until -> str
            The datelike string used for backtesting.
        fit_style -> str
            The string representing the fitting style.
        xmin -> str
            The choice for the "left cutoff" when fitting.
        perturbation -> float
            The percentage with which the fit parameters are perturbated.
        step -> float
            The resolution of the perturbation.
        parm_names -> list
            The name of the parameters we wish to perturb.

    Return:
        res -> tuple of lists
            Each list represents 
    """
    fit_parms = get_fit_parms(country=country, until=until, fit_style=fit_style,
                    xmin=xmin)
    production = get_production(country, until)
    changes = arange(1.-perturbation, 1+perturbation+step, step)
    res = {}
    print 'It will be implemented later.'
Esempio n. 4
0
def extend_all(country, until='2013-02-01', fit_style='stretched exponential',
    style='urr', xmin='my_xmin'):
    """Extends the production of all fields (regular, irregular, inactive and
    insufficient) and returns the results in single DataFrame.
    
     Args:
        country -> str
            The string representing the country we are dealing with. Ex: 'NO'
        until -> str
            The datetime like string used for backtesting.
        fit_style -> str
            The string representing the fit style.
        style -> str
            The style used for the extension of "bad" fields. Ex: 'random','urr'
        xmin -> str
            The choice for the "left cutoff" when fitting.

    Return:
        extended_prod -> DataFrame
            The DataFrame of the extended production.
    """
    regulars = extend_production(country, until, fit_style, xmin)
    bad_ones = extend_the_bad_ones(country, until, style, fit_style, xmin)
    production = get_production(country, until)
    classification = get_classification(country, until)
    fields = classification[classification['inactive']]['inactive'].index
    inactives = production[fields]
    extended_prod = concat((regulars, bad_ones, inactives), axis=1).sort(axis=1)

    return extended_prod
Esempio n. 5
0
def logistic_extrap(country, field, until, start, cutoff, k_frac):
    """Extrapolates the the logistic fit of a field from start, only taking
    data from cutoff into account, until the fit reaches k_frac * k.
    """
    prod = get_production(country, until)[field].dropna()[cutoff:]
    x, y = prod.index, prod.values
    nx = len(x)

    fname = os.path.join(DPATH, 
                '%s_logistic_production_%s.pkl' % (country, until[:4]))
    if os.path.exists(fname):
        f = open(fname)
        rkp_dict = load(f)
        f.close()
        r, k, p = rkp_dict[field]
    else:
        r, k, p = fit_logistic(x, y, Timestamp(start))
 

    if y[-1] > k_frac * k:
        return (x, y)

    while y[-1] < k_frac * k:
        x = list(x)
        last_date = x[-1]
        future = date_range(last_date, periods=120, freq='MS')[1:]
        x.extend(future)
        y = compute_logistic(x, (r, k, p), Timestamp(start))

    _y = y[nx:]
    _y = _y[_y < k_frac * k]
    _x = x[nx:]
    _x = _x[:len(_y)]

    return (_x, _y)
Esempio n. 6
0
def stretched_exponential(w=8, l=10, save=False):
    field = 'WELTON'
    prod = get_production('UK')[field].dropna()
    fit_parms = get_fit_parms('UK', '2013-02-01').ix[field]    

    tau, beta, y0 = fit_parms['tau'], fit_parms['beta'], fit_parms['y0']
    x, y = prepare_xy(prod)
    yfit = y0 * exp(-(x/abs(tau))**beta)
    fit = Series(yfit, index=prod.index) 

    lower_cutoff = '1996-08-01'

    fig = plt.figure(figsize=(w, l))
    ax = fig.gca()

    prod.plot(ax=ax, marker='x', ls='')
    l1 = ax.get_lines()[0]
    l2, = ax.plot(fit[lower_cutoff:].index, fit[lower_cutoff:], '-k')
    
    ax.legend([l1, l2], [field.lower().capitalize() + ' (UK)',
                         r'$y = y0 + exp(-(\frac{t}{\tau})^\beta)$'])

    ax.set_xlabel('Time')
    ax.set_ylabel('Prodction [barrels/day]')

    fig.tight_layout()

    if save:
        os.chdir('article')
        fig.savefig('stretched_exponential.pdf')
        os.chdir('..')
    else:
        plt.show()
Esempio n. 7
0
def rate_of_discoveries(country, until='2013-02-01', fit_style='logistic', 
    confid=None, show_plot=False, style='urr'):
    """Fits a logistic curve to the number of fields discovered up to time t. 
    This informs us about the underlying discovery mechanism. This mechanism can
    depend on size.

    Args:
        country -> str:
            The string representing the name of the country.
        until -> str:
            datelike string useful for backtesting (left cutoff)

    Return:
        discoveries -> DataFrame:
            DataFrame containing all the discoveries.

    """
    production = get_production(country, until)
    fields = production.columns
    start_of_prod = dict((field, production[field].dropna().index[0]) for 
                          field in fields)
    start_of_prod = Series(start_of_prod)
    categories = classify_fields_according_to_urr(country, until, style)
    
    activity = date_range(START[country], until, freq='MS')
    n_vs_ts= []
    for category in categories:
        starts = array(sorted(start_of_prod[category]))
        n = [(starts <= start).sum() for start in activity]
        t = activity
        n_vs_ts.append((t, n))
 
    #return n_vs_ts
    fit_params = []
    fname = os.path.join(DPATH, '%s_%s_rate.pkl' % (country, until[:4]))
    if os.path.exists(fname):
        f = open(fname)
        rkp_dict = load(f)
        f.close()
    for i, n_vs_t in enumerate(n_vs_ts):
        x, y = n_vs_t[0], n_vs_t[1]
        if os.path.exists(fname):
            (r, k, p) = rkp_dict[i]
        else:
            #res = fit_logistic(x, y, START[country], confid, show_plot)
            (r, k, p) = fit_logistic(x, y, START[country], confid, show_plot)
        fit_params.append((r, k, p))
    
    return fit_params
Esempio n. 8
0
def extend_production(country, until='2013-02-01', 
    fit_style='stretched exponential', xmin='my_xmin'):
    """Extends the production of the different oil fields into the future, given
    the fit style.

    Args:
        country -> str
            The string representing the country we are dealing with. Ex: 'NO'
        until -> str
            The datetime like string used for backtesting.
        fit_style -> str
            The string representing the fit style.
        xmin -> str
            The choice for the "left cutoff" when fitting.

    Return:
        extended_prod -> DataFrame
            The DataFrame of the extended production.
    """
    fpath = os.path.join(DPATH, '%s_extended_%s_%s' % (country, until[:4], 
                fit_style[:2]))
    if os.path.exists(fpath):
        f = open(fpath)
        extended_production = load(f)
        f.close()
        return extended_production

    production = get_production(country=country, until=until)
    classification = get_classification(country=country, until=until)
    fields = classification[classification['regular']].index
    fit_parms = get_fit_parms(country=country, until=until, fit_style=fit_style,
                    xmin=xmin)

    extended_productions = []
    for field in fields:
        _prod = production[field]
        _extended_prod = _extend_production(_prod, fit_parms.ix[field], 
            'stretched exponential')
        extended_productions.append(_extended_prod)

    f = open(fpath, 'w')
    extended_production = concat(extended_productions, axis=1)
    dump(extended_production, f)
    f.close()
    return extended_production
Esempio n. 9
0
def get_fit_parms(country, until='2013-02-01', 
    fit_style='stretched exponential', xmin='my_xmin'):
    """Return a DataFrame with the fields as the index and name of the relevant 
    fit parameters as columns.

    Args:
        country -> str.
            The string representing the name of the country.
        until -> str.
            The string representing the date until which we take the monthly
            oil production into account. This is especially useful for 
            backtesting.
        fit_style -> str.
            The string representing the fit style.
        xmin -> str.
            The string representing the x-coordinate of the production 
            timeseries from which the fitting is done.

    Return:
        fit_parms -> DataFrame.
            The DataFrame containing the fit parameters. 
    """
    
    fit_parms = {}
    if fit_style == 'exponential':
        fit = fit_exponential
        columns = ['tau', 'y0']
    elif fit_style == 'stretched exponential':
        fit = fit_stretched_exponential
        columns = ['tau', 'beta', 'y0']
    elif fit_style == 'power law':
        fit = fit_power_law
        columns = ['alpha', 'y0']

    fpath = os.path.join(DPATH, '%s_fit_parms_%s_%s' % (country, until[:4], 
                fit_style[:2]))

    if os.path.exists(fpath):
        f = open(fpath)
        fit_parms = load(f)
        f.close()
        return fit_parms

    production = get_production(country, until)
    classification = get_classification(country, until)
    #Returns the fields that are regular (and thus fittable)
    fields = classification[classification['regular']].index

    #We define get_xmin, a function that returns the "left cutoff value" for the
    #fits.
    if xmin == 'my_xmin':
        def get_xmin(field):
            field_xmin = classification.ix[field]['from']
            return field_xmin

    #Filling the fit_parms DataFrame
    fit_parms = DataFrame(index=fields, columns=columns)
    for field in fields:
        fit_parms.ix[field] = fit(production[field], xmin=get_xmin(field))

    f = open(fpath, 'w')
    dump(fit_parms, f)
    f.close()
    return fit_parms
Esempio n. 10
0
def extend_the_bad_ones(country, until='2013-02-01', style='urr',
    fit_style='stretched exponential', xmin='my_xmin'):
    """Extends the production of the irregular/insufficient oil fields into the
    future, given the fit style.

    Args:
        country -> str
            The string representing the country we are dealing with. Ex: 'NO'
        until -> str
            The datetime like string used for backtesting.
        style -> str
            The style used for the extension. Ex: 'random', 'urr'
        fit_style -> str
            The string representing the fit style.
        xmin -> str
            The choice for the "left cutoff" when fitting.

    Return:
        extended_prod -> DataFrame
            The DataFrame of the extended production.
    """
    ###A big mess was created with is_logistic... Taking into account the 
    ###possibility of future rise of oil_production of irregular fields.

    #No URR data for UK
    if country == 'UK':
        assert style == 'random'

    production = get_production(country=country, until=until)
    classification = get_classification(country=country, until=until)
    fields = classification[classification['bad']].index
 
    if style == 'random':
        fname = os.path.join(DPATH, '%s_logistic_extension_%s.pkl' % (
                country, until[:4]))
        f = open(fname)
        logistic_start = load(f)
        f.close()
        #The "good fields" i.e. those who are regular enough for a fit. We will
        #sample the decay for our "bad fields" from the good ones.
        _extended_production = extend_production(country, until, fit_style,
                                   xmin)
        samples = choice(_extended_production.columns, len(fields))
        ss = []
        for field, sample in zip(fields, samples):
            is_logistic = False
            if field in logistic_start.keys():
                if_logistic = True
                start = logistic_start[field][0]
                cutoff = logistic_start[field][1]
                x, y = logistic_extrap(country, field, until, start, cutoff, 0.95)
            if is_logistic:
                tail_shape = _extended_production[sample][until:][1:].values
                tail = list(y) + list(y[-1]/tail_shape[0] * tail_shape)
                idx = _extended_production[until:][1:].index
                nmax = len(idx)
                tail = tail[:nmax]
                ntail = len(tail)
                tail = Series(tail, index=idx[:ntail], name=field)
            else:
                tail_shape = _extended_production[sample][until:][1:]
                #We need to scale the tail_shape to the field we want to extend.
                tail = production[field].dropna()[-1]/tail_shape[0] * tail_shape
                tail.name = field
            ss.append(tail)
            is_logistic = False
        future_prod = concat(ss, axis=1)
        extended_prod = concat((production[fields], future_prod))

        return extended_prod

    if style == 'urr':
        urr_estimates = classification['urr'].ix[fields] 
        if country == 'NO':
            urr_estimates *= 1e6 * M3_TO_BARRELS
        fname = os.path.join(DPATH, '%s_logistic_extension_%s.pkl' % (
                country, until[:4]))
        f = open(fname)
        logistic_start = load(f)
        f.close()

        ss = []
        for field in fields:
            prod_until_now = production[field].sum()
            prod_remaining = urr_estimates[field] - prod_until_now
            prod_now = production[field].dropna()[-1]

            is_logistic = False
            if field in logistic_start.keys():
                is_logistic = True
                start = logistic_start[field][0]
                cutoff = logistic_start[field][1]
                x, y = logistic_extrap('NO', field, until, 
                           logistic_start[field][0], logistic_start[field][1],
                           0.95)
                prod_now = y[-1]
                prod_remaining -= sum(y)            

           #comes from the constraint that sum over future equals remaining
            tau = prod_remaining / prod_now
            #time from start in months.
            if is_logistic:
                tnow = len(production[field].dropna()) + len(y)
            else:
                tnow = len(production[field].dropna())

            #comes from the constraint that p(tnow) = pnow
            y0 = prod_now * exp(tnow/tau)

            if is_logistic:
                lifetime = -tau * log(MIN_PROD/y0) + len(y)
            else:
                lifetime = -tau * log(MIN_PROD/y0)
            
            max_nfuture_months = 12 * (MAX_DATE.year - Timestamp(until).year) +\
                                      (MAX_DATE.month - Timestamp(until).month)
            if is_logistic:
                nfuture_months = min(max_nfuture_months, max(0, 
                    int(lifetime-tnow)+len(y)))
            else:
                nfuture_months = min(max_nfuture_months, 
                                     max(0, int(lifetime-tnow)))
       
            if nfuture_months == 0:
                ss.append(production[field])
                continue
            future = date_range(Timestamp(until) + MonthBegin(), 
                         periods=nfuture_months, freq='MS')
            if is_logistic:
                ly = list(y)
                ly.extend(y0 * exp(-arange(tnow+1, tnow+1+nfuture_months-len(y))/tau))
                prod_future = array(ly)
            else:
                prod_future = y0 * exp(-arange(tnow+1, tnow+1+nfuture_months)/tau)
            ss.append(Series(prod_future, index=future, name=field, dtype='float64'))
            is_logistic = False
        future_prod = concat(ss, axis=1)
        extended_prod = concat((production[fields], future_prod))
       
        return extended_prod

    if style == 'const':
        urr_estimates = classification['urr'].ix[fields]
        if country == 'NO':
            urr_estimates *= 1e6 * M3_TO_BARRELS
        ss = []
        for field in fields:
            prod_until_now = production[field].sum()
            prod_remaining = urr_estimates[field] - prod_until_now
            prod_now = production[field].dropna()[-5:].mean()
            n = prod_remaining / prod_now
            max_nfuture_months = 12 * (MAX_DATE.year - Timestamp(until).year) +\
                                      (MAX_DATE.month - Timestamp(until).month)
            nfuture_months = min(max_nfuture_months, max(0, int(n)))
 
            if nfuture_months == 0:
                ss.append(production[field])
                continue
            future = date_range(Timestamp(until) + MonthBegin(), 
                         periods=nfuture_months, freq='MS')
            prod_future = len(future) * [prod_now]
            ss.append(Series(prod_future, index=future, name=field))
        future_prod = concat(ss, axis=1)
        extended_prod = concat((production[fields], future_prod)) 

        return extended_prod
Esempio n. 11
0
    x = arange(0., len(tot_prod))
    prod_multi = doublecycle(multi_parms['NO']['2003-02-01'], x)
    prod_multi = Series(prod_multi, index=tot_prod.index)

    if 'ax' not in kwargs.keys():
        fig = plt.figure(figsize=(w, l))
        ax = fig.gca()
    else:
        ax = kwargs['ax']

    tot_prod[:'2003-02-01'].plot(ax=ax, marker='o', ls='', color='k')
    prod_multi[:'2003-02-01'].plot(ax=ax, ls='-', color='r') 
    tot_prod['2003-02-01':END_DATE].plot(ax=ax, ls='-', color='b')
    prod_multi['2003-02-01':END_DATE].plot(ax=ax, ls='--', color='r')

    prod_2003_2013 = get_production('NO').sum(axis=1)['2003-02-01':].dropna()
    prod_2003_2013.plot(ax=ax, marker='x', ls='', color='k')

    l1, l2, l3, l4, l5 = ax.get_lines()
    ax.legend([l1, l5, l3, l4], ['Data up to 2003', 
        'Data from 2003 to 2013', 'Monte-Carlo forecast',
        'Hubbert forecast'], loc='upper right')

    print prod_multi['2013-02-01':].sum() / 1e9
    print tot_prod['2013-02-01':].sum() / 1e9

    ax.set_xlabel('Time')
    ax.set_ylabel('Norwegian oil production [barrels/day]')

    if 'ax' not in kwargs.keys():
        fig.tight_layout()