def test_grid(clrd): # Load Data medmal_paid = clrd.groupby("LOB").sum().loc["medmal"]["CumPaidLoss"] medmal_prem = (clrd.groupby("LOB").sum().loc["medmal"] ["EarnedPremDIR"].latest_diagonal) # Pipeline dev = cl.Development() tail = cl.TailCurve() benk = cl.Benktander() steps = [("dev", dev), ("tail", tail), ("benk", benk)] pipe = cl.Pipeline(steps) # Prep Benktander Grid Search with various assumptions, and a scoring function param_grid = dict(benk__n_iters=[250], benk__apriori=[1.00]) scoring = {"IBNR": lambda x: x.named_steps.benk.ibnr_.sum()} grid = cl.GridSearch(pipe, param_grid, scoring=scoring) # Perform Grid Search grid.fit(medmal_paid, benk__sample_weight=medmal_prem) assert (grid.results_["IBNR"][0] == cl.Benktander( n_iters=250, apriori=1).fit( cl.TailCurve().fit_transform( cl.Development().fit_transform(medmal_paid)), sample_weight=medmal_prem, ).ibnr_.sum())
def test_grid(): # Load Data clrd = cl.load_sample('clrd') medmal_paid = clrd.groupby('LOB').sum().loc['medmal']['CumPaidLoss'] medmal_prem = clrd.groupby( 'LOB').sum().loc['medmal']['EarnedPremDIR'].latest_diagonal medmal_prem.rename('development', ['premium']) # Pipeline dev = cl.Development() tail = cl.TailCurve() benk = cl.Benktander() steps = [('dev', dev), ('tail', tail), ('benk', benk)] pipe = cl.Pipeline(steps) # Prep Benktander Grid Search with various assumptions, and a scoring function param_grid = dict(benk__n_iters=[250], benk__apriori=[1.00]) scoring = {'IBNR': lambda x: x.named_steps.benk.ibnr_.sum()} grid = cl.GridSearch(pipe, param_grid, scoring=scoring) # Perform Grid Search grid.fit(medmal_paid, benk__sample_weight=medmal_prem) assert grid.results_['IBNR'][0] == \ cl.Benktander(n_iters=250, apriori=1).fit(cl.TailCurve().fit_transform(cl.Development().fit_transform(medmal_paid)), sample_weight=medmal_prem).ibnr_.sum()
def test_pipeline(): tri = cl.load_sample('clrd').groupby('LOB').sum()[[ 'CumPaidLoss', 'IncurLoss', 'EarnedPremDIR' ]] tri['CaseIncurredLoss'] = tri['IncurLoss'] - tri['CumPaidLoss'] X = tri[['CumPaidLoss', 'CaseIncurredLoss']] sample_weight = tri['EarnedPremDIR'].latest_diagonal dev = [ cl.Development(), cl.ClarkLDF(), cl.Trend(), cl.IncrementalAdditive(), cl.MunichAdjustment(paid_to_incurred=('CumPaidLoss', 'CaseIncurredLoss')), cl.CaseOutstanding(paid_to_incurred=('CumPaidLoss', 'CaseIncurredLoss')) ] tail = [cl.TailCurve(), cl.TailConstant(), cl.TailBondy(), cl.TailClark()] ibnr = [ cl.Chainladder(), cl.BornhuetterFerguson(), cl.Benktander(n_iters=2), cl.CapeCod() ] for model in list(itertools.product(dev, tail, ibnr)): print(model) cl.Pipeline( steps=[('dev', model[0]), ('tail', model[1]), ('ibnr', model[2])]).fit_predict( X, sample_weight=sample_weight).ibnr_.sum( 'origin').sum('columns').sum()
def test_pipeline_json_io(): pipe = cl.Pipeline( steps=[('dev', cl.Development()), ('model', cl.BornhuetterFerguson())]) pipe2 = cl.read_json(pipe.to_json()) assert {item[0]: item[1].get_params() for item in pipe.get_params()['steps']} == \ {item[0]: item[1].get_params() for item in pipe2.get_params()['steps']}
def test_pipeline(tri, dev, tail, ibnr): X = tri[['CumPaidLoss', 'CaseIncurredLoss']] sample_weight = tri['EarnedPremDIR'].latest_diagonal cl.Pipeline(steps=[('dev', dev()), ('tail', tail()), ('ibnr', ibnr())]).fit_predict( X, sample_weight=sample_weight).ibnr_.sum( 'origin').sum('columns').sum()
def test_pipeline_json_io(): pipe = cl.Pipeline( steps=[("dev", cl.Development()), ("model", cl.BornhuetterFerguson())] ) pipe2 = cl.read_json(pipe.to_json()) assert {item[0]: item[1].get_params() for item in pipe.get_params()["steps"]} == { item[0]: item[1].get_params() for item in pipe2.get_params()["steps"] }
def get_triangle_projections(triangles, average_methods=None, n_periods=None, grain='OYDY'): """ Generates the main kpis such as ultimate loss, ibnr, loss development factors Arguments --> A dictionnary of triangles or a single triangle, the methods to derive the LDF (simple or volume average) defined as a list if there are several ultimate triangles to produce, the number of periods to look at (-1 means all periods by default) the origin/development pattern ('OxDy' with x and y in (Y, M, Q)) Returns --> a dictionnary storing the triangles and other kpis the dict keys are 'ldf' for loss development factors, 'cdf' for the cumulative ones, 'fit' to get the fitted model and 'full_triangle' to get the full triangle produced """ triangles_values = triangles.values() if isinstance(triangles, dict) else [triangles] triangles_keys = triangles.keys() if isinstance(triangles, dict) else [1] selected_average_methods = ['volume'] * len(triangles_keys) if average_methods is None else \ average_methods if isinstance(average_methods, list) else [average_methods] selected_n_periods = [-1] * len(triangles_keys) if n_periods is None else \ n_periods if isinstance(n_periods, list) else [n_periods] # Gets the different types of figures we are studying (asif cost, cost excl LL, count, etc.) triangles_names = [triangle.columns[0] for triangle in triangles_values] # Builds the triangle transformer with development attributes ; loops through the triangles triangles_dev = [ cl.Pipeline([('dev', cl.Development(average=selected_average_methods[index], n_periods=selected_n_periods[index])) ]).fit_transform(triangle.grain(grain)) for index, triangle in enumerate(triangles_values) ] # Loops through the triangles_dev to derive the ldfs, cdfs and the fit method triangles_model = [(triangle_dev.ldf_, triangle_dev.cdf_, cl.Chainladder().fit(triangle_dev)) for triangle_dev in triangles_dev] # Loops through the triangles_model to build a dict with the name of the figures (claims cost, count, etc.) # as primary key and the main triangle characteristics as second keys return {value: { 'ldf': triangles_model[index][0], 'cdf': triangles_model[index][1], 'fit': triangles_model[index][2], 'full_triangle': pd.concat([triangles_model[index][2].full_triangle_.to_frame(), triangles_model[index][2].ibnr_.to_frame()] \ , axis=1).rename(columns={9999: 'Ultimates', value: 'IBNR'}) } for index, value in enumerate(triangles_names)}
'1/1/2005', '1/1/2006', '1/1/2007', '1/1/2008' ], 'rate_change': [.02, .02, .02, .02, .05, .075, .15, .1, -.2, -.2] }) # Loss on-leveling factors tort_reform = pd.DataFrame({ 'date': ['1/1/2006', '1/1/2007'], 'rate_change': [-0.1067, -.25] }) # In addition to development, include onlevel estimator in pipeline for loss pipe = cl.Pipeline(steps=[('olf', cl.ParallelogramOLF(tort_reform, change_col='rate_change', date_col='date', vertical_line=True) ), ('dev', cl.Development( n_periods=2)), ('model', cl.CapeCod(trend=0.034))]) # Define X X = cl.load_sample('xyz')['Incurred'] # Separately apply on-level factors for premium sample_weight = cl.ParallelogramOLF(rate_history, change_col='rate_change', date_col='date', vertical_line=True).fit_transform( xyz['Premium'].latest_diagonal) # Fit Cod Estimator
==================================================== Basic Assumption Tuning with Pipeline and Gridsearch ==================================================== This example demonstrates testing multiple number of periods in the development transformer to see its influence on the overall ultimate estimate. """ import seaborn as sns sns.set_style('whitegrid') import chainladder as cl tri = cl.load_dataset('abc') # Set up Pipeline steps = [('dev', cl.Development()), ('chainladder', cl.Chainladder())] params = dict(dev__n_periods=[item for item in range(2, 11)]) pipe = cl.Pipeline(steps=steps) # Develop scoring function that returns an Ultimate/Incurred Ratio scoring = lambda x: x.named_steps.chainladder.ultimate_.sum( ) / tri.latest_diagonal.sum() # Run GridSearch grid = cl.GridSearch(pipe, params, scoring).fit(tri) # Plot Results grid.results_.plot(x='dev__n_periods', y='score', marker='o').set(ylabel='Ultimate / Incurred')