def test_score(self): N = 100000 z1 = np.random.normal(size=N) z2 = np.random.choice([0, 1], size=N) z3 = np.random.choice(['a', 'b', 'c'], size=N) numeric_mapping = {'a': 3, 'b': 4, 'c': 5} z3_numeric = [numeric_mapping[z3i] for z3i in z3] p_assign = np.exp(z1 + z2 + z3_numeric) / (1. + np.exp(z1 + z2 + z3_numeric)) assignment = np.random.binomial(1, p_assign) outcome = np.random.normal(assignment) matcher = PropensityScoreMatching() X = pd.DataFrame({ 'z1': z1, 'z2': z2, 'z3': z3, 'assignment': assignment, 'outcome': outcome }) confounder_types = {'z1': 'c', 'z2': 'o', 'z3': 'o'} matcher.score(X, confounder_types, store_model_fit=True) assert 0.9 <= matcher.model_fit.params['z1'] <= 1.1 assert 0.9 <= matcher.model_fit.params['z2'] <= 1.1 assert 0.0 <= matcher.model_fit.params['z3_b'] <= 2.0 assert 1.0 <= matcher.model_fit.params['z3_c'] <= 3.0 assert 2.0 <= matcher.model_fit.params['intercept'] <= 4.0
def sxxp_womenBoard_tobin(): ''' Test the effect of having a large % of women on the board on tobins M&M: A large percentage of women in the board could also affect negatively the performance. ''' data, types = getData("sxxp_fboard","corp_gov_causal") controlFor = stageAlgo(types) treatment = 'X..Women.on.Bd' target = 'Tobins.Q' matcher = PropensityScoreMatching() ATE_results = matcher.estimate_ATE(data, treatment, target, {'P.B': 'c', 'Asset':'c', 'Tax':'c', 'P.E':'c'}, bootstrap=True) #now write results to mysql conn = MySQLdb.connect(host="localhost", user="******", passwd="", db="causal_results") cur = conn.cursor() query = """ insert into akelleh_results values ('%s','%s','%s','%s','%s','%s'); """ % (now,"sxxp",treatment,target,str(ATE_results),"A large percentage of women in the board could also affect negatively the performance.") cur.execute(query) conn.commit() conn.close() print("Done")
def spx_indepDirFinlL_azs(): ''' Test the effect of having a lead indep director and fincl leverage > 2.5 M&M: ...but also the presence of an independent lead director in the company along with a financial leverage higher than 2.5 incur a higher risk of bankruptcy. Latest Results: (-0.47855609106276292, -0.4343301267327499, -0.3864914259963988) Comments: So this 'treatment' causes quite a dip in AZS, which is what MM are saying ''' data, types = getData("spx_indepdirfincl","corp_gov_causal") controlFor = stageAlgo(types) treatment = 'Indep.Lead.Dir.Fincl..l' target = 'AZS' matcher = PropensityScoreMatching() ATE_results = matcher.estimate_ATE(data, treatment, target, {'P.EBITDA': 'c', 'P.B': 'c', 'Asset':'c', 'Tax':'c', 'P.E':'c'}, bootstrap=True) #now write results to mysql conn = MySQLdb.connect(host="localhost", user="******", passwd="", db="causal_results") cur = conn.cursor() query = """ insert into akelleh_results values ('%s','%s','%s','%s','%s','%s'); """ % (now,"spx",treatment,target,str(ATE_results),"...but also the presence of an independent lead director in the company along with a financial leverage higher than 2.5 incur a higher risk of bankruptcy.") cur.execute(query) conn.commit() conn.close() print("Done")
def spx_fceo_tobin(): ''' Test the effect of having a female ceo on tobins M&M: This is my own Latest Results: (-0.48556936715099608, -0.3878325746547393, -0.29127847792553346) Comments: Non-zero influence? ''' data, types = getData("spx_fceo","corp_gov_causal") controlFor = stageAlgo(types) treatment = 'Feml.CEO.or.Equiv' target = 'Tobins.Q' matcher = PropensityScoreMatching() ATE_results = matcher.estimate_ATE(data, treatment, target, {'P.EBITDA': 'c', 'P.B': 'c', 'Asset':'c', 'Tax':'c', 'P.E':'c'}, bootstrap=True) #now write results to mysql conn = MySQLdb.connect(host="localhost", user="******", passwd="", db="causal_results") cur = conn.cursor() query = """ insert into akelleh_results values ('%s','%s','%s','%s','%s','%s'); """ % (now,"spx",treatment,target,str(ATE_results),"This is my own") cur.execute(query) conn.commit() conn.close() print("Done")
def test_match(self): matcher = PropensityScoreMatching() X = pd.DataFrame({'assignment': [1, 0, 0, 0, 0, 0], 'propensity score': [3, 1, 2, 3, 5, 4]}) test, control = matcher.match(X, n_neighbors=3) assert set(control['propensity score'].values) == set([2, 3, 4])
def eebp_finlL_tobins(): ''' Test the effect of financial leverage on tobins M&M: ...and that a financial leverage less than 4 is needed in order to be on the upper side of the Tobin’s Q ratio ''' data, types = getData("eebp_fl","corp_gov_causal") controlFor = stageAlgo(types) treatment = 'Fincl.l.treatment' target = 'Tobins.Q' matcher = PropensityScoreMatching() ATE_results = matcher.estimate_ATE(data, treatment, target, {'P.B': 'c', 'Asset':'c', 'Tax':'c', 'P.E':'c'}, bootstrap=True) #now write results to mysql conn = MySQLdb.connect(host="localhost", user="******", passwd="", db="causal_results") cur = conn.cursor() query = """ insert into akelleh_results values ('%s','%s','%s','%s','%s','%s'); """ % (now,"eebp",treatment,target,str(ATE_results),"...and that a financial leverage less than 4 is needed in order to be on the upper side of the Tobins Q ratio") cur.execute(query) conn.commit() conn.close() print("Done")
def psm(tdf, i): start_time = time.time() cause_set = [ "bias_party", "bias_degree", "misinfo_factcheck", "misinfo_veracity" ] for cause in cause_set: confound_dict = { "meta_like": "o", "meta_dislike": "o", "meta_view": "o", "linguist_swear": "u", "linguist_laugh": "u", "linguist_emoji": "u", "linguist_fake": "u", "linguist_administration": "u", "linguist_american": "u", "linguist_nation": "u", "linguist_personal": "u" } for other_cause in cause_set: if other_cause != cause: confound_dict[other_cause] = "u" matcher = PropensityScoreMatching() samples = matcher.estimate_ATE(tdf, cause, "moderated", confound_dict, bootstrap=True) samples.to_csv(os.path.join(ate_path, cause + "_per" + str(i) + ".csv"), index=False) print(time.time() - start_time)
def test_at_estimators(self): N = 1000 # how many data points z1 = 0.5 * np.random.normal(size=N) # a few confounding variables z2 = 0.5 * np.random.normal(size=N) z3 = 0.5 * np.random.normal(size=N) arg = (z1 + z2 + z3 + np.random.normal(size=N)) p = np.exp(arg) / ( 1. + np.exp(arg) ) # propensity to receive treatment, P(d|z), taking on a logistic form d = np.random.binomial(1, p) y = (np.random.normal(size=N) + (z1 + z2 + z3 + 1.) * d ) # effect of d is confounded by z. True ATE is 1. X = pd.DataFrame({ 'd': d, 'z1': z1, 'z2': z2, 'z3': z3, 'y': y, 'p': p }) matcher = PropensityScoreMatching() ATE = matcher.estimate_ATE(X, 'd', 'y', { 'z1': 'c', 'z2': 'c', 'z3': 'c' }) assert 0.9 <= ATE <= 1.1
def test_at_estimators(self): ates = [] atcs = [] atts = [] for i in range(100): N = 1000 X = np.random.choice([0.25, 0.75], size=N) X = pd.DataFrame(X, columns=['Z']) X.loc[:, 'assignment'] = np.random.binomial(1, p=X['Z']) X.loc[:, 'outcome'] = np.random.normal(3.1 * X['assignment'] + 2.0 * X['Z']) matcher = PropensityScoreMatching() att = matcher.estimate_ATT(X, 'assignment', 'outcome', {'Z': 'c'}, n_neighbors=10) X.loc[:, 'inverted assignment'] = (X['assignment'] + 1) % 2 atc = matcher.estimate_ATT(X, 'inverted assignment', 'outcome', {'Z': 'c'}, n_neighbors=10) ate = (att + atc) / 2. atts.append(att) atcs.append(atc) ates.append(ate) X = pd.DataFrame({'att': atts, 'ate': ates, 'atc': atcs}) assert (3.0 <= X.mean()).all() assert (X.mean() <= 4.0).all()
def eebp_ageRange_tobins(): ''' Test the effect of age range in board on tobins M&M: we found that a smaller age range for the board members is positively related with the companies’ performance ''' data, types = getData("eebp_agerange","corp_gov_causal") controlFor = stageAlgo(types) treatment = 'BOD.Age.Rng' target = 'Tobins.Q' matcher = PropensityScoreMatching() ATE_results = matcher.estimate_ATE(data, treatment, target, {'P.B': 'c', 'Asset':'c', 'Tax':'c', 'P.E':'c'}, bootstrap=True) #now write results to mysql conn = MySQLdb.connect(host="localhost", user="******", passwd="", db="causal_results") cur = conn.cursor() query = """ insert into akelleh_results values ('%s','%s','%s','%s','%s','%s'); """ % (now,"eebp",treatment,target,str(ATE_results),"we found that a smaller age range for the board members is positively related with the companies performance") cur.execute(query) conn.commit() conn.close() print("Done")
def sxxp_indepDirFormerCEOBoard_tobin(): ''' Test the effect of having a lead indep director or former ceo on board on tobins Q M&M: the presence of an independent lead director or a former CEO in the board could be a sign of weaker performances, being negatively correlated with Tobin’s Q Latest Results: (0.012699075182737657, 0.05961530907139483, 0.099317124249211436) Comments: Nothing much, but is positive contrary to what MM say ''' data, types = getData("sxxp_indepdirfceo","corp_gov_causal") controlFor = stageAlgo(types) treatment = 'Indep.Lead.Dir.Feml.CEO.or.Equiv' target = 'Tobins.Q' matcher = PropensityScoreMatching() ATE_results = matcher.estimate_ATE(data, treatment, target, {'P.B': 'c', 'Asset':'c', 'Tax':'c', 'P.E':'c'}, bootstrap=True) #now write results to mysql conn = MySQLdb.connect(host="localhost", user="******", passwd="", db="causal_results") cur = conn.cursor() query = """ insert into akelleh_results values ('%s','%s','%s','%s','%s','%s'); """ % (now,"sxxp",treatment,target,str(ATE_results),"the presence of an independent lead director or a former CEO in the board could be a sign of weaker performances, being negatively correlated with Tobins Q") cur.execute(query) conn.commit() conn.close() print("Done")
def test_match(self): matcher = PropensityScoreMatching() X = pd.DataFrame({ 'assignment': [1, 0, 0, 0, 0, 0], 'propensity score': [3, 1, 2, 3, 5, 4] }) test, control = matcher.match(X, n_neighbors=3) assert set(control['propensity score'].values) == set([2, 3, 4])
def test_match(self): matcher = PropensityScoreMatching() X = pd.DataFrame( {"assignment": [1, 1, 1, 1, 1, 0, 0, 0, 0, 0], "propensity score": [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]} ) test, control = matcher.match(X, n_neighbors=3) matches = test[test["propensity score"] == 2]["matches"].values[0][0] assert set(control.iloc[matches]["propensity score"].values) == set([1, 2, 3]) matches = test[test["propensity score"] == 4]["matches"].values[0][0] assert set(control.iloc[matches]["propensity score"].values) == set([3, 4, 5])
def test_match(self): matcher = PropensityScoreMatching() X = pd.DataFrame({ 'assignment': [1, 1, 1, 1, 1, 0, 0, 0, 0, 0], 'propensity score': [1, 2, 3, 4, 5, 1, 2, 3, 4, 5] }) test, control = matcher.match(X, n_neighbors=3) matches = test[test['propensity score'] == 2]['matches'].values[0][0] assert set(control.iloc[matches]['propensity score'].values) == set( [1, 2, 3]) matches = test[test['propensity score'] == 4]['matches'].values[0][0] assert set(control.iloc[matches]['propensity score'].values) == set( [3, 4, 5])
def test_score(self): N = 5000 z1 = np.random.normal(size=N) z2 = np.random.choice(['a','b','c'], size=N) numeric_mapping = {'a' :3, 'b' :4, 'c' :5} z2_numeric = [numeric_mapping[z2i] for z2i in z2] p_assign = np.exp(z1 + z2_numeric) / (1. + np.exp(z1 + z2_numeric)) assignment = np.random.binomial(1, p_assign) outcome = np.random.normal(assignment) matcher = PropensityScoreMatching() X = pd.DataFrame({'z1': z1, 'z2': z2, 'assignment': assignment, 'outcome': outcome}) confounder_types = {'z1': 'c', 'z2':'o'} matcher.score(X, confounder_types, store_model_fit=True) assert 0.7 <= matcher.propensity_score_model.params['z1'] <= 1.3 assert 0.0 <= matcher.propensity_score_model.params['z2_b'] <= 2.0 assert 1.0 <= matcher.propensity_score_model.params['z2_c'] <= 3.0 assert 2.0 <= matcher.propensity_score_model.params['intercept'] <= 4.0
def eebp_indepChaFCEO_azs(): ''' Test the effect of financial leverage on tobins M&M: ...to be on the “safe” zone of the Altman Z-score it is important to have an independent chairperson or even a woman as CEO. ''' data, types = getData("eebp_indepChFmlCEO","corp_gov_causal") #controlFor = stageAlgo(types) controlFor = { 'P.B': 'c', 'Fincl..l':'c', 'Asset':'c', 'Tax':'c', 'P.E':'c', 'OPM.T12M':'c', 'P.EBITDA':'c', 'EV.EBITDA.T12M':'c', 'ROC':'c', 'ROE':'c', 'BOD.Age.Rng':'c', 'Norm.NI.to.NI.for.Cmn..':'c', 'Cash.Gen.Cash.Reqd':'c', 'Bd.Avg.Age':'c', 'X5Yr.Avg.Adj.ROE':'c', #'Dvd.Yld':'c', 'EBITDA.Sh':'c', 'Net.Debt.to.EBITDA':'c' } treatment = 'Indep.Chrprsn.Feml.CEO.or.Equiv' target = 'AZS.class.Binary' matcher = PropensityScoreMatching() ATE_results = matcher.estimate_ATE(data, treatment, target, controlFor, bootstrap=True) #now write results to mysql conn = MySQLdb.connect(host="localhost", user="******", passwd="", db="causal_results") cur = conn.cursor() query = """ insert into akelleh_results values ('%s','%s','%s','%s','%s','%s'); """ % (now,"eebp",treatment,target,str(ATE_results),"...to be on the safe zone of the Altman Z-score it is important to have an independent chairperson or even a woman as CEO.") cur.execute(query) conn.commit() conn.close() print("Done")
def test_at_estimators(self): N = 1000 # how many data points z1 = 0.5 * np.random.normal(size=N) # a few confounding variables z2 = 0.5 * np.random.normal(size=N) z3 = 0.5 * np.random.normal(size=N) arg = (z1 + z2 + z3 + np.random.normal(size=N)) p = np.exp(arg) / (1. + np.exp(arg)) # propensity to receive treatment, P(d|z), taking on a logistic form d = np.random.binomial(1, p) y = (np.random.normal(size=N) + (z1 + z2 + z3 + 1.) * d) # effect of d is confounded by z. True ATE is 1. X = pd.DataFrame({'d': d, 'z1': z1, 'z2': z2, 'z3': z3, 'y': y, 'p': p}) matcher = PropensityScoreMatching() ATE = matcher.estimate_ATE(X, 'd', 'y', {'z1': 'c', 'z2': 'c', 'z3': 'c'}) assert 0.9 <= ATE <= 1.1
def test_score(self): N = 100000 z1 = np.random.normal(size=N) z2 = np.random.choice([0, 1], size=N) z3 = np.random.choice(["a", "b", "c"], size=N) numeric_mapping = {"a": 3, "b": 4, "c": 5} z3_numeric = [numeric_mapping[z3i] for z3i in z3] p_assign = np.exp(z1 + z2 + z3_numeric) / (1.0 + np.exp(z1 + z2 + z3_numeric)) assignment = np.random.binomial(1, p_assign) outcome = np.random.normal(assignment) matcher = PropensityScoreMatching() X = pd.DataFrame({"z1": z1, "z2": z2, "z3": z3, "assignment": assignment, "outcome": outcome}) confounder_types = {"z1": "c", "z2": "o", "z3": "o"} matcher.score(X, confounder_types, store_model_fit=True) assert 0.9 <= matcher.model_fit.params["z1"] <= 1.1 assert 0.9 <= matcher.model_fit.params["z2"] <= 1.1 assert 0.0 <= matcher.model_fit.params["z3_b"] <= 2.0 assert 1.0 <= matcher.model_fit.params["z3_c"] <= 3.0 assert 2.0 <= matcher.model_fit.params["intercept"] <= 4.0
def spx_wmOnBoard_tobin(): ''' Test the effect of having women on the board of directors on the tobins Q score M&M: For the American companies inside the S&P 500 index, we found a positive correlation between the percentage higher than 20 % of women in the board and the Tobin’s Q ratio Latest Results: (-0.094388682158789469, -0.04962212239013655, -0.0068850052276448973) Comments: So no real causal influence here, wrong sign ''' data, types = getData("spx_fboard","corp_gov_causal") controlFor = stageAlgo(types) treatment = 'X..Women.on.Bd' target = 'Tobins.Q' matcher = PropensityScoreMatching() ATE_results = matcher.estimate_ATE(data, treatment, target, {'P.EBITDA': 'c', 'P.B': 'c', 'Asset':'c', 'Tax':'c', 'P.E':'c'}, bootstrap=True) matcher.check_support(data, 'X..Women.on.Bd', {'P.EBITDA': 'c', 'P.B': 'c','Asset':'c', 'Tax':'c', 'P.E':'c'}) print("") print("Balance before matching") print(matcher.assess_balance(data, 'X..Women.on.Bd', {'P.EBITDA': 'c', 'P.B': 'c','Asset':'c', 'Tax':'c', 'P.E':'c', 'propensity score': 'c'})) print ("") data = matcher.score(data, assignment='X..Women.on.Bd', confounder_types={'P.EBITDA': 'c', 'P.B': 'c','Asset':'c', 'Tax':'c', 'P.E':'c'}) treated, control = matcher.match(data, assignment='X..Women.on.Bd') print("") print("Balance after matching") print(matcher.assess_balance(treated.append(control), 'X..Women.on.Bd', {'P.EBITDA': 'c', 'P.B': 'c','Asset':'c', 'Tax':'c', 'P.E':'c', 'propensity score': 'c'})) print ("") #now write results to mysql conn = MySQLdb.connect(host="localhost", user="******", passwd="", db="causal_results") cur = conn.cursor() query = """ insert into akelleh_results values ('%s','%s','%s','%s','%s','%s'); """ % (now,"spx",treatment,target,str(ATE_results),"For the American companies inside the S and P 500 index, we found a positive correlation between the percentage higher than 20pct of women in the board and the Tobins Q ratio") cur.execute(query) conn.commit() conn.close() print("Done")
def test_at_estimators(self): ates = [] atcs = [] atts = [] for i in range(100): N = 1000 X = np.random.choice([0.25, 0.75], size=N) X = pd.DataFrame(X, columns=["Z"]) X.loc[:, "assignment"] = np.random.binomial(1, p=X["Z"]) X.loc[:, "outcome"] = np.random.normal(3.1 * X["assignment"] + 2.0 * X["Z"]) matcher = PropensityScoreMatching() att = matcher.estimate_ATT(X, "assignment", "outcome", {"Z": "c"}, n_neighbors=10) X.loc[:, "inverted assignment"] = (X["assignment"] + 1) % 2 atc = matcher.estimate_ATT(X, "inverted assignment", "outcome", {"Z": "c"}, n_neighbors=10) ate = (att + atc) / 2.0 atts.append(att) atcs.append(atc) ates.append(ate) X = pd.DataFrame({"att": atts, "ate": ates, "atc": atcs}) assert (3.0 <= X.mean()).all() assert (X.mean() <= 4.0).all()
df = pd.read_csv(comments_path) cause_set = [ "bias_party", "bias_degree", "misinfo_factcheck", "misinfo_veracity" ] for cause in cause_set: confound_dict = { "meta_like": "o", "meta_dislike": "o", "meta_view": "o", "linguist_swear": "u", "linguist_laugh": "u", "linguist_emoji": "u", "linguist_fake": "u", "linguist_administration": "u", "linguist_american": "u", "linguist_nation": "u", "linguist_personal": "u" } for other_cause in cause_set: if other_cause != cause: confound_dict[other_cause] = "u" matcher = PropensityScoreMatching() samples = matcher.estimate_ATE(df, cause, "moderated", confound_dict, bootstrap=True) samples.to_csv(os.path.join(ate_path, cause + ".csv"), index=False) print(cause, confound_dict)
import pandas as pd import numpy as np from causality.estimation.parametric import PropensityScoreMatching N = 10000 z1 = np.random.normal(size=N) z2 = np.random.normal(size=N) z3 = np.random.normal(size=N) p_d = 1. / (1. + np.exp(-(z1 + z2 + z3)/4.)) d = np.random.binomial(1, p=p_d) y0 = np.random.normal() y1 = y0 + z1 + z2 + z3 y = (d==1)*y1 + (d==0)*y0 X = pd.DataFrame({'d': d, 'z1': z1, 'z2': z2, 'z3': z3, 'y': y, 'y0': y0, 'y1': y1, 'p': p_d}) print(X.head(10)) matcher = PropensityScoreMatching() print(matcher.estimate_ATE(X, 'd', 'y', {'z1': 'c', 'z2': 'c', 'z3': 'c'})) matcher.check_support(X, 'd', {'z1': 'c', 'z2': 'c', 'z3': 'c'}) print(matcher.assess_balance(X, 'd', {'z1': 'c', 'z2': 'c', 'z3': 'c'}))