'seniority', 'company_id_12', 'company_id_9', 'company_id_10', 'company_id_6', 'company_id_7', 'company_id_8', 'company_id_1', 'dept_design', 'company_id_11', 'company_id_5' ], axis=1) data3.head() data4 = data3.astype('int64') data4.info() data4.columns cph.fit(data4, duration_col='duration', event_col='observed', show_progress=True) cph.print_summary() # Finally! Got rid of enough variables -- had too many features for the model to converge. # Seems like salary is a big predictor of people leaving -- the lower the salary, the more likely they will leave? # And if they work for company 2 # And if you are an engineer you are also more likely to leave # Well aspects of this model suggest this way of modeling is not the best -- I don't think we have a linear # relationship so shouldn't use it... cph.score_ cph.check_assumptions(data4) import pkg_resources import lifelines pkg_resources.get_distribution("lifelines").version