def test_churn_predictor(self): # Arrange time = [1453845953 + 20000 * x for x in range(500)] user = [1, 2, 3, 4, 5] * 20 + [1, 2, 3, 4] * 25 + [1, 2, 3] * 100 actions = turicreate.SFrame({ "user_id": user, "timestamp": time, "action": [1, 2, 3, 4, 5] * 100, }) def _unix_timestamp_to_datetime(x): import datetime return datetime.datetime.fromtimestamp(x) actions["timestamp"] = actions["timestamp"].apply( _unix_timestamp_to_datetime) actions = turicreate.TimeSeries(actions, "timestamp") # Act m = turicreate.churn_predictor.create(actions) actual = [x for x in dir(m) if not x.startswith("_")] # Assert. expected = [ "categorical_features", "evaluate", "extract_features", "get_feature_importance", "churn_period", "grace_period", "features", "get", "is_data_aggregated", "_list_fields", "_list_fields", "lookback_periods", "model_options", "name", "num_features", "num_observations", "num_users", "numerical_features", "predict", "explain", "processed_training_data", "save", "show", "summary", "time_boundaries", "time_period", "trained_model", "trained_explanation_model", "get_churn_report", "get_activity_baseline", "views", "use_advanced_features", "user_id", ] check_visible_modules(actual, expected)
def test_churn_predictor(self): # Arrange time = [1453845953 + 20000 * x for x in range(500)] user = [1,2,3,4,5] * 20 + [1,2,3,4] * 25 + [1,2,3] * 100 actions = turicreate.SFrame({ 'user_id': user, 'timestamp': time, 'action': [1,2,3,4,5] * 100, }) def _unix_timestamp_to_datetime(x): import datetime return datetime.datetime.fromtimestamp(x) actions['timestamp'] = actions['timestamp'].apply( _unix_timestamp_to_datetime) actions = turicreate.TimeSeries(actions, 'timestamp') # Act m = turicreate.churn_predictor.create(actions) actual = [x for x in dir(m) if not x.startswith('_')] # Assert. expected = ['categorical_features', 'evaluate', 'extract_features', 'get_feature_importance', 'churn_period', 'grace_period', 'features', 'get', 'is_data_aggregated', '_list_fields', '_list_fields', 'lookback_periods', 'model_options', 'name', 'num_features', 'num_observations', 'num_users', 'numerical_features', 'predict', 'explain', 'processed_training_data', 'save', 'show', 'summary', 'time_boundaries', 'time_period', 'trained_model', 'trained_explanation_model', 'get_churn_report', 'get_activity_baseline', 'views', 'use_advanced_features', 'user_id'] check_visible_modules(actual, expected)
def get_venue_authors_links_timeseries(self): a = self.authors_affilations_sframe["Paper ID", "Author ID"] a = self._all_papers_sf.join(a, on="Paper ID") a = a['datetime', 'Author ID', 'Paper publish year', 'Paper ID'] links_sf = a.join(a, on="Paper ID") p = self.papers_sframe["Paper ID", "Paper publish year"] links_sf.rename({'Author ID': 'src_id', 'Author ID.1': 'dst_id'}) links_sf = links_sf["src_id", "dst_id", "datetime"] links_sf = links_sf[links_sf["src_id"] != links_sf[ "dst_id"]] # because this is a direct network we keep for each link both (u,v) and (v,u) return tc.TimeSeries(links_sf, index="datetime")
def get_venue_authors_timeseries(self): p = self._all_papers_sf["Paper ID", "Paper publish year"] a = self.authors_affilations_sframe["Paper ID", "Author ID"] sf = p.join(a, on="Paper ID")["Author ID", "Paper publish year"] sf = sf.groupby( "Author ID", { "mindate": agg.MIN("Paper publish year"), "maxdate": agg.MAX("Paper publish year") }) sf.rename({"Author ID": "v_id"}) sf["mindate"] = sf["mindate"].apply( lambda y: datetime(year=y, month=1, day=1)) sf["maxdate"] = sf["maxdate"].apply( lambda y: datetime(year=y, month=1, day=1)) if sf.num_rows() == 0: return None return tc.TimeSeries(sf, index="mindate")