def finish(cls): dv.fit(all_players) for (g, y, w) in all_games: this_games_players = list() for p in g.players: this_games_players.append({'name': p.__dict__["name"]}) rows = dv.transform(this_games_players) result_array = None for row in rows: if result_array == None: result_array = row else: result_array = result_array + row features.append(result_array.toarray()[0]) labels_home.append(g.score_home) labels_away.append(g.score_away) pca.fit(features) for (home, away, week, year) in future_games: rows = dv.transform(last_known_players[home] + last_known_players[away]) result_array = None for row in rows: if result_array == None: result_array = row else: result_array = result_array + row futures_home.append({ 'name': home, 'features': pca.transform(result_array.toarray()[0]) }) futures_away.append({ 'name': away, 'features': pca.transform(result_array.toarray()[0]) }) train_home = (np.array(pca.transform(features)), np.array(labels_home,)) train_away = (np.array(pca.transform(features)), np.array(labels_away,)) dir_name = os.path.dirname(Transformer.get_pickle_filename(cls.__name__)) train_home_name = os.path.join(dir_name, "train_home.pickle.gz") train_away_name = os.path.join(dir_name, "train_away.pickle.gz") future_home_name = os.path.join(dir_name, "futures_home.pickle.gz") future_away_name = os.path.join(dir_name, "futures_away.pickle.gz") cPickle.dump(train_home, gzip.open(train_home_name,'wb'), cPickle.HIGHEST_PROTOCOL) cPickle.dump(train_away, gzip.open(train_away_name,'wb'), cPickle.HIGHEST_PROTOCOL) cPickle.dump(futures_home, gzip.open(future_home_name,'wb'), cPickle.HIGHEST_PROTOCOL) cPickle.dump(futures_away, gzip.open(future_away_name,'wb'), cPickle.HIGHEST_PROTOCOL)
def finish(cls): df = pd.DataFrame.from_dict(rows) df.index.name = "index" df.to_csv(Transformer.get_csv_filename(cls.__name__)) df.to_pickle(Transformer.get_pickle_filename(cls.__name__))