def finish(cls):
        dv.fit(all_players)
        for (g, y, w) in all_games:
            this_games_players = list()
            for p in g.players:
                this_games_players.append({'name': p.__dict__["name"]})
            rows = dv.transform(this_games_players)
            result_array = None
            for row in rows:

                if result_array == None:
                    result_array = row
                else:
                    result_array = result_array + row

            features.append(result_array.toarray()[0])
            labels_home.append(g.score_home)
            labels_away.append(g.score_away)

        pca.fit(features)

        for (home, away, week, year) in future_games:
            rows = dv.transform(last_known_players[home] + last_known_players[away])
            result_array = None
            for row in rows:

                if result_array == None:
                    result_array = row
                else:
                    result_array = result_array + row

            futures_home.append({
                'name': home,
                'features': pca.transform(result_array.toarray()[0])
            })

            futures_away.append({
                'name': away,
                'features': pca.transform(result_array.toarray()[0])
            })


        train_home = (np.array(pca.transform(features)), np.array(labels_home,))
        train_away = (np.array(pca.transform(features)), np.array(labels_away,))

        dir_name = os.path.dirname(Transformer.get_pickle_filename(cls.__name__))
        train_home_name = os.path.join(dir_name, "train_home.pickle.gz")
        train_away_name = os.path.join(dir_name, "train_away.pickle.gz")
        future_home_name = os.path.join(dir_name, "futures_home.pickle.gz")
        future_away_name = os.path.join(dir_name, "futures_away.pickle.gz")
        cPickle.dump(train_home, gzip.open(train_home_name,'wb'), cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(train_away, gzip.open(train_away_name,'wb'), cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(futures_home, gzip.open(future_home_name,'wb'), cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(futures_away, gzip.open(future_away_name,'wb'), cPickle.HIGHEST_PROTOCOL)
Exemple #2
0
 def finish(cls):
     df = pd.DataFrame.from_dict(rows)
     df.index.name = "index"
     df.to_csv(Transformer.get_csv_filename(cls.__name__))
     df.to_pickle(Transformer.get_pickle_filename(cls.__name__))