def calculate_TSNE(self, indecies=None, sample=5000, n_jobs=1, multicore=False, **kwargs): """ Calculate the TSNE and store it in the h5 object Args: indecies (list): select a subset based on indecies sample (int): number of cells to downsample to if None use them all n_jobs (int): number of cpus to use (if multicore is True) multicore (bool): use the MultiCoreTSNE package **kwargs: pass any other arguments to TSNE """ if not self.mode in ['w', 'r+', 'a']: raise ValueError('cant write for readonly') dsdata = self.fractions.copy() if indecies is not None: dsdata = dsdata.loc[indecies] if sample is not None: if dsdata.shape[0] < sample: sample = dsdata.shape[0] dsdata = dsdata.sample(n=sample) if self.verbose: sys.stderr.write("executing TSNE decomposition on " + str(dsdata.shape[0]) + " cells\n") tsne = None if multicore: from MulticoreTSNE import MulticoreTSNE as mTSNE if self.verbose: sys.stderr.write("Using MulticoreTSNE\n") tsne = mTSNE(n_jobs=n_jobs, **kwargs).fit_transform(dsdata) else: if self.verbose: sys.stderr.write("Using sklearn TSNE\n") tsne = TSNE(**kwargs).fit_transform(dsdata) tsne = pd.DataFrame(tsne, columns=['x', 'y']) tsne.index = dsdata.index tsne = tsne.reset_index().merge( self.clusters.reset_index()[['cluster_id', 'k', 'db_id'] + self.groupby], on=['db_id']) tsne['cluster_name'] = tsne['cluster_id'].astype(str) self.tsne = tsne
"Off-Target", "Blocked", "Corners", "Offsides", "Free Kicks", \ "Saves", "Pass Accuracy %", "Passes", "Distance Covered (Kms)", \ "Fouls Committed", "Yellow Card", "Yellow & Red", "Red"] # , "1st Goal" names = ["Goal Scored", "On-Target", "Off-Target", "Ball Possession %", \ "Fouls Committed"] data_file_name = "../data/FIFA_2018_Statistics.csv" df = pd.read_csv(data_file_name) df = df[names] df_norm = (df - df.mean()) / (df.max() - df.min()) print(df_norm) input_data_mat = np.array(df_norm) df_embedded = TSNE(n_components=2).fit_transform(df_norm) # pca = PCA(n_components=2) # df_embedded = pca.fit(input_data_mat).transform(input_data_mat) print(df_embedded) df_embedded = pd.DataFrame(df_embedded) df_embedded.reset_index(inplace=True) df_embedded = df_embedded.rename(columns={0: "x", 1: "y"}) print(df_embedded) df_embedded.to_csv("../data/tsne-results.csv", index=0)