def create_info(self): """ Creates several info dataframe for the input data. self.peaks_per_feature .... top peaks per feature for features that are hit by top peaks self.top_peaks ... top peaks with the features that are close """ value_s = self.value_s.sort(ascending=self.ascending, inplace=False) top_s = value_s.iloc[: self.top_n] self.candidate_features = hp.get_features( top_s, self.feature_df, feature_name=self.feature_name, max_dist=self.max_dist ) sub_feature_df = ( self.feature_df.reset_index() .set_index(self.feature_name) .ix[self.candidate_features] .reset_index() .set_index(["chrom", "start"]) ) self.peaks_per_feature = get_peaks(sub_feature_df, top_s, self.max_dist, feature_name=self.feature_name) features_sort_by_max = self.peaks_per_feature["peak_height"].groupby(lambda i: i[0]).max() features_sort_by_max.sort(ascending=False, inplace=True) self.peaks_per_feature = self.peaks_per_feature.ix[features_sort_by_max.index] self.top_peaks = self.get_peak_info(top_s, self.peaks_per_feature) # super(SummaryEnrichment, self).create_info() CandidateEnrichment.create_info(self)
def get_association(self, value_s): value_s = value_s.sort(ascending=self.ascending, inplace=False) top_s = value_s.iloc[:self.top_n] del value_s candidate_features = hp.get_features(top_s, self.feature_df, feature_name=self.feature_name, max_dist=self.max_dist) assoc = CandidateEnrichment.get_association(self, candidate_features) #assoc = self.feature_to_category.set_index(self.feature_name).ix[cand_genes].groupby(self.category_name).apply(len) #assoc.name = "n_" + self.feature_name #assoc.index.name = self.category_name return assoc