Esempio n. 1
0
def ttest_location(channel, location, df):
    """
    t test single location and compare against its channel
    """
    channel_array = np.ravel(df.loc[channel])
    location_array = np.ravel(df.loc[location])
    return stats.ttest(location_array, channel_array)
Esempio n. 2
0
    def run(self, experiment: Experiment) -> SPIAResult:
        """

        Returns: a list of pathways: pathway id, pathway name, pNDE, pPERT, pG, FDR correction,
            Bonferroni correction, status for each pathway

        """
        pvalue = ttest(experiment) <= self.threshold
        calc_f = experiment.calculate_fold_change()
        all = pvalue.index.tolist()
        for a in range(len(all)):
            all[a] = all[a].name
        p = pvalue[pvalue == True].index.tolist()
        de = {}
        for i in p:
            de[i.name] = calc_f["FC"][i]
        json = {}
        if len(de) == 0:
            # if there are no DEGs anywhere, the problem of finding the impact on various pathways is meaningless
            print('No differentialy expressed genes.')
            return SPIAResult([])
        db = KEGGPathways(self.organism)
        pathways = {}
        for gene in de.keys():
            ps = db.search_by_gene(gene)
            for (k, v) in ps.items():
                if k not in pathways.keys():
                    pathways[k] = v
        if not pathways:
            print('No pathways found in database.')
            return SPIAResult([])
        for (id, descr) in pathways.items():
            pathway = db.get_pathway(id)
            path_genes = set(pathway.nodes)
            path_genes = list(path_genes)
            interaction_list = {i: [] for i in rel}
            x = get_edge_attributes(pathway, 'type')
            for gene1, interaction in x.items():
                interaction = '_'.join(interaction)
                if interaction in interaction_list.keys():
                    interaction_list[interaction].append([
                        path_genes.index(gene1[0]),
                        path_genes.index(gene1[1])
                    ])
                else:
                    interaction_list[interaction] = [[
                        path_genes.index(gene1[0]),
                        path_genes.index(gene1[1])
                    ]]
            interaction_list['row_names'] = path_genes
            json[id] = interaction_list
        json['id2name'] = pathways
        s = SPIA.calculate_spia(de, all, json)
        result = SPIAResult(s)
        if self.markdown:
            result.generate_markdown(self.markdown, 'Results of SPIA:')
        return result
Esempio n. 3
0
def test_ttest():
    data1 = {'BAD': 1.2345, 'FUCA2': 6.5432}
    data2 = {'BAD': 2.3456, 'FUCA2': 7.6543}
    data3 = {'BAD': 6.3456, 'FUCA2': 11.6543}
    data4 = {'BAD': 7.1111, 'FUCA2': 9.9711}

    tumour_samples = [Sample.from_names('Tumour_1', data1), Sample.from_names('Tumour_2', data2)]
    normal_samples = [Sample.from_names('Normal_1', data3), Sample.from_names('Normal_2', data4)]

    tumour = SampleCollection('Tumour', tumour_samples)
    normal = SampleCollection('Normal', normal_samples)

    experiment = Experiment(case=tumour, control=normal)
    tt = ttest(experiment)
    assert isinstance(tt, pd.Series)
    assert all(gene in list(tt.keys()) for gene in experiment.get_all().genes)
 def test_ttest_norm(self):
     norm = ttest(self.cart_array2, self.cart_array3)
     test_result = 1 - ttest_ind(
         self.cart_array2, self.cart_array3, equal_var=False).pvalue
     self.assertTrue(float_equals(norm, test_result))
 def test_ttest_short2(self):
     short2 = ttest(self.cart_array4, self.cart_array3)
     self.assertTrue(float_equals(c.single_item_cart_max, short2))
 def test_ttest_less(self):
     less = ttest(self.cart_array1, self.cart_array2)
     self.assertTrue(float_equals(c.low_score, less))
    def run(self, experiment: Experiment) -> ImpactAnalysisResult:
        """

        Returns:
            list of pathways sorted by their impact factor. Each pathway in the list has values of FDR and
            Bonferroni corrections assigned.
        """
        self.experiment_genes = set(
            [gene.name for gene in experiment.get_all().genes])

        # calculate fold change
        self.FC = experiment.calculate_fold_change()

        # remove genes for witch fold change cannot be calculated correctly
        experiment.exclude_genes(
            list(self.FC['FC'][isnan(self.FC['FC'])].index))

        if self.degs:
            self.degs = pd.Series({
                Gene(x): True
                for x in self.degs if Gene(x) not in self.experiment_genes
            })
        else:
            # select differentialy expressed genes
            pvalue = ttest(experiment) <= self.threshold
            self.degs = pvalue[pvalue == True]

        if self.degs.size == 0:
            # if there are no DEGs anywhere, the problem of finding the impact on various pathways is meaningless
            print('No differentialy expressed genes.')
            return ImpactAnalysisResult([])

        db = KEGGPathways(self.org)
        pathways = {}

        for gene in [g.name for g in list(self.degs.index)]:
            ps = db.search_by_gene(gene)
            for (k, v) in ps.items():
                if k not in pathways.keys():
                    pathways[k] = v

        if not pathways:
            print('No pathways found in database.')
            return ImpactAnalysisResult([])

        res = pd.DataFrame(columns=['name', 'IF', 'pvalue'])
        for (code, descr) in pathways.items():
            pathway = db.get_pathway(code)
            impact_factor, pval = self.calculate_impact_factor(
                experiment, pathway)
            if impact_factor is not None and pval is not None:
                res.loc[len(res.index)] = [descr, impact_factor, pval]

        res['FDR'], res['Bonferroni'] = self.calculate_corrections(
            res['pvalue'])
        ifp_pathways = [IAPathway(res.loc[i]) for i in range(len(res.index))]
        ifp_pathways.sort(key=lambda x: x.IF if not isnan(x.IF) else 0,
                          reverse=True)

        result = ImpactAnalysisResult(ifp_pathways)
        if self.markdown:
            result.generate_markdown(self.markdown,
                                     'Results of Impact Analysis:')
        return result
    def get_group_figure(self):
        """Group average figure of skeleton

        - skeleton group average as ahline
        - skeleton subject average as ahline
        - tests between subject averages between groups
        """
        plt.style.use('default')

        self.g = sns.catplot(x='group',
                             y='mean',
                             hue='group',
                             hue_order=self.df.group.unique(),
                             data=self.df)

        if self.df['mean'].mean() < 0.005:
            self.g.ax.set_ylim(
                self.df['mean'].min() - (self.df['mean'].std() / 3),
                self.df['mean'].max() - (self.df['mean'].std() / 3))

        self.g.fig.set_size_inches(8, 4)
        self.g.fig.set_dpi(150)
        self.g.ax.set_ylabel(f'{self.modality}')
        self.g.ax.set_xlabel('Group')
        self.g.ax.set_title(
            f'Average {self.modality} in skeleton for all subjects',
            fontweight='bold')

        # tick labels to have number of groups
        def get_ticklabels(tmp_df, group):
            row_count_for_group = len(tmp_df[tmp_df.group == group])
            return f'{group} ({row_count_for_group})'

        self.g.ax.set_xticklabels(
            [get_ticklabels(self.df, x) for x in self.df.group.unique()])
        # group_list = corrpMap.group_labels
        # print(group_list)

        # average line
        line_width = 0.3
        gb = self.df.groupby('group')
        for num, group in enumerate(self.df.group.unique()):
            table = gb.get_group(group)
            average = table['mean'].mean()
            self.g.ax.plot([num - line_width, num + line_width],
                           [average, average])

        # Add stat information to the graph
        height = 0.9
        two_groups_perm = list(combinations(self.df.group.unique(), 2))

        # if two groups
        if len(self.df.group.unique()) == 2:
            height_step = 0.8 / len(two_groups_perm)
        else:
            height_step = 0.8 / (len(two_groups_perm) + 1)

        # two group comparisons
        # TODO: add ANCOVA
        for g1, g2 in two_groups_perm:
            gb = self.df.groupby('group')
            g1_means = gb.get_group(g1)['mean']
            g2_means = gb.get_group(g2)['mean']

            # t, p = ss.ttest_ind(g1_means, g2_means)
            t, p, dof = ttest(g1_means, g2_means)

            if p < 0.05:
                text = f'{g1} vs {g2}\nT ({int(dof)}) = {t:.2f}, P = {p:.2f}*'
            else:
                text = f'{g1} vs {g2}\nT ({int(dof)}) = {t:.2f}, P = {p:.2f}'

            self.g.ax.text(1, height, text, transform=self.g.ax.transAxes)
            height -= height_step

        # ANCOVA if there are more than two groups
        if len(self.df.group.unique()) > 2:
            anova_df = anova(self.df, 'mean ~ group')
            f_val = anova_df.loc['group', 'F']
            dof = anova_df.loc['group', 'df']
            p = anova_df.loc['group', 'PR(>F)']
            if p < 0.05:
                text = f'ANOVA\nF ({int(dof)}) = '\
                       f'{f_val:.2f}, P = {p:.2f}*'
            else:
                text = f'ANOVA\nF ({int(dof)}) = '\
                       f'{f_val:.2f}, P = {p:.2f}'
            self.g.ax.text(1, height, text, transform=self.g.ax.transAxes)
Esempio n. 9
0
    saveText(collegeData)

    return collegeData


collegeData = getFullData(colleges, reddit)

# --- Running T-Test

clg1Pol = clg.getPolarityData("princeton", reddit)
clg2Pol = clg.getPolarityData("mit", reddit)

# End T-Test

print(stats.ttest(clg1Pol, clg2Pol))

# --- Creating Bar Chart ---
collegePolarity = []
for i in collegeData:
    collegePolarity.append(collegeData[i][0])

fig, ax = plt.subplots()
rect = plt.bar(x=range(len(collegeData)),
               height=collegePolarity,
               color=["blue"] * 3 + ["red"] + ["blue"] * 16)

ax.set_ylabel('Average Sentiment score vs Colleges')
ax.set_title('Sentiment Score')
plt.xticks(rotation=90)