Example #1
0
 def generate_report(self, visualization='sankey'):
     report = rp.Report(identifier="knowledge")
     if visualization == 'network':
         plots = [self.get_knowledge_graph_plot()]
     elif visualization == 'sankey':
         if self.graph is None:
             self.generate_knowledge_graph()
         df = nx.to_pandas_edgelist(self.graph).fillna(1)
         plots = [
             viz.get_sankey_plot(df,
                                 self.identifier,
                                 args={
                                     'source': 'source',
                                     'target': 'target',
                                     'source_colors': 'source_color',
                                     'target_colors': 'target_color',
                                     'hover': 'type',
                                     'pad': 10,
                                     'weight': 'weight',
                                     'orientation': 'h',
                                     'valueformat': '.0f',
                                     'width': 1600,
                                     'height': 2200,
                                     'font': 10,
                                     'title': 'Knowledge Graph'
                                 })
         ]
     report.plots = {("Knowledge Graph", "Knowledge Graph"): plots}
     self.report = report
Example #2
0
 def load_dataset_report(self, report_dir):
     self.load_dataset(report_dir)
     dataset_dir = os.path.join(
         os.path.join(os.path.abspath(os.path.dirname(__file__)),
                      report_dir), self.dataset_type)
     r = rp.Report(self.dataset_type, {})
     r.read_report(dataset_dir)
     self.report = r
Example #3
0
    def generate_project_info_report(self):
        report = rp.Report(identifier="project_info")

        plots = self.generate_project_attributes_plot()
        plots.extend(self.generate_project_similarity_plots())
        plots.extend(self.generate_overlap_plots())

        report.plots = {("Project info", "Project Information"): plots}

        return report
Example #4
0
 def load_project_report(self):
     self.load_project_data()
     project_dir = os.path.join(
         os.path.join(os.path.abspath(os.path.dirname(__file__)),
                      "../../data/reports/"), self.identifier)
     self.report = {}
     for root, data_types, files in os.walk(project_dir):
         for data_type in data_types:
             r = rp.Report(data_type, {})
             r.read_report(os.path.join(root, data_type))
             if data_type in self.datasets:
                 self.datasets[data_type].report = r
             elif data_type == "Knowledge":
                 self.knowledge = knowledge.Knowledge(self.identifier,
                                                      {'name': self.name},
                                                      report=r)
             else:
                 self.update_report({data_type: r})
Example #5
0
    def generate_report(self,
                        visualizations=['sankey'],
                        summarize=True,
                        method='betweenness',
                        inplace=True):
        report = rp.Report(identifier="knowledge")
        plots = []
        G = None
        if self.graph is None:
            G = self.generate_knowledge_graph(summarize=summarize,
                                              method=method,
                                              inplace=inplace)

        for visualization in visualizations:
            if visualization == 'network':
                plots.append(self.get_knowledge_graph_plot(G))
            elif visualization == 'sankey':
                plots.append(self.generate_knowledge_sankey_plot(G))

        report.plots = {("Knowledge Graph", "Knowledge Graph"): plots}
        self.report = report
Example #6
0
    def generate_report(self):
        self.report = rp.Report(identifier=self.dataset_type.capitalize(),
                                plots={})
        order = 1
        report_pipeline = {}
        if self.configuration is not None:
            for section in self.configuration:
                report_step = {}
                report_step[section] = {}
                if section == "args":
                    continue
                for subsection in self.configuration[section]:
                    description, data_names, analysis_types, plot_types, store_analysis, args = self.extract_configuration(
                        self.configuration[section][subsection])
                    if description is not None:
                        description = viz.get_markdown(description, args={})
                    report_step[section][subsection] = {
                        'data': data_names,
                        'analyses': [],
                        'args': {}
                    }
                    if isinstance(data_names, dict) or isinstance(
                            data_names, list):
                        data = self.get_dataframes(data_names)
                    else:
                        data = self.get_dataframe(data_names)

                    if data is not None and len(data) > 0:
                        if subsection in self.analysis_queries:
                            query = self.analysis_queries[subsection]
                            if "use" in args:
                                for r_id in args["use"]:
                                    if r_id == "columns":
                                        rep_str = args["use"][r_id].upper()
                                        rep = ",".join([
                                            '"{}"'.format(i) for i in
                                            data.columns.unique().tolist()
                                        ])
                                    elif r_id == "index":
                                        rep_str = args["use"][r_id].upper()
                                        rep = ",".join([
                                            '"{}"'.format(i) for i in
                                            data.index.unique().tolist()
                                        ])
                                    elif r_id in data.columns:
                                        rep_str = r_id.upper()
                                        rep = ",".join([
                                            '"{}"'.format(i) for i in
                                            data[r_id].unique().tolist()
                                        ])
                                    query = query.replace(rep_str, rep)
                                data = self.send_query(query)
                        result = None
                        if description is not None:
                            self.report.update_plots({
                                (str(order), subsection + "_description", 'description'):
                                [description]
                            })
                            order += 1
                        if len(analysis_types) >= 1:
                            for analysis_type in analysis_types:
                                result = analytics_factory.Analysis(
                                    subsection, analysis_type, args, data)
                                analysis_type = result.analysis_type
                                if analysis_type in result.result and result.result[
                                        analysis_type] is not None and len(
                                            result.result[analysis_type]) >= 1:
                                    report_step[section][subsection][
                                        'analyses'].append(analysis_type)
                                    report_step[section][subsection][
                                        'args'] = result.args
                                    report_pipeline.update(report_step)
                                    if store_analysis:
                                        if analysis_type.lower() in [
                                                "anova", "samr", "ttest",
                                                "ancova"
                                        ]:
                                            reg_data = result.result[
                                                analysis_type]
                                            if not reg_data.empty:
                                                if isinstance(data, dict):
                                                    data = data['processed']
                                                cols = ['group']
                                                if 'sample' in data.columns:
                                                    cols.append('sample')
                                                if 'subject' in data.columns:
                                                    cols.append('subject')
                                                sig_hits = list(
                                                    set(reg_data.loc[
                                                        reg_data.rejected,
                                                        "identifier"])) + cols
                                                sig_data = data[sig_hits]
                                                self.update_data({
                                                    "regulated":
                                                    sig_data,
                                                    "regulation table":
                                                    reg_data
                                                })
                                        else:
                                            self.update_data({
                                                subsection + "_" + analysis_type:
                                                result.result[analysis_type]
                                            })
                                    for plot_type in plot_types:
                                        plots = result.get_plot(
                                            plot_type, subsection + "_" +
                                            analysis_type + "_" + plot_type)
                                        self.report.update_plots({
                                            (str(order), subsection + "_" + analysis_type, plot_type):
                                            plots
                                        })
                                        order += 1
                        else:
                            if result is None:
                                dictresult = {}
                                dictresult["_".join(
                                    subsection.split(' '))] = data
                                result = analytics_factory.Analysis(
                                    self.identifier,
                                    "_".join(subsection.split(' ')),
                                    args,
                                    data,
                                    result=dictresult)
                                report_pipeline.update(report_step)
                                if store_analysis:
                                    self.update_data({
                                        "_".join(subsection.split(' ')):
                                        data
                                    })
                            for plot_type in plot_types:
                                plots = result.get_plot(
                                    plot_type,
                                    "_".join(subsection.split(' ')) + "_" +
                                    plot_type)
                                self.report.update_plots(
                                    {
                                        (str(order), "_".join(
                                             subsection.split(' ')), plot_type):
                                        plots
                                    })
                                order += 1

        self.add_configuration_to_report(report_pipeline)