Beispiel #1
0
def calculate_metric_or_summary(
    annotated_variant_data: AnnotatedVariantData,
    evaluation_data: EvaluationData, report: Union[Type[PerformanceMetric],
                                                   Type[PerformanceSummary]]
) -> Dict[Plugin, Any]:
    """ Calculates a metrics or a summary for all plugins in the annotated variant data.

    Parameters
    ----------
    annotated_variant_data : AnnotatedVariantData
        The annotated variant data

    evaluation_data : EvaluationData
        The evaluation data

    report: Union[Type[PerformanceMetric], Type[PerformanceSummary]]
        The performance summary or metric that should be calculated

    Returns
    -------
    Dict[Plugin, Any]
        A dictionary where the keys are the plugins and the result from the calculations are the values

    """
    log.debug(f"Calculate {report.name()}")
    rv = {}
    for score in annotated_variant_data.scores:
        rv[score.plugin] = report.calculate(
            score, evaluation_data.interpreted_classes)
    return rv
Beispiel #2
0
    def run(self, variant_information_table: DataFrame) -> DataFrame:
        """ Run the plugin on the `variant_information_table`

        Before running the plugin the :meth:`compatibility <vpmbench.plugin.Plugin.is_compatible_with_data>` of the
        data with the plugin is tested. Next the :meth:`~vpmbench.plugin.EntryPoint.run` method of the entry_point is
        called with the `variant_information_table`. The result of the entry_point is :meth:`validated
        <vpmbench.plugin.Plugin._validate_score_table>` to ensure that each variant from the
        variant_information_table got a valid score assigned. Finally, the score column is renamed using the
        :meth:`~vpmbench.plugin.Plugin.score_column_name`.

        The resulting Dataframe consists of two columns:

            * UID: The UID of the variants
            *  :meth:`~vpmbench.plugin.Plugin.score_column_name`: The scores from the prioritization method

        Parameters
        ----------
        variant_information_table
            The variant information table

        Returns
        -------
        DataFrame
            The plugin result.

        """
        self.is_compatible_with_data(variant_information_table)
        log.debug(f"Invoke method: {self.name}")
        score_table = self.entry_point.run(variant_information_table)
        log.debug(f"Finish method: {self.name}")
        self._validate_score_table(variant_information_table, score_table)
        return score_table.rename(columns={"SCORE": self.score_column_name})
Beispiel #3
0
def invoke_methods(plugins: List[Plugin],
                   variant_data: DataFrame,
                   cpu_count: int = -1) -> AnnotatedVariantData:
    """ Invoke multiple prioritization methods given as a list of `plugins` on the `variant_data` in parallel.

    Calls :func:`vpmbench.api.invoke_method` for each in plugin in `plugins` on the `variant_data`.
    The compatibility of the `plugins` with the `variant_data` are checked via :meth:`Plugin.is_compatible_with_data <vpmbench.plugin.Plugin.is_compatible_with_data>`.
    If `cpu_count` is -1 then (number of cpus-1) are used to run the plugins in parallel; set to one 1 disable parallel execution.
    The resulting annotated variant data is constructed by collecting the outputs of the plugin use them as input for :meth:`AnnotatedVariantData.from_results <vpmbench.data.AnnotatedVariantData.from_results>`.

    Parameters
    ----------
    variant_data : pandas.DataFrame
        The variant data which should be processed by the plugins

    plugins : List[Plugin]
        A list of plugins that should be invoked

    cpu_count : int
        The numbers of cpus that should be used to invoke the plugins in parallel

    Returns
    -------
    AnnotatedVariantData
        The variant data annotated with the scores from the prioritization methods

    """
    map(lambda plugin: plugin.is_compatible_with_data(variant_data), plugins)
    if cpu_count == -1:
        cpu_count = mp.cpu_count() - 1
    log.info(f"Invoke methods")
    log.debug(f"#CPUs: {cpu_count}")
    pool = mp.Pool(cpu_count)
    jobs = [
        pool.apply_async(invoke_method, args=(plugin, variant_data))
        for plugin in plugins
    ]
    plugin_results = []
    for job in jobs:
        plugin_results.append(job.get())
    pool.close()
    return AnnotatedVariantData.from_results(variant_data, plugin_results)
Beispiel #4
0
def extract_evaluation_data(
        evaluation_data_path: Union[str, Path],
        extractor: Type[Extractor] = ClinVarVCFExtractor) -> EvaluationData:
    """ Extract the EvaluationData from the evaluation input data.

    Parses the evaluation the evaluation input data given by the `evaluation_data_path` using the `extractor`.

    Parameters
    ----------
    evaluation_data_path : Union[str, Path]
        The path to the evaluation input data
    extractor : Type[Extractor]
        The extractor that should be used to parse the evaluation input data

    Returns
    -------
    EvaluationData
        The evaluation data extracted from `evaluation_input_data` using the `extractor`

    """
    log.info(f"Extract data from {evaluation_data_path} ")
    log.debug(f"Used extractor: {extractor}!")
    return extractor.extract(evaluation_data_path)
Beispiel #5
0
    def extract(cls, file_path: Union[str, Path]) -> EvaluationData:
        """ Extract the :class:`~vpmbench.data.EvaluationData` from the file at `file_path`.

        This function calls :meth:`~vpmbench.extractor.Extractor._extract` and uses
        :meth:`vpmbench.data.EvaluationData.validate` to check if the evaluation data is valid.

        Parameters
        ----------
        file_path
            The file path to evaluation input data


        Returns
        -------
        EvaluationData
            The validated evaluation data

        Raises
        ------
        RuntimeError
            If the file can not be parsed

        SchemaErrors
            If the validation of the extracted data fails

        """
        try:
            table = cls._extract(file_path)
        except Exception:
            raise RuntimeError(
                f"Can't parse data at '{file_path}' with '{cls.__name__}'. \nMaybe the data does not exist, or is not "
                f"compatible with the Extractor.\n If the data exists use absolute path."
            )
        log.debug("Extracted Data:")
        log.debug(table.variant_data.head(10))
        table.validate()
        return table
Beispiel #6
0
def run_pipeline(with_data: Union[str, Path],
                 reporting: List[Union[Type[PerformanceMetric],
                                       Type[PerformanceSummary]]],
                 using: Callable[[Plugin], Any] = None,
                 extractor: Type[Extractor] = ClinVarVCFExtractor,
                 plugin_path: Union[str, Path] = DEFAULT_PLUGIN_PATH,
                 cpu_count: int = -1) -> PerformanceReport:
    log.info("Run pipeline")
    log.debug(f'Starting time: {datetime.now().strftime("%d/%m/%Y %H:%M:%S")}')
    evaluation_data: EvaluationData = extract_evaluation_data(
        with_data, extractor)
    plugins: List[Plugin] = load_plugins(plugin_path, using)
    if len(plugins) == 0:
        raise RuntimeError(f"Can' find plugins in {plugin_path}")
    annotated_variants: AnnotatedVariantData = invoke_methods(
        plugins, evaluation_data.variant_data, cpu_count)
    reports = calculate_metrics_and_summaries(annotated_variants,
                                              evaluation_data, reporting)
    log.info("Stop pipeline")
    log.debug(
        f'Finishing time: {datetime.now().strftime("%d/%m/%Y %H:%M:%S")}')

    report = PerformanceReport(evaluation_data, annotated_variants, reports)
    return report
Beispiel #7
0
def load_plugins(plugin_path: Union[str, Path], plugin_selection: Optional[Callable[[Plugin], bool]] = None) -> \
        List[Plugin]:
    """ Load all plugins from the `plugin_directory` and applies the plugin selection to filter them.

    If `plugin_selection` is `None` all plugins in the `plugin_path` are returned.

    Parameters
    ----------
    plugin_path : Union[str, PathLike]
        The path to your plugin directory

    plugin_selection : Optional[Callable[[Plugin], bool]]
        The selection function that should be applied to filter the plugins

    Returns
    -------
    List[Plugin]
        The list of plugins loaded from the `plugin_path`

    """
    log.info(f"Load plugins from {plugin_path}")
    plugin_path = Path(plugin_path).resolve().absolute()
    log.debug(f"Absolute plugin path: {plugin_path}")
    found_plugins = [
        load_plugin(manifest)
        for manifest in plugin_path.glob("*/**/manifest.yaml")
    ]
    log.debug(
        f"Found {len(found_plugins)} plugins: {[plugin.name for plugin in found_plugins]}"
    )
    if filter:
        filtered_plugins = list(filter(plugin_selection, found_plugins))
        log.debug(
            f"Returning {len(filtered_plugins)} filtered plugins: {[plugin.name for plugin in filtered_plugins]}"
        )
        return filtered_plugins
    log.debug(f"Returning {len(found_plugins)} plugins: {found_plugins}")
    return found_plugins