Esempio n. 1
0
 def get_gene_to_haplotypes_call(
         cls, full_call_data: FullCallData,
         panel: Panel) -> Dict[str, Set[HaplotypeCall]]:
     gene_to_haplotype_calls = {}
     for gene_info in panel.get_gene_infos():
         logging.info(f"Calling haplotypes for {gene_info.gene}")
         gene_to_haplotype_calls[
             gene_info.gene] = cls.__get_haplotypes_call(
                 full_call_data, gene_info)
     return gene_to_haplotype_calls
Esempio n. 2
0
    def get_genotype_tsv_text(cls, pgx_analysis: PgxAnalysis, panel: Panel,
                              version: str) -> str:
        gene_to_haplotype_calls = pgx_analysis.get_gene_to_haplotype_calls()

        genes_in_analysis = set(gene_to_haplotype_calls.keys())
        assert genes_in_analysis == panel.get_genes(), (
            f"Gene lists inconsistent.\n"
            f"From analysis={sorted(list(genes_in_analysis))}\n"
            f"From panel={sorted(list(panel.get_genes()))}")

        gene_to_drug_info = {}
        for gene_info in panel.get_gene_infos():
            sorted_drugs = sorted([drug for drug in gene_info.drugs],
                                  key=lambda info:
                                  (info.name, info.url_prescription_info))
            gene_to_drug_info[gene_info.gene] = (cls.DRUG_SEPARATOR.join(
                [drug.name for drug in sorted_drugs]),
                                                 cls.DRUG_SEPARATOR.join([
                                                     drug.url_prescription_info
                                                     for drug in sorted_drugs
                                                 ]))

        header = cls.TSV_SEPARATOR.join(cls.GENOTYPE_TSV_COLUMNS)
        lines = [header]
        for gene in sorted(gene_to_haplotype_calls.keys()):
            if gene_to_haplotype_calls[gene]:
                for haplotype_call in sorted(
                        gene_to_haplotype_calls[gene],
                        key=lambda call: call.haplotype_name):
                    lines.append(
                        cls.TSV_SEPARATOR.join([
                            gene,
                            haplotype_call.haplotype_name,
                            cls.__get_zygosity(haplotype_call),
                            panel.get_haplotype_function(
                                gene, haplotype_call.haplotype_name),
                            gene_to_drug_info[gene][0],
                            gene_to_drug_info[gene][1],
                            panel.get_id(),
                            version,
                        ]))
            else:
                lines.append(
                    cls.TSV_SEPARATOR.join([
                        gene,
                        cls.UNRESOLVED_HAPLOTYPE_STRING,
                        cls.NOT_APPLICABLE_ZYGOSITY_STRING,
                        UNKNOWN_FUNCTION_STRING,
                        gene_to_drug_info[gene][0],
                        gene_to_drug_info[gene][1],
                        panel.get_id(),
                        version,
                    ]))
        text = "\n".join(lines) + "\n"
        return text
Esempio n. 3
0
    def __get_calls_for_panel_variants_without_calls(
            cls, simple_call_data: SimpleCallData,
            panel: Panel) -> FrozenSet[SimpleCall]:
        # assume ref call when no call is found. Set filter to NO_CALL
        reference_assembly = simple_call_data.reference_assembly

        rs_ids_found_in_patient = {
            rs_id
            for call in simple_call_data.calls for rs_id in call.rs_ids
            if rs_id != "."
        }
        coordinates_covered_by_found_calls = {
            coordinate
            for call in simple_call_data.calls
            for coordinate in call.get_relevant_coordinates()
        }

        uncalled_calls = set()
        for gene_info in panel.get_gene_infos():
            for rs_id_info in gene_info.rs_id_infos:
                coordinates_partially_handled = bool(
                    rs_id_info.get_relevant_coordinates(reference_assembly).
                    intersection(coordinates_covered_by_found_calls))
                if rs_id_info.rs_id not in rs_ids_found_in_patient and not coordinates_partially_handled:
                    # Assuming REF/REF relative to reference assembly
                    start_coordinate = rs_id_info.get_start_coordinate(
                        reference_assembly)
                    reference_allele = rs_id_info.get_reference_allele(
                        reference_assembly)
                    uncalled_ref_call = SimpleCall(
                        start_coordinate,
                        reference_allele,
                        (reference_allele, reference_allele),
                        gene_info.gene,
                        (rs_id_info.rs_id, ),
                        REF_CALL_ANNOTATION_STRING,
                        SimpleCallFilter.NO_CALL,
                    )
                    uncalled_calls.add(uncalled_ref_call)
        return frozenset(uncalled_calls)