コード例 #1
0
ファイル: effect_prediction.py プロジェクト: gnetsanet/isovar
def predicted_effects_for_variant(variant,
                                  transcript_id_whitelist=None,
                                  only_coding_changes=True):
    """
    For a given variant, return its set of predicted effects. Optionally
    filter to transcripts where this variant results in a non-synonymous
    change to the protein sequence.

    Parameters
    ----------
    variant : varcode.Variant

    transcript_id_whitelist : set
        Filter effect predictions to only include these transcripts

    Returns a varcode.EffectCollection object
    """

    effects = []
    for transcript in variant.transcripts:
        if only_coding_changes and not transcript.complete:
            logger.info(
                "Skipping transcript %s for variant %s because it's incomplete",
                transcript.name, variant)
            continue

        if transcript_id_whitelist and transcript.id not in transcript_id_whitelist:
            logger.info(
                "Skipping transcript %s for variant %s because it's not one of %d allowed",
                transcript.name, variant, len(transcript_id_whitelist))
            continue
        effects.append(variant.effect_on_transcript(transcript))

    effects = EffectCollection(effects)

    n_total_effects = len(effects)
    logger.info("Predicted total %d effects for variant %s" %
                (n_total_effects, variant))
    if not only_coding_changes:
        return effects
    else:
        nonsynonymous_coding_effects = effects.drop_silent_and_noncoding()
        logger.info(
            "Keeping %d/%d effects which affect protein coding sequence for %s: %s",
            len(nonsynonymous_coding_effects), n_total_effects, variant,
            nonsynonymous_coding_effects)

        usable_effects = [
            effect for effect in nonsynonymous_coding_effects
            if effect.mutant_protein_sequence is not None
        ]
        logger.info(
            "Keeping %d effects with predictable AA sequences for %s: %s",
            len(usable_effects), variant, usable_effects)
        return usable_effects
コード例 #2
0
ファイル: load.py プロジェクト: arahuja/cohorts
    def _load_single_sample_effects(self, sample_idx, file_format_funcs,
                                    only_nonsynonymous, variant_type,
                                    merge_type):
        sample_id = self.sample_ids[sample_idx]

        cached_file_name = "%s-%s-effects.pkl" % (variant_type, merge_type)
        if only_nonsynonymous:
            cached = self.load_from_cache(
                self.cache_names["nonsynonymous_effect"], sample_id,
                cached_file_name)
        else:
            cached = self.load_from_cache(self.cache_names["effect"],
                                          sample_id, cached_file_name)
        if cached is not None:
            return cached

        variants = self._load_single_sample_variants(
            sample_idx, self.variant_type_to_format_funcs[variant_type],
            variant_type, merge_type)
        effects = variants.effects()
        nonsynonymous_effects = EffectCollection(
            effects.drop_silent_and_noncoding(
            ).top_priority_effect_per_variant().values())

        self.save_to_cache(effects, self.cache_names["effect"], sample_id,
                           cached_file_name)
        self.save_to_cache(nonsynonymous_effects,
                           self.cache_names["nonsynonymous_effect"], sample_id,
                           cached_file_name)

        if only_nonsynonymous:
            return nonsynonymous_effects
        return effects
コード例 #3
0
 def top_priority_maybe(effect_collection):
     """
     Always (unless all_effects=True) take the top priority effect per variant
     so we end up with a single effect per variant.
     """
     if all_effects:
         return effect_collection
     return EffectCollection(
         list(effect_collection.top_priority_effect_per_variant().values()))
コード例 #4
0
def filter_effects(effect_collection, variant_collection, patient, filter_fn,
                   all_effects, **kwargs):
    """Filter variants from the Effect Collection

    Parameters
    ----------
    effect_collection : varcode.EffectCollection
    variant_collection : varcode.VariantCollection
    patient : cohorts.Patient
    filter_fn : function
        Takes a FilterableEffect and returns a boolean. Only effects returning True are preserved.
    all_effects : boolean
        Return the single, top-priority effect if False. If True, return all effects (don't filter to top-priority).

    Returns
    -------
    varcode.EffectCollection
        Filtered effect collection, with only the variants passing the filter
    """
    def top_priority_maybe(effect_collection):
        """
        Always (unless all_effects=True) take the top priority effect per variant
        so we end up with a single effect per variant.
        """
        if all_effects:
            return effect_collection
        return EffectCollection(
            list(effect_collection.top_priority_effect_per_variant().values()))

    def apply_filter_fn(filter_fn, effect):
        """
        Return True if filter_fn is true for the effect or its alternate_effect.
        If no alternate_effect, then just return True if filter_fn is True.
        """
        applied = filter_fn(
            FilterableEffect(effect=effect,
                             variant_collection=variant_collection,
                             patient=patient), **kwargs)
        if hasattr(effect, "alternate_effect"):
            applied_alternate = filter_fn(
                FilterableEffect(effect=effect.alternate_effect,
                                 variant_collection=variant_collection,
                                 patient=patient), **kwargs)
            return applied or applied_alternate
        return applied

    if filter_fn:
        return top_priority_maybe(
            EffectCollection([
                effect for effect in effect_collection
                if apply_filter_fn(filter_fn, effect)
            ]))
    else:
        return top_priority_maybe(effect_collection)
コード例 #5
0
def protein_change_effects_from_args(args):
    genome = genome_from_args(args)
    valid_gene_names = set(genome.gene_names())
    substitution_regex = re.compile("([A-Z]+)([0-9]+)([A-Z]+)")
    effects = []
    for gene_name, protein_change_string in args.protein_change:
        match_obj = substitution_regex.match(protein_change_string)
        if match_obj is None:
            logging.warn("Unable to parse protein modification: '%s'" %
                         protein_change_string)
            continue

        ref, base1_pos, alt = match_obj.groups()

        base1_pos = int(base1_pos)

        if gene_name not in valid_gene_names:
            logging.warn(
                "Invalid gene name '%s' in protein modification: '%s'" %
                (gene_name, protein_change_string))
            continue

        candidate_transcripts = []
        for candidate_gene in genome.genes_by_name(gene_name):
            for candidate_transcript in candidate_gene.transcripts:
                if not candidate_transcript.is_protein_coding:
                    continue
                protein_sequence = candidate_transcript.protein_sequence
                if protein_sequence is None:
                    continue
                if len(protein_sequence) < (base1_pos + len(ref) - 1):
                    # protein sequence too short for this modification
                    # e.g. EGFR T790M can't happen in an EGFR transcript
                    # with only 789 amino acids
                    continue

                seq_at_pos = protein_sequence[base1_pos - 1:base1_pos +
                                              len(ref) - 1]
                if seq_at_pos != ref:
                    # if this transcript doesn't have the same reference amino
                    # acids as the change then skip it and use a different
                    # transcript
                    continue
                candidate_transcripts.append(candidate_transcript)
        if len(candidate_transcripts) > 0:
            transcript = best_transcript(candidate_transcripts)
            effects.append(
                Substitution(variant=None,
                             transcript=transcript,
                             aa_ref=ref,
                             aa_alt=alt,
                             aa_mutation_start_offset=base1_pos - 1))
    return EffectCollection(effects)
コード例 #6
0
def missense_snv_count(cohort, **kwargs):
    sample_nonsynonymous_effects = cohort.load_effects(only_nonsynonymous=True,
                                                       **kwargs)
    sample_missense_effects = dict([
        (sample,
         EffectCollection(
             [effect for effect in effects if type(effect) == Substitution]))
        for (sample, effects) in sample_nonsynonymous_effects.items()
    ])

    def count_func(sample):
        if sample in sample_missense_effects:
            return len(sample_missense_effects[sample])
        return np.nan

    return count(cohort, count_func, count_col="missense_snv_count")
コード例 #7
0
ファイル: effect_prediction.py プロジェクト: Al3n70rn/isovar
def predicted_coding_effects_with_mutant_sequence(
        variant,
        transcript_id_whitelist=None):
    """
    For a given variant, return the set of predicted mutation effects
    on transcripts where this variant results in a predictable non-synonymous
    change to the protein sequence.

    Parameters
    ----------
    variant : varcode.Variant

    transcript_id_whitelist : set
        Filter effect predictions to only include these transcripts

    Returns a varcode.EffectCollection object
    """

    effects = []
    for transcript in variant.transcripts:
        if not transcript.complete:
            logger.info(
                "Skipping transcript %s for variant %s because it's incomplete",
                transcript.name,
                variant)
            continue

        if transcript_id_whitelist and transcript.id not in transcript_id_whitelist:
            logger.info(
                "Skipping transcript %s for variant %s because it's not one of %d allowed",
                transcript.name,
                variant,
                len(transcript_id_whitelist))
            continue
        effects.append(variant.effect_on_transcript(transcript))

    effects = EffectCollection(effects)

    n_total_effects = len(effects)
    logger.info("Predicted total %d effects for variant %s" % (
        n_total_effects,
        variant))

    nonsynonymous_coding_effects = effects.drop_silent_and_noncoding()
    logger.info(
        "Keeping %d/%d effects which affect protein coding sequence for %s: %s",
        len(nonsynonymous_coding_effects),
        n_total_effects,
        variant,
        nonsynonymous_coding_effects)

    usable_effects = [
        effect
        for effect in nonsynonymous_coding_effects
        if effect.mutant_protein_sequence is not None
    ]
    logger.info(
        "Keeping %d effects with predictable AA sequences for %s: %s",
        len(usable_effects),
        variant,
        usable_effects)
    return usable_effects
def test_tcga_effect_collection_to_dict():
    eq_(tcga_ov_effects, EffectCollection.from_dict(tcga_ov_effects.to_dict()))
def test_wustle_effect_collection_to_json():
    eq_(ov_wustle_effects,
        EffectCollection.from_json(ov_wustle_effects.to_json()))
def test_tcga_effect_collection_to_json():
    eq_(tcga_ov_effects, EffectCollection.from_json(tcga_ov_effects.to_json()))
def test_wustle_effect_collection_to_dict():
    eq_(ov_wustle_effects,
        EffectCollection.from_dict(ov_wustle_effects.to_dict()))
コード例 #12
0
def test_tcga_effect_collection_to_dict():
    eq_(
        tcga_ov_effects,
        EffectCollection.from_dict(tcga_ov_effects.to_dict()))
コード例 #13
0
def test_wustle_effect_collection_to_json():
    eq_(
        ov_wustle_effects,
        EffectCollection.from_json(ov_wustle_effects.to_json()))
コード例 #14
0
def test_tcga_effect_collection_to_json():
    eq_(tcga_ov_effects, EffectCollection.from_json(tcga_ov_effects.to_json()))
コード例 #15
0
def test_wustle_effect_collection_to_dict():
    eq_(
        ov_wustle_effects,
        EffectCollection.from_dict(ov_wustle_effects.to_dict()))