Exemple #1
0
def permute_cnas(cnas, gene2chromo, chromo2genes):
    samples2cnas = defaultdict(list)
    for cna in cnas:
        samples2cnas[cna.sample].append(cna)

    permuted_cnas = []
    for sample in samples2cnas:
        chromo2blocks = get_cna_blocks_for_sample(samples2cnas[sample],
                                                  gene2chromo, chromo2genes)
        for chromo, blocks in chromo2blocks.iteritems():
            genes = chromo2genes[chromo]
            invalid_indices = []
            for block in blocks:
                permuted_indices = get_block_indices(len(genes),
                                                     len(block.genes),
                                                     invalid_indices)
                for index in permuted_indices:
                    permuted_cnas.append(
                        Mutation(sample, genes[index], block.mut_type))

                new_invalid_indices = permuted_indices +\
                    [min(permuted_indices) - 1, max(permuted_indices) + 1]
                invalid_indices.extend(new_invalid_indices)

    return permuted_cnas
Exemple #2
0
def permute_snvs(samples, tested_genes, gene2length, bmr, gene2bmr):
    permuted_snvs = []
    for sample in samples:
        for gene in tested_genes:
            gene_bmr = gene2bmr[gene] if gene in gene2bmr else bmr
            gene_length = gene2length[gene]
            prob = 1 - pow(1 - gene_bmr, gene_length)
            if random.random() <= prob:
                permuted_snvs.append(Mutation(sample, gene, SNV))

    return permuted_snvs
Exemple #3
0
def load_snvs(snv_file, gene_wlst=None, sample_wlst=None):
    """Load SNV data from a file and return as a list of Mutation tuples with mut_type == SNV.
 
    Arguments:
    snv_file -- path to TSV file containing SNVs where the first column of each line is a sample ID
                and subsequent columns contain the names of SNVs with mutations in that sample.
                Lines starting with "#" will be ignored.
    gene_wlist -- whitelist of allowed genes (default None). Genes not in this list will be ignored.
                  If None, all mutated genes will be included.
    sample_wlist -- whitelist of allowed samples (default None). Samples not in this list will be
                    ignored.  If None, all samples will be included.

    """
    with open(snv_file) as f:
        arrs = [l.rstrip().split("\t") for l in f if not l.startswith("#")]
        return [
            Mutation(arr[0], gene, SNV) for arr in arrs
            if include(arr[0], sample_wlst) for gene in arr[1:]
            if include(gene, gene_wlst)
        ]
Exemple #4
0
def load_cnas(cna_file, gene_wlst=None, sample_wlst=None):
    """Load CNA data from a file and return as a list of Mutation tuples with mut_type == AMP or DEL.
 
    Arguments:
    cna_file -- path to TSV file containing CNAs where the first column of each line is a sample ID
                and subsequent columns contain gene names followed by "(A)" or "(D)" indicating an
                ammplification or deletion in that gene for the sample. Lines starting with '#'
                will be ignored.
    gene_wlist -- whitelist of allowed genes (default None). Genes not in this list will be ignored.
                  If None, all mutated genes will be included.
    sample_wlist -- whitelist of allowed samples (default None). Samples not in this list will be
                    ignored.  If None, all samples will be included.

    """
    with open(cna_file) as f:
        arrs = [l.rstrip().split("\t") for l in f if not l.startswith("#")]
        return [
            Mutation(arr[0],
                     cna.split("(")[0], get_mut_type(cna)) for arr in arrs
            if include(arr[0], sample_wlst) for cna in arr[1:]
            if include(cna.split("(")[0], gene_wlst)
        ]
Exemple #5
0
def load_inactivating_snvs(inactivating_snvs_file,
                           gene_wlst=None,
                           sample_wlst=None):
    """Load inactivating SNVs from a file and return as a list of Mutation tuples with
    mut_type == INACTIVE_SNV.
 
    Arguments:
    inactivating_snvs_file -- path to TSV file listing inactivating SNVs where the first column of
                              each line is a gene name and the second column is a sample ID.
                              Lines starting with "#" will be ignored.
    gene_wlist -- whitelist of allowed genes (default None). Genes not in this list will be ignored.
                  If None, all mutated genes will be included.
    sample_wlist -- whitelist of allowed samples (default None). Samples not in this list will be
                    ignored.  If None, all samples will be included.

    """
    with open(inactivating_snvs_file) as f:
        arrs = [line.split() for line in f if not line.startswith("#")]
        return [
            Mutation(arr[1], arr[0], INACTIVE_SNV) for arr in arrs
            if include(arr[1], sample_wlst) and include(arr[0], gene_wlst)
        ]
Exemple #6
0
def get_invalidated_mutation(mutation):
    return Mutation(mutation.sample, mutation.gene, mutation.mut_type, False)