Ejemplo n.º 1
0
def get_mutation_rep_time(mutation_file, sample_name):
    """ Returns list with replication timings of mutated nucleotides
    with given sample name, mutation motif and final nucleotide"""
    sys.stdout.write("\nConsidering {0} sample: ".format(sample_name))
    mutation_rep_time = []
    # We consider chromosomes separately to avoid memory overflow:
    # All genome in str format ~ 3.1 GB - too much for RAM in my PC
    genome_file_names = core.get_genome_file_names()
    for chromosome in genome_file_names:
        sys.stdout.write(chromosome + ', ')
        sys.stdout.flush()
        mutations_list = core.read_mutations(mutation_file,
                                           mutation_type='subs',
                                           chromosome=chromosome,
                                           sample_names=[sample_name],
                                           final_nucleotides=core.FINAL_NUCL)
        with open(genome_file_names[chromosome]) as genome_file:
            genom = genome_file.read()
        for index, mutation in mutations_list.iterrows():
            # FIXME: Considering mutations_list 20 times - unefficient
            position = mutation['positionFrom']
            motif = genome[position - 2: position + 1]
            if motif in core.MOTIFS:
                rep_time = core.calculate_replication_timing(chromosome,
                                                           position)
                if rep_time == -1:
                    print '\nuncalculatable replication time at\
                    {0}:{1}'.format(chromosome, position)
                mutation_rep_time.append(rep_time)
        del genome
    return mutation_rep_time
Ejemplo n.º 2
0
def filterMutations(mutations_file, catalogFileName, out_file):
    """ in: file with mutations; catalog with list of genome samples
    out: writes all mutations (exclude indel mutations and exome samples)
    to out file, specified by out_file; returns nothing """
    genomeSampleNames = []      # all samples except exome samples
    with open(catalogFileName) as catalogFile:
        genomeSampleNames = catalogFile.readline()[:-1].split('\t')
        genomeSampleNames.pop(0) # First and second words are "Mutation type"

    mutations = core.read_mutations(mutations_file, mutation_type='subs',
                                  sample_names=genomeSampleNames)
    mutations.to_csv(out_file, sep='\t', header=False, index=False)
    return
Ejemplo n.º 3
0
def check_chromosome(genome_file, mutations_file, chromosome):
    """Checks is (initialNucleotide, position) in mutations_file
    placed according to genome_file"""
    with open(genome_file, 'r') as f:
        genome_sequence = f.read()

    mutations_list = core.read_mutations(mutations_file,
                                       mutation_type='subs',
                                       chromosome=chromosome)
    for index, mutation in mutations_list.iterrows():
        position = mutation['positionFrom']
        genome_nucleotide = genome_sequence[position - 1]
        mutation_nucleotide = mutation['initialNucl']
        if  genome_nucleotide != mutation_nucleotide:
            message = '\n{0}:{1} nucleotide in genome ({2}) and\
            mutations file ({3}) not equal'
            sys.exit(message.format(chromosome, position,
                                    genome_nucleotide,
                                    mutation_nucleotide))
    print "Chromosome {0} check succeed".format(chromosome)
    return