def get_mutation_rep_time(mutation_file, sample_name): """ Returns list with replication timings of mutated nucleotides with given sample name, mutation motif and final nucleotide""" sys.stdout.write("\nConsidering {0} sample: ".format(sample_name)) mutation_rep_time = [] # We consider chromosomes separately to avoid memory overflow: # All genome in str format ~ 3.1 GB - too much for RAM in my PC genome_file_names = core.get_genome_file_names() for chromosome in genome_file_names: sys.stdout.write(chromosome + ', ') sys.stdout.flush() mutations_list = core.read_mutations(mutation_file, mutation_type='subs', chromosome=chromosome, sample_names=[sample_name], final_nucleotides=core.FINAL_NUCL) with open(genome_file_names[chromosome]) as genome_file: genom = genome_file.read() for index, mutation in mutations_list.iterrows(): # FIXME: Considering mutations_list 20 times - unefficient position = mutation['positionFrom'] motif = genome[position - 2: position + 1] if motif in core.MOTIFS: rep_time = core.calculate_replication_timing(chromosome, position) if rep_time == -1: print '\nuncalculatable replication time at\ {0}:{1}'.format(chromosome, position) mutation_rep_time.append(rep_time) del genome return mutation_rep_time
def get_motif_rep_time(chromosome): """ Returns list of replication timings of positions in genome with particular motif and given chromosome""" motif_rep_time = [] genome_file_names = core.get_genome_file_names() with open(genome_file_names[chromosome], 'r') as f: genome = f.read() for motif in core.MOTIFS: # First occurence of beginning of motif first_occurence = genome.find(motif, 0) while first_occurence >= 0: # One +1 because str.find finds start of motif, but we want center # Second +1 because str begins with 0th element replication_timing = core.calculate_replication_timing(chromosome, first_occurence + 2) if replication_timing == -1: print '\nuncalculatable replication time at {0}:{1}'\ .format(chromosome, first_occurence + 2) motif_rep_time.append(replication_timing) first_occurence = genome.find(motif, first_occurence + 1) del genome return motif_rep_time