if double_stranded: str_ec = ' -d ' else: str_ec = ' ' #run_cmd('python ' + shannon_dir + 'extension_correction.py ' + str_ec + sample_name_input+'algo_input/k1mer.dict_org ' +sample_name_input+'algo_input/k1mer.dict ' + str(hyp_min_weight) + ' ' + str(hyp_min_length) + ' ' + comp_directory_name + " " + str(comp_size_threshold)) str_ec += sample_name_input+'algo_input/k1mer.dict_org ' +sample_name_input+'algo_input/k1mer.dict ' + str(hyp_min_weight) + ' ' + str(hyp_min_length) + ' ' + comp_directory_name + " " + str(comp_size_threshold) + " " + str(nJobs) + " " + reads_string dontWriteToFile = True k1mer_dictionary,reads = extension_correction(str_ec.split(),dontWriteToFile) # Gets kmers from k1mers '''if run_jellyfish or run_extension_corr: run_cmd('python ' + shannon_dir + 'kp1mer_to_kmer.py ' + sample_name_input+'algo_input/k1mer.dict ' + sample_name_input+'algo_input/kmer.dict')''' # Runs gpmetis to partition components of size above "partition_size" into partitions of size "partition_size" # Gets k1mers, kmers, and reads for each partition [components_broken, new_components, contig_weights, rps] = kmers_for_component(k1mer_dictionary,kmer_directory, reads,reads_files, base_directory_name, contig_file_extension, get_partition_kmers, double_stranded, paired_end, use_second_iteration, partition_size, overload, K, gpmetis_path, penalty, only_reads, inMem,nJobs) k1mer_dictionary.clear() #Delete in memory components_broken.clear() # This counts remaining and non-remaining partitions for log. num_remaining = 0 num_non_remaining = 0 for part in new_components: if 'remaining' in part: num_remaining += 1 else: num_non_remaining += 1 # This code updates the log if os.path.exists(comp_directory_name+"/before_sp_log.txt"): f_log = open(comp_directory_name+"/before_sp_log.txt", 'a')
sample_name_input + 'algo_input/k1mer.dict ' + str(hyp_min_weight) + ' ' + str(hyp_min_length) + ' ' + comp_directory_name + " " + str(comp_size_threshold)) # Gets kmers from k1mers if run_jellyfish or run_extension_corr: run_cmd('python ' + shannon_dir + 'kp1mer_to_kmer.py ' + sample_name_input + 'algo_input/k1mer.dict ' + sample_name_input + 'algo_input/kmer.dict') # Runs gpmetis to partition components of size above "partition_size" into partitions of size "partition_size" # Gets k1mers, kmers, and reads for each partition [components_broken, new_components ] = kmers_for_component(kmer_directory, reads_files, base_directory_name, r1_contig_file_extension, r1_new_kmer_tag, r1_graph_file_extension, get_og_comp_kmers, get_partition_kmers, double_stranded, paired_end, False, partition_size, overload, K, gpmetis_path) # This counts remaining and non-remaining partitions for log. num_remaining = 0 num_non_remaining = 0 for part in new_components: if "remaining" in part: num_remaining += 1 else: num_non_remaining += 1 # If "use_second_partition", rerun gpmetis with a penalization for contig edges broken in old partitioning # This to give a new partitioning for each component of size above "partition_size" # Gets k1mers, kmers, and reads for each partition
#run_cmd('rm ' + base_directory_name+"/component*contigs.txt") if double_stranded: str_ec = ' -d ' else: str_ec = ' ' run_cmd('python ' + shannon_dir + 'extension_correction.py ' + str_ec + sample_name_input+'algo_input/k1mer.dict_org ' +sample_name_input+'algo_input/k1mer.dict ' + str(hyp_min_weight) + ' ' + str(hyp_min_length) + ' ' + comp_directory_name + " " + str(comp_size_threshold)) # Gets kmers from k1mers if run_jellyfish or run_extension_corr: run_cmd('python ' + shannon_dir + 'kp1mer_to_kmer.py ' + sample_name_input+'algo_input/k1mer.dict ' + sample_name_input+'algo_input/kmer.dict') # Runs gpmetis to partition components of size above "partition_size" into partitions of size "partition_size" # Gets k1mers, kmers, and reads for each partition [components_broken, new_components] = kmers_for_component(kmer_directory, reads_files, base_directory_name, r1_contig_file_extension, r1_new_kmer_tag, r1_graph_file_extension, get_og_comp_kmers, get_partition_kmers, double_stranded, paired_end, False, partition_size, overload, K, gpmetis_path) # This counts remaining and non-remaining partitions for log. num_remaining = 0 num_non_remaining = 0 for part in new_components: if "remaining" in part: num_remaining += 1 else: num_non_remaining += 1 # If "use_second_partition", rerun gpmetis with a penalization for contig edges broken in old partitioning # This to give a new partitioning for each component of size above "partition_size" # Gets k1mers, kmers, and reads for each partition if use_second_iteration: r2_graph_file_extension = "r2.txt"
if double_stranded: str_ec = ' -d ' else: str_ec = ' ' #run_cmd('python ' + shannon_dir + 'extension_correction.py ' + str_ec + sample_name_input+'algo_input/k1mer.dict_org ' +sample_name_input+'algo_input/k1mer.dict ' + str(hyp_min_weight) + ' ' + str(hyp_min_length) + ' ' + comp_directory_name + " " + str(comp_size_threshold)) str_ec += sample_name_input+'algo_input/k1mer.dict_org ' +sample_name_input+'algo_input/k1mer.dict ' + str(hyp_min_weight) + ' ' + str(hyp_min_length) + ' ' + comp_directory_name + " " + str(comp_size_threshold) + " " + str(nJobs) + " " + reads_string dontWriteToFile = True k1mer_dictionary,reads = extension_correction(str_ec.split(),dontWriteToFile) # Gets kmers from k1mers '''if run_jellyfish or run_extension_corr: run_cmd('python ' + shannon_dir + 'kp1mer_to_kmer.py ' + sample_name_input+'algo_input/k1mer.dict ' + sample_name_input+'algo_input/kmer.dict')''' # Runs gpmetis to partition components of size above "partition_size" into partitions of size "partition_size" # Gets k1mers, kmers, and reads for each partition [components_broken, new_components, contig_weights, rps] = kmers_for_component(k1mer_dictionary,kmer_directory, reads,reads_files, base_directory_name, contig_file_extension, get_partition_kmers, double_stranded, paired_end, use_second_iteration, partition_size, overload, K, gpmetis_path, penalty, only_reads, inMem,nJobs) k1mer_dictionary.clear() #Delete in memory components_broken.clear() # This counts remaining and non-remaining partitions for log. num_remaining = 0 num_non_remaining = 0 for part in new_components: if 'remaining' in part: num_remaining += 1 else: num_non_remaining += 1 # This code updates the log if os.path.exists(comp_directory_name+"/before_sp_log.txt"): f_log = open(comp_directory_name+"/before_sp_log.txt", 'a')