Python mt_map Exemples

Langage de programmation: Python

Espace de nommage/Pack: idseq_dag.util.thread_with_result

Méthode/Fonction: mt_map

Exemples au hotexamples.com: 2

Python mt_map - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de idseq_dag.util.thread_with_result.mt_map extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Exemple #1

0

Afficher le fichier

def lzw_compute(input_files, threshold_readlength, slice_step=NUM_SLICES): """Spawn subprocesses on NUM_SLICES of the input files, then coalesce the scores into a temp file, and return that file's name.""" temp_file_names = [f"lzwslice_{slice_step}_{slice_start}.txt" for slice_start in range(slice_step + 1)] for tfn in temp_file_names: assert not os.path.exists(tfn) @run_in_subprocess def lzw_compute_slice(slice_start): """For each read, or read pair, in input_files, such that read_index % slice_step == slice_start, output the lzw score for the read, or the min lzw score for the pair.""" lzw_score = PipelineStepRunLZW.lzw_score with open(temp_file_names[slice_start], "a") as slice_output: for i, reads in enumerate(fasta.synchronized_iterator(input_files)): if i % slice_step == slice_start: lzw_min_score = min(lzw_score(r.sequence, threshold_readlength) for r in reads) slice_output.write(str(lzw_min_score) + "\n") # slices run in parallel mt_map(lzw_compute_slice, range(slice_step)) slice_outputs = temp_file_names[:-1] coalesced_score_file = temp_file_names[-1] # Paste can insert newlines at the end; we grep those out. command.execute("paste -d '\n' " + " ".join(slice_outputs) + " | grep -v ^$ > " + coalesced_score_file) for tfn in slice_outputs: os.remove(tfn) return coalesced_score_file

Exemple #2

0

Afficher le fichier

def calc_contig2coverage(bam_filename): # PySAM pileup is CPU-intenstive. Each CPU core is assigned a slice of the input bam file on which to perform pileup. The slice contigs are selected by slice_idx modulo num_slices. Each slice gets its own pair of temporary output files, one in CSV format and one in JSON. In the end, these slice outputs are concatenated. This is a similar pattern to run_lzw. num_physical_cpu = (cpu_count() + 1) // 2 num_slices = num_physical_cpu output_csv_filenames = [ f"tmp_slice_{num_slices}_{slice}.csv" for slice in range(num_slices + 1) ] output_json_filenames = [ f"tmp_slice_{num_slices}_{slice}.json" for slice in range(num_slices + 1) ] for fn in output_csv_filenames + output_json_filenames: if os.path.exists(fn): os.remove(fn) @run_in_subprocess def compute_slice(slice_idx): with open(output_csv_filenames[slice_idx], "w") as output_csv, \ open(output_json_filenames[slice_idx], "w") as output_json, \ pysam.AlignmentFile(bam_filename, "rb") as input_bam: # noqa: E126 for contig_idx, contig_name in enumerate(input_bam.references): if contig_idx % num_slices == slice_idx: PipelineStepGenerateCoverageStats._process_contig( input_bam, output_csv, output_json, contig_name) # Compute pileup for each slice with LongRunningCodeSection( "PipelineStepGenerateCoverageStats.calc_contig2coverage.mt_map" ): mt_map(compute_slice, range(num_slices)) # Output CSV headers with open(output_csv_filenames[-1], "w") as ocsv: ocsv.write(",".join(COVERAGE_STATS_SCHEMA)) ocsv.write("\n") # Output JSON dict open paren with open(output_json_filenames[-1], "w") as ojson: ojson.write("{") # Collate CSV slices command.execute( command_patterns.ShellScriptCommand( script= r'''cat "${individual_slice_outputs[@]}" >> "${collated_csv}";''', # note >> for appending named_args={ 'collated_csv': output_csv_filenames[-1], 'individual_slice_outputs': output_csv_filenames[:-1] })) for tfn in output_csv_filenames[:-1]: os.remove(tfn) # Collate JSON slices, replacing final ", " with "}" command.execute( command_patterns.ShellScriptCommand( script= r'''cat "${individual_slice_outputs[@]}" | sed 's=, $=}=' >> "${collated_json}";''', # note >> for appending named_args={ 'collated_json': output_json_filenames[-1], 'individual_slice_outputs': output_json_filenames[:-1] })) for tfn in output_json_filenames[:-1]: os.remove(tfn) return (output_csv_filenames[-1], output_json_filenames[-1])