def main(): args = parse_args() assert os.path.isdir(args.output_dir), "{} is not a directory".format( args.output_dir) assert os.path.exists(args.bam), "{} does not exist".format(args.bam) if args.positions_directory is not None: assert os.path.exists( args.positions_directory), "{} does not exist".format( args.positions_file) else: assert args.positions_file is not None, "Must pass in --positions_file or --positions_directory" assert os.path.exists(args.positions_file), "{} does not exist".format( args.positions_file) output_dir = args.output_dir if not os.path.exists(output_dir): os.mkdir(output_dir) bam = args.bam top_n = WriteTopN(bam_file=bam) n = args.n if args.positions_directory is not None: for positions_file in list_dir(args.positions_directory, ext="positions"): print(f"Running on {positions_file}") top_n.write_top_n_covered_positions_file(positions_file, output_dir, n) else: print(f"Running on {args.positions_file}") top_n.write_top_n_covered_positions_file(args.positions_file, output_dir, n)
def main(): start = timer() args = parse_args() assert os.path.isdir(args.output_dir), "{} is not a directory".format(args.output_dir) if args.from_pickle is not None: print("Loading sequencing summary info from pickle") summary_pd = pd.read_pickle(args.from_pickle) else: assert args.fast5_dir is not None, "Must select fast5_dir if not loading from pickle file" fast5s = list_dir(args.fast5_dir, ext='fast5') assert len(fast5s) > 0, "Check fast5_dir. No files with fast5 extension found: {}".format(args.fast5_dir) print("Creating alignment summary info") get_summary_args = dict(number=args.number, pass_threshold=args.quality_threshold, gap_size=10, verbose=False) summary_pd = multiprocess_get_summary_info(args.alignment_file, args.readdb, [args.fast5_dir], get_summary_args, worker_count=args.workers, debug=args.debug) summary_pd.to_pickle(os.path.join(args.output_dir, "summary_info.pkl")) print("Printing summary stats and creating plots") print_summary_information(summary_pd, pass_threshold=args.quality_threshold) plot_summary_information(summary_pd, output_dir=args.output_dir) stop = timer() print("Running Time = {} seconds".format(stop - start), file=sys.stderr)
def get_top_kmers_from_directory(kmer_dir, output_dir, n, random=False): """Get the top n kmers from a directory of kmer tables :param kmer_dir: path to directory with kmer files :param output_dir: path to output dir :param n: number of kmers to collect :param random: boolean option to select random kmers instead of top n """ output_file = os.path.join(kmer_dir, "all_kmers.tsv") kmer = os.path.basename(kmer_dir) concatenate_files(list_dir(kmer_dir, ext="tsv"), output_file) with open(output_file, "r") as fh: data = [x.split() for x in fh.readlines()] # sort data if len(data) < n: print("Not enough events for kmer {}. {}".format(kmer, len(data))) largest = data else: print("Getting events for kmer {}. {}".format(kmer, n)) largest = heapq.nlargest(n, data, key=lambda e: e[3]) # write data output_file = os.path.join(output_dir, kmer + ".tsv") with open(output_file, "w") as fh2: for line in largest: fh2.write("\t".join(line) + "\n") shutil.rmtree(kmer_dir)
def generate_top_n_kmers_from_sa_output(assignment_files, working_dir, output_file, n, alphabet="ACGT", kmer_len=5, min_prob=0.8, worker_count=1, random=False, complement=False, remove=False, alignment=False): """Get the most probable n events for each kmer""" kmer_dirs = make_kmer_directories(working_dir, alphabet, kmer_len, complement=complement) multiprocess_split_sa_tsv_file(assignment_files, kmer_dirs, alphabet, kmer_len, min_prob=min_prob, remove=remove, worker_count=worker_count, alignment=alignment) multiprocess_get_top_kmers_from_directory(kmer_dirs, working_dir, n, random=random, worker_count=worker_count) concatenate_files(list_dir(working_dir, ext="tsv"), output_file, remove_files=True) return output_file
def __init__(self, sa_full_tsv_dir, variants="ATGC", verbose=False, processes=2): """Marginalize over all posterior probabilities to give a per position read probability :param sa_full_tsv_dir: directory of full output from signalAlign :param variants: bases to track probabilities """ self.sa_full_tsv_dir = sa_full_tsv_dir self.variants = sorted(variants) self.columns = merge_lists([['contig', 'position', 'strand', 'forward_mapped'], list(self.variants)]) self.forward_tsvs = list_dir(self.sa_full_tsv_dir, ext=".forward.tsv") self.backward_tsvs = list_dir(self.sa_full_tsv_dir, ext=".backward.tsv") self.verbose = verbose self.worker_count = processes self.aggregate_position_probs = pd.DataFrame() self.per_position_data = pd.DataFrame() self.per_read_data = pd.DataFrame() self.has_data = self._multiprocess_aggregate_all_variantcalls()
def main(): """Main docstring""" start = timer() dna_reads = "/Users/andrewbailey/CLionProjects/nanopore-RNN/test_files/minion-reads/canonical/" rna_reads = "/Users/andrewbailey/CLionProjects/nanopore-RNN/test_files/minion-reads/rna_reads" dna_minknow_params = dict(window_lengths=(5, 10), thresholds=(2.0, 1.1), peak_height=1.2) dna_speedy_params = dict(min_width=5, max_width=80, min_gain_per_sample=0.008, window_width=800) rna_minknow_params = dict(window_lengths=(5, 10), thresholds=(2.0, 1.1), peak_height=1.2) rna_speedy_params = dict(min_width=5, max_width=40, min_gain_per_sample=0.008, window_width=800) rna_minknow_params = dict(window_lengths=(5, 10), thresholds=(1.9, 1.0), peak_height=1.2) rna_speedy_params = dict(min_width=5, max_width=40, min_gain_per_sample=0.008, window_width=800) dna_minknow_params = dict(window_lengths=(5, 10), thresholds=(2.0, 1.1), peak_height=1.2) dna_speedy_params = dict(min_width=5, max_width=80, min_gain_per_sample=0.008, window_width=800) rna_files = list_dir(rna_reads, ext='fast5') dna_files = list_dir(dna_reads, ext='fast5') print("MAX RNA SKIPS: Speedy") for fast5_path in rna_files: print(fast5_path) f5fh = resegment_reads(fast5_path, rna_speedy_params, speedy=True, overwrite=True) print(get_resegment_accuracy(f5fh)) # f5fh = resegment_reads(fast5_path, rna_minknow_params, speedy=False, overwrite=True) # print(test_resegment_accuracy(f5fh)) print("MAX RNA SKIPS: Minknow") for fast5_path in rna_files: f5fh = resegment_reads(fast5_path, rna_minknow_params, speedy=False, overwrite=True) print(get_resegment_accuracy(f5fh)) print("MAX DNA SKIPS: speedy") for fast5_path in dna_files: print(fast5_path) f5fh = resegment_reads(fast5_path, dna_speedy_params, speedy=True, overwrite=True) print(get_resegment_accuracy(f5fh)) print("MAX DNA SKIPS:Minknow") for fast5_path in dna_files: f5fh = resegment_reads(fast5_path, dna_minknow_params, speedy=False, overwrite=True) print(get_resegment_accuracy(f5fh)) # print(fast5_path) stop = timer() print("Running Time = {} seconds".format(stop - start), file=sys.stderr)
def test_tar_gz(self): with captured_output() as (_, _): input_dir = os.path.join(self.HOME, "tests/test_files/test_tar_dir") dir_data = os.path.join(self.HOME, "tests/test_files/test_tar_dir/test.fa") with tempfile.TemporaryDirectory() as tempdir: # tempdir = "/Users/andrewbailey/CLionProjects/python_utils/py3helpers/tests/test_files/test_test_test" path = os.path.join(tempdir, "test.tgz") out_file = tar_gz(input_dir, path) path = untar_gz(out_file, out_dir=tempdir) output_dir = os.path.join(path, "test_tar_dir") self.assertTrue(filecmp.cmp(list_dir(output_dir)[0], dir_data)) # test output to directory out_file = tar_gz(input_dir, tempdir) path = untar_gz(out_file, out_dir=tempdir) output_dir = os.path.join(path, "test_tar_dir") self.assertTrue(filecmp.cmp(list_dir(output_dir)[0], dir_data))
def aggregate_deepmod_data(deepmod_output_dir): deepmod_beds = list_dir(deepmod_output_dir, ext="bed") deepmod_bed_data = [] for bed in deepmod_beds: data = parse_deepmod_bed(bed) data["E"] = (data["modification_percentage"] / 100) data["C"] = 1 - (data["modification_percentage"] / 100) deepmod_bed_data.append(data) return pd.concat(deepmod_bed_data)
def setUpClass(cls): super(SignalAlignmentTest, cls).setUpClass() cls.HOME = '/'.join(os.path.abspath(__file__).split("/")[:-4]) cls.reference = os.path.join( cls.HOME, "tests/test_sequences/pUC19_SspI_Zymo.fa") cls.ecoli_reference = os.path.join( cls.HOME, "tests/test_sequences/E.coli_K12.fasta") cls.fast5_dir = os.path.join( cls.HOME, "tests/minion_test_reads/canonical_ecoli_R9") cls.files = [ "miten_PC_20160820_FNFAD20259_MN17223_mux_scan_AMS_158_R9_WGA_Ecoli_08_20_16_83098_ch138_read23_strand.fast5", "miten_PC_20160820_FNFAD20259_MN17223_sequencing_run_AMS_158_R9_WGA_Ecoli_08_20_16_43623_ch101_read456_strand.fast5", "miten_PC_20160820_FNFAD20259_MN17223_sequencing_run_AMS_158_R9_WGA_Ecoli_08_20_16_43623_ch101_read544_strand1.fast5", "miten_PC_20160820_FNFAD20259_MN17223_sequencing_run_AMS_158_R9_WGA_Ecoli_08_20_16_43623_ch103_read333_strand1.fast5" ] cls.fast5_paths = list_dir(cls.fast5_dir, ext="fast5") cls.fast5_bam = os.path.join( cls.HOME, "tests/minion_test_reads/canonical_ecoli_R9/canonical_ecoli.bam") cls.fast5_readdb = os.path.join( cls.HOME, "tests/minion_test_reads/canonical_ecoli_R9/canonical_ecoli.readdb" ) cls.template_hmm = os.path.join( cls.HOME, "models/testModelR9_acgt_template.model") cls.path_to_bin = os.path.join(cls.HOME, 'bin') cls.tmp_directory = tempfile.mkdtemp() cls.test_dir = os.path.join(cls.tmp_directory, "test") dna_dir = os.path.join(cls.HOME, "tests/minion_test_reads/1D/") # copy file to tmp directory shutil.copytree(dna_dir, cls.test_dir) cls.readdb = os.path.join( cls.HOME, "tests/minion_test_reads/oneD.fastq.index.readdb") cls.bam = os.path.join(cls.HOME, "tests/minion_test_reads/oneD.bam") cls.rna_bam = os.path.join( cls.HOME, "tests/minion_test_reads/RNA_edge_cases/rna_reads.bam") cls.rna_readdb = os.path.join( cls.HOME, "tests/minion_test_reads/RNA_edge_cases/rna_reads.readdb") cls.test_dir_rna = os.path.join(cls.tmp_directory, "test_rna") cls.rna_reference = os.path.join( cls.HOME, "tests/test_sequences/fake_rna_ref.fa") rna_dir = os.path.join(cls.HOME, "tests/minion_test_reads/RNA_edge_cases/") # copy file to tmp directory shutil.copytree(rna_dir, cls.test_dir_rna) # used to test runSignalAlign with config file cls.config_file = os.path.join(cls.HOME, "tests/runSignalAlign-config.json") cls.default_args = create_dot_dict(load_json(cls.config_file))
def __init__(self, variant_tsv_dir, variants="ATGC", verbose=False): """Marginalize over all posterior probabilities to give a per position read probability :param variant_tsv_dir: directory of variantCaller output from signalAlign :param variants: bases to track probabilities """ self.variant_tsv_dir = variant_tsv_dir self.variants = sorted(variants) self.columns = merge_lists([['contig', 'position', 'strand', 'forward_mapped'], list(self.variants)]) self.variant_tsvs = list_dir(self.variant_tsv_dir, ext=".vc.tsv") self.aggregate_position_probs = pd.DataFrame() self.per_position_data = pd.DataFrame() self.verbose = verbose self.per_read_data = pd.DataFrame() self.has_data = self._aggregate_all_variantcalls()
def multiprocess_get_distance_from_guide(in_dir, sa_number, threshold, label, worker_count=1, debug=False): """Multiprocess for filtering reads but dont move the files :param threshold: probability threshold :param sa_number: SA embedded number to grab sa variant data :param label: what is the correct nucleotide call for a given variant called read :param in_dir: input directory with subdirectories assumed to have fast5s in them :param worker_count: number of workers to use :param debug: boolean option which will only use one process in order to fail if an error arises :return: True """ # grab aligned segment if debug: output = [] for f5_path in list_dir(in_dir, ext="fast5"): data = get_distance_from_guide_alignment_wrapper(f5_path, sa_number, threshold, label) if data is not None: output.append(data) else: filter_reads_args = {"sa_number": sa_number, "threshold": threshold, "label": label} total, failure, messages, output = multithread.run_service2( get_distance_from_guide_service, list_dir(in_dir, ext="fast5"), filter_reads_args, ["f5_path"], worker_count) return output
def main(): args = parse_args() total = 0 files = 0 errors = 0 for f5_file in list_dir(args.dir, ext="fast5"): try: total += remove_sa_analyses(f5_file) files += 1 except KeyError as e: errors += 1 print("FAILED {}: {}".format(f5_file, e)) print("Deleted {} SignalAlign analysis datasets deleted from {} files".format(total, files))
def multiprocess_get_gaps_from_reads(in_dir, sa_number, gap_threshold, label, worker_count=1, alignment_threshold=0.5, debug=False): """Multiprocess for calculating the number of gaps in a signalalign alignment. Must have MEA alignment :param threshold: probability threshold :param sa_number: SA embedded number to grab sa variant data :param label: what is the correct nucleotide call for a given variant called read :param in_dir: input directory with subdirectories assumed to have fast5s in them :param worker_count: number of workers to use :return: True """ if debug: output = [] for f5_path in list_dir(in_dir, ext="fast5"): data = get_gap_lengths_and_read_length( f5_path, sa_number, label, gap_threshold, alignment_threshold=alignment_threshold) if data is not None: output.append(data) filter_reads_args = { "sa_number": sa_number, "gap_threshold": gap_threshold, "label": label, "alignment_threshold": alignment_threshold } total, failure, messages, output = multithread.run_service2( get_distance_from_guide_service, list_dir(in_dir, ext="fast5"), filter_reads_args, ["f5_path"], worker_count) return output
def test_MarginalizeFullVariants(self): forward_mapped_files = list_dir(self.variant_files, ext="forward.tsv") for test_file in forward_mapped_files: read_name = os.path.basename(test_file) full_data = read_in_alignment_file(test_file) mv_h = MarginalizeFullVariants(full_data, variants="CE", read_name=read_name, forward_mapped=True) position_probs = mv_h.get_data() for i, data in position_probs.iterrows(): self.assertEqual(data["contig"], "gi_ecoli") self.assertAlmostEqual(data["E"] + data["C"], 1) for i, data in mv_h.per_read_calls.iterrows(): self.assertAlmostEqual(data["E"] + data["C"], 1) self.assertEqual(data["contig"], "gi_ecoli")
def main(args=None): """Go through fast5 files and if there are two basecalled tables, compare and flag if different""" start = timer() # get args args = args if args is not None else parse_args() # get fast5s if args.dir: f5_locations = list_dir(args.dir, ext="fast5") else: f5_locations = [args.f5_path] assert len(args.basecall) == 2, "Must select two basecalled sections for comparison. " \ "You selected {} basecalled tables".format(len(args.basecall)) verify_load_from_raw(args, f5_locations) stop = timer() print("Running Time = {} seconds".format(stop - start), file=sys.stderr)
def test_variant_calling_with_multiple_paths_rna(self): with tempfile.TemporaryDirectory() as tempdir: new_dir = os.path.join(tempdir, "new_dir") if os.path.exists(new_dir): shutil.rmtree(new_dir) working_folder = FolderHandler() working_folder.open_folder(os.path.join(tempdir, "test_dir")) shutil.copytree(self.test_dir_rna, new_dir) args = create_signalAlignment_args( alignment_file=self.rna_bam, bwa_reference=self.rna_reference, forward_reference=os.path.join( self.HOME, "tests/test_sequences/fake_rna_replace/forward.fake_rna_atg.fake_rna_ref.fa" ), backward_reference=os.path.join( self.HOME, "tests/test_sequences/fake_rna_replace/backward.fake_rna_atg.fake_rna_ref.fa" ), in_templateHmm=os.path.join( self.HOME, "models/fake_testModelR9p4_5mer_acfgt_RNA.model"), path_to_bin=self.path_to_bin, destination=working_folder.path, embed=False, output_format="full", filter_reads=0, twoD_chemistry=False, delete_tmp=True, degenerate="m6a", check_for_temp_file_existance=False) multithread_signal_alignment(args, list_dir(new_dir, ext="fast5"), worker_count=8, forward_reference=None, debug=True, filter_reads_to_string_wrapper=None) self.assertEqual(len(os.listdir(working_folder.path)), 2)
def test_read_in_alignment_file(self): assignments_dir = os.path.join( self.HOME, "tests/test_alignments/ecoli1D_test_alignments_sm3") data = read_in_alignment_file(list_dir(assignments_dir)[0]) self.assertEqual(len(data["contig"]), 16852) self.assertEqual(len(data["reference_index"]), 16852) self.assertEqual(len(data["reference_kmer"]), 16852) self.assertEqual(len(data["read_file"]), 16852) self.assertEqual(len(data["strand"]), 16852) self.assertEqual(len(data["event_index"]), 16852) self.assertEqual(len(data["event_mean"]), 16852) self.assertEqual(len(data["event_noise"]), 16852) self.assertEqual(len(data["event_duration"]), 16852) self.assertEqual(len(data["aligned_kmer"]), 16852) self.assertEqual(len(data["scaled_mean_current"]), 16852) self.assertEqual(len(data["scaled_noise"]), 16852) self.assertEqual(len(data["posterior_probability"]), 16852) self.assertEqual(len(data["descaled_event_mean"]), 16852) self.assertEqual(len(data["ont_model_mean"]), 16852) self.assertEqual(len(data["path_kmer"]), 16852) self.assertEqual(len(data), 16852)
def test_list_dir(self): """Test list_dir function""" with captured_output() as (_, _): fake_path = "asdf/adsf/" fake_str = 0 with self.assertRaises(AssertionError): list_dir(fake_str) list_dir(fake_path) with tempfile.TemporaryDirectory() as tempdir: path = os.path.join(tempdir, "test.csv") with open(path, "w") as tmp: tmp.write("atest") files = list_dir(tempdir) self.assertEqual(files[0], path) files = list_dir(tempdir, ext='tsv') self.assertEqual(files, [])
def main(): args = parse_args() print(args) assert os.path.isdir(args.dir), "{} is not a directory".format(args.dir) assert os.path.isdir(args.output_dir), "{} is not a directory".format( args.output_dir) assert os.path.exists(args.base_model), "{} does not exist".format( args.base_model) csv_files = list_dir(args.dir, ext="csv") model = HmmModel(args.base_model, rna=args.rna) transition_probs = model.transitions extra_args = { "output_dir": args.output_dir, "transition_probs": transition_probs, "state_number": 3, "rna": args.rna } service = BasicService(convert_csv_to_sa_model, service_name="multiprocess_convert_csv_to_sa_model") total, failure, messages, output = run_service(service.run, csv_files, extra_args, ["csv_file"], args.num_threads)
def main(): """Main docstring""" start = timer() minknow_params = dict(window_lengths=(5, 10), thresholds=(2.0, 1.1), peak_height=1.2) speedy_params = dict(min_width=5, max_width=30, min_gain_per_sample=0.008, window_width=800) dna_reads = "/Users/andrewbailey/CLionProjects/nanopore-RNN/test_files/minion-reads/canonical/" files = list_dir(dna_reads, ext='fast5') rna_reads = "/Users/andrewbailey/CLionProjects/nanopore-RNN/test_files/minion-reads/rna_reads" # files = list_dir(rna_reads, ext='fast5') print(files[0]) f5fh = Fast5(files[0]) # f5fh = resegment_reads(files[0], minknow_params, speedy=False, overwrite=True) plot_segmented_comparison(f5fh, window_size=3000) stop = timer() print("Running Time = {} seconds".format(stop - start), file=sys.stderr)
def main(): args = parse_args() print(args) assert os.path.isdir(args.dir), "{} is not a directory".format(args.dir) assert os.path.isdir(args.output_dir), "{} is not a directory".format( args.output_dir) assert os.path.exists(args.base_model), "{} does not exist".format( args.base_model) if args.original_fast5_dir or args.target_fast5_dir: assert args.original_fast5_dir and args.target_fast5_dir, \ "Both args.original_fast5_dir and args.target_fast5_dir must be specified if one is specified" assert os.path.isdir( args.original_fast5_dir), "{} is not a directory".format( args.original_fast5_dir) assert os.path.isdir( args.target_fast5_dir), "{} is not a directory".format( args.target_fast5_dir) models = list_dir(args.dir, ext="model") sa_base_model = load_json(args.base_model) created_models_dir = os.path.join(args.output_dir, "created_models") if not os.path.exists(created_models_dir): os.mkdir(created_models_dir) execute = "runSignalAlign run --config {}" embed_main_execute = "embed_main sa2bed -d {}/tempFiles_alignment/{}/ -a {} -o {}/{}.bed -t {} -c {} --overwrite" running = True if args.rna: embed_main_execute += " --rna" for model in models: # copy subdirectory example if args.original_fast5_dir and args.target_fast5_dir: print("Copy fast5s from original dir to target dir") copy_tree(args.original_fast5_dir, args.target_fast5_dir) # run sa try: running = True base_name = os.path.splitext(os.path.basename(model))[0] output_dir = os.path.join(args.output_dir, base_name) if os.path.exists(output_dir): running = False for x in sa_base_model["samples"]: sub_dir_path = os.path.join( output_dir, "tempFiles_alignment/" + x["name"]) if os.path.exists(sub_dir_path): if len(list_dir(sub_dir_path, ext="tsv")) == 0: running = True else: running = True if running: sa_base_model["template_hmm_model"] = model sa_base_model["output_dir"] = output_dir new_model_file = os.path.join(created_models_dir, base_name + ".json") save_json(sa_base_model, new_model_file) check_call(execute.format(new_model_file).split()) else: print(output_dir, " exists already: continuing") continue # run sa2bed variants_dir = os.path.join(output_dir, "variant_calls") if not os.path.exists(variants_dir): os.mkdir(variants_dir) for sample in sa_base_model["samples"]: check_call( embed_main_execute.format(output_dir, sample["name"], sa_base_model["ambig_model"], variants_dir, sample["name"], sa_base_model["job_count"], args.variants).split()) except Exception as e: print(e) continue
def main(config=None): """Plot event to reference labelled ONT nanopore reads""" start = timer() if config is None: args = parse_args() # load model files assert os.path.exists(args.config), "Config file does not exist: {}".format(args.config) config = load_json(args.config) args = create_dot_dict(config) threshold = args.threshold all_data = [] names = [] for experiment in args.plot: names.append(experiment.name) experiment_data = [] for sample in experiment.samples: tsvs = None f5s = None if sample.variant_tsvs is not None: tsvs = list_dir(sample.variant_tsvs, ext="vc.tsv") if sample.embedded_fast5_dir is not None: f5s = list_dir(sample.embedded_fast5_dir, ext="fast5") data = multiprocess_get_distance_from_guide(sample.embedded_fast5_dir, experiment.sa_number, threshold, sample.label, worker_count=7, debug=False) experiment_data.extend([x for x in data if x is not None]) all_data.append(pd.concat(experiment_data)) true_deltas = pd.concat([data[data["true_false"]]["guide_delta"] for data in all_data]) true_starts = pd.concat([data[data["true_false"]]["raw_start"] for data in all_data]) false_deltas = pd.concat([data[[not x for x in data["true_false"]]]["guide_delta"] for data in all_data]) false_starts = pd.concat([data[[not x for x in data["true_false"]]]["raw_start"] for data in all_data]) plot_deviation_vs_time_from_start([true_deltas, false_deltas], [true_starts, false_starts], ["True", "False"], os.path.join(args.save_fig_dir, "raw_start_vs_alignment_deviation_accuracy.png")) plot_deviation_vs_time_from_start([x["guide_delta"] for x in all_data], [x["raw_start"] for x in all_data], names, os.path.join(args.save_fig_dir, "raw_start_vs_alignment_deviation.png")) new_names = [] new_data = [] for name, data in zip(names, all_data): new_data.append(data[data["true_false"]]["guide_delta"]) new_names.append(name+"_correct") new_data.append(data[[not x for x in data["true_false"]]]["guide_delta"]) new_names.append(name+"_wrong") plot_alignment_deviation(new_data, new_names, bins=np.arange(-10000, 10000, 100), save_fig_path=os.path.join(args.save_fig_dir, "alignment_deviation_hist.png")) plot_violin_classication_alignment_deviation(new_data, new_names, save_fig_path=os.path.join(args.save_fig_dir, "alignment_deviation_violin.png")) plot_classification_accuracy_vs_deviation(all_data, names, save_fig_path=os.path.join(args.save_fig_dir, "classification_accuracy_vs_deviation.png")) stop = timer() print("Running Time = {} seconds".format(stop - start), file=sys.stderr)