def main(argv): if len(argv) == 0: print( "[Error] - Expected call format:\n python post_process_results.py [ -o <outdir> | -c <config> ]" ) sys.exit(2) try: opts, args = getopt.getopt(argv, "ho:c:", ["outdir=", "config="]) except getopt.GetoptError: print( "[Error] - Expected call format:\n python post_process_results.py [ -o <outdir> | -c <config> ]" ) sys.exit(2) for opt, arg in opts: if opt == '-h': print( "python post_process_results.py [ -o <outdir> | -c <config> ]") sys.exit() elif opt in ('-o', '--outdir'): out_dir = arg elif opt in ('-c', '--config'): config_file = arg out_dir = create_out_dir(config_file) return out_dir
def init_ouput_dirs(self): # Init output dir structure self.out_dir = custom_utils.create_out_dir(self.config_file) self.ml_data_dir = self.out_dir + '/ml_data' self.clinvar_feature_table_dir = self.ml_data_dir + '/clinvar_feature_tables' self.out_models_dir = self.ml_data_dir + '/models'
def init_ouput_dirs(self): # Init output dir structure self.out_dir = custom_utils.create_out_dir(self.config_file) self.ml_data_dir = self.out_dir + '/ml_data' self.clinvar_feature_table_dir = self.ml_data_dir + '/clinvar_feature_tables' self.out_models_dir = self.ml_data_dir + '/models' self.raw_seq_dir = self.ml_data_dir + '/raw_seq' if not os.path.exists(self.raw_seq_dir): os.makedirs(self.raw_seq_dir)
def __init__(self, config_file, input_features, chrom, NTHREADS=20): print("Initialising new JarvisDataPreprocessing object...") self.input_features = input_features self.chrom = chrom self.NTHREADS = NTHREADS # ==== Read config parameters ==== config_params = custom_utils.get_config_params(config_file) self.hg_version = config_params['hg_version'] print('\n\nhg_version:', self.hg_version) self.grch = {'hg19': '37', 'hg38': '38'} pathogenic_set = config_params['pathogenic_set'] benign_set = config_params['benign_set'] self.patho_benign_sets = pathogenic_set + '_' + benign_set self.win_len = config_params['win_len'] #self.win_len = int(config_params['win_len'] / 2) self.Y_label = config_params['Y_label'] # ==== Define dir structure ==== out_dir = custom_utils.create_out_dir(config_file) self.ml_data_dir = out_dir + '/ml_data' if not os.path.exists(self.ml_data_dir): os.makedirs(self.ml_data_dir) self.seq_out_dir = self.ml_data_dir + '/raw_seq' if not os.path.exists(self.seq_out_dir): os.makedirs(self.seq_out_dir) self.feature_tables_dir = self.ml_data_dir + '/clinvar_feature_tables' if not os.path.exists(self.feature_tables_dir): os.makedirs(self.feature_tables_dir) self.jarvis_predictions_dir = self.ml_data_dir + '/jarvis_predictions' if not os.path.exists(self.jarvis_predictions_dir): os.makedirs(self.jarvis_predictions_dir) self.jarvis_predictions_per_chr_dir = self.jarvis_predictions_dir + '/chr' + str( self.chrom) if not os.path.exists(self.jarvis_predictions_per_chr_dir): os.makedirs(self.jarvis_predictions_per_chr_dir) # Specificy input (static) files self.human_ref_genome_2bit = '../' + self.hg_version + '/homo_sapiens_GRCh' + self.grch[ self.hg_version] + '_FASTA/hsa' + self.grch[ self.hg_version] + '.2bit'
def init_dirs(self): """ Dir Initialisation """ self.out_dir = create_out_dir(self.config_file, create_dirs=False) self.ml_data_dir = self.out_dir + '/ml_data' self.clinvar_feature_table_dir = self.ml_data_dir + '/clinvar_feature_tables' self.clinvar_ml_out_dir = self.ml_data_dir + '/clinvar-out' if not os.path.exists(self.clinvar_ml_out_dir): os.makedirs(self.clinvar_ml_out_dir) self.base_out_models_dir = self.ml_data_dir + '/models' if not os.path.exists(self.base_out_models_dir): os.makedirs(self.base_out_models_dir)
kmer = config_params['kmer'] # 7 or 3 all_variants_upper_thres = config_params['all_variants_upper_thres'] # e.g. 200 (filter out windows with more than 200 variants before fitting regression) MAF_thres = config_params['MAF_thres'] # e.g. 0.0001 (Minor Allele Frequency) variants_table_dir = config_params['variants_table_dir'] # gnomad-filtered_variant_tables-all-PASS_ONLY-NO_SEGDUP-NO_LCR-high_conf_regions # ---------------------- human_ref_genome_2bit = '../' + hg_version + '/homo_sapiens_GRCh' + grch[hg_version] + '_FASTA/hsa' + grch[hg_version] +'.2bit' data_dir = '../' + dataset + '/out/' + variants_table_dir print('> data_dir: ' + data_dir) filtered_vcf = data_dir + '/chr' + chrom + '_' + dataset + '_table.' + population + '.txt.filtered' # ---------------------- # Create out_MAF{threshold} dir to store output results (plots and gwRVIS csv files) out_dir = create_out_dir(config_file) print('> out_dir: ' + out_dir) var_ratios_dir = out_dir + '/var_ratios' if not os.path.exists(var_ratios_dir): os.makedirs(var_ratios_dir, exist_ok=True) gwrvis_dir = out_dir + '/gwrvis_scores' if not os.path.exists(gwrvis_dir): os.makedirs(gwrvis_dir, exist_ok=True) gwrvis_nt_offset_dir = gwrvis_dir + '/single_nt_offset_' + str(single_nt_offset) if not os.path.exists(gwrvis_nt_offset_dir): os.makedirs(gwrvis_nt_offset_dir, exist_ok=True) plots_dir = out_dir + '/plots_per_chrom'
pp = PdfPages(full_genome_out + "/Whole_genome_gwRVIS_Boxplots.pdf") pp.savefig(boxplot_fig) pp.close() if __name__ == '__main__': config_file = sys.argv[1] # 'config.log' input_classes_file = sys.argv[2] # 'input_classes.txt' # ----- Init directories and global variables ----- out_dir = create_out_dir(config_file) # get output dir - already generated print('Output dir: ' + out_dir) gwrvis_distr_data_dir = out_dir + '/gwrvis_distribution/data' full_genome_out = out_dir + '/full_genome_out' ucne_dir = '../UCNEbase' chroms = [str(ch) for ch in list(range(1,23))] # ---- Read run parameters from config file and store into a dictionary ----- run_params = get_config_params(config_file) genomic_classes_log = run_params['genomic_classes'] filter_plot_outliers = run_params['filter_plot_outliers']
genomic_classes.append(vals[-2]) #genomic_classes = ['intergenic', 'utr', 'intergenic_utr_lincrna_ucne_vista'] return genomic_classes if __name__ == '__main__': config_file = sys.argv[1] DL_MODELS = bool(int(sys.argv[2])) # Read dir structure out_dir = custom_utils.create_out_dir(config_file) ml_data_dir = out_dir + '/ml_data' clinvar_feature_table_dir = ml_data_dir + '/clinvar_feature_tables' clinvar_out_dir = ml_data_dir + '/clinvar-out' # Infer available genomic classes genomic_classes = infer_avail_genomic_classes() for genomic_class in genomic_classes: print('\n> Genomic class:', genomic_class) bench = MetricsBenchmark(config_file, genomic_class) ret = bench.read_metrics_from_saved_files() if ret == -1: continue