Ejemplo n.º 1
0
def main(argv):

    if len(argv) == 0:
        print(
            "[Error] - Expected call format:\n    python post_process_results.py [ -o <outdir> | -c <config> ]"
        )
        sys.exit(2)

    try:
        opts, args = getopt.getopt(argv, "ho:c:", ["outdir=", "config="])
    except getopt.GetoptError:
        print(
            "[Error] - Expected call format:\n    python post_process_results.py [ -o <outdir> | -c <config> ]"
        )
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print(
                "python post_process_results.py [ -o <outdir> | -c <config> ]")
            sys.exit()
        elif opt in ('-o', '--outdir'):
            out_dir = arg
        elif opt in ('-c', '--config'):
            config_file = arg
            out_dir = create_out_dir(config_file)
    return out_dir
Ejemplo n.º 2
0
    def init_ouput_dirs(self):

        # Init output dir structure
        self.out_dir = custom_utils.create_out_dir(self.config_file)
        self.ml_data_dir = self.out_dir + '/ml_data'
        self.clinvar_feature_table_dir = self.ml_data_dir + '/clinvar_feature_tables'
        self.out_models_dir = self.ml_data_dir + '/models'
Ejemplo n.º 3
0
	def init_ouput_dirs(self):
		# Init output dir structure
		self.out_dir = custom_utils.create_out_dir(self.config_file)
		self.ml_data_dir = self.out_dir + '/ml_data'
		self.clinvar_feature_table_dir = self.ml_data_dir + '/clinvar_feature_tables'
		self.out_models_dir = self.ml_data_dir + '/models'

		self.raw_seq_dir = self.ml_data_dir + '/raw_seq'
		if not os.path.exists(self.raw_seq_dir):
			os.makedirs(self.raw_seq_dir)
Ejemplo n.º 4
0
    def __init__(self, config_file, input_features, chrom, NTHREADS=20):

        print("Initialising new JarvisDataPreprocessing object...")

        self.input_features = input_features
        self.chrom = chrom
        self.NTHREADS = NTHREADS

        # ==== Read config parameters ====
        config_params = custom_utils.get_config_params(config_file)
        self.hg_version = config_params['hg_version']
        print('\n\nhg_version:', self.hg_version)
        self.grch = {'hg19': '37', 'hg38': '38'}

        pathogenic_set = config_params['pathogenic_set']
        benign_set = config_params['benign_set']

        self.patho_benign_sets = pathogenic_set + '_' + benign_set
        self.win_len = config_params['win_len']
        #self.win_len = int(config_params['win_len'] / 2)
        self.Y_label = config_params['Y_label']

        # ==== Define dir structure ====
        out_dir = custom_utils.create_out_dir(config_file)

        self.ml_data_dir = out_dir + '/ml_data'
        if not os.path.exists(self.ml_data_dir):
            os.makedirs(self.ml_data_dir)
        self.seq_out_dir = self.ml_data_dir + '/raw_seq'
        if not os.path.exists(self.seq_out_dir):
            os.makedirs(self.seq_out_dir)
        self.feature_tables_dir = self.ml_data_dir + '/clinvar_feature_tables'
        if not os.path.exists(self.feature_tables_dir):
            os.makedirs(self.feature_tables_dir)

        self.jarvis_predictions_dir = self.ml_data_dir + '/jarvis_predictions'
        if not os.path.exists(self.jarvis_predictions_dir):
            os.makedirs(self.jarvis_predictions_dir)

        self.jarvis_predictions_per_chr_dir = self.jarvis_predictions_dir + '/chr' + str(
            self.chrom)
        if not os.path.exists(self.jarvis_predictions_per_chr_dir):
            os.makedirs(self.jarvis_predictions_per_chr_dir)

        # Specificy input (static) files
        self.human_ref_genome_2bit = '../' + self.hg_version + '/homo_sapiens_GRCh' + self.grch[
            self.hg_version] + '_FASTA/hsa' + self.grch[
                self.hg_version] + '.2bit'
Ejemplo n.º 5
0
    def init_dirs(self):
        """ 
			Dir Initialisation
		"""

        self.out_dir = create_out_dir(self.config_file, create_dirs=False)
        self.ml_data_dir = self.out_dir + '/ml_data'

        self.clinvar_feature_table_dir = self.ml_data_dir + '/clinvar_feature_tables'

        self.clinvar_ml_out_dir = self.ml_data_dir + '/clinvar-out'
        if not os.path.exists(self.clinvar_ml_out_dir):
            os.makedirs(self.clinvar_ml_out_dir)

        self.base_out_models_dir = self.ml_data_dir + '/models'
        if not os.path.exists(self.base_out_models_dir):
            os.makedirs(self.base_out_models_dir)
	kmer = config_params['kmer']			# 7 or 3
	all_variants_upper_thres = config_params['all_variants_upper_thres']	# e.g. 200 (filter out windows with more than 200 variants before fitting regression)
	MAF_thres = config_params['MAF_thres']	        # e.g. 0.0001 (Minor Allele Frequency)
	variants_table_dir = config_params['variants_table_dir']	# gnomad-filtered_variant_tables-all-PASS_ONLY-NO_SEGDUP-NO_LCR-high_conf_regions
	# ----------------------


	human_ref_genome_2bit = '../' + hg_version + '/homo_sapiens_GRCh' + grch[hg_version] + '_FASTA/hsa' + grch[hg_version] +'.2bit'
	data_dir = '../' + dataset + '/out/' + variants_table_dir
	print('> data_dir: ' + data_dir)
	filtered_vcf = data_dir + '/chr' + chrom + '_' + dataset + '_table.' + population + '.txt.filtered'
	# ----------------------


	# Create out_MAF{threshold} dir to store output results (plots and gwRVIS csv files)
	out_dir = create_out_dir(config_file)
	print('> out_dir: ' + out_dir)

	var_ratios_dir = out_dir + '/var_ratios'
	if not os.path.exists(var_ratios_dir):     
		os.makedirs(var_ratios_dir, exist_ok=True)

	gwrvis_dir = out_dir + '/gwrvis_scores'
	if not os.path.exists(gwrvis_dir):     
		os.makedirs(gwrvis_dir, exist_ok=True)
	
	gwrvis_nt_offset_dir = gwrvis_dir + '/single_nt_offset_' + str(single_nt_offset)
	if not os.path.exists(gwrvis_nt_offset_dir):     
		os.makedirs(gwrvis_nt_offset_dir, exist_ok=True)

	plots_dir = out_dir + '/plots_per_chrom'
	pp = PdfPages(full_genome_out + "/Whole_genome_gwRVIS_Boxplots.pdf")
	pp.savefig(boxplot_fig)
	pp.close()





if __name__ == '__main__':

	config_file = sys.argv[1]	# 'config.log'
	input_classes_file = sys.argv[2]	# 'input_classes.txt'


	# ----- Init directories and global variables -----
	out_dir = create_out_dir(config_file)	# get output dir - already generated
	print('Output dir: ' + out_dir)


	gwrvis_distr_data_dir = out_dir + '/gwrvis_distribution/data'
	full_genome_out = out_dir + '/full_genome_out'
	ucne_dir = '../UCNEbase'
	
	chroms = [str(ch) for ch in list(range(1,23))]


	# ---- Read run parameters from config file and store into a dictionary -----
	run_params = get_config_params(config_file)

	genomic_classes_log = run_params['genomic_classes']
	filter_plot_outliers = run_params['filter_plot_outliers']
		genomic_classes.append(vals[-2])

	#genomic_classes = ['intergenic', 'utr', 'intergenic_utr_lincrna_ucne_vista']

	return genomic_classes 



if __name__ == '__main__':

	config_file = sys.argv[1]
	DL_MODELS = bool(int(sys.argv[2]))


	# Read dir structure
	out_dir = custom_utils.create_out_dir(config_file)
	ml_data_dir = out_dir + '/ml_data' 		
	clinvar_feature_table_dir = ml_data_dir + '/clinvar_feature_tables'
	clinvar_out_dir = ml_data_dir + '/clinvar-out'

	# Infer available genomic classes
	genomic_classes = infer_avail_genomic_classes()
		

	for genomic_class in genomic_classes:
		print('\n> Genomic class:', genomic_class)

		bench = MetricsBenchmark(config_file, genomic_class)
		ret = bench.read_metrics_from_saved_files()
		if ret == -1:
			continue