def calc_enrichment(args): genotype_folder = args['genotype_folder'] genes_files = args['genes_file'] gwas_file = args['gwas_file'] log.info('Retrieving genes') genes = {} for genes_file in genes_files: with open(genes_file, 'r') as f: genes[os.path.basename(genes_file)] = json.load(f) gwas_result = None _, input_ext = os.path.splitext(gwas_file) log.info('Opening GWAS file') if input_ext == '.csv': gwas_result = result.load_from_csv(gwas_file) else: gwas_result = result.load_from_hdf5(gwas_file) log.info('Loading genotype file') genotype_data = _load_genotype_(genotype_folder) top_snps = gwas_result.get_top_snps(args['top_snps_count']) top_snps.sort(order=['scores']) top_snps = top_snps[:args['top_snps_count']] pval = enrichment.enrichment(genes, genotype_data, top_snps, args['window_size'], args['permutation_count']) print json.dumps(pval) return pval
def calc_enrichment(args): genotype_folder = args['genotype_folder'] genes_files = args['genes_file'] gwas_file = args['gwas_file'] log.info('Retrieving genes') genes = {} for genes_file in genes_files: with open(genes_file,'r') as f: genes[os.path.basename(genes_file)] = json.load(f) gwas_result = None _,input_ext = os.path.splitext(gwas_file) log.info('Opening GWAS file') if input_ext == '.csv': gwas_result = result.load_from_csv(gwas_file) else: gwas_result = result.load_from_hdf5(gwas_file) log.info('Loading genotype file') genotype_data = _load_genotype_(genotype_folder) top_snps = gwas_result.get_top_snps(args['top_snps_count']) top_snps.sort(order=['scores']) top_snps = top_snps[:args['top_snps_count']] pval = enrichment.enrichment(genes,genotype_data,top_snps,args['window_size'],args['permutation_count']) print json.dumps(pval) return pval
def plot(args): _, ext = os.path.splitext(args['file']) if ext not in SUPPORTED_FILE_EXT: raise Exception( 'The input file must have one of the supported extensions: %s' % str(SUPPORTED_FILE_EXT)) chrs = None marker_size = args['marker_size'] if 'chr' in args and args['chr'] is not None and args['chr'] != '': chrs = [args['chr']] gwas_result = None if ext in HDF5_FILE_EXT: gwas_result = result.load_from_hdf5(args['file']) else: gwas_result = result.load_from_csv(args['file']) fdr = args['fdr'] colored = args['colored'] color_map = None if colored: color_map = ['b', 'g', 'r', 'c', 'm'] plotting.plot_gwas_result(gwas_result, args['output'], chrs, args['macs'], marker_size=marker_size, fdr=fdr, color_map=color_map)
def calc_ld(args): positions = args['positions'] range = int(args['range']) is_chr_pos = CHR_POS_PATTERN.match(positions) genotypeData = genotype.load_hdf5_genotype_data(args['genotype_file']) accession_file = args.get('acession_file',None) if accession_file is not None: accessions = _load_accessions(accession_file) genotypeData.filter_accessions(accessions) genotypeData.filter_non_binary() if is_chr_pos: chr = is_chr_pos.group(1) position = int(is_chr_pos.group(2)) log.info('Calculating LD for chr %s and position %s with +/- range of %s' % (chr,position, range)) # get the positions abs_ix,ix,found = genotypeData.get_pos_ix(chr,position) min_ix = max(0,abs_ix - range) max_ix = min(abs_ix + range, genotypeData.genome_length) chr_pos_list = zip(genotypeData.chromosomes[min_ix:max_ix],genotypeData.positions[min_ix:max_ix]) else: if os.path.exists(positions) == False: raise ValueError('%s path does not exist' % positions) # get the SNPs _,ext = os.path.splitext(positions) if ext not in SUPPORTED_FILE_EXT: raise Exception('The input file must have one of the supported extensions: %s' % str(SUPPORTED_FILE_EXT)) if ext in HDF5_FILE_EXT: gwas_result = result.load_from_hdf5(positions) else: gwas_result = result.load_from_csv(positions) gwas_data = gwas_result.get_top_snps(range) chr_pos_list = zip(map(str,gwas_data['chr']),gwas_data['positions']) chr_pos_list = sorted(chr_pos_list,key=itemgetter(0,1)) ld_data = genotypeData.calculate_ld(chr_pos_list) _save_ld_data(args['output_file'],ld_data,chr_pos_list)
def qq_plot(args): _,ext = os.path.splitext(args['file']) if ext not in SUPPORTED_FILE_EXT: raise Exception('The input file must have one of the supported extensions: %s' % str(SUPPORTED_FILE_EXT)) gwas_result = None if ext in HDF5_FILE_EXT: gwas_result = result.load_from_hdf5(args['file']) else: gwas_result = result.load_from_csv(args['file']) plotting.plot_qq(gwas_result,args['output'])
def qq_plot(args): _, ext = os.path.splitext(args['file']) if ext not in SUPPORTED_FILE_EXT: raise Exception( 'The input file must have one of the supported extensions: %s' % str(SUPPORTED_FILE_EXT)) gwas_result = None if ext in HDF5_FILE_EXT: gwas_result = result.load_from_hdf5(args['file']) else: gwas_result = result.load_from_csv(args['file']) plotting.plot_qq(gwas_result, args['output'])
def convert(args): _,input_ext = os.path.splitext(args['inputfile']) _,output_ext = os.path.splitext(args['outputfile']) if input_ext == output_ext: raise Exception('use different file extension for input (%s) and output file (%s)' % (input_ext,output_ext)) if input_ext not in SUPPORTED_FILE_EXT: raise Exception('The input file must have one of the supported extensions: %s' % str(SUPPORTED_FILE_EXT)) if output_ext not in SUPPORTED_FILE_EXT: raise Exception('The output file must have one of the supported extensions: (%s)' % ', '.join(SUPPORTED_FILE_EXT)) gwas_result = None if input_ext == '.csv': gwas_result = result.load_from_csv(args['inputfile']) gwas_result.save_as_hdf5(args['outputfile']) else: gwas_result = result.load_from_hdf5(args['inputfile']) gwas_result.save_as_csv(args['outputfile'])
def plot(args): _,ext = os.path.splitext(args['file']) if ext not in SUPPORTED_FILE_EXT: raise Exception('The input file must have one of the supported extensions: %s' % str(SUPPORTED_FILE_EXT)) chrs = None marker_size = args['marker_size'] if 'chr' in args and args['chr'] is not None and args['chr'] != '': chrs = [args['chr']] gwas_result = None if ext in HDF5_FILE_EXT: gwas_result = result.load_from_hdf5(args['file']) else: gwas_result = result.load_from_csv(args['file']) fdr = args['fdr'] colored = args['colored'] color_map = None if colored: color_map = ['b', 'g', 'r', 'c', 'm'] plotting.plot_gwas_result(gwas_result,args['output'],chrs,args['macs'],marker_size=marker_size, fdr=fdr, color_map=color_map)
def calc_ld(args): positions = args['positions'] range = int(args['range']) is_chr_pos = CHR_POS_PATTERN.match(positions) genotypeData = genotype.load_hdf5_genotype_data(args['genotype_file']) accession_file = args.get('acession_file', None) if accession_file is not None: accessions = _load_accessions(accession_file) genotypeData.filter_accessions(accessions) genotypeData.filter_non_binary() if is_chr_pos: chr = is_chr_pos.group(1) position = int(is_chr_pos.group(2)) log.info( 'Calculating LD for chr %s and position %s with +/- range of %s' % (chr, position, range)) # get the positions abs_ix, ix, found = genotypeData.get_pos_ix(chr, position) min_ix = max(0, abs_ix - range) max_ix = min(abs_ix + range, genotypeData.genome_length) chr_pos_list = zip(genotypeData.chromosomes[min_ix:max_ix], genotypeData.positions[min_ix:max_ix]) else: if os.path.exists(positions) == False: raise ValueError('%s path does not exist' % positions) # get the SNPs _, ext = os.path.splitext(positions) if ext not in SUPPORTED_FILE_EXT: raise Exception( 'The input file must have one of the supported extensions: %s' % str(SUPPORTED_FILE_EXT)) if ext in HDF5_FILE_EXT: gwas_result = result.load_from_hdf5(positions) else: gwas_result = result.load_from_csv(positions) gwas_data = gwas_result.get_top_snps(range) chr_pos_list = zip(map(str, gwas_data['chr']), gwas_data['positions']) chr_pos_list = sorted(chr_pos_list, key=itemgetter(0, 1)) ld_data = genotypeData.calculate_ld(chr_pos_list) _save_ld_data(args['output_file'], ld_data, chr_pos_list)
def convert(args): _, input_ext = os.path.splitext(args['inputfile']) _, output_ext = os.path.splitext(args['outputfile']) if input_ext == output_ext: raise Exception( 'use different file extension for input (%s) and output file (%s)' % (input_ext, output_ext)) if input_ext not in SUPPORTED_FILE_EXT: raise Exception( 'The input file must have one of the supported extensions: %s' % str(SUPPORTED_FILE_EXT)) if output_ext not in SUPPORTED_FILE_EXT: raise Exception( 'The output file must have one of the supported extensions: (%s)' % ', '.join(SUPPORTED_FILE_EXT)) gwas_result = None if input_ext == '.csv': gwas_result = result.load_from_csv(args['inputfile']) gwas_result.save_as_hdf5(args['outputfile']) else: gwas_result = result.load_from_hdf5(args['inputfile']) gwas_result.save_as_csv(args['outputfile'])