def pkg_requirements(): reqs = subprocess.check_output([sys.executable, '-m', 'pip', 'freeze']) installed_packages = [r.decode().split('==')[0] for r in reqs.split()] required = ['biopython', 'matplotlib'] if not set(required).issubset(installed_packages): bomb( f"install package(s) {required} with: pip install {' '.join(list(required))}\n" )
def check_path(): if not isdir(my_parser().filePath): return bomb(f'No files found at path = {my_parser().filePath}\n') elif isdir(my_parser().filePath) and \ not [item for item in flatten(file_list) if item.endswith(('vcf', 'vcf.gz', 'ped', 'map'))]: return bomb( f'No vcf or plink files found at path = {abspath(my_parser().filePath)}\n' )
def check_dups(): err = open('Error.txt', 'w') # SNPs occurring more than once in same file dict0 = { k: {x for x in v if v.count(x) > 1} for k, v in within_file_snps_dups.items() if len(v) > len(set(v)) } if dict0: err.write(''.join('SNPs occurring more than once in same file')) err.write(''.join('\n------------------------------------------\n')) for k, v in dict0.items(): err.write('{:<3} {:<5} {:<8}\n'.format(k, '>>', ' '.join(v))) bomb( '> duplicate SNP found. SNP should be unique in each file\n' ' > check `less -S Error.txt` for optional additional information\n' ) # sample occurring more than once in same file if within_file_sample_dups: err.write(''.join('\n--------------------------------------------\n')) err.write(''.join('sample occurring more than once in same file')) err.write(''.join('\n--------------------------------------------\n')) for k, v in within_file_sample_dups.items(): err.write('{:<3} {:<5} {:<8}\n'.format(k, '>>', ' '.join(v))) bomb( '> duplicate sample(s) found. Sample(s) should be unique in each file\n' ' > check `less -S Error.txt` for optional additional information\n' ) # sample occurring more than once two or more files if len(file_list) > 1: if between_file_sample_dups: err.write(''.join( '\n----------------------------------------------------\n')) err.write(''.join( 'sample occurring more than once in two or more files')) err.write(''.join( '\n----------------------------------------------------\n')) err.write(' '.join(find_common(between_file_sample_dups))) sets0 = set_counter(between_file_sample_dups.values()) sets1 = [len(i) for i in [*between_file_sample_dups.values()]] if sets0 != sets1: bomb( '> sample(s) found in more than one file. Sample(s) should be unique to each file\n' ' > check `less -S Error.txt` for optional additional information\n' ) result_chrom = set_counter(between_file_chroms.values()) if sum(result_chrom) > 0: bomb( '> files have differing number of chromosomes or chromosome ids\n' ' > check `less -S Error.txt` for optional additional information\n' )
import timeit import re from datetime import datetime from Bio import bgzf from funtools import flatten, bomb from parse_args import my_parser from recode_dict import fimpute_2_vcf start = timeit.default_timer() try: samples = open(my_parser().samples, "r") except FileNotFoundError: bomb(f"Missing argument or '{my_parser().samples}' may be empty\n") try: snp_info = open(my_parser().snps, "r") except FileNotFoundError: bomb(f"Missing argument or '{my_parser().snps}' may be empty\n") try: geno_info = open(my_parser().geno, "r") except FileNotFoundError: bomb(f"Missing argument or '{my_parser().geno}' may be empty\n") try: allele_info = open(my_parser().allele, "r") except FileNotFoundError: bomb(f"Missing argument or '{my_parser().allele}' may be empty\n") toto = my_parser().type_ if not my_parser().out: bomb('Missing argument, "-o PREFIX", "--out PREFIX"\n'
from os.path import abspath, isdir from funtools import file_by_size, to_mat, line_count, find_common, bomb, std_capture, flatten, set_counter from parse_args import my_parser # get a list of input files if not isdir(my_parser().filePath): bomb(f'No files found at path = {abspath(my_parser().filePath)}\n') vcf_list = [ file_ for file_ in file_by_size(my_parser().filePath, ['vcf']) if file_.endswith(('vcf', 'vcf.gz')) ] plink_list = to_mat(sorted(file_by_size(my_parser().filePath, ["ped", "map"])), 2) files = vcf_list + plink_list file_list = [] # first check if len(files) > 10: bomb('FImpute does not support imputing more than 10 chips simultaneously') elif len(files) < 1: bomb(f'No files found at path = {abspath(my_parser().filePath)}\n') # sort file from largest smallest ''' for file in vcf_list + plink_list: if str(file).endswith(("vcf", "vcf.gz")): unsorted_file_list.append([line_count(file), file]) else: unsorted_file_list.append([line_count(file[0]), file]) for item in sorted(unsorted_file_list, key=lambda x: (int(x[0])), reverse=True):
pos] = [bta, snp, cm, pos, ref, alt] elif snps_list[bta + ":" + pos] == [alt + '_' + ref]: print( f'Warning: Allele flipped for SNP {snp} in PLINK file(s)' ) elif snps_list[bta + ":" + pos][4] != ref: print( f'Warning: Possible erroneous allele for SNP {snp} in PLINK file(s)' f'normalize with `bcftools norm`') except IndexError: print( f'Warning: SNP {snp} in PLINK file(s) not indexed as `chrom_pos_ref_alt`' ) # raise SystemExit else: bomb(f'Recheck files at {file_list}') raise SystemExit # Create a comprehensive snps file mark = [row for row in [list(flatten(i)) for i in list(mark_tot.values())]] # Sort the snps by chrom then pos and write index mark = sorted(mark, key=lambda x: (int(x[1]), int(x[2]))) for row in mark: mark_out.write(' '.join(str(e) for e in row) + '\n') # Write a marker list with ref/alt information for row in [list(flatten(i)) for i in list(snps_list.values())]: allele_out.write('\t'.join(row) + '\n') if len(mark) - len(snps_list) > 0:
def main(): args1 = [('-D', '--DIR'), ('-O', '--OUT')] args2 = [('-g', '--geno'), ('-s', '--snps'), ('-o', '--out'), ('-n', '--samples'), ('-t', '--type'), ('-a', '--alleles')] if len(sys.argv) == 1: from parse_args import msg print(msg()) del msg raise SystemExit elif sys.argv[1] == '-h' or sys.argv[1] == '--help': import geno2fi geno2fi del geno2fi raise SystemExit elif any(x in sys.argv[1:] for x in list(itertools.chain(*args1))): x = set([item for item in args1 for a in sys.argv[1:] if a in item]) y = set( [item for item in args1 for a in sys.argv[1:] if a not in item]) z = list(x.symmetric_difference(y)) if z: bomb(f'Missing argument when trying to convert to fimpute:\n' f' required args: {z}\n' f' try `./snprecode -h` for complete arguments list\n') from check_path import check_path if not check_path(): print("File path OK...") else: print(check_path()) del check_path from garbage import check_dups, file_list if not check_dups(): print("File check complete...\nFiles to be processed...\n") print('\n'.join(map(str, file_list))) del check_dups import geno2fi geno2fi del geno2fi raise SystemExit elif any(x in sys.argv[1:] for x in list(itertools.chain(*args2))): x = set([item for item in args2 for a in sys.argv[1:] if a in item]) y = set( [item for item in args2 for a in sys.argv[1:] if a not in item]) z = list(x.symmetric_difference(y)) if z: bomb(f"Missing argument when trying to convert from fimpute\n" f" required args: {z}\n" f" try: `./snprecode -h` for complete arguments list\n") import fi2geno fi2geno del fi2geno raise SystemExit elif [item for item in sys.argv[1:] if item.endswith(('bim', 'map'))]: import snpinfo snpinfo del snpinfo raise SystemExit elif [item for item in sys.argv[1:] if item.endswith(('vcf', 'vcf.gz'))]: import geno_corr geno_corr del geno_corr raise SystemExit else: bomb('Unknown argument(s)\n try ./snprecode -h')
def py_version(): if float(python_version()[0:3]) < 3.6: # float(python_version()[0:3]) >= 3.6 return bomb( "Python version not satisfied, install Python V3.6 or later\n")
#!/usr/bin/env python # -*- coding: utf-8 -*- from parse_args import my_parser from os.path import abspath, isdir from funtools import bomb, file_by_size, to_mat, flatten try: vcf_list = [ file_ for file_ in file_by_size(my_parser().filePath, ['vcf']) if file_.endswith(('vcf', 'vcf.gz')) ] plink_list = to_mat( sorted(file_by_size(my_parser().filePath, ["ped", "map"])), 2) file_list = vcf_list + plink_list except FileNotFoundError: bomb(f'No files found at path = {abspath(my_parser().filePath)}\n') # get a list of input files def check_path(): if not isdir(my_parser().filePath): return bomb(f'No files found at path = {my_parser().filePath}\n') elif isdir(my_parser().filePath) and \ not [item for item in flatten(file_list) if item.endswith(('vcf', 'vcf.gz', 'ped', 'map'))]: return bomb( f'No vcf or plink files found at path = {abspath(my_parser().filePath)}\n' )
from funtools import allelic_r2, open_by_suffix, bomb from parse_args import my_parser from recode_dict import recode start = timeit.default_timer() # inputs in_files = [ item.name for item in my_parser().file if item.name.endswith(('vcf', 'vcf.gz')) ] if len(in_files) != 2: bomb( '''two vcf files required to calculate genotype correlation\n\ttry: `snprecode --file vcf1 vcf2`''' ) ''' file1 = my_parser().file[0].name file2 = my_parser().file[1].name ''' file1 = in_files[0] file2 = in_files[1] mat1 = open_by_suffix(file1) mat2 = open_by_suffix(file2) # outputs file_txt = 'genotype_R2.txt' file_plot = "genotype_R2.pdf" with open(file_txt, 'w') as outfile: