コード例 #1
0
ファイル: check_modules.py プロジェクト: AMarete/SnpRecode
def pkg_requirements():
    reqs = subprocess.check_output([sys.executable, '-m', 'pip', 'freeze'])
    installed_packages = [r.decode().split('==')[0] for r in reqs.split()]
    required = ['biopython', 'matplotlib']
    if not set(required).issubset(installed_packages):
        bomb(
            f"install package(s) {required} with: pip install {' '.join(list(required))}\n"
        )
コード例 #2
0
def check_path():
    if not isdir(my_parser().filePath):
        return bomb(f'No files found at path = {my_parser().filePath}\n')
    elif isdir(my_parser().filePath) and \
            not [item for item in flatten(file_list) if item.endswith(('vcf', 'vcf.gz', 'ped', 'map'))]:
        return bomb(
            f'No vcf or plink files found at path = {abspath(my_parser().filePath)}\n'
        )
コード例 #3
0
def check_dups():
    err = open('Error.txt', 'w')
    # SNPs occurring more than once in same file
    dict0 = {
        k: {x
            for x in v if v.count(x) > 1}
        for k, v in within_file_snps_dups.items() if len(v) > len(set(v))
    }

    if dict0:
        err.write(''.join('SNPs occurring more than once in same file'))
        err.write(''.join('\n------------------------------------------\n'))
        for k, v in dict0.items():
            err.write('{:<3} {:<5} {:<8}\n'.format(k, '>>', ' '.join(v)))
        bomb(
            '> duplicate SNP found. SNP should be unique in each file\n'
            '       > check `less -S Error.txt` for optional additional information\n'
        )

    # sample occurring more than once in same file
    if within_file_sample_dups:
        err.write(''.join('\n--------------------------------------------\n'))
        err.write(''.join('sample occurring more than once in same file'))
        err.write(''.join('\n--------------------------------------------\n'))
        for k, v in within_file_sample_dups.items():
            err.write('{:<3} {:<5} {:<8}\n'.format(k, '>>', ' '.join(v)))
        bomb(
            '> duplicate sample(s) found. Sample(s) should be unique in each file\n'
            '       > check `less -S Error.txt` for optional additional information\n'
        )

    # sample occurring more than once two or more files
    if len(file_list) > 1:
        if between_file_sample_dups:
            err.write(''.join(
                '\n----------------------------------------------------\n'))
            err.write(''.join(
                'sample occurring more than once in two or more files'))
            err.write(''.join(
                '\n----------------------------------------------------\n'))
            err.write(' '.join(find_common(between_file_sample_dups)))

        sets0 = set_counter(between_file_sample_dups.values())
        sets1 = [len(i) for i in [*between_file_sample_dups.values()]]
        if sets0 != sets1:
            bomb(
                '> sample(s) found in more than one file. Sample(s) should be unique to each file\n'
                '       > check `less -S Error.txt` for optional additional information\n'
            )

        result_chrom = set_counter(between_file_chroms.values())
        if sum(result_chrom) > 0:
            bomb(
                '> files have differing number of chromosomes or chromosome ids\n'
                '       > check `less -S Error.txt` for optional additional information\n'
            )
コード例 #4
0
ファイル: fi2geno.py プロジェクト: AMarete/SnpRecode
import timeit
import re
from datetime import datetime
from Bio import bgzf
from funtools import flatten, bomb
from parse_args import my_parser
from recode_dict import fimpute_2_vcf

start = timeit.default_timer()

try:
    samples = open(my_parser().samples, "r")
except FileNotFoundError:
    bomb(f"Missing argument or '{my_parser().samples}' may be empty\n")
try:
    snp_info = open(my_parser().snps, "r")
except FileNotFoundError:
    bomb(f"Missing argument or '{my_parser().snps}' may be empty\n")
try:
    geno_info = open(my_parser().geno, "r")
except FileNotFoundError:
    bomb(f"Missing argument or '{my_parser().geno}' may be empty\n")
try:
    allele_info = open(my_parser().allele, "r")
except FileNotFoundError:
    bomb(f"Missing argument or '{my_parser().allele}' may be empty\n")

toto = my_parser().type_

if not my_parser().out:
    bomb('Missing argument, "-o PREFIX", "--out PREFIX"\n'
コード例 #5
0
from os.path import abspath, isdir
from funtools import file_by_size, to_mat, line_count, find_common, bomb, std_capture, flatten, set_counter
from parse_args import my_parser

# get a list of input files
if not isdir(my_parser().filePath):
    bomb(f'No files found at path = {abspath(my_parser().filePath)}\n')
vcf_list = [
    file_ for file_ in file_by_size(my_parser().filePath, ['vcf'])
    if file_.endswith(('vcf', 'vcf.gz'))
]
plink_list = to_mat(sorted(file_by_size(my_parser().filePath, ["ped", "map"])),
                    2)
files = vcf_list + plink_list
file_list = []

# first check
if len(files) > 10:
    bomb('FImpute does not support imputing more than 10 chips simultaneously')
elif len(files) < 1:
    bomb(f'No files found at path = {abspath(my_parser().filePath)}\n')

# sort file from largest smallest
'''
for file in vcf_list + plink_list:
    if str(file).endswith(("vcf", "vcf.gz")):
        unsorted_file_list.append([line_count(file), file])
    else:
        unsorted_file_list.append([line_count(file[0]), file])

for item in sorted(unsorted_file_list, key=lambda x: (int(x[0])), reverse=True):
コード例 #6
0
                                      pos] = [bta, snp, cm, pos, ref, alt]
                        elif snps_list[bta + ":" + pos] == [alt + '_' + ref]:
                            print(
                                f'Warning: Allele flipped for SNP {snp} in PLINK file(s)'
                            )
                        elif snps_list[bta + ":" + pos][4] != ref:
                            print(
                                f'Warning: Possible erroneous allele for SNP {snp} in PLINK file(s)'
                                f'normalize with `bcftools norm`')
                    except IndexError:
                        print(
                            f'Warning: SNP {snp} in PLINK file(s) not indexed as `chrom_pos_ref_alt`'
                        )
                        # raise SystemExit
        else:
            bomb(f'Recheck files at {file_list}')
            raise SystemExit

    # Create a comprehensive snps file
    mark = [row for row in [list(flatten(i)) for i in list(mark_tot.values())]]
    # Sort the snps by chrom then pos and write index
    mark = sorted(mark, key=lambda x: (int(x[1]), int(x[2])))

    for row in mark:
        mark_out.write(' '.join(str(e) for e in row) + '\n')

    # Write a marker list with ref/alt information
    for row in [list(flatten(i)) for i in list(snps_list.values())]:
        allele_out.write('\t'.join(row) + '\n')

    if len(mark) - len(snps_list) > 0:
コード例 #7
0
ファイル: __main__.py プロジェクト: AMarete/SnpRecode
def main():
    args1 = [('-D', '--DIR'), ('-O', '--OUT')]
    args2 = [('-g', '--geno'), ('-s', '--snps'), ('-o', '--out'),
             ('-n', '--samples'), ('-t', '--type'), ('-a', '--alleles')]

    if len(sys.argv) == 1:
        from parse_args import msg
        print(msg())
        del msg
        raise SystemExit

    elif sys.argv[1] == '-h' or sys.argv[1] == '--help':
        import geno2fi
        geno2fi
        del geno2fi
        raise SystemExit

    elif any(x in sys.argv[1:] for x in list(itertools.chain(*args1))):
        x = set([item for item in args1 for a in sys.argv[1:] if a in item])
        y = set(
            [item for item in args1 for a in sys.argv[1:] if a not in item])
        z = list(x.symmetric_difference(y))
        if z:
            bomb(f'Missing argument when trying to convert to fimpute:\n'
                 f'       required args: {z}\n'
                 f'       try `./snprecode -h` for complete arguments list\n')

        from check_path import check_path
        if not check_path():
            print("File path OK...")
        else:
            print(check_path())
        del check_path

        from garbage import check_dups, file_list
        if not check_dups():
            print("File check complete...\nFiles to be processed...\n")
            print('\n'.join(map(str, file_list)))
        del check_dups

        import geno2fi
        geno2fi
        del geno2fi
        raise SystemExit

    elif any(x in sys.argv[1:] for x in list(itertools.chain(*args2))):
        x = set([item for item in args2 for a in sys.argv[1:] if a in item])
        y = set(
            [item for item in args2 for a in sys.argv[1:] if a not in item])
        z = list(x.symmetric_difference(y))
        if z:
            bomb(f"Missing argument when trying to convert from fimpute\n"
                 f"       required args: {z}\n"
                 f"       try: `./snprecode -h` for complete arguments list\n")

        import fi2geno
        fi2geno
        del fi2geno
        raise SystemExit

    elif [item for item in sys.argv[1:] if item.endswith(('bim', 'map'))]:
        import snpinfo
        snpinfo
        del snpinfo
        raise SystemExit

    elif [item for item in sys.argv[1:] if item.endswith(('vcf', 'vcf.gz'))]:
        import geno_corr
        geno_corr
        del geno_corr
        raise SystemExit

    else:
        bomb('Unknown argument(s)\n       try ./snprecode -h')
コード例 #8
0
ファイル: check_modules.py プロジェクト: AMarete/SnpRecode
def py_version():
    if float(python_version()[0:3]) < 3.6:
        # float(python_version()[0:3]) >= 3.6
        return bomb(
            "Python version not satisfied, install Python V3.6 or later\n")
コード例 #9
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from parse_args import my_parser
from os.path import abspath, isdir
from funtools import bomb, file_by_size, to_mat, flatten

try:
    vcf_list = [
        file_ for file_ in file_by_size(my_parser().filePath, ['vcf'])
        if file_.endswith(('vcf', 'vcf.gz'))
    ]
    plink_list = to_mat(
        sorted(file_by_size(my_parser().filePath, ["ped", "map"])), 2)
    file_list = vcf_list + plink_list
except FileNotFoundError:
    bomb(f'No files found at path = {abspath(my_parser().filePath)}\n')


# get a list of input files
def check_path():
    if not isdir(my_parser().filePath):
        return bomb(f'No files found at path = {my_parser().filePath}\n')
    elif isdir(my_parser().filePath) and \
            not [item for item in flatten(file_list) if item.endswith(('vcf', 'vcf.gz', 'ped', 'map'))]:
        return bomb(
            f'No vcf or plink files found at path = {abspath(my_parser().filePath)}\n'
        )
コード例 #10
0
from funtools import allelic_r2, open_by_suffix, bomb
from parse_args import my_parser
from recode_dict import recode

start = timeit.default_timer()

# inputs

in_files = [
    item.name for item in my_parser().file
    if item.name.endswith(('vcf', 'vcf.gz'))
]

if len(in_files) != 2:
    bomb(
        '''two vcf files required to calculate genotype correlation\n\ttry: `snprecode --file vcf1 vcf2`'''
    )
'''
file1 = my_parser().file[0].name
file2 = my_parser().file[1].name
'''
file1 = in_files[0]
file2 = in_files[1]
mat1 = open_by_suffix(file1)
mat2 = open_by_suffix(file2)

# outputs
file_txt = 'genotype_R2.txt'
file_plot = "genotype_R2.pdf"

with open(file_txt, 'w') as outfile: