Esempio n. 1
0
def check_path():
    if not isdir(my_parser().filePath):
        return bomb(f'No files found at path = {my_parser().filePath}\n')
    elif isdir(my_parser().filePath) and \
            not [item for item in flatten(file_list) if item.endswith(('vcf', 'vcf.gz', 'ped', 'map'))]:
        return bomb(
            f'No vcf or plink files found at path = {abspath(my_parser().filePath)}\n'
        )
Esempio n. 2
0
            genotypes.append(row)
        elif toto == 2:
            for n, a in enumerate(geno):
                try:
                    geno_tot.append(plink_ACGT[plink_info[n][0]][int(a)])
                except KeyError:
                    pass
            geno_out.write(
                '%s %s \n' %
                (' '.join([sample_id, sample_id, '0', '0', '2', '-9'
                           ]), ' '.join(geno_tot)))

# continue writing VCF
if toto == 1:
    for row in zip(vcfsnps, [*zip(*genotypes)]):
        line = [list(flatten(i)) for i in list(row)]
        geno_out.write('\t'.join(flatten(line)) + '\n')

# Map file for Ped
if toto == 2:
    map_out = open(fo + ".map", "w")
    map_ped = []
    for marker in snps:  # snps is a list need to ammend this
        try:
            ms = vcf_a1a2[marker][0:3]
            ms.insert(0, '0')
            ms = [ms[i] for i in [1, 3, 0, 2]]
            map_ped.append(ms)
        except KeyError:
            ms = snps[marker]
            ms.insert(0, '0')
Esempio n. 3
0
within_file_sample_dups = {}  # key=file, v = duplicated sample
between_file_sample_dups = {}  # key=file, v = file_samples
between_file_chroms = {}  # key=file, v = set(file_chroms)

for index, file_ in enumerate(file_list):
    if str(file_list[index]).endswith("vcf.gz"):
        command0 = "zcat< " + file_list[index] + "|grep -m 1  '#CHR' |cut -f10-"
        command1 = "zcat< " + file_list[index] + "| grep -v '#'|cut -f1-2"

        chroms = [i[0] for i in std_capture(command1)]
        snps = [':'.join(i) for i in std_capture(command1)]

        within_file_snps_dups[file_list[index]] = snps
        between_file_chroms[file_list[index]] = set(chroms)

        samples = list(flatten(std_capture(command0)))
        if len(samples) > len(set(samples)):
            within_file_sample_dups[file_list[index]] = set([
                x for x in samples if samples.count(x) > 1
            ])  # multimode(samples)
        between_file_sample_dups[file_list[index]] = set(samples)

    if str(file_list[index]).endswith("vcf"):
        command0 = "cat " + file_list[index] + "|grep -m 1  '#CHR' |cut -f10-"
        command1 = "cat " + file_list[index] + "| grep -v '#'|cut -f1-2"

        chroms = [i[0] for i in std_capture(command1)]
        snps = [':'.join(i) for i in std_capture(command1)]

        within_file_snps_dups[file_list[index]] = snps
        between_file_chroms[file_list[index]] = set(chroms)
Esempio n. 4
0
        except ValueError:
            chrom, snpname, cm, pos = line.strip().split()

        key = chrom + ":" + pos

        if key not in markers:
            markers[key] = [snpname] + mark_chip
        if key in markers:
            value = list(markers[key])
            value[chip + 1] = snp_number + 1
            markers[key] = value

# Create a comprehensive snps file
# Sort the snps by chrom then pos and write index
mark = []
for row in [list(flatten(i)) for i in list(markers.items())]:
    row.insert(0, row[1])
    row.insert(1, row[1].split(":")[0])
    row.insert(2, row[2].split(":")[1])
    row.pop(3)
    row.pop(3)
    mark.append(row)
'''
for row in mark:
    print(row)
'''
mark = sorted(mark, key=lambda x: (int(x[1]), int(x[2])))

f_out = "snpinfo.txt"
mark_out = open(f_out, "w")
mark_out.write('marker bta pos chips...\n')
Esempio n. 5
0
            geno = []
            chip = index + 1
            snp_number = 0

            with open_by_suffix(file_list[index]) as f:
                for line in f:
                    if "##" in line: continue
                    # CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,samples...
                    bta, pos, snp, ref, alt, qual, filter_, info, format_, genotype = line.strip(
                    ).split("\t", 9)
                    bta = bta.replace('Chr', '')
                    geno.append(
                        list(
                            flatten([
                                vcf_2_fimpute.get(i, i) for i in [
                                    ii.split(':')[0].replace("|", "/")
                                    for ii in genotype.split('\t')
                                ]
                            ])))

                    if bta != '#CHROM ' and pos != 'POS':
                        if bta + ":" + pos not in snps_list:
                            snps_list[bta + ":" +
                                      pos] = [bta, snp, "0", pos, ref, alt]
                        elif bta + ":" + pos in snps_list and snps_list[
                                bta + ":" + pos][4].lower() != ref.lower():
                            print(
                                'Some REF/ALT may be flipped \n'
                                'Normalize VCF files e.g. `bcftools norm -f [REF_GENOME] ... `\n'
                            )
                            raise SystemExit