Esempio n. 1
0
    def convert(self):
        remove = []
        if (self.ind < 1.0 and self.ind > 0.0):
            remove = self.get_ind_coverage()
            #print(remove)

        vcf_command = "vcftools --vcf " + self.vcf_file + " --plink --out " + self.prefix
        if (self.thin > 0):
            vcf_command = vcf_command + " --thin " + str(self.thin)
        if (self.snp < 1.0 and self.snp > 0.0):
            vcf_command = vcf_command + " --max-missing " + str(self.snp)
        if (len(remove) > 0):
            vcf_command = vcf_command + remove
        if (self.maf > 0.0 and self.maf < 1.0):
            vcf_command = vcf_command + " --maf " + str(self.maf)
        if (self.mac > 0):
            vcf_command = vcf_command + " --mac " + str(self.mac)
        if (self.bi == True):
            vcf_command = vcf_command + " --min-alleles 2 --max-alleles 2"
        if (self.removeInds == True):
            vcf_command = vcf_command + " --remove " + str(self.removeFile)
        if (len(self.discard) > 0):
            for ind in self.discard:
                vcf_command = vcf_command + " --remove-indv " + str(ind)

        call = SysCall(vcf_command)
        call.run_program()

        self.fix_map()
Esempio n. 2
0
    def loglik(self):
        fh = open("loglik.txt", 'wb')
        for fn in os.listdir("."):
            if fn.endswith("stdout"):
                temp = open(fn, 'r')
                fnlist = fn.split("_")
                fnlist2 = fnlist[-2].split(".")
                kval = fnlist2[-1]
                print(fnlist2)
                for line in temp.readlines():
                    if line.startswith("Loglikelihood:"):
                        mylist = line.split()
                        #print(mylist)
                        fh.write(kval.encode())
                        fh.write("\t".encode())
                        fh.write(mylist[-1].encode())
                        fh.write("\n".encode())
                temp.close()
        fh.close()

        print("Sorting log(likelihood) values...")
        command = "sort -n -k1 -o loglik.txt loglik.txt"

        sortCall = SysCall(command)
        sortCall.run_program()
Esempio n. 3
0
    def admix(self):
        ks = range(self.minK, self.maxK + 1)
        #print(ks)
        #for each k value
        for i in ks:
            for j in range(self.rep):
                command_string = "admixture -j" + str(self.NP) + " -s " + str(
                    np.random.randint(1000000)) + " --cv=" + str(
                        self.cv) + " " + self.prefix + ".ped " + str(i)

                #call Admixture
                admixtureCall = SysCall(command_string)
                admixtureCall.run_admixture(self.prefix, i, j)

                #Manually re-name output files to include _j rep number
                for filename in os.listdir("."):
                    fn = self.prefix + "." + str(i) + "."
                    if fn in filename:
                        oldname, extension = os.path.splitext(filename)
                        newname = oldname + "_" + str(j) + extension
                        if (extension.endswith("Q")):
                            self.qfiles[str(i)].append(newname)
                        os.rename(filename, newname)

        # write dict of .Q files
        jsonFile = self.prefix + ".qfiles.json"
        with open(jsonFile, 'w') as json_file:
            json.dump(self.qfiles, json_file)
Esempio n. 4
0
    def get_indlist(self):
        string_vtools = "vcf-query -l " + self.vcf_file + " > vcf_indlist.txt"

        try:
            call = SysCall(string_vtools)
            call.run_program()
        except:
            print(
                "vcf-query failed to capture list of individuals from VCF file."
            )
            print("Check that vcf-query is installed on your system.")
            raise SystemExit
Esempio n. 5
0
    def runDistruct(self):
        print("Now running distruct for all drawparams files...")
        contents = os.listdir(self.nd)

        os.chdir(self.nd)

        for f in contents:
            if f.startswith("drawparams"):
                distructCommand = "distruct -d " + str(f) + "; echo"
                call = SysCall(distructCommand)
                call.run_program()

        print("WARNING: Check that distruct ran properly.")
        print(
            "This program does not check the exit status of DISTRUCT because its exit status always equals 1."
        )
        print("")
Esempio n. 6
0
	def evalAdmix(self, minK, maxK, np):
		ks = range(int(minK), int(maxK)+1)
		for k in ks:
			for qf in self.qfiles[str(k)]:
				print(qf)
				temp = qf.split(".")

				#make .P file name
				temp[-1] = "P"
				pf = ".".join(temp)

				#make output .corres file name
				temp[-1] = "corres"
				eAf = ".".join(temp)

				#build command for evalAdmix
				evalAdmix_str_com = "evalAdmix -plink " + self.prefix + " -fname " + pf + " -qname " + qf + " -o " + eAf + " -P " + str(np)

				call = SysCall(evalAdmix_str_com)
				call.run_program()
Esempio n. 7
0
    def get_ind_coverage(self):
        vcf_command = "vcftools --vcf " + self.vcf_file + " --missing-indv --out " + self.prefix
        if (self.removeInds == True):
            vcf_command = vcf_command + " --remove " + str(self.removeFile)

        call = SysCall(vcf_command)
        call.run_program()

        fname = self.prefix + ".imiss"
        ret = ""
        with open(fname, 'r') as fh:
            try:
                lnum = 0
                for line in fh:
                    line = line.strip()
                    if not line:
                        continue
                    lnum += 1
                    if lnum < 2:  #skip header line
                        continue
                    else:
                        stuff = line.split()
                        #print(stuff)
                        if float(stuff[4]) > self.ind:
                            print("Removing individual %s: %s missing data" %
                                  (stuff[0], stuff[4]))
                            self.blacklist[stuff[0]] = 1
                            ret = ret + " --remove-indv " + str(stuff[0])
                return (ret)
            except IOError as e:
                print("Could not read file %s: %s" % (fname, e))
                sys.exit(1)
            except Exception as e:
                print("Unexpected error reading file %s: %s" % (fname, e))
                sys.exit(1)
            finally:
                fh.close()
Esempio n. 8
0
    def print_cv(self):
        print("Printing CV values...")
        command = "grep -h CV " + self.prefix + "*.stdout > " + self.prefix + "_cv_summary.txt"

        grepCall = SysCall(command)
        grepCall.run_program()
Esempio n. 9
0
	def makeBED(self):
		plink_command = "plink --file " + self.prefix + " --make-bed --out " + self.prefix
		call = SysCall(plink_command)
		call.run_program()
Esempio n. 10
0
	def recodePlink(self):
		plink_command = "plink --file " + self.prefix + " --noweb --allow-extra-chr 0 --recode12 --out " + self.prefix
		call = SysCall(plink_command)
		call.run_program()
Esempio n. 11
0
	def recodeStructure(self):
		plink_str_com = "plink --file " + self.prefix + " --allow-extra-chr 0 --recode structure --out " + self.prefix
		call = SysCall(plink_str_com)
		call.run_program()