def convert(self): remove = [] if (self.ind < 1.0 and self.ind > 0.0): remove = self.get_ind_coverage() #print(remove) vcf_command = "vcftools --vcf " + self.vcf_file + " --plink --out " + self.prefix if (self.thin > 0): vcf_command = vcf_command + " --thin " + str(self.thin) if (self.snp < 1.0 and self.snp > 0.0): vcf_command = vcf_command + " --max-missing " + str(self.snp) if (len(remove) > 0): vcf_command = vcf_command + remove if (self.maf > 0.0 and self.maf < 1.0): vcf_command = vcf_command + " --maf " + str(self.maf) if (self.mac > 0): vcf_command = vcf_command + " --mac " + str(self.mac) if (self.bi == True): vcf_command = vcf_command + " --min-alleles 2 --max-alleles 2" if (self.removeInds == True): vcf_command = vcf_command + " --remove " + str(self.removeFile) if (len(self.discard) > 0): for ind in self.discard: vcf_command = vcf_command + " --remove-indv " + str(ind) call = SysCall(vcf_command) call.run_program() self.fix_map()
def loglik(self): fh = open("loglik.txt", 'wb') for fn in os.listdir("."): if fn.endswith("stdout"): temp = open(fn, 'r') fnlist = fn.split("_") fnlist2 = fnlist[-2].split(".") kval = fnlist2[-1] print(fnlist2) for line in temp.readlines(): if line.startswith("Loglikelihood:"): mylist = line.split() #print(mylist) fh.write(kval.encode()) fh.write("\t".encode()) fh.write(mylist[-1].encode()) fh.write("\n".encode()) temp.close() fh.close() print("Sorting log(likelihood) values...") command = "sort -n -k1 -o loglik.txt loglik.txt" sortCall = SysCall(command) sortCall.run_program()
def admix(self): ks = range(self.minK, self.maxK + 1) #print(ks) #for each k value for i in ks: for j in range(self.rep): command_string = "admixture -j" + str(self.NP) + " -s " + str( np.random.randint(1000000)) + " --cv=" + str( self.cv) + " " + self.prefix + ".ped " + str(i) #call Admixture admixtureCall = SysCall(command_string) admixtureCall.run_admixture(self.prefix, i, j) #Manually re-name output files to include _j rep number for filename in os.listdir("."): fn = self.prefix + "." + str(i) + "." if fn in filename: oldname, extension = os.path.splitext(filename) newname = oldname + "_" + str(j) + extension if (extension.endswith("Q")): self.qfiles[str(i)].append(newname) os.rename(filename, newname) # write dict of .Q files jsonFile = self.prefix + ".qfiles.json" with open(jsonFile, 'w') as json_file: json.dump(self.qfiles, json_file)
def get_indlist(self): string_vtools = "vcf-query -l " + self.vcf_file + " > vcf_indlist.txt" try: call = SysCall(string_vtools) call.run_program() except: print( "vcf-query failed to capture list of individuals from VCF file." ) print("Check that vcf-query is installed on your system.") raise SystemExit
def runDistruct(self): print("Now running distruct for all drawparams files...") contents = os.listdir(self.nd) os.chdir(self.nd) for f in contents: if f.startswith("drawparams"): distructCommand = "distruct -d " + str(f) + "; echo" call = SysCall(distructCommand) call.run_program() print("WARNING: Check that distruct ran properly.") print( "This program does not check the exit status of DISTRUCT because its exit status always equals 1." ) print("")
def evalAdmix(self, minK, maxK, np): ks = range(int(minK), int(maxK)+1) for k in ks: for qf in self.qfiles[str(k)]: print(qf) temp = qf.split(".") #make .P file name temp[-1] = "P" pf = ".".join(temp) #make output .corres file name temp[-1] = "corres" eAf = ".".join(temp) #build command for evalAdmix evalAdmix_str_com = "evalAdmix -plink " + self.prefix + " -fname " + pf + " -qname " + qf + " -o " + eAf + " -P " + str(np) call = SysCall(evalAdmix_str_com) call.run_program()
def get_ind_coverage(self): vcf_command = "vcftools --vcf " + self.vcf_file + " --missing-indv --out " + self.prefix if (self.removeInds == True): vcf_command = vcf_command + " --remove " + str(self.removeFile) call = SysCall(vcf_command) call.run_program() fname = self.prefix + ".imiss" ret = "" with open(fname, 'r') as fh: try: lnum = 0 for line in fh: line = line.strip() if not line: continue lnum += 1 if lnum < 2: #skip header line continue else: stuff = line.split() #print(stuff) if float(stuff[4]) > self.ind: print("Removing individual %s: %s missing data" % (stuff[0], stuff[4])) self.blacklist[stuff[0]] = 1 ret = ret + " --remove-indv " + str(stuff[0]) return (ret) except IOError as e: print("Could not read file %s: %s" % (fname, e)) sys.exit(1) except Exception as e: print("Unexpected error reading file %s: %s" % (fname, e)) sys.exit(1) finally: fh.close()
def print_cv(self): print("Printing CV values...") command = "grep -h CV " + self.prefix + "*.stdout > " + self.prefix + "_cv_summary.txt" grepCall = SysCall(command) grepCall.run_program()
def makeBED(self): plink_command = "plink --file " + self.prefix + " --make-bed --out " + self.prefix call = SysCall(plink_command) call.run_program()
def recodePlink(self): plink_command = "plink --file " + self.prefix + " --noweb --allow-extra-chr 0 --recode12 --out " + self.prefix call = SysCall(plink_command) call.run_program()
def recodeStructure(self): plink_str_com = "plink --file " + self.prefix + " --allow-extra-chr 0 --recode structure --out " + self.prefix call = SysCall(plink_str_com) call.run_program()