def main(argv): global map_loc, flip_loc, repair_loc cl_arguments(sys.argv[1:]) ## map_loc = '' ## try: ## opts, args = getopt.getopt(argv, "hm:", ## ["help","map="]) ## except getopt.GetoptError: ## usage() ## sys.exit(2) ## for opt, arg in opts: ## if opt in ("-h","--help"): ## usage() ## sys.exit() ## elif opt in ("-m","--map"): ## map_loc = arg ## elif opt in ("--flip"): ## flip_loc = arg repair_dict = pc_toolbox.read_dict(repair_loc) sort_fo = list_info(map_loc) base, ext = os.path.splitext(map_loc) extract_loc = base+'_extractList.txt' exclude_bfile = base + "_excludeDup" extract_bfile = base + "_extractDup" find_dups(sort_fo, extract_loc, repair_dict) #write and use the script to extract the non-rs duplicates extract_script = base + '_extractScript.txt' plink_extract(base, extract_loc, extract_bfile,extract_script,flip_loc) plink(extract_script) #write and use the script to exclude the non-rs duplicates exclude_script = base + '_excludeScript.txt' plink_exclude(base, extract_loc, exclude_bfile,exclude_script) plink(exclude_script) #convert the map files from both results to use only chr:position form uniform_map(extract_bfile+'.bim') uniform_map(exclude_bfile+'.bim') #run concordance check merge_script = base + '_mergeScript.txt' merge_out = base + '_DupMerge' plink_merge(exclude_bfile, extract_bfile,merge_out, merge_script) plink(merge_script) #summarize the results of the diff file summarize_diff(merge_out)
def fix_map(map_loc, repair_loc): ''' Replaces all SNP names that do not begin with 'rs' with a name using chr<chromosome#>:<basepair position> format. Args: map_loc -- filepath of bim or map file ''' (basepath, ext) = os.path.splitext(map_loc) orig_rename = basepath + '~' new_loc = str(rename_as_necessary(orig_rename, ext)) + ext shutil.copy(map_loc, new_loc) repair_dict = pc_toolbox.read_dict(repair_loc) with open(new_loc, mode='r') as original_map: with open (map_loc, mode='w') as new_map: for orig_line in original_map: orig_list = orig_line.split() ## if orig_list[1] in dup_list: ## if orig_list[1] in repair_dict: ## new_snp = repair_dict[orig_list[1]] ## orig_list[1] = new_snp ## else: ## new_snp = 'chr'+ orig_list[0]+':'+ orig_list[3] ## orig_list[1] = new_snp if orig_list[1].startswith('rs'): new_snp = orig_list[1] elif orig_list[1] in repair_dict: new_snp = repair_dict[orig_list[1]] else: new_snp = 'chr'+ orig_list[0]+':'+ orig_list[3] ## if orig_list[1] in repair_dict: ## new_snp = repair_dict[orig_list[1]] ## else: ## new_snp = 'chr'+ orig_list[0]+':'+ orig_list[3] orig_list[1] = new_snp new_map.write('\t'.join(orig_list)+'\n')