コード例 #1
0
def main(argv):
    global map_loc, flip_loc, repair_loc
    cl_arguments(sys.argv[1:])
##    map_loc = ''
##    try: 
##        opts, args = getopt.getopt(argv, "hm:",
##                                   ["help","map="])
##    except getopt.GetoptError:
##        usage()
##        sys.exit(2)
##    for opt, arg in opts:
##        if opt in ("-h","--help"):
##            usage()
##            sys.exit()
##        elif opt in ("-m","--map"):
##            map_loc = arg
##        elif opt in ("--flip"):
##            flip_loc = arg

    repair_dict = pc_toolbox.read_dict(repair_loc)
    
    sort_fo = list_info(map_loc)
    base, ext = os.path.splitext(map_loc)
    extract_loc = base+'_extractList.txt'
    exclude_bfile = base + "_excludeDup"
    extract_bfile = base + "_extractDup"
    find_dups(sort_fo, extract_loc, repair_dict)
    
    #write and use the script to extract the non-rs duplicates
    extract_script = base + '_extractScript.txt'
    plink_extract(base, extract_loc, extract_bfile,extract_script,flip_loc)
    plink(extract_script)
    
    #write and use the script to exclude the non-rs duplicates
    exclude_script = base + '_excludeScript.txt'
    plink_exclude(base, extract_loc, exclude_bfile,exclude_script)
    plink(exclude_script)
    
    #convert the map files from both results to use only chr:position form
    uniform_map(extract_bfile+'.bim')
    uniform_map(exclude_bfile+'.bim')
    #run concordance check
    merge_script = base + '_mergeScript.txt'
    merge_out = base + '_DupMerge'
    plink_merge(exclude_bfile, extract_bfile,merge_out, merge_script)
    plink(merge_script)

    #summarize the results of the diff file
    summarize_diff(merge_out)
コード例 #2
0
def fix_map(map_loc, repair_loc):
    '''
    Replaces all SNP names that do not begin with 'rs' with a name using
    chr<chromosome#>:<basepair position> format.
    Args:
        map_loc -- filepath of bim or map file
    
    '''
    (basepath, ext) = os.path.splitext(map_loc)
    orig_rename = basepath + '~'
    new_loc = str(rename_as_necessary(orig_rename, ext)) + ext
    shutil.copy(map_loc, new_loc)
    repair_dict = pc_toolbox.read_dict(repair_loc)
    
    with open(new_loc, mode='r') as original_map:
        with open (map_loc, mode='w') as new_map:
            for orig_line in original_map:
                orig_list = orig_line.split()
##                if orig_list[1] in dup_list:
##                    if orig_list[1] in repair_dict:
##                        new_snp = repair_dict[orig_list[1]]
##                        orig_list[1] = new_snp
##                else:
##                    new_snp = 'chr'+ orig_list[0]+':'+ orig_list[3]
##                    orig_list[1] = new_snp
                if orig_list[1].startswith('rs'):
                    new_snp = orig_list[1]
                elif orig_list[1] in repair_dict:
                    new_snp = repair_dict[orig_list[1]]
                else:
                    new_snp = 'chr'+ orig_list[0]+':'+ orig_list[3]
##                    if orig_list[1] in repair_dict:
##                        new_snp = repair_dict[orig_list[1]]
##                    else:
##                        new_snp = 'chr'+ orig_list[0]+':'+ orig_list[3]
                orig_list[1] = new_snp
                new_map.write('\t'.join(orig_list)+'\n')