def find_leastP_SNPs(gene_region_list, table_loc, out_base, freq_loc, keep_loc, table_type, annot_dict):
    title_list = TITLE_LIST
    zero_title_list = ['snp_name','locuszoom_snp','p-value','|z-score|','weight']
    title_line = '\t'.join(title_list)
    zero_line = '\t'.join(zero_title_list)
    
    keep = open(keep_loc,mode="w")
    z_list = list()
    lw_list = list()
    out_file = out_base + '_yank.tbl'
    with open(out_file, mode="w") as out_text:
        out_text.write(title_line +'\n')
    with open(out_file, mode="a") as out_text:
        for gene in gene_region_list:
            gene_info, zero_list, lw_snp = read_table(table_loc, gene, table_type, annot_dict)#, position_form)
            keep.write(gene_info['lz']+'\n')
            if not zero_list == []:
                #zero_start, ext = os.path.splitext(out_file)
                zero_path = out_base + '_yank_'+gene_info['ID'] +'.txt'
                zeros = open(zero_path,mode='w')
                zeros.write(zero_line +'\n')
                for zero in zero_list:
                    zeros.write('\t'.join(zero)+'\n')
                zeros.close()
            freq_info = pc_toolbox.retrieve_freq(freq_loc, gene_info['snp'])
            if freq_info == None:
                freq_info = pc_toolbox.retrieve_freq(freq_loc, gene_info['lz'])
            if freq_info == None:
                gene_info.update({'maf':'NA','control_a1':'NA',
                                  'control_a2':'NA'})
            else:
                gene_info.update({'maf':freq_info[0],'control_a1':freq_info[1],
                                  'control_a2':freq_info[2]})
            for item in ORDER_LIST:
                print gene_info
                out_text.write(str(gene_info[item]) + '\t')
            out_text.write('\n')
            if lw_snp is not None:
                lw_list.append(lw_snp)
            z_list = create_condition_list(gene_info, table_loc, out_file, z_list)
    keep.close()
    print z_list
    print lw_list
    return z_list, lw_list
def update_with_freq(freq_loc, snp_info, pop, aa_freq_loc=None):
    print snp_info
    print ("3")
    if pop == 'UK' and aa_freq_loc == None:
        if snp_info['snp'] == 'NA':
            snp_info.update({'maf':'NA','control_a1':'NA','control_a2':'NA'})
            return snp_info
        freq_info = pc_toolbox.retrieve_freq(freq_loc, snp_info['snp'])
        if freq_info == None:
            freq_info = pc_toolbox.retrieve_freq(freq_loc, snp_info['lz'])
        if freq_info == None:
            snp_info.update({'maf':'NA','control_a1':'NA','control_a2':'NA'})
        else:
            snp_info.update({'maf':freq_info[0],'control_a1':freq_info[1],
                             'control_a2':freq_info[2]})
        return snp_info
    else:
        snp_info = update_with_aa_freq(aa_freq_loc, freq_loc, snp_info)
        return snp_info
def update_with_aa_freq(aa_freq_loc,uk_freq_loc, snp_info):
    print snp_info
    print ("3")
    if snp_info['snp'] == 'NA':
        snp_info.update({'AA_maf':'NA','AA_control_a1':'NA','AA_control_a2':'NA'})
        return snp_info
    aa_freq_info = pc_toolbox.retrieve_freq(aa_freq_loc, snp_info['snp'])
    if aa_freq_info == None:
        aa_freq_info = pc_toolbox.retrieve_freq(aa_freq_loc, snp_info['lz'])
    if aa_freq_info == None:
        snp_info.update({'AA_maf':'NA','AA_control_a1':'NA','AA_control_a2':'NA'})
    else:
        snp_info.update({'AA_maf':aa_freq_info[0],'AA_control_a1':aa_freq_info[1],
                          'AA_control_a2':aa_freq_info[2]})
    uk_freq_info = pc_toolbox.retrieve_freq(uk_freq_loc, snp_info['snp'])
    if uk_freq_info == None:
        uk_freq_info = pc_toolbox.retrieve_freq(uk_freq_loc, snp_info['lz'])
    if uk_freq_info == None:
        snp_info.update({'UK_maf':'NA','UK_control_a1':'NA','UK_control_a2':'NA'})
    else:
        snp_info.update({'UK_maf':uk_freq_info[0],'UK_control_a1':uk_freq_info[1],
                          'UK_control_a2':uk_freq_info[2]})
    print snp_info
    return snp_info
def read_log(log_summary,log_file,freq_loc,map_loc, repair_loc,annot_dict):
    '''
    Extract relevant information from log file and append it to a summary file.
    Args:
        log_file -- path to log file which has been formated in a specific manner
        log_summary -- path to which summary information should be written
    Returns:
        Nothing.
    
    '''
    #values used to reinitialize non-range information in output_list
    BLANK = '--'
    empty_list = []
    condition_list = empty_list
    reset_val = [empty_list,BLANK,BLANK,BLANK,BLANK,BLANK,BLANK,BLANK,BLANK,BLANK,'--']
    blank_dict = {'chr':'chr','band':'band','ref':'refgene','start':'start','end':'end','flag':'--',
                  'clist':BLANK,'im':BLANK,'sig':BLANK,'pval':BLANK, 'OR':BLANK,'lo':BLANK,
                  'hi':BLANK,'t':BLANK,'aa1':BLANK,'maf':BLANK,'ma1':BLANK,'ma2':BLANK,
                  'lz':BLANK,'ca':BLANK,'total':'--'}
    order_list = ['chr','band','ref','start','end','flag','clist','im','sig','pval','OR','lo',
                  'hi','t','aa1','maf','ma1','ma2','lz','ca','total']
    reset_list = ['clist','im','sig','pval','OR','lo',
                  'hi','t','aa1','maf','ma1','ma2','lz','ca']
    output_dict = blank_dict
    with open(log_file, mode='r')as log:
        
        with open(log_summary, mode='a')as summary:
            
            for logline in log:
                #if logline begins '$$$', it contains the range info
                if logline.startswith('$$$'):
                    range_info = mb_range(logline)
                    output_dict['band']=range_info.band
                    output_dict['chr']=range_info.chro
                    output_dict['ref']=range_info.gene
                    output_dict['start']=range_info.start
                    output_dict['end']=range_info.end
                    output_dict['flag']=range_info.flag
                    
                #if logline begins '%%%', info pertains to loop through plink_association.py
                if logline.startswith('%%%'):
                    #if there is unprinted (and thus unwiped) loop information
                    #in output_dict, LocusZoom did not spit out
                    #a 'Found: ' phrase with reference SNP name. This is bad news!
                    if not output_dict['sig']==BLANK:
                        output_dict['lz'] = 'ERROR'
                        print('ERROR: LZ did not announce a reference SNP!')
                        print output_dict
                        dict_list = []
                        for key in order_list:
                            dict_list.append(output_dict[key])
                        print dict_list
                        summary.write('\t'.join(dict_list)+'\n')
                    #gather info pertaining to this loop through plink
                    snp_pv_loop = snp_p_loop(logline)
##                    print('SNP_PV_LOOP is: ')
##                    print snp_pv_loop
                    output_dict['sig']=snp_pv_loop[0]
                    output_dict['pval']=snp_pv_loop[1]
                    output_dict['ca']=snp_pv_loop[2]
                    #look up the allele frequency of the snp in the freq file
                    maf,ma1,ma2 = pc_toolbox.retrieve_freq(freq_loc, snp_pv_loop[0])
                    output_dict['maf']=maf
                    output_dict['ma1']=ma1
                    output_dict['ma2']=ma2
                    #look up the original Immunochip SNP name in the map file
                    #im = pc_toolbox.retrieve_im(map_loc, snp_pv_loop[0], repair_loc)
                    im = annot_dict[anp_pv_loop[0]].name
                    output_dict['im']=im
                #if logline begins with Odds Ratio etc.
                elif logline.startswith('Regression Coefficient') or logline.startswith('Odds Ratio'):
                    orbeta = logline.strip().partition(':')[2]
                    output_dict['OR']=orbeta
                elif logline.startswith('Coefficient T-Statistic'):
                    stat = logline.strip().partition(':')[2]
                    output_dict['t']=stat
                elif logline.startswith('A1:'):
                    aa1 = logline.strip().partition(':')[2]
                    output_dict['aa1']=aa1
                #if logline begins 'Found: ', obtain name of LZ's reference SNP
                elif logline.startswith('Found:'):
                    lz_snp = one_value(logline)
                    output_dict['lz']=lz_snp
                    if len(condition_list)==0:
                        output_dict['clist']=BLANK
                    else:
                        output_dict['clist']='['+','.join(condition_list)+']'
                    dict_list = []
                    for key in order_list:
                        dict_list.append(output_dict[key])
                    print dict_list
                    summary.write('\t'.join(dict_list)+'\n')
                    condition_list = []
                    #if output_dict['total'] is '--',
                    #then this is NOT last loop through plink_association.py.
                    #(if it were, p_a wouldn't give a ref SNP, and LZ would,
                    #leading to an expected mismatch.)
                    if output_dict['total']=='--':
                        if not output_dict['lz']==output_dict['sig']:
                            print('ERROR: plink_association and Locus Zoom did not identify the same reference SNP!')
                            summary.write('ERROR: PLINK_ASSOCIATION AND LOCUS ZOOM IDENTIFIED DIFFERENT REFERENCE SNPs! \n')
                    output_dict['total']='--'
                    for key in reset_list:
                            output_dict[key]=BLANK
                    
                #if logline begins ':::', this is final loop through p_a
                elif logline.startswith(':::'):
                    final_loopcount = one_value(logline)
                    output_dict['total']=final_loopcount
                    output_dict['ca']=final_loopcount
                #if logline begins '&&&', it contains a snp in condition list
                elif logline.startswith('&&&'):
                    condition_list.append(one_value(logline))
                    print(condition_list)
                elif logline.startswith('Confidence Interval'):
                    want = logline.partition(':')[2].strip()
                    lo = want.partition('-')[0].strip()
                    hi = want.partition('-')[2].strip()
                    output_dict['lo']=lo
                    output_dict['hi']=hi
                    
                next
def main(argv):
    global outfolder, assoc, chromosome,snpstar
    global out_flag, user_script_loc
    global range_start_bp, range_end_bp, hit_index, hitstring
    global region_id, build, single, ldfolder, freq_loc, multi, hit1
    
    cl_arguments(argv)
    placeholder = ''
    annot_dict_loc = fix_it.locate_annot_dict(build)
    annot_dict = fix_it.build_annot_dict('LOG',annot_dict_loc)
    snpstar_im = annot_dict[snpstar].name
    table_folder = os.path.join(outfolder, 'ResultTables')
    summary_folder = os.path.join(outfolder, 'SummaryTables')
    str_hit_index = str(hit_index)
    if hit_index < 10:
        str_hit_index = '0'+str_hit_index
    if not os.path.exists(table_folder):
        os.makedirs(table_folder)
    if not os.path.exists(summary_folder):
        os.makedirs(summary_folder)
    chr_folder = os.path.join(outfolder, 'chr{0}'.format(chromosome))
    if not os.path.exists(chr_folder):
        os.makedirs(chr_folder)
    reg_folder = os.path.join(chr_folder, region_id)
    if multi:
        placeholder = '_'+snpstar+'_'+str_hit_index
    if not os.path.exists(reg_folder):
        os.makedirs(reg_folder)
    super_outbase = os.path.join(reg_folder, region_id)
    list_loc = super_outbase + '_snps.list'
    
    table_loc = os.path.join(chr_folder, region_id+placeholder+'.tbl')
    new_table_loc = os.path.join(table_folder, region_id+placeholder+'.tbl')
    summary_table_loc = os.path.join(summary_folder, region_id+placeholder+'.tbl')
    print(table_loc)
    snplist = make_snp_pos_list(assoc,list_loc)
    ld_loc = os.path.join(ldfolder,'chr{0}'.format(chromosome),
                              '{0}_r2_0.ld'.format(region_id))
    table = open(table_loc, mode="w")
    table.write('\t'.join(['SNP*','SNP*_pos','SNP*_im','conditional_snp',
                           'csnp_im','csnp_pos','SNP*_pvalue','OR','ci_lo','ci_hi',
                           'a1',"r2","csnp_freq","csnp_freq_a1"])+'\n')
    table.close()
    index = 1
    for snp_tuple in snplist:
        snp = snp_tuple[0]
        snp_pos = snp_tuple[1]
        snp_im = annot_dict[snp].name
        corrected_snp = snp.replace(':','_')
        outbase = super_outbase+'_'+corrected_snp
        script_loc = outbase + '.script'
        assoc_out = outbase +'.assoc.logistic'
        if hit1:
            write_script(outbase,script_loc, user_script_loc, snp, single, hitstring)
            plink(script_loc)
            print('''
**********************************************************************
    ************************************************************
    The data will be conditioned on the following SNP:
    %%%             {0}
    This is snp #{1} in a list of {2}.
        ******************************************************
**********************************************************************
'''.format(snp,index,len(snplist)))
##            if snp == snpstar:
##                write_script(outbase,script_loc, user_script_loc, snp, single, hitstring)
##                plink(script_loc)
        elif not multi:
            write_script(outbase,script_loc, user_script_loc, snp, single, snpstar)
            plink(script_loc)
            print('''
**********************************************************************
    ************************************************************
    The data will be conditioned on the following SNP:
    %%%             {0}
    This is snp #{1} in a list of {2}.
        ******************************************************
**********************************************************************
'''.format(snp,index,len(snplist)))
            
        info = filter_result(assoc_out,snpstar)
        snp_freq = pc_toolbox.retrieve_freq(freq_loc, snp)
        r2 = pc_toolbox.retrieve_r2(snpstar,snp,ld_loc)
        if r2 is None:
            r2 = "???"
        
        index = index + 1
        with open(table_loc, mode='a') as table:
            table.write('\t'.join([snpstar,info.pos, snpstar_im,
                                    snp,snp_im,snp_pos,str(info.p),
                                    info.OR,info.lo,info.hi,
                                    info.a1,r2,snp_freq[0],snp_freq[1]])+'\n')
    shutil.copy(table_loc, new_table_loc)
    summarize_table(new_table_loc, summary_table_loc, snpstar)