def load_manual_threshold_somatics(jsm_file_name, threshold):
    '''
    Load a list of rows containing somatics based on pre-specified probability threshold.
    '''
    reader = JointSnvMixReader(jsm_file_name)

    chr_list = reader.get_chr_list()

    rows = []
    scores = []

    for chr_name in sorted(chr_list):
        print chr_name

        chr_rows = reader.get_rows(chr_name)

        for row in chr_rows:
            score = row['p_aa_ab'] + row['p_aa_bb']            
            
            if score >= threshold:
                row = format_rows(row, chr_name)
                
                insert_position = bisect.bisect(scores, score)
                
                scores.insert(insert_position, score)
                rows.insert(insert_position, row)
                
    reader.close()

    return rows
def load_auto_threshold_somatics(jsm_file_name):
    '''
    Load a list of rows containing somatics based on automatically determined probability threshold. Threshold is
    determined based on inflection point method.
    '''
    n = int(1e5)
    threshold = 1e-6

    reader = JointSnvMixReader(jsm_file_name)

    chr_list = reader.get_chr_list()

    scores = []
    rows = []

    for chr_name in sorted(chr_list):
        print chr_name

        chr_rows = reader.get_rows(chr_name)

        for row in chr_rows:
            score = row['p_aa_ab'] + row['p_aa_bb']

            insert_position = bisect.bisect(scores, score)

            if insert_position > 0 or len(scores) == 0:
                scores.insert(insert_position, score)
                
                row = format_rows(row, chr_name)
                
                rows.insert(insert_position, row)
            
                if scores[0] <= threshold or len(scores) > n:
                    scores.pop(0)
                    rows.pop(0)

    reader.close()
    
    max_diff = 0
    index = 0
    
    for i in range(len(scores) - 1):
        diff = scores[i + 1] - scores[i]
        
        if diff > max_diff:
            max_diff = diff
            index = i
            
    rows = rows[index:]

    return rows
def load_somatics( jsm_file_name ):
    n = int( 1e5 )
    threshold = 1e-6

    reader = JointSnvMixReader( jsm_file_name )

    chr_list = reader.get_chr_list()

    scores = []

    for chr_name in sorted( chr_list ):
        if chr_name in excluded_chrom:
            continue
        
        print chr_name

        chr_rows = reader.get_rows( chr_name )

        for row in chr_rows:
            score = row['p_aa_ab'] + row['p_aa_bb']

            insert_position = bisect.bisect( scores, score )

            if insert_position > 0 or len( scores ) == 0:
                scores.insert( insert_position, score )
            
                if scores[0] <= threshold or len( scores ) > n:
                    scores.pop( 0 )

    reader.close()
    
    max_diff = 0
    index = 0
    
    for i in range( len( scores ) - 1 ):
        diff = scores[i + 1] - scores[i]
        
        if diff > max_diff:
            max_diff = diff
            index = i
            
    scores = scores[index:]

    return scores