def load_manual_threshold_somatics(jsm_file_name, threshold): ''' Load a list of rows containing somatics based on pre-specified probability threshold. ''' reader = JointSnvMixReader(jsm_file_name) chr_list = reader.get_chr_list() rows = [] scores = [] for chr_name in sorted(chr_list): print chr_name chr_rows = reader.get_rows(chr_name) for row in chr_rows: score = row['p_aa_ab'] + row['p_aa_bb'] if score >= threshold: row = format_rows(row, chr_name) insert_position = bisect.bisect(scores, score) scores.insert(insert_position, score) rows.insert(insert_position, row) reader.close() return rows
def load_auto_threshold_somatics(jsm_file_name): ''' Load a list of rows containing somatics based on automatically determined probability threshold. Threshold is determined based on inflection point method. ''' n = int(1e5) threshold = 1e-6 reader = JointSnvMixReader(jsm_file_name) chr_list = reader.get_chr_list() scores = [] rows = [] for chr_name in sorted(chr_list): print chr_name chr_rows = reader.get_rows(chr_name) for row in chr_rows: score = row['p_aa_ab'] + row['p_aa_bb'] insert_position = bisect.bisect(scores, score) if insert_position > 0 or len(scores) == 0: scores.insert(insert_position, score) row = format_rows(row, chr_name) rows.insert(insert_position, row) if scores[0] <= threshold or len(scores) > n: scores.pop(0) rows.pop(0) reader.close() max_diff = 0 index = 0 for i in range(len(scores) - 1): diff = scores[i + 1] - scores[i] if diff > max_diff: max_diff = diff index = i rows = rows[index:] return rows
def main( jsm_file_name, prefix ): reader = JointSnvMixReader( jsm_file_name ) parameters = reader.get_parameters() reader.close() n = 100 global a, b a = np.linspace( 0, n, n + 1 ) b = n - a a = a.reshape( ( a.size, 1 ) ) b = b.reshape( ( b.size, 1 ) ) recursive_plot( parameters, prefix )
def load_somatics( jsm_file_name ): n = int( 1e5 ) threshold = 1e-6 reader = JointSnvMixReader( jsm_file_name ) chr_list = reader.get_chr_list() scores = [] for chr_name in sorted( chr_list ): if chr_name in excluded_chrom: continue print chr_name chr_rows = reader.get_rows( chr_name ) for row in chr_rows: score = row['p_aa_ab'] + row['p_aa_bb'] insert_position = bisect.bisect( scores, score ) if insert_position > 0 or len( scores ) == 0: scores.insert( insert_position, score ) if scores[0] <= threshold or len( scores ) > n: scores.pop( 0 ) reader.close() max_diff = 0 index = 0 for i in range( len( scores ) - 1 ): diff = scores[i + 1] - scores[i] if diff > max_diff: max_diff = diff index = i scores = scores[index:] return scores
def get_position_probabilities(jsm_file_name, positions): reader = JointSnvMixReader(jsm_file_name) for chromosome, coordinates in sorted(positions.items()): for coord in coordinates: try: jsm_data = reader.get_position(chromosome, coord) except KeyError: print chromosome, " not in jsm file." continue if jsm_data: row = list(jsm_data) row[0] = chromosome + ":" + str(row[0]) print "\t".join([str(x) for x in row]) else: print chromosome + ":" + coord, " not if jsm file." reader.close()
def main( args ): reader = JointSnvMixReader( args.jsm_file_name ) writer = csv.writer( open( args.call_file_name, 'w' ), delimiter="\t" ) header = get_header() writer.writerow( header ) chr_list = sorted( reader.get_chr_list() ) for chr_name in sorted( chr_list ): if args.argmax: rows = reader.get_genotype_rows_by_argmax( chr_name, args.genotype_class ) else: rows = reader.get_genotype_rows_by_prob( chr_name, args.genotype_class, args.prob_threshold ) if not len(rows) == 0: continue rows = rows.tolist() rows = [list( row ) for row in rows] for row in rows: row[0] = "{0}:{1}".format( chr_name, row[0] ) writer.writerows( rows ) reader.close()
def extract_jsm_parameters( args ): reader = JointSnvMixReader( args.jsm_file_name ) parameters = reader.get_parameters() reader.close() recursive_print( parameters )