def test_alt_chr_pos_to_chromorder(): assert(nc.chr_pos_to_genome_pos('6', 0, 'b37') == 1062541960) assert(nc.chr_pos_to_genome_pos('7', 0, 'b37') == 1233657027) assert(nc.chr_pos_to_genome_pos('8', 0, 'b37') == 1392795690)
def test_chr_pos_to_chromorder(): assert(nc.chr_pos_to_genome_pos('chr6', 0) == 1062541960) assert(nc.chr_pos_to_genome_pos('chr7', 0) == 1233657027) assert(nc.chr_pos_to_genome_pos('chr8', 0) == 1392795690)
def test_chr_pos_to_genome_pos(): ''' Test the coordinate conversion script. ''' genome_coord = nc.chr_pos_to_genome_pos('chr1', 10) assert(genome_coord == 10) genome_coord = nc.chr_pos_to_genome_pos('chr2', 10) assert(genome_coord == 249250631)
def line_to_dict(line): parts = line.split() d = {} try: d['xs'] = [ nc.chr_pos_to_genome_pos(parts[chr1_col], int(parts[from1_col]), assembly), nc.chr_pos_to_genome_pos(parts[chr1_col], int(parts[to1_col]), assembly) ] d['ys'] = [ nc.chr_pos_to_genome_pos(parts[chr2_col], int(parts[from2_col]), assembly), nc.chr_pos_to_genome_pos(parts[chr2_col], int(parts[to2_col]), assembly) ] except KeyError: error_str = ( "ERROR converting chromosome position to genome position. " "Please make sure you've specified the correct assembly " "using the --assembly option. " "Current assembly: {}, chromosomes: {},{}".format( assembly, parts[chr1_col], parts[chr2_col])) raise (KeyError(error_str)) d['uid'] = slugid.nice().decode('utf-8') d['chrOffset'] = d['xs'][0] - int(parts[from1_col]) if importance_column is None: d['importance'] = max(d['xs'][1] - d['xs'][0], d['ys'][1] - d['ys'][0]) elif importance_column == 'random': d['importance'] = random.random() else: d['importance'] = float(d[importance_column]) d['fields'] = line return d
def END_ABS(self, CHROM, END): chrom_info = nc.get_chrominfo("hg38") return nc.chr_pos_to_genome_pos("chr" + CHROM, END, chrom_info)
def START_ABS(self, CHROM, START): chrom_info = nc.get_chrominfo("hg38") return nc.chr_pos_to_genome_pos("chr" + CHROM, START, chrom_info)
def test_dm3_chr_pos_genome_pos(): assert(nc.chr_pos_to_genome_pos('chr2L', 100, 'dm3') == 100) assert(nc.chr_pos_to_genome_pos('chr2R', 100, 'dm3') == 23380516)
def test_test3chroms_chr_pos_genome_pos(): assert(nc.chr_pos_to_genome_pos('chr1', 100, 'test3chroms') == 100) assert(nc.chr_pos_to_genome_pos('chr2', 100, 'test3chroms') == 1100)
def POS_ABS(self, CHROM, POS): chrom_info = nc.get_chrominfo('hg38') return nc.chr_pos_to_genome_pos('chr'+CHROM, POS, chrom_info)
def test_test3chroms_chr_pos_genome_pos(): assert (nc.chr_pos_to_genome_pos('chr1', 100, 'test3chroms') == 100) assert (nc.chr_pos_to_genome_pos('chr2', 100, 'test3chroms') == 1100)
def test_alt_chr_pos_to_chromorder(): assert (nc.chr_pos_to_genome_pos('6', 0, 'b37') == 1062541960) assert (nc.chr_pos_to_genome_pos('7', 0, 'b37') == 1233657027) assert (nc.chr_pos_to_genome_pos('8', 0, 'b37') == 1392795690)
def test_chr_pos_to_chromorder(): assert (nc.chr_pos_to_genome_pos('chr6', 0) == 1062541960) assert (nc.chr_pos_to_genome_pos('chr7', 0) == 1233657027) assert (nc.chr_pos_to_genome_pos('chr8', 0) == 1392795690)
def test_chr_pos_to_genome_pos(): assert (nc.chr_pos_to_genome_pos('chr1', 100) == 100) assert (nc.chr_pos_to_genome_pos('chr2', 100) == 249250621 + 100) assert (nc.chr_pos_to_genome_pos('1', 100, 'grch37') == 100)
def test_dm3_chr_pos_genome_pos(): assert (nc.chr_pos_to_genome_pos('chr2L', 100, 'dm3') == 100) assert (nc.chr_pos_to_genome_pos('chr2R', 100, 'dm3') == 23380516)
def test_clodius_aggregate_bedgraph1(): input_file = op.join(testdir, 'sample_data', 'dm3_values.tsv') output_file = '/tmp/dm3_values.hitile' runner = clt.CliRunner() result = runner.invoke( cca.bedgraph, [input_file, '--output-file', output_file, '--assembly', 'dm3']) a, b, tb = result.exc_info """ print("exc_info:", result.exc_info) print("result:", result) print("result.output", result.output) print("result.error", traceback.print_tb(tb)) print("Exception:", a,b) """ # print("result.output", result.output) f = h5py.File('/tmp/dm3_values.hitile') # max_zoom = f['meta'].attrs['max-zoom'] # TODO: Make assertions about result values = f['values_0'] import numpy as np # print("values:", values[8]) # genome positions are 0 based as stored in hitile files assert (np.isnan(values[8])) assert (values[9] == 1) assert (values[10] == 1) assert (values[13] == 1) assert (np.isnan(values[14])) assert (np.isnan(values[15])) chrom_info = nc.get_chrominfo('dm3') chr_2r_pos = nc.chr_pos_to_genome_pos('chr2R', 0, chrom_info) # print('chr_2r_pos:', chr_2r_pos) assert (np.isnan(values[chr_2r_pos + 28])) assert (values[chr_2r_pos + 29] == 77) assert (values[chr_2r_pos + 38] == 77) assert (values[chr_2r_pos + 39] == 0) assert (result.exit_code == 0) d = cht.get_data(f, 0, 0) # print("d[:10]", d[:10]) # print("sum(d):", sum([x for x in d if not np.isnan(x)])) assert (np.nansum(d) > 1.0 and np.nansum(d) < 10.0) return input_file = op.join(testdir, 'sample_data', 'test3chroms_values.tsv') output_file = '/tmp/test3chroms_values.hitile' runner = clt.CliRunner() result = runner.invoke(cca.bedgraph, [ input_file, '--output-file', output_file, '--assembly', 'test3chroms' ]) # print('output:', result.output, result) f = h5py.File('/tmp/test3chroms_values.hitile') # f['meta'].attrs['max-zoom'] # TODO: Make assertions about result # print('max_zoom:', max_zoom) # print("len", len(f['values_0'])) values = f['values_0'] # print('values', values[:100]) # genome positions are 0 based as stored in hitile files assert (values[8] == 0) assert (values[9] == 1) assert (values[10] == 1) assert (values[13] == 1) assert (values[14] == 0) assert (values[15] == 0) chr2_pos = nc.chr_pos_to_genome_pos('chr2', 0, 'test3chroms') assert (values[chr2_pos + 28] == 0) assert (values[chr2_pos + 29] == 77) assert (values[chr2_pos + 38] == 77) assert (values[chr2_pos + 39] == 0) assert (result.exit_code == 0) d = cht.get_data(f, 0, 0) assert (sum(d) == 770 + 880 + 5)
def main(): parser = argparse.ArgumentParser(description=""" python chr_pos_to_genome_pos.py -t 1,2:3,4 Convert chromosome,position pairs to genome_positions. Assumes that the coordinates refer to the hg19 assembly (unless otherwise specified). Example: 2 NM_000014 chr12 - 9220303 9268825 -> python scripts/chr_pos_to_genome_pos.py -c 3:5,3:6 2 NM_000014 genome - 2115405269 2115453791 -------------------------------- This also works with space-delimited fields: chr5 56765,56766 ->python scripts/chr_pos_to_genome_pos.py -c 1:2 genome 881683465,881683466 """) parser.add_argument('-a', '--assembly', default='hg19') parser.add_argument('-s', '--chromsizes-file', default=None) parser.add_argument('-n', '--new-chrom', default=None) parser.add_argument( '-c', '--columns', default='1,2', help="Which columns to translate to genome positions. " "Column pairs should be 1-based and separated by colons") #parser.add_argument('-u', '--useless', action='store_true', # help='Another useless option') args = parser.parse_args() if args.chromsizes_file is not None: chrom_info = nc.get_chrominfo_from_file(args.chromsizes_file) else: chrom_info = nc.get_chrominfo(args.assembly) for line in sys.stdin: try: line_output = [] line_parts = line.strip().split() translated_positions = {} translated_chroms = {} for translate_pair in [[int(y) for y in x.split(':')] for x in args.columns.split(',')]: # go through the pairs of columns that need to be translated to genome position # assume that the position column is comma separated list of values (although it doesn't # actually need to be) chrom, poss = line_parts[translate_pair[0] - 1], line_parts[ translate_pair[1] - 1].strip(",").split(',') genome_pos = ",".join( map(str, [ nc.chr_pos_to_genome_pos(chrom, int(pos), chrom_info) for pos in poss ])) #line_output += [genome_pos] # note that we've translated these columns and shouldn't include them in the output translated_positions[translate_pair[1] - 1] = genome_pos translated_chroms[translate_pair[0] - 1] = chrom for i, part in enumerate(line_parts): if i in translated_chroms: # replace chromosome identifiers (e.g. 'chr1') with 'genome' to indicate the positions if args.new_chrom is None: line_output += ['genome({})'.format(chrom)] else: line_output += [args.new_chrom] elif i in translated_positions: # this column used to contain a position so we need to replace it with a translated # position line_output += [translated_positions[i]] else: # if this column didn't contain a translated position output it as is line_output += [part] try: print("\t".join(map(str, line_output))) except BrokenPipeError: # Output is probably being run through "head" or something similar break except KeyError as ke: print("KeyError:", ke, line.strip(), file=sys.stderr)
def test_chr_pos_to_genome_pos(): assert(nc.chr_pos_to_genome_pos('chr1', 100) == 100) assert(nc.chr_pos_to_genome_pos('chr2', 100) == 249250621 + 100) assert(nc.chr_pos_to_genome_pos('1', 100, 'grch37') == 100)
def main(): parser = argparse.ArgumentParser(description=""" python chr_pos_to_genome_pos.py -t 1,2:3,4 Convert chromosome,position pairs to genome_positions. Assumes that the coordinates refer to the hg19 assembly (unless otherwise specified). Example: 2 NM_000014 chr12 - 9220303 9268825 -> python scripts/chr_pos_to_genome_pos.py -c 3:5,3:6 2 NM_000014 genome - 2115405269 2115453791 -------------------------------- This also works with space-delimited fields: chr5 56765,56766 ->python scripts/chr_pos_to_genome_pos.py -c 1:2 genome 881683465,881683466 """) parser.add_argument('-a', '--assembly', default='hg19') parser.add_argument('-s', '--chromsizes-file', default=None) parser.add_argument('-n', '--new-chrom', default=None) parser.add_argument('-c', '--columns', default='1,2', help="Which columns to translate to genome positions. " "Column pairs should be 1-based and separated by colons") #parser.add_argument('-u', '--useless', action='store_true', # help='Another useless option') args = parser.parse_args() if args.chromsizes_file is not None: chrom_info = nc.get_chrominfo_from_file(args.chromsizes_file) else: chrom_info = nc.get_chrominfo(args.assembly) for line in sys.stdin: try: line_output = [] line_parts = line.strip().split() translated_positions = {} translated_chroms = {} for translate_pair in [[int (y) for y in x.split(':')] for x in args.columns.split(',')]: # go through the pairs of columns that need to be translated to genome position # assume that the position column is comma separated list of values (although it doesn't # actually need to be) chrom,poss = line_parts[translate_pair[0]-1], line_parts[translate_pair[1]-1].strip(",").split(',') genome_pos = ",".join(map(str,[nc.chr_pos_to_genome_pos( chrom, int(pos), chrom_info) for pos in poss])) #line_output += [genome_pos] # note that we've translated these columns and shouldn't include them in the output translated_positions[translate_pair[1]-1] = genome_pos translated_chroms[translate_pair[0]-1] = chrom for i,part in enumerate(line_parts): if i in translated_chroms: # replace chromosome identifiers (e.g. 'chr1') with 'genome' to indicate the positions if args.new_chrom is None: line_output += ['genome({})'.format(chrom)] else: line_output += [args.new_chrom] elif i in translated_positions: # this column used to contain a position so we need to replace it with a translated # position line_output += [translated_positions[i]] else: # if this column didn't contain a translated position output it as is line_output += [part] try: print("\t".join(map(str, line_output))) except BrokenPipeError: # Output is probably being run through "head" or something similar break except KeyError as ke: print("KeyError:", ke, line.strip(), file=sys.stderr)