Example #1
0
def test_alt_chr_pos_to_chromorder():
    assert(nc.chr_pos_to_genome_pos('6', 0, 'b37') ==
            1062541960)
    assert(nc.chr_pos_to_genome_pos('7', 0, 'b37') ==
            1233657027)
    assert(nc.chr_pos_to_genome_pos('8', 0, 'b37') ==
            1392795690)
Example #2
0
def test_chr_pos_to_chromorder():
    assert(nc.chr_pos_to_genome_pos('chr6', 0) ==
            1062541960)
    assert(nc.chr_pos_to_genome_pos('chr7', 0) ==
            1233657027)
    assert(nc.chr_pos_to_genome_pos('chr8', 0) ==
            1392795690)
Example #3
0
def test_chr_pos_to_genome_pos():
    '''
    Test the coordinate conversion script.
    '''
    genome_coord = nc.chr_pos_to_genome_pos('chr1', 10)

    assert(genome_coord == 10)

    genome_coord = nc.chr_pos_to_genome_pos('chr2', 10)

    assert(genome_coord == 249250631)
Example #4
0
    def line_to_dict(line):
        parts = line.split()
        d = {}
        try:
            d['xs'] = [
                nc.chr_pos_to_genome_pos(parts[chr1_col],
                                         int(parts[from1_col]), assembly),
                nc.chr_pos_to_genome_pos(parts[chr1_col], int(parts[to1_col]),
                                         assembly)
            ]
            d['ys'] = [
                nc.chr_pos_to_genome_pos(parts[chr2_col],
                                         int(parts[from2_col]), assembly),
                nc.chr_pos_to_genome_pos(parts[chr2_col], int(parts[to2_col]),
                                         assembly)
            ]
        except KeyError:
            error_str = (
                "ERROR converting chromosome position to genome position. "
                "Please make sure you've specified the correct assembly "
                "using the --assembly option. "
                "Current assembly: {}, chromosomes: {},{}".format(
                    assembly, parts[chr1_col], parts[chr2_col]))
            raise (KeyError(error_str))

        d['uid'] = slugid.nice().decode('utf-8')

        d['chrOffset'] = d['xs'][0] - int(parts[from1_col])

        if importance_column is None:
            d['importance'] = max(d['xs'][1] - d['xs'][0],
                                  d['ys'][1] - d['ys'][0])
        elif importance_column == 'random':
            d['importance'] = random.random()
        else:
            d['importance'] = float(d[importance_column])

        d['fields'] = line

        return d
 def END_ABS(self, CHROM, END):
     chrom_info = nc.get_chrominfo("hg38")
     return nc.chr_pos_to_genome_pos("chr" + CHROM, END, chrom_info)
 def START_ABS(self, CHROM, START):
     chrom_info = nc.get_chrominfo("hg38")
     return nc.chr_pos_to_genome_pos("chr" + CHROM, START, chrom_info)
Example #7
0
def test_dm3_chr_pos_genome_pos():
    assert(nc.chr_pos_to_genome_pos('chr2L', 100, 'dm3') == 100)
    assert(nc.chr_pos_to_genome_pos('chr2R', 100, 'dm3') == 23380516)
Example #8
0
def test_test3chroms_chr_pos_genome_pos():
    assert(nc.chr_pos_to_genome_pos('chr1', 100, 'test3chroms') == 100)
    assert(nc.chr_pos_to_genome_pos('chr2', 100, 'test3chroms') == 1100)
Example #9
0
 def POS_ABS(self, CHROM, POS):
     chrom_info = nc.get_chrominfo('hg38')
     return nc.chr_pos_to_genome_pos('chr'+CHROM, POS, chrom_info)
Example #10
0
def test_test3chroms_chr_pos_genome_pos():
    assert (nc.chr_pos_to_genome_pos('chr1', 100, 'test3chroms') == 100)
    assert (nc.chr_pos_to_genome_pos('chr2', 100, 'test3chroms') == 1100)
Example #11
0
def test_alt_chr_pos_to_chromorder():
    assert (nc.chr_pos_to_genome_pos('6', 0, 'b37') == 1062541960)
    assert (nc.chr_pos_to_genome_pos('7', 0, 'b37') == 1233657027)
    assert (nc.chr_pos_to_genome_pos('8', 0, 'b37') == 1392795690)
Example #12
0
def test_chr_pos_to_chromorder():
    assert (nc.chr_pos_to_genome_pos('chr6', 0) == 1062541960)
    assert (nc.chr_pos_to_genome_pos('chr7', 0) == 1233657027)
    assert (nc.chr_pos_to_genome_pos('chr8', 0) == 1392795690)
Example #13
0
def test_chr_pos_to_genome_pos():
    assert (nc.chr_pos_to_genome_pos('chr1', 100) == 100)
    assert (nc.chr_pos_to_genome_pos('chr2', 100) == 249250621 + 100)
    assert (nc.chr_pos_to_genome_pos('1', 100, 'grch37') == 100)
Example #14
0
def test_dm3_chr_pos_genome_pos():
    assert (nc.chr_pos_to_genome_pos('chr2L', 100, 'dm3') == 100)
    assert (nc.chr_pos_to_genome_pos('chr2R', 100, 'dm3') == 23380516)
Example #15
0
def test_clodius_aggregate_bedgraph1():
    input_file = op.join(testdir, 'sample_data', 'dm3_values.tsv')
    output_file = '/tmp/dm3_values.hitile'

    runner = clt.CliRunner()
    result = runner.invoke(
        cca.bedgraph,
        [input_file, '--output-file', output_file, '--assembly', 'dm3'])

    a, b, tb = result.exc_info
    """
    print("exc_info:", result.exc_info)
    print("result:", result)
    print("result.output", result.output)
    print("result.error", traceback.print_tb(tb))
    print("Exception:", a,b)
    """

    # print("result.output", result.output)

    f = h5py.File('/tmp/dm3_values.hitile')
    # max_zoom = f['meta'].attrs['max-zoom']
    # TODO: Make assertions about result
    values = f['values_0']

    import numpy as np
    # print("values:", values[8])
    # genome positions are 0 based as stored in hitile files
    assert (np.isnan(values[8]))
    assert (values[9] == 1)
    assert (values[10] == 1)
    assert (values[13] == 1)
    assert (np.isnan(values[14]))
    assert (np.isnan(values[15]))

    chrom_info = nc.get_chrominfo('dm3')
    chr_2r_pos = nc.chr_pos_to_genome_pos('chr2R', 0, chrom_info)
    # print('chr_2r_pos:', chr_2r_pos)

    assert (np.isnan(values[chr_2r_pos + 28]))
    assert (values[chr_2r_pos + 29] == 77)
    assert (values[chr_2r_pos + 38] == 77)
    assert (values[chr_2r_pos + 39] == 0)

    assert (result.exit_code == 0)

    d = cht.get_data(f, 0, 0)
    # print("d[:10]", d[:10])
    # print("sum(d):", sum([x for x in d if not np.isnan(x)]))
    assert (np.nansum(d) > 1.0 and np.nansum(d) < 10.0)

    return

    input_file = op.join(testdir, 'sample_data', 'test3chroms_values.tsv')
    output_file = '/tmp/test3chroms_values.hitile'

    runner = clt.CliRunner()
    result = runner.invoke(cca.bedgraph, [
        input_file, '--output-file', output_file, '--assembly', 'test3chroms'
    ])

    # print('output:', result.output, result)

    f = h5py.File('/tmp/test3chroms_values.hitile')
    # f['meta'].attrs['max-zoom']
    # TODO: Make assertions about result

    # print('max_zoom:', max_zoom)
    # print("len", len(f['values_0']))

    values = f['values_0']

    # print('values', values[:100])

    # genome positions are 0 based as stored in hitile files
    assert (values[8] == 0)
    assert (values[9] == 1)
    assert (values[10] == 1)
    assert (values[13] == 1)
    assert (values[14] == 0)
    assert (values[15] == 0)

    chr2_pos = nc.chr_pos_to_genome_pos('chr2', 0, 'test3chroms')

    assert (values[chr2_pos + 28] == 0)
    assert (values[chr2_pos + 29] == 77)
    assert (values[chr2_pos + 38] == 77)
    assert (values[chr2_pos + 39] == 0)

    assert (result.exit_code == 0)

    d = cht.get_data(f, 0, 0)
    assert (sum(d) == 770 + 880 + 5)
Example #16
0
def main():
    parser = argparse.ArgumentParser(description="""
    
    python chr_pos_to_genome_pos.py -t 1,2:3,4

    Convert chromosome,position pairs to genome_positions. Assumes that the
    coordinates refer to the hg19 assembly (unless otherwise specified).

    Example:

    2       NM_000014       chr12   -       9220303 9268825

    -> python scripts/chr_pos_to_genome_pos.py -c 3:5,3:6

    2       NM_000014       genome  -       2115405269      2115453791

    --------------------------------

    This also works with space-delimited fields:

    chr5    56765,56766

    ->python scripts/chr_pos_to_genome_pos.py -c 1:2

    genome  881683465,881683466

""")

    parser.add_argument('-a', '--assembly', default='hg19')
    parser.add_argument('-s', '--chromsizes-file', default=None)
    parser.add_argument('-n', '--new-chrom', default=None)
    parser.add_argument(
        '-c',
        '--columns',
        default='1,2',
        help="Which columns to translate to genome positions. "
        "Column pairs should be 1-based and separated by colons")

    #parser.add_argument('-u', '--useless', action='store_true',
    #                     help='Another useless option')
    args = parser.parse_args()

    if args.chromsizes_file is not None:
        chrom_info = nc.get_chrominfo_from_file(args.chromsizes_file)
    else:
        chrom_info = nc.get_chrominfo(args.assembly)

    for line in sys.stdin:
        try:
            line_output = []
            line_parts = line.strip().split()
            translated_positions = {}
            translated_chroms = {}

            for translate_pair in [[int(y) for y in x.split(':')]
                                   for x in args.columns.split(',')]:
                # go through the pairs of columns that need to be translated to genome position
                # assume that the position column is comma separated list of values (although it doesn't
                # actually need to be)
                chrom, poss = line_parts[translate_pair[0] - 1], line_parts[
                    translate_pair[1] - 1].strip(",").split(',')
                genome_pos = ",".join(
                    map(str, [
                        nc.chr_pos_to_genome_pos(chrom, int(pos), chrom_info)
                        for pos in poss
                    ]))
                #line_output += [genome_pos]

                # note that we've translated these columns and shouldn't include them in the output
                translated_positions[translate_pair[1] - 1] = genome_pos
                translated_chroms[translate_pair[0] - 1] = chrom

            for i, part in enumerate(line_parts):
                if i in translated_chroms:
                    # replace chromosome identifiers (e.g. 'chr1') with 'genome' to indicate the positions
                    if args.new_chrom is None:
                        line_output += ['genome({})'.format(chrom)]
                    else:
                        line_output += [args.new_chrom]
                elif i in translated_positions:
                    # this column used to contain a position so we need to replace it with a translated
                    # position
                    line_output += [translated_positions[i]]
                else:
                    # if this column didn't contain a translated position output it as is
                    line_output += [part]

            try:
                print("\t".join(map(str, line_output)))
            except BrokenPipeError:
                # Output is probably being run through "head" or something similar
                break
        except KeyError as ke:
            print("KeyError:", ke, line.strip(), file=sys.stderr)
Example #17
0
def test_chr_pos_to_genome_pos():
    assert(nc.chr_pos_to_genome_pos('chr1', 100) == 100)
    assert(nc.chr_pos_to_genome_pos('chr2', 100) == 249250621 + 100)
    assert(nc.chr_pos_to_genome_pos('1', 100, 'grch37') == 100)
def main():
    parser = argparse.ArgumentParser(description="""
    
    python chr_pos_to_genome_pos.py -t 1,2:3,4

    Convert chromosome,position pairs to genome_positions. Assumes that the
    coordinates refer to the hg19 assembly (unless otherwise specified).

    Example:

    2       NM_000014       chr12   -       9220303 9268825

    -> python scripts/chr_pos_to_genome_pos.py -c 3:5,3:6

    2       NM_000014       genome  -       2115405269      2115453791

    --------------------------------

    This also works with space-delimited fields:

    chr5    56765,56766

    ->python scripts/chr_pos_to_genome_pos.py -c 1:2

    genome  881683465,881683466

""")

    parser.add_argument('-a', '--assembly', default='hg19')
    parser.add_argument('-s', '--chromsizes-file', default=None)
    parser.add_argument('-n', '--new-chrom', default=None)
    parser.add_argument('-c', '--columns', default='1,2', 
            help="Which columns to translate to genome positions. "
            "Column pairs should be 1-based and separated by colons")

    #parser.add_argument('-u', '--useless', action='store_true', 
    #                     help='Another useless option')
    args = parser.parse_args()

    if args.chromsizes_file is not None:
        chrom_info = nc.get_chrominfo_from_file(args.chromsizes_file)
    else:
        chrom_info = nc.get_chrominfo(args.assembly)

    for line in sys.stdin:
        try:
            line_output = []
            line_parts = line.strip().split()
            translated_positions = {}
            translated_chroms = {}

            for translate_pair in [[int (y) for y in x.split(':')] for x in args.columns.split(',')]:
                # go through the pairs of columns that need to be translated to genome position
                # assume that the position column is comma separated list of values (although it doesn't
                # actually need to be)
                chrom,poss = line_parts[translate_pair[0]-1], line_parts[translate_pair[1]-1].strip(",").split(',')
                genome_pos = ",".join(map(str,[nc.chr_pos_to_genome_pos( chrom, int(pos), chrom_info) for pos in poss]))
                #line_output += [genome_pos]

                # note that we've translated these columns and shouldn't include them in the output
                translated_positions[translate_pair[1]-1] = genome_pos
                translated_chroms[translate_pair[0]-1] = chrom

            for i,part in enumerate(line_parts):
                if i in translated_chroms:
                    # replace chromosome identifiers (e.g. 'chr1') with 'genome' to indicate the positions
                    if args.new_chrom is None:
                        line_output += ['genome({})'.format(chrom)]
                    else:
                        line_output += [args.new_chrom]
                elif i in translated_positions:
                    # this column used to contain a position so we need to replace it with a translated
                    # position
                    line_output += [translated_positions[i]]
                else:
                    # if this column didn't contain a translated position output it as is
                    line_output += [part]

            try:
                print("\t".join(map(str, line_output)))
            except BrokenPipeError:
                # Output is probably being run through "head" or something similar
                break
        except KeyError as ke:
            print("KeyError:", ke, line.strip(), file=sys.stderr)