Esempio n. 1
0
def test_clodius_aggregate_bedpe2():
    '''Use galGal6 chromsizes file'''
    input_file = op.join(testdir, 'sample_data', 'galGal6.bed')
    chromsizes_file = op.join(testdir, 'sample_data', 'galGal6.chrom.sizes')
    expected_file = op.join(testdir, 'sample_data', 'galGal6.bed.multires.db')

    with tempfile.TemporaryDirectory() as tmpdirname:
        output_file = op.join(tmpdirname, 'blah.bed2ddb')
        # the test is here to ensure that this doesn't raise an error
        cca._bedpe(input_file, output_file, None,
                   chr1_col=1, chr2_col=1,
                   from1_col=2, from2_col=2,
                   to1_col=3, to2_col=3,
                   importance_column=None,
                   chromosome=None,
                   chromsizes_filename=chromsizes_file,
                   max_per_tile=100,
                   tile_size=1024,
                   has_header=True)

        tsinfo = cdt.get_tileset_info(output_file)

        stat_output = os.stat(output_file)
        stat_expected = os.stat(expected_file)

        assert tsinfo['max_length'] == 1065365426
        assert stat_output.st_size == stat_expected.st_size
def aggregate_file(filename, filetype, assembly, chromsizes_filename,
                   has_header):
    if filetype == 'bedfile':
        if assembly is None and chromsizes_filename is None:
            print(
                'An assembly or set of chromosome sizes is required when importing bed files. Please specify one or the other using the --assembly or --chromsizes-filename parameters',
                file=sys.stderr)
            return

        with tempfile.TemporaryDirectory() as td:
            output_file = op.join(td, filename + '.beddb')

            print("Aggregating bedfile")
            cca._bedfile(filename,
                         output_file,
                         assembly,
                         importance_column='random',
                         has_header=has_header,
                         chromosome=None,
                         max_per_tile=50,
                         delimiter=None,
                         chromsizes_filename=chromsizes_filename,
                         offset=0,
                         tile_size=1024)

            to_import = output_file

            # because we aggregated the file, the new filetype is beddb
            filetype = 'beddb'
            return (to_import, filetype)
    elif filetype == 'bedpe':
        if assembly is None and chromsizes_filename is None:
            print(
                'An assembly or set of chromosome sizes is required when importing bed files. Please specify one or the other using the --assembly or --chromsizes-filename parameters',
                file=sys.stderr)
            return

        with tempfile.TemporaryDirectory() as td:
            output_file = op.join(td, filename + '.beddb')

            print("Aggregating bedfile")
            cca._bedpe(filename,
                       output_file,
                       assembly,
                       importance_column='random',
                       has_header=has_header,
                       chromosome=None,
                       max_per_tile=50,
                       chromsizes_filename=chromsizes_filename,
                       tile_size=1024)

            to_import = output_file

            # because we aggregated the file, the new filetype is beddb
            filetype = 'bed2ddb'
            return (to_import, filetype)
    else:
        return (filename, filetype)
Esempio n. 3
0
def test_clodius_aggregate_bedpe():
    input_file = op.join(testdir, "sample_data", "isidro.bedpe")

    with tempfile.TemporaryDirectory() as tmpdirname:
        output_file = op.join(tmpdirname, "isidro.bed2ddb")

        cca._bedpe(
            input_file,
            output_file,
            "b37",
            importance_column=None,
            chromosome=None,
            max_per_tile=100,
            tile_size=1024,
            has_header=True,
        )
        """
        runner = clt.CliRunner()
        result = runner.invoke(
                cca.bedpe,
                [input_file,
                '--output-file', output_file,
                '--importance-column', 'random',
                '--has-header',
                '--assembly', 'b37'])

        # print('output:', result.output, result)
        assert(result.exit_code == 0)
        """

        cdt.get_2d_tiles(output_file, 0, 0, 0)
        # print("entries:", entries)

        cdt.get_tileset_info(output_file)
        # TODO: Make assertions about result
        # print('tileset_info', tileset_info)

        cdt.get_2d_tiles(output_file, 1, 0, 0, numx=2, numy=2)
        # TODO: Make assertions about result
        # print("entries:", entries)

        cdt.get_tileset_info(output_file)
Esempio n. 4
0
def test_clodius_aggregate_bedpe():
    input_file = op.join(testdir, 'sample_data', 'isidro.bedpe')
    output_file = '/tmp/isidro.bed2ddb'

    cca._bedpe(input_file,
               output_file,
               'b37',
               importance_column=None,
               chromosome=None,
               max_per_tile=100,
               tile_size=1024,
               has_header=True)
    """
    runner = clt.CliRunner()
    result = runner.invoke(
            cca.bedpe,
            [input_file,
            '--output-file', output_file,
            '--importance-column', 'random',
            '--has-header', 
            '--assembly', 'b37'])

    #print('output:', result.output, result)
    assert(result.exit_code == 0)
    """

    entries = cdt.get_2d_tiles(output_file, 0, 0, 0)
    #print("entries:", entries)

    tileset_info = cdt.get_tileset_info(output_file)
    #print('tileset_info', tileset_info)

    entries = cdt.get_2d_tiles(output_file, 1, 0, 0, numx=2, numy=2)
    #print("entries:", entries)

    tileset_info = cdt.get_tileset_info(output_file)
Esempio n. 5
0
def aggregate_file(filename, filetype, assembly, chromsizes_filename,
                   has_header, no_upload, tmp_dir):
    if filetype == 'bedfile':
        if no_upload:
            raise Exception(
                "Bedfile files need to be aggregated and cannot be linked. Consider not using the --link-file option",
                file=sys.stderr)

        if assembly is None and chromsizes_filename is None:
            print(
                'An assembly or set of chromosome sizes is required when importing bed files. Please specify one or the other using the --assembly or --chromsizes-filename parameters',
                file=sys.stderr)
            return

        output_file = op.join(tmp_dir, filename + '.beddb')

        print("Aggregating bedfile")
        cca._bedfile(filename,
                     output_file,
                     assembly,
                     importance_column='random',
                     has_header=has_header,
                     chromosome=None,
                     max_per_tile=50,
                     delimiter=None,
                     chromsizes_filename=chromsizes_filename,
                     offset=0,
                     tile_size=1024)

        to_import = output_file

        # because we aggregated the file, the new filetype is beddb
        filetype = 'beddb'
        return (to_import, filetype)
    elif filetype == 'bedpe':
        if no_upload:
            raise Exception(
                "Bedpe files need to be aggregated and cannot be linked. Consider not using the --link-file option",
                file=sys.stderr)
        if assembly is None and chromsizes_filename is None:
            print(
                'An assembly or set of chromosome sizes is required when importing bed files. Please specify one or the other using the --assembly or --chromsizes-filename parameters',
                file=sys.stderr)
            return

        output_file = op.join(tmp_dir, filename + '.bed2ddb')

        print("Aggregating bedpe")
        cca._bedpe(filename,
                   output_file,
                   assembly,
                   importance_column='random',
                   has_header=has_header,
                   chromosome=None,
                   max_per_tile=50,
                   chromsizes_filename=chromsizes_filename,
                   tile_size=1024)

        to_import = output_file

        # because we aggregated the file, the new filetype is beddb
        filetype = 'bed2ddb'
        return (to_import, filetype)
    else:
        return (filename, filetype)