def test_add_info_table():
    vcf = viola.read_vcf(os.path.join(HERE, 'data/test.manta.vcf'))
    vcf_expected = viola.read_vcf(
        os.path.join(HERE, 'data/test.info.added.manta.vcf'))
    table = vcf.svlen
    table.columns = ['id', 'value_idx', 'test']
    vcf.add_info_table('test', table, 1, 'Integer', 'test info')
    assert_vcf_equal(vcf, vcf_expected)
Example #2
0
def test_drop_by_id():
    vcf = viola.read_vcf(os.path.join(HERE, 'data/test.manta.vcf'))
    vcf_dropped_expected = viola.read_vcf(os.path.join(HERE, 'data/test.dropped.manta.vcf'))
    vcf_dropped2_expected = viola.read_vcf(os.path.join(HERE, 'data/test.dropped2.manta.vcf'))
    vcf_dropped = vcf.drop_by_id('test1')
    vcf_dropped2 = vcf.drop_by_id(['test2', 'test3'])
    assert_vcf_equal(vcf_dropped, vcf_dropped_expected)
    assert_vcf_equal(vcf_dropped2, vcf_dropped2_expected)
Example #3
0
def generate_feature_matrix(input_dir, input_files, input_files_id, format_,
                            caller, svtype_col_name, as_breakpoint,
                            definitions, output):
    """
    Generate feature matrix from VCF or BEDPE files.
    """
    if format_ == 'bedpe':
        if (input_dir is None) & (input_files is None):
            return
        elif (input_files is None):
            data = viola.read_bedpe_multi(input_dir,
                                          svtype_col_name=svtype_col_name)
        elif (input_dir is None):
            ls_input = input_files.split(',')
            ls_bedpe = [
                viola.read_bedpe(path, svtype_col_name=svtype_col_name)
                for path in ls_input
            ]
            if input_files_id is None:
                ls_names = range(len(ls_bedpe))
            else:
                ls_names = input_files_id.split(',')
            data = viola.MultiBedpe(ls_bedpe, ls_names)
        else:
            return
    else:
        if (input_dir is None) & (input_files is None):
            return
        elif (input_files is None):
            data = viola.read_vcf_multi(input_dir,
                                        variant_caller=caller,
                                        as_breakpoint=as_breakpoint)
        elif (input_dir is None):
            ls_input = input_files.split(',')
            if as_breakpoint:
                ls_vcf = [
                    viola.read_vcf(
                        path, variant_caller=caller).breakend2breakpoint()
                    for path in ls_input
                ]
            else:
                ls_vcf = [
                    viola.read_vcf(path, variant_caller=caller)
                    for path in ls_input
                ]

            if input_files_id is None:
                ls_names = range(len(ls_vcf))
            else:
                ls_names = input_files_id.split(',')
            data = viola.MultiBedpe(ls_vcf, ls_names)
        else:
            return

    result = data.classify_manual_svtype(definitions=definitions)
    result.to_csv(output, sep='\t')
def test_merge3():
    manta = viola.read_vcf(StringIO(DATA1), variant_caller='manta')
    delly = viola.read_vcf(StringIO(DATA2), variant_caller='delly')
    result = viola.merge([delly, manta],
                         mode='confidence_intervals',
                         integration=True)
    assert result.sv_count == 4
    assert result.get_ids() == {
        'manta_M1', 'delly_MD1', 'delly_MDL1', 'delly_D1'
    }
Example #5
0
def test_merge():
    manta = viola.read_vcf(os.path.join(HERE, 'data/test.merge.manta.vcf'),
                           variant_caller='manta')
    delly = viola.read_vcf(os.path.join(HERE, 'data/test.merge.delly.vcf'),
                           variant_caller='delly')
    lumpy = viola.read_vcf(os.path.join(HERE, 'data/test.merge.lumpy.vcf'),
                           variant_caller='lumpy')
    gridss = viola.read_vcf(os.path.join(HERE, 'data/test.merge.gridss.vcf'),
                            variant_caller='gridss')

    merged = manta.merge(threshold=100,
                         ls_caller_names=['manta', 'delly', 'lumpy', 'gridss'],
                         ls_vcf=[manta, delly, lumpy, gridss])
Example #6
0
def test_merge2():
    gridss = viola.read_vcf(StringIO(DATA1), variant_caller='gridss').breakend2breakpoint()
    manta = viola.read_vcf(StringIO(DATA2), variant_caller='manta').breakend2breakpoint()
    result = viola.merge([manta, gridss], mode='confidence_intervals', integration=True)
    result = result.filter('supportingcallercount > 1')
    assert result.sv_count == 5
    assert result.get_ids() == {
        'manta_MantaDUP:TANDEM:88695:0:1:0:0:0',
        'manta_viola_breakpoint:0',
        'manta_MantaDUP:TANDEM:93040:0:1:0:0:0',
        'manta_viola_breakpoint:1',
        'manta_viola_breakpoint:2'
    }
Example #7
0
def test_merge1():
    gridss = viola.read_vcf(StringIO(DATA1), variant_caller='gridss').breakend2breakpoint()
    manta = viola.read_vcf(StringIO(DATA2), variant_caller='manta').breakend2breakpoint()
    result = viola.merge([gridss, manta], mode='confidence_intervals', integration=True)
    result = result.filter('supportingcallercount > 1')
    assert result.sv_count == 5
    assert result.get_ids() == {
        'gridss_viola_breakpoint:0',
        'gridss_viola_breakpoint:1',
        'gridss_viola_breakpoint:6',
        'gridss_viola_breakpoint:7',
        'gridss_viola_breakpoint:8',
        }
Example #8
0
def test_copy():
    manta_path = os.path.join(HERE, '../io/data/test.manta.vcf')
    delly_path = os.path.join(HERE, '../io/data/test.delly.vcf')
    lumpy_path = os.path.join(HERE, '../io/data/test.lumpy.vcf')
    gridss_path = os.path.join(HERE, '../io/data/test.gridss.vcf')
    manta_vcf = viola.read_vcf(manta_path)
    delly_vcf = viola.read_vcf(delly_path, variant_caller='delly')
    lumpy_vcf = viola.read_vcf(lumpy_path, variant_caller='lumpy')
    gridss_vcf = viola.read_vcf(gridss_path, variant_caller='gridss')
    assert_vcf_equal(manta_vcf, manta_vcf.copy())
    assert_vcf_equal(delly_vcf, delly_vcf.copy())
    assert_vcf_equal(lumpy_vcf, lumpy_vcf.copy())
    assert_vcf_equal(gridss_vcf, gridss_vcf.copy())
class TestToVcf:
    #manta_path = os.path.join(HERE, 'data/manta1.inv.vcf')
    #result = viola.read_vcf(manta_path)
    body = """chr10	39984191	43	N	<INV>	.	.	SVTYPE=INV;STRANDS=++:4,--:2;SVLEN=176;END=39984367;CIPOS=-8,11;CIEND=-17,31;CIPOS95=-3,6;CIEND95=-1,15;IMPRECISE;SU=6;PE=6;SR=0	GT:SU:PE:SR	./.:3:3:0	./.:3:3:0
chr10	121422696	788	N	<DEL>	.	.	SVTYPE=DEL;STRANDS=+-:8;SVLEN=-21;END=121422717;CIPOS=-9,8;CIEND=-9,8;CIPOS95=0,0;CIEND95=0,0;SU=8;PE=0;SR=8	GT:SU:PE:SR	./.:8:0:8	./.:0:0:0
chr14	45812810	7294_1	N	[chr14:65332314[N	.	.	SVTYPE=BND;STRANDS=--:10;EVENT=7294;MATEID=7294_2;CIPOS=-7,6;CIEND=-7,6;CIPOS95=0,0;CIEND95=0,0;SU=10;PE=0;SR=10	GT:SU:PE:SR	./.:10:0:10	./.:0:0:0
chr14	65332314	7294_2	N	[chr14:45812810[N	.	.	SVTYPE=BND;STRANDS=--:10;SECONDARY;EVENT=7294;MATEID=7294_1;CIPOS=-7,6;CIEND=-7,6;CIPOS95=0,0;CIEND95=0,0;SU=10;PE=0;SR=10	GT:SU:PE:SR	./.:10:0:10	./.:0:0:0
chr15	60882465	8801	N	<DUP>	.	.	SVTYPE=DUP;STRANDS=-+:41;SVLEN=3099777;END=63982242;CIPOS=-3,1;CIEND=-3,2;CIPOS95=0,0;CIEND95=0,0;SU=41;PE=19;SR=22	GT:SU:PE:SR	./.:41:19:22	./.:0:0:0
"""
    expected_out = """chr10	39984191	43_1	N	<INV>	.	PASS	SVTYPE=INV;STRANDS=++:4;EVENT=43;SVLEN=176;END=39984367;CIPOS=-8,11;CIEND=-17,31;CIPOS95=-3,6;CIEND95=-1,15;IMPRECISE;SU=4;PE=6;SR=0;SUORG=6	GT:SU:PE:SR	./.:3:3:0	./.:3:3:0
chr10	39984191	43_2	N	<INV>	.	PASS	SVTYPE=INV;STRANDS=--:2;EVENT=43;SVLEN=176;END=39984367;CIPOS=-8,11;CIEND=-17,31;CIPOS95=-3,6;CIEND95=-1,15;IMPRECISE;SU=2;PE=6;SR=0;SUORG=6	GT:SU:PE:SR	./.:3:3:0	./.:3:3:0
chr10	121422696	788	N	<DEL>	.	PASS	SVTYPE=DEL;STRANDS=+-:8;SVLEN=-21;END=121422717;CIPOS=-9,8;CIEND=-9,8;CIPOS95=0,0;CIEND95=0,0;SU=8;PE=0;SR=8	GT:SU:PE:SR	./.:8:0:8	./.:0:0:0
chr14	45812810	7294_1	N	[chr14:65332314[N	.	PASS	SVTYPE=BND;STRANDS=--:10;EVENT=7294;MATEID=7294_2;CIPOS=-7,6;CIEND=-7,6;CIPOS95=0,0;CIEND95=0,0;SU=10;PE=0;SR=10	GT:SU:PE:SR	./.:10:0:10	./.:0:0:0
chr14	65332314	7294_2	N	[chr14:45812810[N	.	PASS	SVTYPE=BND;STRANDS=--:10;SECONDARY;EVENT=7294;MATEID=7294_1;CIPOS=-7,6;CIEND=-7,6;CIPOS95=0,0;CIEND95=0,0;SU=10;PE=0;SR=10	GT:SU:PE:SR	./.:10:0:10	./.:0:0:0
chr15	60882465	8801	N	<DUP>	.	PASS	SVTYPE=DUP;STRANDS=-+:41;SVLEN=3099777;END=63982242;CIPOS=-3,1;CIEND=-3,2;CIPOS95=0,0;CIEND95=0,0;SU=41;PE=19;SR=22	GT:SU:PE:SR	./.:41:19:22	./.:0:0:0
"""
    b = StringIO(HEADER + body)
    result = viola.read_vcf(b, variant_caller='lumpy')

    def test_to_vcf_like(self):
        df_vcf = self.result.to_vcf_like()
        df_expected = pd.read_csv(StringIO(self.expected_out),
                                  index_col=False,
                                  names=df_vcf.columns,
                                  sep='\t')
        pd.testing.assert_frame_equal(df_vcf, df_expected)
Example #10
0
def test_classify_manual_svtype():
    vcf = viola.read_vcf(os.path.join(HERE, 'data/manta1.vcf'))
    vcf2 = vcf.copy()
    vcf = vcf.breakend2breakpoint()
    vcf2 = vcf2.breakend2breakpoint()
    multi_vcf = viola.MultiVcf([vcf, vcf2], ['vcf1', 'vcf2'])
    ls_conditions = [
        small_del, large_del, small_dup, large_dup, small_inv, tra
    ]
    ls_names = [
        'small_del', 'large_del', 'small_dup', 'large_dup', 'small_inv', 'tra'
    ]
    result = multi_vcf.classify_manual_svtype(ls_conditions=ls_conditions,
                                              ls_names=ls_names)

    manual_sv_type = multi_vcf.manual_sv_type
    manual_sv_type.set_index('id', inplace=True)
    manual_sv_type_expected = pd.read_csv(StringIO(data_expected),
                                          sep='\t',
                                          names=('id', 'value_idx',
                                                 'manual_sv_type'))
    manual_sv_type_expected.set_index('id', inplace=True)
    pd.testing.assert_frame_equal(manual_sv_type,
                                  manual_sv_type_expected,
                                  check_like=True)

    result_expected = pd.DataFrame([[2, 3, 1, 0, 2, 2, 1],
                                    [2, 3, 1, 0, 2, 2, 1]])
    result_expected.columns = ls_names + ['others']
    result_expected.columns.name = 'manual_sv_type'
    result_expected.index = ['vcf1', 'vcf2']
    result_expected.index.name = 'patients'
    pd.testing.assert_frame_equal(result, result_expected)
class TestReadVcfLumpy:
    vcf_lumpy_inv = viola.read_vcf(lumpy_body_inv_buf, variant_caller='lumpy')

    def test_read_vcf_lumpy_inv(self):
        vcf = self.vcf_lumpy_inv

        # assert positions table equal
        df_pos_expected = pd.read_csv(
            StringIO(
                """id,chrom1,pos1,chrom2,pos2,strand1,strand2,ref,alt,qual,svtype
43_1,chr10,39984191,chr10,39984367,+,+,N,<INV>,None,INV
43_2,chr10,39984192,chr10,39984367,-,-,N,<INV>,None,INV"""))
        df_pos_expected['qual'] = None
        pd.testing.assert_frame_equal(vcf.positions, df_pos_expected)

        # assert strands table equal
        df_info_strands_expected = pd.read_csv(
            StringIO("""id,value_idx,strands
43_1,0,++:4
43_2,0,--:2"""))
        pd.testing.assert_frame_equal(vcf.strands, df_info_strands_expected)

        # assert su table equal
        df_info_su_expected = pd.read_csv(
            StringIO("""id,value_idx,su
43_1,0,4
43_2,0,2"""))
        pd.testing.assert_frame_equal(vcf.su, df_info_su_expected)

        # assert suorg table equal
        df_info_suorg_expected = pd.read_csv(
            StringIO("""id,value_idx,suorg
43_1,0,6
43_2,0,6"""))
        pd.testing.assert_frame_equal(vcf.suorg, df_info_suorg_expected)
Example #12
0
def test_replace_svid():
    vcf = viola.read_vcf(StringIO(HEADER + body))
    vcf_expected1 = viola.read_vcf(StringIO(HEADER + body_expected1))
    vcf_expected2 = viola.read_vcf(StringIO(HEADER + body_expected2))

    vcf1 = vcf.copy()
    vcf1.replace_svid('test1', 'a')
    assert_vcf_equal(vcf1, vcf_expected1)

    vcf1_ex = vcf.copy()
    vcf1_ex.replace_svid(['test1'], ['a'])
    assert_vcf_equal(vcf1_ex, vcf_expected1)

    vcf2 = vcf.copy()
    vcf2.replace_svid(['test2', 'test3'], ['a', 'b'])
    assert_vcf_equal(vcf2, vcf_expected2)
def test_breakend2breakpoint():
    vcf = viola.read_vcf(StringIO(HEADER + body), variant_caller='lumpy')
    vcf_expected = viola.read_vcf(StringIO(HEADER_expected + body_expected),
                                  variant_caller='lumpy')
    ex_svpos = vcf_expected.get_table('positions')
    ex_svpos.loc[3, 'svtype'] = 'INV'
    ex_svtype = vcf_expected.get_table('svtype')
    ex_svtype.iloc[3, 2] = 'INV'
    ex_infos_meta = vcf_expected.get_table('infos_meta')
    ex_infos_meta.index = [
        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 17
    ]
    vcf_expected._odict_alltables['positions'] = ex_svpos
    vcf_expected._odict_alltables['svtype'] = ex_svtype
    vcf_expected._odict_alltables['infos_meta'] = ex_infos_meta
    vcf_result = vcf.breakend2breakpoint()
    assert_vcf_equal(vcf_result, vcf_expected)
Example #14
0
    def test_filter_by_id(self):
        body_expected = """chr1	82550461	test1	G	<DEL>	.	MinSomaticScore	END=82554225;SVTYPE=DEL;SVLEN=-3764;IMPRECISE;CIPOS=-51,52;CIEND=-51,52;SOMATIC;SOMATICSCORE=10	PR:SR	21,0:10,0	43,4:15,3
chr1	60567906	test3	T	<DEL>	.	MinSomaticScore	END=60675940;SVTYPE=DEL;SVLEN=-108034;CIPOS=-44,44;CIEND=-38,39;SOMATIC;SOMATICSCORE=18	PR	23,0	44,6
"""
        expected_b = StringIO(HEADER + body_expected)
        obj_expected = viola.read_vcf(expected_b)
        obj_result = self.obj.filter_by_id(['test1', 'test3'])
        viola.testing.assert_vcf_equal(obj_expected, obj_result)
Example #15
0
def test_get_table():
    data = viola.read_vcf(os.path.join(HERE, 'data/test.manta.vcf'))

    # get positions table
    positions = data.get_table('positions')

    ## test for TableNotFoundError
    with pytest.raises(TableNotFoundError):
        data.get_table("notexists")
Example #16
0
def vcf2bedpe(caller, info, filter_, format_, vcf):
    """
   Convert a VCF file into a BEDPE file.

   A VCF argument is the path to the input VCF file.
   """
    if isinstance(vcf, TextIOWrapper):
        vcf_obj = viola.read_vcf(StringIO(vcf.read()), variant_caller=caller)
    else:
        vcf_obj = viola.read_vcf(vcf, variant_caller=caller)
    if info is not None:
        ls_info = info.split(',')
        ls_info_lower = [i.lower() for i in ls_info]
    else:
        ls_info_lower = []
    bedpe_like = vcf_obj.to_bedpe_like(custom_infonames=ls_info_lower,
                                       add_filters=filter_,
                                       add_formats=format_)
    click.echo(bedpe_like.to_string(index=None))
def test_merge():
    manta = viola.read_vcf(os.path.join(HERE, 'data/test.merge.manta.vcf'),
                           variant_caller='manta')
    delly = viola.read_vcf(os.path.join(HERE, 'data/test.merge.delly.vcf'),
                           variant_caller='delly')
    lumpy = viola.read_vcf(os.path.join(HERE, 'data/test.merge.lumpy.vcf'),
                           variant_caller='lumpy')
    gridss = viola.read_vcf(os.path.join(HERE, 'data/test.merge.gridss.vcf'),
                            variant_caller='gridss')

    result = manta._generate_distance_matrix_by_confidence_intervals(
        viola.TmpVcfForMerge([manta, delly, gridss, lumpy],
                             ['manta', 'delly', 'gridss', 'lumpy']))
    result = viola.merge([manta, delly, lumpy, gridss],
                         mode='confidence_intervals')
    assert result.get_ids() == {
        'manta_M1', 'manta_MD1', 'manta_ML1', 'manta_MG1', 'manta_MDL1',
        'manta_MDG1', 'manta_MLG1', 'manta_MDLG1o', 'delly_D1', 'delly_DL1',
        'delly_DG1', 'delly_DLG1', 'lumpy_L1', 'lumpy_LG1', 'gridss_G1o'
    }
Example #18
0
class TestWriteVcf:
    # get vcf class
    #manta_path = os.path.join(HERE, 'data/manta1.inv.vcf')
    manta_path = os.path.join(HERE, 'data/test.manta.vcf')
    lumpy_path = os.path.join(HERE, 'data/test.lumpy.vcf')
    delly_path = os.path.join(HERE, 'data/test.delly.vcf')
    gridss_path = os.path.join(HERE, 'data/test.gridss.vcf')
    vcf_manta = viola.read_vcf(manta_path)
    vcf_lumpy = viola.read_vcf(lumpy_path, variant_caller='lumpy')
    vcf_delly = viola.read_vcf(delly_path, variant_caller='delly')
    vcf_gridss = viola.read_vcf(gridss_path, variant_caller='gridss')

    def test_write_vcf_manta(self):
        # self.df.to_csv('path/to/csv')
        self.vcf_manta.to_vcf('tests/io/output/write_vcf_manta.vcf')
        #assert filecmp.cmp('tests/io/output/write_vcf.vcf', 'tests/io/data/manta1.inv.vcf')
        assert filecmp.cmp('tests/io/output/write_vcf_manta.vcf',
                           'tests/io/data/test.manta.validation.vcf')

    def test_write_vcf_lumpy(self):
        # self.df.to_csv('path/to/csv')
        self.vcf_lumpy.to_vcf('tests/io/output/write_vcf_lumpy.vcf')
        #assert filecmp.cmp('tests/io/output/write_vcf.vcf', 'tests/io/data/manta1.inv.vcf')
        assert filecmp.cmp('tests/io/output/write_vcf_lumpy.vcf',
                           'tests/io/data/test.lumpy.validation.vcf')

    def test_write_vcf_delly(self):
        self.vcf_delly.to_vcf('tests/io/output/write_vcf_delly.vcf')
        assert filecmp.cmp('tests/io/output/write_vcf_delly.vcf',
                           'tests/io/data/test.delly.validation.vcf')

    def test_write_vcf_gridss(self):
        self.vcf_gridss.to_vcf('tests/io/output/write_vcf_gridss.vcf')
        assert filecmp.cmp('tests/io/output/write_vcf_gridss.vcf',
                           'tests/io/data/test.gridss.validation.vcf')

    def test_write_info(self):
        self.vcf_manta.to_vcf('tests/io/output/write_info.vcf', onlyinfo=True)
        #assert filecmp.cmp('tests/io/output/write_info.vcf', 'tests/io/data/manta1.inv_info.vcf')
        assert filecmp.cmp('tests/io/output/write_info.vcf',
                           'tests/io/data/test_info.vcf')
Example #19
0
class TestAppendInfos:
    manta_path = os.path.join(HERE, 'data/test.manta.vcf')
    reader = viola.read_vcf(manta_path)
    positions = reader.get_table('positions')

    def test_append_svlen(self):
        if 'svlen' in self.reader.table_list:
            result = viola.Vcf.append_infos(self.reader,
                                            self.positions,
                                            ls_tablenames=['svlen'])
            assert result.columns[-1] == 'svlen_0'
            assert result['svlen_0'].notnull().any()
Example #20
0
class TestFilters:
    body = """chr1	82550461	test1	G	<DEL>	.	MinSomaticScore	END=82554225;SVTYPE=DEL;SVLEN=-3764;IMPRECISE;CIPOS=-51,52;CIEND=-51,52;SOMATIC;SOMATICSCORE=10	PR:SR	21,0:10,0	43,4:15,3
chr1	22814216	test2	T	<INV>	.	MinSomaticScore	END=92581131;SVTYPE=INV;SVLEN=69766915;IMPRECISE;CIPOS=-51,51;CIEND=-89,90;SOMATIC;SOMATICSCORE=11;INV5	PR	24,0	35,5
chr1	60567906	test3	T	<DEL>	.	MinSomaticScore	END=60675940;SVTYPE=DEL;SVLEN=-108034;CIPOS=-44,44;CIEND=-38,39;SOMATIC;SOMATICSCORE=18	PR	23,0	44,6
chr1	69583190	test4	T	<DEL>	.	PASS	END=69590947;SVTYPE=DEL;SVLEN=-7757;IMPRECISE;CIPOS=-123,123;CIEND=-135,136;SOMATIC;SOMATICSCORE=47	PR	21,0	20,12
"""
    data = HEADER + body
    b = StringIO(data)
    obj = viola.read_vcf(b)

    def test_filter_infos(self):
        result_svlen = self.obj._filter_infos('svlen',
                                              0,
                                              operator="<",
                                              threshold=10000)
        result_svtype = self.obj._filter_infos('svtype',
                                               0,
                                               operator='==',
                                               threshold='INV')
        assert result_svlen == {'test1', 'test3', 'test4'}
        assert result_svtype == {'test2'}

    def test_filter_infos_flag(self):
        result_imprecise = self.obj._filter_infos_flag('imprecise',
                                                       exclude=False)
        result_imprecise_ex = self.obj._filter_infos_flag('imprecise',
                                                          exclude=True)
        assert result_imprecise == {'test1', 'test2', 'test4'}
        assert result_imprecise_ex == {'test3'}

    def test_filter_filters(self):
        result_filter = self.obj._filter_filters('PASS', exclude=False)
        result_filter_ex = self.obj._filter_filters('PASS', exclude=True)
        assert result_filter == {'test4'}
        assert result_filter_ex == {'test1', 'test2', 'test3'}

    def test_filter_formats(self):
        result_format = self.obj._filter_formats('mouse1_T', 'PR', 1, '>', 5)
        assert result_format == {'test3', 'test4'}

    def test_filter_by_id(self):
        body_expected = """chr1	82550461	test1	G	<DEL>	.	MinSomaticScore	END=82554225;SVTYPE=DEL;SVLEN=-3764;IMPRECISE;CIPOS=-51,52;CIEND=-51,52;SOMATIC;SOMATICSCORE=10	PR:SR	21,0:10,0	43,4:15,3
chr1	60567906	test3	T	<DEL>	.	MinSomaticScore	END=60675940;SVTYPE=DEL;SVLEN=-108034;CIPOS=-44,44;CIEND=-38,39;SOMATIC;SOMATICSCORE=18	PR	23,0	44,6
"""
        expected_b = StringIO(HEADER + body_expected)
        obj_expected = viola.read_vcf(expected_b)
        obj_result = self.obj.filter_by_id(['test1', 'test3'])
        viola.testing.assert_vcf_equal(obj_expected, obj_result)
Example #21
0
class TestReadVcfLumpy:
    vcf_delly = viola.read_vcf(delly_body_buf, variant_caller='delly')

    def test_read_vcf_delly(self):
        vcf = self.vcf_delly

        # assert positions table equal
        df_pos_expected = pd.read_csv(
            StringIO(
                """id,chrom1,pos1,chrom2,pos2,strand1,strand2,ref,alt,qual,svtype
DEL00001,chr10,3485568,chr10,3485618,+,-,C,<DEL>,180,DEL
INV00001,chr10,8825910,chr10,32816609,-,-,A,<INV>,17,INV
DUP00001,chr10,11350704,chr10,122603091,-,+,G,<DUP>,29,DUP
INV00002,chr10,61016659,chr10,111424580,+,+,G,<INV>,10,INV
BND00001,chr11,15249914,chr10,72297333,+,+,A,A]chr10:72297333],193,BND
"""))
        pd.testing.assert_frame_equal(vcf.positions, df_pos_expected)
Example #22
0
class TestToBedpe:
    #manta_path = os.path.join(HERE, 'data/manta1.inv.vcf')
    #result = viola.read_vcf(manta_path)
    body = """chr1	82550461	test1	G	<DEL>	.	MinSomaticScore	END=82554225;SVTYPE=DEL;SVLEN=-3764;IMPRECISE;CIPOS=-51,52;CIEND=-51,52;SOMATIC;SOMATICSCORE=10	PR:SR	21,0:10,0	43,4:15,3
chr1	22814216	test2	T	<INV>	.	MinSomaticScore	END=92581131;SVTYPE=INV;SVLEN=69766915;IMPRECISE;CIPOS=-51,51;CIEND=-89,90;SOMATIC;SOMATICSCORE=11;INV5	PR	24,0	35,5
chr1	60567906	test3	T	<DEL>	.	MinSomaticScore	END=60675940;SVTYPE=DEL;SVLEN=-108034;CIPOS=-44,44;CIEND=-38,39;SOMATIC;SOMATICSCORE=18	PR	23,0	44,6
chr1	69583190	test4	T	<DEL>	.	PASS	END=69590947;SVTYPE=DEL;SVLEN=-7757;IMPRECISE;CIPOS=-123,123;CIEND=-135,136;SOMATIC;SOMATICSCORE=47	PR	21,0	20,12
"""
    b = StringIO(HEADER + body)
    result = viola.read_vcf(b)

    def test_to_bedpe_like(self):
        self.result.to_bedpe(os.path.join(HERE, 'data/out.bedpe'))
        assert filecmp.cmp(os.path.join(HERE, 'data/out.bedpe'),
                           os.path.join(HERE, 'data/expected.bedpe'))

    def test_to_bedpe_like_with_info(self):
        self.result.to_bedpe(os.path.join(HERE, 'data/out.svlen.bedpe'),
                             custom_infonames=['svlen'])
        assert filecmp.cmp(os.path.join(HERE, 'data/out.svlen.bedpe'),
                           os.path.join(HERE, 'data/expected.svlen.bedpe'))
import viola
import numpy as np
import os
HERE = os.path.abspath(os.path.dirname(__file__))
delly = viola.read_vcf(os.path.join(HERE, 'data', "test.merge.delly.vcf"),
                       variant_caller="delly")
manta = viola.read_vcf(os.path.join(HERE, 'data', "test.merge.manta.vcf"),
                       variant_caller="manta")
gridss = viola.read_vcf(os.path.join(HERE, 'data', "test.merge.gridss.vcf"),
                        variant_caller="gridss")
lumpy = viola.read_vcf(os.path.join(HERE, 'data', "test.merge.lumpy.vcf"),
                       variant_caller="lumpy")


def test_merge_to_vcf_like():
    merged = viola.merge([manta, gridss, delly, lumpy], integration=True)
    merged = merged.filter('supportingcallercount > 1')
    merged.to_vcf(os.path.join(HERE, 'data/output_merged.vcf'))
##FILTER=<ID=MaxMQ0Frac,Description="For a small variant (<1000 bases) in the normal sample, the fraction of reads with MAPQ0 around either breakend exceeds 0.4">
##ALT=<ID=INV,Description="Inversion">
##ALT=<ID=DEL,Description="Deletion">
##ALT=<ID=INS,Description="Insertion">
##ALT=<ID=DUP:TANDEM,Description="Tandem Duplication">
#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	mouse1_N	mouse1_T
"""
#manta_path = os.path.join(HERE, 'data/manta1.inv.vcf')
#result = viola.read_vcf(manta_path)
body = """chr1	82550461	test1	G	<DEL>	.	MinSomaticScore	END=82554225;SVTYPE=DEL;SVLEN=-3764;IMPRECISE;CIPOS=-51,52;CIEND=-51,52;SOMATIC;SOMATICSCORE=10	PR:SR	21,0:10,0	43,4:15,3
chr1	22814216	test2	T	<INV>	.	MinSomaticScore	END=92581131;SVTYPE=INV;SVLEN=69766915;IMPRECISE;CIPOS=-51,51;CIEND=-89,90;SOMATIC;SOMATICSCORE=11;INV5	PR	24,0	35,5
chr1	60567906	test3	T	<DEL>	.	MinSomaticScore	END=60675940;SVTYPE=DEL;SVLEN=-108034;CIPOS=-44,44;CIEND=-38,39;SOMATIC;SOMATICSCORE=18	PR	23,0	44,6
chr1	69583190	test4	T	<DEL>	.	PASS	END=69590947;SVTYPE=DEL;SVLEN=-7757;IMPRECISE;CIPOS=-123,123;CIEND=-135,136;SOMATIC;SOMATICSCORE=47	PR	21,0	20,12
"""
b = StringIO(HEADER + body)
result = viola.read_vcf(b)


def test_to_bedpe_like():
    expected_data = """chrom1\tstart1\tend1\tchrom2\tstart2\tend2\tname\tscore\tstrand1\tstrand2
chr1\t82550460\t82550461\tchr1\t82554225\t82554226\ttest1\t\t+\t-
chr1\t22814216\t22814217\tchr1\t92581131\t92581132\ttest2\t\t-\t-
chr1\t60567905\t60567906\tchr1\t60675940\t60675941\ttest3\t\t+\t-
chr1\t69583189\t69583190\tchr1\t69590947\t69590948\ttest4\t\t+\t-
"""
    df_expected = pd.read_csv(StringIO(expected_data), sep='\t')
    df_expected['score'] = df_expected['score'].astype(
        object)  # because score field is empty in this case
    bedpe = result.to_bedpe_like()
    pd.testing.assert_frame_equal(bedpe, df_expected, check_exact=True)
def test_breakend2breakpoint():
    vcf = viola.read_vcf(StringIO(HEADER + body))
    vcf_expected = viola.read_vcf(StringIO(HEADER_expected + body_expected))
    vcf_result = vcf.breakend2breakpoint()
    assert_vcf_equal(vcf_result, vcf_expected)
def test_remove_info_table():
    vcf = viola.read_vcf(os.path.join(HERE, 'data/test.info.added.manta.vcf'))
    vcf_expected = viola.read_vcf(os.path.join(HERE, 'data/test.manta.vcf'))
    vcf.remove_info_table('test')
    assert_vcf_equal(vcf, vcf_expected)
Example #27
0
def test_read_vcf_empty_lumpy():
    vcf = viola.read_vcf(StringIO(GRIDSS), variant_caller='gridss')
Example #28
0
import viola
import numpy as np
delly = viola.read_vcf("/shared_data/share/merge/test.merge.delly.vcf",
                       variant_caller="delly")
gridss = viola.read_vcf("/shared_data/share/merge/test.merge.gridss.vcf",
                        variant_caller="gridss")
lumpy = viola.read_vcf("/shared_data/share/merge/test.merge.lumpy.vcf",
                       variant_caller="lumpy")
manta = viola.read_vcf("/shared_data/share/merge/test.merge.manta.vcf",
                       variant_caller="manta")


def test_merge():
    merged_all = gridss.merge(ls_vcf=[delly, lumpy, manta, gridss],
                              threshold=100)
    viola.testing.assert_vcf_equal(merged_all, merged_all.copy())
Example #29
0
import os
import shutil
import viola

vcf_in = str(snakemake.input)
vcf_org = vcf_in + '.org'
vcf_out = str(snakemake.output)
caller = str(snakemake.wildcards.prefix)

if caller == 'gridss':
    os.rename(vcf_in, vcf_org)
    with open(vcf_in, 'w') as new:
        with open(vcf_org, 'r') as org:
            for line in org:
                # FIX INFO field: change PARID to MATEID
                new.write(line.replace('PARID', 'MATEID'))
try:
    sv = viola.read_vcf(vcf_in, variant_caller=caller).breakend2breakpoint()
    sv.to_vcf(vcf_out)
except Exception:
    shutil.copyfile(vcf_in, vcf_out)
def test_single():
    vcf = viola.read_vcf(StringIO(HEADER + body), variant_caller='gridss')
    vcf_single = vcf.idx['single_test']
    print(vcf_single.positions)