def test_classify_manual_svtype_from_file(): bedpe1 = viola.read_bedpe(StringIO(data)) bedpe2 = viola.read_bedpe(StringIO(data)) ls_conditions = [ small_del, large_del, small_dup, large_dup, small_inv, tra ] ls_names = [ 'small_del', 'large_del', 'small_dup', 'large_dup', 'small_inv', 'tra' ] multibedpe = viola.MultiBedpe([bedpe1, bedpe2], ['bedpe1', 'bedpe2']) path = os.path.join(HERE, '../bedpe/data/example_definition.txt') result = multibedpe.classify_manual_svtype(definitions=path) manual_sv_type = multibedpe.manual_sv_type manual_sv_type.set_index('id', inplace=True) manual_sv_type_expected = pd.read_csv(StringIO(data_expected), sep='\t', names=('id', 'value_idx', 'manual_sv_type')) manual_sv_type_expected.set_index('id', inplace=True) pd.testing.assert_frame_equal(manual_sv_type, manual_sv_type_expected, check_like=True) result_expected = pd.DataFrame([[2, 3, 1, 0, 2, 2, 2], [2, 3, 1, 0, 2, 2, 2]]) result_expected.columns = ls_names + ['others'] result_expected.columns.name = 'manual_sv_type' result_expected.index = ['bedpe1', 'bedpe2'] result_expected.index.name = 'patients' pd.testing.assert_frame_equal(result, result_expected)
def test_classify_manual_svtype_exclude_empty(): bedpe1 = viola.read_bedpe(StringIO(data)) bedpe2 = viola.read_bedpe(StringIO(data)) empty1 = viola.read_bedpe(StringIO(data_empty)) empty2 = viola.read_bedpe(StringIO(data_empty)) ls_conditions = [ small_del, large_del, small_dup, large_dup, small_inv, tra ] ls_names = [ 'small_del', 'large_del', 'small_dup', 'large_dup', 'small_inv', 'tra' ] multibedpe = viola.MultiBedpe([bedpe1, empty1, bedpe2, empty2], ['bedpe1', 'empty1', 'bedpe2', 'empty2']) result = multibedpe.classify_manual_svtype(ls_conditions=ls_conditions, ls_names=ls_names, exclude_empty_cases=True) manual_sv_type = multibedpe.manual_sv_type manual_sv_type.set_index('id', inplace=True) manual_sv_type_expected = pd.read_csv(StringIO(data_expected), sep='\t', names=('id', 'value_idx', 'manual_sv_type')) manual_sv_type_expected.set_index('id', inplace=True) pd.testing.assert_frame_equal(manual_sv_type, manual_sv_type_expected, check_like=True) result_expected = pd.DataFrame([[2, 3, 1, 0, 2, 2, 2], [2, 3, 1, 0, 2, 2, 2]]) result_expected.columns = ls_names + ['others'] result_expected.columns.name = 'manual_sv_type' result_expected.index = ['bedpe1', 'bedpe2'] result_expected.index.name = 'patients' pd.testing.assert_frame_equal(result, result_expected)
class TestReadBedpe: data = """chrom1\tstart1\tend1\tchrom2\tstart2\tend2\tname\tscore\tstrand1\tstrand2 chr1\t10\t13\tchr2\t20\t30\ttest1\t10\t+\t+ chr3\t100\t130\tchr3\t210\t230\ttest2\t30\t-\t+ """ b = StringIO(data) obj = viola.read_bedpe(b) def test_read_bedpe(self): b = StringIO(self.data) viola.read_bedpe(b) def test_svpos(self): expected_data = """id\tchrom1\tpos1\tchrom2\tpos2\tstrand1\tstrand2\tqual\tsvtype\tref\talt test1\tchr1\t12\tchr2\t26\t+\t+\t10\tBND\tN\tN]chr2:26] test2\tchr3\t116\tchr3\t221\t-\t+\t30\tDUP\tN\t<DUP> """ df_svpos = self.obj.get_table('positions') df_expected = pd.read_csv(StringIO(expected_data), sep="\t") pd.testing.assert_frame_equal(df_svpos, df_expected) def test_create_alt_field_from_position(self): test_data = """id\tchrom1\tpos1\tchrom2\tpos2\tstrand1\tstrand2\tsvtype\tref test1\tchr1\t10\tchr2\t10\t+\t-\tBND\tN test2\tchr1\t10\tchr1\t10\t+\t-\tDEL\tN """ b = StringIO(test_data) df_svpos = pd.read_csv(b, sep="\t") result = viola.io.parser.create_alt_field_from_position(df_svpos)
def test_classify_manual_svtype(): bedpe = viola.read_bedpe(StringIO(data)) ls_conditions = [ small_del, large_del, small_dup, large_dup, small_inv, tra ] ls_names = [ 'small_del', 'large_del', 'small_dup', 'large_dup', 'small_inv', 'tra' ] result = bedpe.classify_manual_svtype(ls_conditions=ls_conditions, ls_names=ls_names) manual_sv_type = bedpe.manual_sv_type manual_sv_type.set_index('id', inplace=True) manual_sv_type_expected = pd.read_csv(StringIO(data_expected), sep='\t', names=('id', 'value_idx', 'manual_sv_type')) manual_sv_type_expected.set_index('id', inplace=True) pd.testing.assert_frame_equal(manual_sv_type, manual_sv_type_expected, check_like=True) result_expected = pd.Series([2, 3, 1, 0, 2, 2, 2]) result_expected.index = ls_names + ['others'] result_expected.name = 'manual_sv_type' pd.testing.assert_series_equal(result, result_expected)
def test_remove_info(): bedpe = viola.read_bedpe(StringIO(data)) bedpe_copy = bedpe.copy() test_info = pd.DataFrame({'id': ['test1', 'test2'], 'value_idx': [0, 0], 'test': ['t', 'u']}) bedpe.add_info_table('test', test_info) bedpe.remove_info_table('test') viola.testing.assert_bedpe_equal(bedpe, bedpe_copy)
def test_to_bedpe(): bedpe = viola.read_bedpe(StringIO(data)) bedpe.to_bedpe(os.path.join(HERE, 'data/output.bedpe')) bedpe.to_bedpe(os.path.join(HERE, 'data/output.svlen.bedpe'), custom_infonames=['svlen']) assert filecmp.cmp(os.path.join(HERE, 'data/output.bedpe'), os.path.join(HERE, 'data/bedpe_expected.bedpe')) assert filecmp.cmp(os.path.join(HERE, 'data/output.svlen.bedpe'), os.path.join(HERE, 'data/output.svlen.expected.bedpe'))
def generate_feature_matrix(input_dir, input_files, input_files_id, format_, caller, svtype_col_name, as_breakpoint, definitions, output): """ Generate feature matrix from VCF or BEDPE files. """ if format_ == 'bedpe': if (input_dir is None) & (input_files is None): return elif (input_files is None): data = viola.read_bedpe_multi(input_dir, svtype_col_name=svtype_col_name) elif (input_dir is None): ls_input = input_files.split(',') ls_bedpe = [ viola.read_bedpe(path, svtype_col_name=svtype_col_name) for path in ls_input ] if input_files_id is None: ls_names = range(len(ls_bedpe)) else: ls_names = input_files_id.split(',') data = viola.MultiBedpe(ls_bedpe, ls_names) else: return else: if (input_dir is None) & (input_files is None): return elif (input_files is None): data = viola.read_vcf_multi(input_dir, variant_caller=caller, as_breakpoint=as_breakpoint) elif (input_dir is None): ls_input = input_files.split(',') if as_breakpoint: ls_vcf = [ viola.read_vcf( path, variant_caller=caller).breakend2breakpoint() for path in ls_input ] else: ls_vcf = [ viola.read_vcf(path, variant_caller=caller) for path in ls_input ] if input_files_id is None: ls_names = range(len(ls_vcf)) else: ls_names = input_files_id.split(',') data = viola.MultiBedpe(ls_vcf, ls_names) else: return result = data.classify_manual_svtype(definitions=definitions) result.to_csv(output, sep='\t')
def test_append_info(): bedpe = viola.read_bedpe(StringIO(data)) test_info = pd.DataFrame({ 'id': ['test1', 'test2'], 'value_idx': [0, 0], 'test': ['t', 'u'] }) bedpe.add_info_table('test', test_info) pd.testing.assert_frame_equal(bedpe._odict_alltables['test'], test_info) pd.testing.assert_frame_equal(bedpe._odict_df_info['test'], test_info) assert 'test' in bedpe._ls_infokeys
def test_classify_manual_svtype_from_file(): bedpe = viola.read_bedpe(StringIO(data)) path = os.path.join(HERE, 'data/example_definition.txt') result = bedpe.classify_manual_svtype(definitions=path) manual_sv_type = bedpe.manual_sv_type manual_sv_type.set_index('id', inplace=True) manual_sv_type_expected = pd.read_csv(StringIO(data_expected), sep='\t', names=('id', 'value_idx', 'manual_sv_type')) manual_sv_type_expected.set_index('id', inplace=True) pd.testing.assert_frame_equal(manual_sv_type, manual_sv_type_expected, check_like=True) result_expected = pd.Series([2, 3, 1, 0, 2, 2, 2]) ls_names = [ 'small_del', 'large_del', 'small_dup', 'large_dup', 'small_inv', 'tra' ] result_expected.index = ls_names + ['others'] result_expected.name = 'manual_sv_type' pd.testing.assert_series_equal(result, result_expected)
def test_copy(): bedpe = viola.read_bedpe(StringIO(data)) bedpe_copy = bedpe.copy() viola.testing.assert_bedpe_equal(bedpe, bedpe_copy)
def test_read_bedpe(self): b = StringIO(self.data) viola.read_bedpe(b)
def test_classify_manual_svtype_from_article(): bedpe = viola.read_bedpe(StringIO(data)) try: result = bedpe.classify_manual_svtype(definitions="article") except TypeError: pass
def test_read_bedpe_with_empty(): bedpe1 = viola.read_bedpe(StringIO(data)) bedpe2 = viola.read_bedpe(StringIO(data)) bedpe_empty = viola.read_bedpe(StringIO(data_empty)) multibedpe = viola.MultiBedpe([bedpe1, bedpe2, bedpe_empty], ['bedpe1', 'bedpe2', 'empty'])