コード例 #1
0
def test_collect_columns_incorrect_number_of_names():
    sample1 = str(datadir / Path("semicolon") / Path("sample1.csv"))
    sample2 = str(datadir / Path("semicolon") / Path("sample2.csv"))
    sys.argv = ["script", "output", sample1, sample2, "-n", "sample1"]
    with pytest.raises(ValueError, match="The number of names did not match "
                                         "the number of inputs."):
        main()
コード例 #2
0
def test_main_stringtie_sum(tmpdir):
    sample1 = str(datadir / Path("stringtie") / Path("sample1.abundance"))
    sample2 = str(datadir / Path("stringtie") / Path("sample2.abundance"))
    gtf = str(datadir / Path("merged.gtf"))
    expected_result = set([
        "feature\tref_gene_id\tgene_name\tsample1.abundance\t"
        "sample2.abundance\n",
        "MSTRG.1\tg_1;g_7\tgene_1;gene_7\t185151.953125\t85151.953125\n",
        "MSTRG.2\tg_2\tgene_2\t100160.070312\t160.070312\n",
        "MSTRG.3\tg_3\tgene_3\t91229.078125\t1229.078125\n",
        "MSTRG.4\tg_4\tgene_4\t184648.109375\t84648.109375\n",
        "MSTRG.5\tg_5\tgene_5\t104290.078125\t4290.078125\n",
        "MSTRG.6\tg_6\tgene_6\t89926.898438\t9927.898438\n"])

    output_file = tmpdir.join("output.tsv")
    sys.argv = ["script", output_file.strpath, sample1, sample2, "-c", "7",
                "-H", "-g", gtf, "-a", "ref_gene_id", "gene_name", "-S"]
    main()
    with output_file.open() as out_file:
        result = set(out_file.readlines())
    assert result == expected_result
コード例 #3
0
def test_main_semicolon(tmpdir):
    sample1 = str(datadir / Path("semicolon") / Path("sample1.csv"))
    sample2 = str(datadir / Path("semicolon") / Path("sample2.csv"))
    gtf = str(datadir / Path("merged.gtf"))
    expected_result = set([
        "feature;ref_gene_id;transcript_id;sample1.csv;sample2.csv\n",
        "gene_1;g_1;t_1_2;1;10\n",
        "gene_2;g_2;t_2_1;2;20\n",
        "gene_3;g_3;t_3_1;3;30\n",
        "gene_4;g_4;\"t_4_2;t_4_1\";4;40\n",
        "gene_5;g_5;t_5_1;5;\n",
        "gene_6;g_6;t_6_1;;60\n"])

    output_file = tmpdir.join("output.tsv")
    sys.argv = ["script", output_file.strpath, sample1, sample2, "-f", "1",
                "-c", "0", "-s", ";", "-H", "-g", gtf, "-a", "ref_gene_id",
                "transcript_id", "-F", "gene_name"]
    main()
    with output_file.open() as out_file:
        result = set(out_file.readlines())
    assert result == expected_result
コード例 #4
0
def test_main_htseq(tmpdir):
    sample1 = str(datadir / Path("htseq") / Path("sample1.fragments_per_gene"))
    sample2 = str(datadir / Path("htseq") / Path("sample2.fragments_per_gene"))
    expected_result = set([
        "feature\ts1\ts2\n",
        "MSTRG.1\t2371\t0\n",
        "MSTRG.2\t381\t1\n",
        "MSTRG.3\t741\t7\n",
        "MSTRG.4\t2361\t2\n",
        "MSTRG.5\t382\t3\n",
        "MSTRG.6\t706\t7\n",
        "__alignment_not_unique\t131\t13\n",
        "__ambiguous\t2995\t295\n",
        "__no_feature\t0\t0\n",
        "__not_aligned\t5\t51\n",
        "__too_low_aQual\t0\t0\n"])
    output_file = tmpdir.join("output.tsv")
    sys.argv = ["script", output_file.strpath, sample1, sample2, "-n", "s1",
                "s2"]
    main()
    with output_file.open() as out_file:
        result = set(out_file.readlines())
    assert result == expected_result