コード例 #1
0
ファイル: show_ipd.py プロジェクト: lemene/mbio
def show_all_ipd(csvfile, mark="", alpha=1):
    matrix, label, row, col = mcsv.load_matrix(csvfile)

    labeltype = {
        l: i
        for i, l in enumerate(sorted(set([l for l in label if mark in l])))
    }
    x = [[] for t in labeltype]
    y = [[] for t in labeltype]

    if "left" == mark or "right" == mark and len(labeltype) == 2:
        lvalue = list(labeltype.keys())

        def filter(x, l):
            xl = x[label == l]
            return xl[xl != -1]

        index = [
            i for i, cv in enumerate(matrix.transpose())
            if ttest(filter(cv, lvalue[0]), filter(cv, lvalue[1]), 10, alpha)
        ]
    else:
        index = list(range(len(col)))

    for m, l, r in zip(matrix[:, index], label, row):
        for mi, c in zip(m, col[index]):
            if mi >= 0 and l in labeltype:
                x[labeltype[l]].append(c)
                y[labeltype[l]].append(mi)

    for i, (xi, yi) in enumerate(zip(x, y)):
        li = plt.scatter(xi, yi, color=scalarMap.to_rgba(i), s=3)
        print(i, scalarMap.to_rgba(i))

    plt.show()
コード例 #2
0
ファイル: ipd.py プロジェクト: lemene/mbio
def test_feature(csvfile, mark, ratio, alpha, count=1):
    alpha = float(alpha)
    ratio = float(ratio)
    count = int(count)

    matrix, label, row, col = mcsv.load_matrix(csvfile)
    omatrix = matrix.copy()

    jaccard, train_size, all_size, train_in_all = 0, [], [], 0
    for i in range(count):
        itrain, itest = random_split_data(ratio, matrix, label)
        #  matrix = normalize(matrix, label)
        matrix = norm1(matrix)
        labeltype = {
            l: i
            for i, l in enumerate(sorted(set([l for l in label if mark in l])))
        }

        assert "left" == mark or "right" == mark and len(labeltype) == 2

        lvalue = list(labeltype.keys())

        Q = alpha
        itest_all = set(
            np.array(range(matrix.shape[1]))[benjamini_hochberg_filter(
                matrix, label == lvalue[0], label == lvalue[1], Q)])
        itest_train = set(
            np.array(range(
                matrix[itrain, ].shape[1]))[benjamini_hochberg_filter(
                    matrix[itrain, ], label[itrain, ] == lvalue[0],
                    label[itrain, ] == lvalue[1], Q)])
        #itest_all = set(ttest_matrix(matrix, label, lvalue, alpha))
        #itest_train = set(ttest_matrix(matrix[itrain,], label[itrain], lvalue, alpha))

        #print(itest_all)
        #print(itest_train)

        train_size.append(len(itest_train))
        all_size.append(len(itest_all))

        train_in_all += len(
            itest_train.intersection(itest_all)) / len(itest_all)

        jaccard += len(itest_all.intersection(itest_train)) / len(
            itest_all.union(itest_train))
    print(train_size)
    print("Size:", all_size[0], np.mean(train_size), np.var(train_size))
    print("Jaccard index:", jaccard / count, train_in_all / count)
コード例 #3
0
ファイル: show_ipd.py プロジェクト: lemene/mbio
def test_feature(csvfile, mark, alpha):
    matrix, label, row, col = mcsv.load_matrix(csvfile)
    omatrix = matrix.copy()

    itrain, itest = random_split_data(0.8, matrix, label)
    matrix = normalize(matrix, label)

    labeltype = {
        l: i
        for i, l in enumerate(sorted(set([l for l in label if mark in l])))
    }

    assert "left" == mark or "right" == mark and len(labeltype) == 2

    lvalue = list(labeltype.keys())

    itest_all = ttest_matrix(matrix, label, lvalue, alpha)
    itest_train = ttest_matrix(omatrix[itrain, ], label[itrain], lvalue, alpha)

    print(itest_all)
    print(itest_train)
コード例 #4
0
ファイル: ipd.py プロジェクト: lemene/mbio
def summary_matrix(csvfile):
    from collections import defaultdict
    matrix, label, row, col = mcsv.load_matrix(csvfile)
    print("matrix.shape:", matrix.shape)

    labelcount = defaultdict(int)
    for l in label:
        labelcount[l] += 1

    print("Label Count:", labelcount.items())

    readcount = defaultdict(int)
    for r in row:
        readcount[r] += 1

    countread = defaultdict(list)
    for r, c in readcount.items():
        countread[c].append(r)

    for c, r in countread.items():
        print(c, len(r))
        if c >= 4: print(r)