def ge_test_extract_fun(data, n_device, batchsize, n_targets, model_path, n_extract):
    #データロード
    test_set = ge_data.ge_test_dataset(data)
    test_loader = DataLoader(test_set, batch_size = batchsize, shuffle=False, num_workers=50)
    device_str = "cuda:{}".format(n_device)
    device = torch.device(device_str if torch.cuda.is_available() else "cpu")
    print("used device : ", device)
    #損失関数
    loss_fun = nn.PoissonNLLLoss()
    #モデルの読み込み
    test_model = ge_nn.Net(n_targets=n_targets)
    test_model.to(device)
    test_model.load_state_dict(torch.load(model_path))
    test_model.eval()
    #損失の記録
    test_loss = []
    #テストデータ番号
    count = 0
    with torch.no_grad():
        for (test_in, test_out) in test_loader:
            count = count + 1
            #モデル入力
            test_in,  test_out = test_in.to(device), test_out.to(device)
            out = test_model(test_in)
            #損失計算
            loss = loss_fun(out, test_out)
            test_loss.append(loss.item())
            #グラフ描画
            out = torch.exp(out)
            if count == n_extract:
                test_out = test_out.to("cpu")
                out = out.to("cpu")
                print(test_out.shape)
                print(out.shape)
                test_out = torch.squeeze(test_out)
                out = torch.squeeze(out)
                print(test_out.shape)
                print(out.shape)
                with open('data_extract_log.txt', 'a') as f:
                    f.write('data{}:tensor detach numpy\n'.format(count))
                test_out = test_out.detach().numpy()
                out = out.detach().numpy()
                #testデータ番号に応じてcsvファイルにデータを抽出(4229, 1024)
                with open('data_extract_log.txt', 'a') as f:
                    f.write('data{}:test out data csv write\n'.format(count))
                with open('/home/abe/data/genome_data/data310/data_test_out{}.csv'.format(count), 'w') as fc :
                    writer = csv.writer(fc)
                    writer.writerows(test_out)
                with open('data_extract_log.txt', 'a') as f:
                    f.write('data{}:model out data csv write 2\n'.format(count))
                with open('/home/abe/data/genome_data/data310/data_out{}.csv'.format(count), 'w') as fc :
                    writer = csv.writer(fc)
                    writer.writerows(out)
            else :
                with open('data_extract_log.txt', 'a') as f:
                    f.write('data{}:data went through\n'.format(count))
    print('data extract finished')
    with open('data_extract_log.txt', 'a') as f:
        f.write('data extract finished')
예제 #2
0
def ge_test_fun(data, n_device, batchsize, n_targets, model_path):
    #データロード
    test_set = ge_data.ge_test_dataset(data)
    test_loader = DataLoader(test_set,
                             batch_size=batchsize,
                             shuffle=False,
                             num_workers=50)
    device_str = "cuda:{}".format(n_device)
    device = torch.device(device_str if torch.cuda.is_available() else "cpu")
    print("used device : ", device)
    #損失関数
    loss_fun = nn.PoissonNLLLoss()
    #モデルの読み込み
    test_model = ge_nn.Net(n_targets=n_targets)
    test_model.to(device)
    test_model.load_state_dict(torch.load(model_path))
    test_model.eval()
    #損失の記録
    test_loss = []
    test_score = []
    count = 0
    with torch.no_grad():
        for (test_in, test_out) in test_loader:
            #モデル入力
            test_in, test_out = test_in.to(device), test_out.to(device)
            out = test_model(test_in)
            #損失計算
            loss = loss_fun(out, test_out)
            test_loss.append(loss.item())
            #score計算
            score = ge_loss.log_r2_score(out, test_out)
            test_score.append(score)
            #グラフ描画
            out = torch.exp(out)
            test_out = test_out.to("cpu")
            out = out.to("cpu")
        avr_test_loss = np.average(test_loss)
        avr_test_score = np.average(test_score)
    print('test data loss:{}, test r2 score:{}'.format(avr_test_loss,
                                                       avr_test_score))
    with open('train_log.txt', 'a') as f:
        f.write('test data loss:{}, test r2 score:{}'.format(
            avr_test_loss, avr_test_score))
예제 #3
0
def ge_test_plot_fun(data, n_device, batchsize, n_targets, model_path):
    #データロード
    test_set = ge_data.ge_test_dataset(data)
    test_loader = DataLoader(test_set,
                             batch_size=batchsize,
                             shuffle=False,
                             num_workers=50)
    device_str = "cuda:{}".format(n_device)
    device = torch.device(device_str if torch.cuda.is_available() else "cpu")
    print("used device : ", device)
    #損失関数
    loss_fun = nn.PoissonNLLLoss()
    #モデルの読み込み
    test_model = ge_nn.Net(n_targets=n_targets)
    test_model.to(device)
    test_model.load_state_dict(torch.load(model_path))
    test_model.eval()
    #損失の記録
    test_loss = []
    test_score = []
    count = 0
    with torch.no_grad():
        for (test_in, test_out) in test_loader:
            count = count + 1
            #モデル入力
            test_in, test_out = test_in.to(device), test_out.to(device)
            out = test_model(test_in)
            #損失計算
            loss = loss_fun(out, test_out)
            test_loss.append(loss.item())
            #score計算
            score = ge_loss.log_r2_score(out, test_out)
            test_score.append(score)
            #グラフ描画
            out = torch.exp(out)
            test_out = test_out.to("cpu")
            out = out.to("cpu")
            #データ抽出用
            cage = 3421
            dnase = 543
            H3K79me2 = 956
            H3K4me3 = 955
            H3K9ac = 1086
            if count == 310:  #何番目のデータを見るか, test_data_label.bedを見て位置を決める
                #dnase
                plt.figure(figsize=(10, 1))
                plt.bar(range(test_out.shape[1]), test_out[0, :, dnase])
                plt.savefig('result/test_out/test_out_dnase.png')
                plt.clf()
                plt.figure(figsize=(10, 1))
                plt.bar(range(out.shape[1]), out[0, :, dnase])
                plt.savefig('result/model_out/model_out_dnase.png')
                plt.clf()
                #histone
                plt.figure(figsize=(10, 1))
                plt.bar(range(test_out.shape[1]), test_out[0, :, H3K79me2])
                plt.savefig('result/test_out/test_out_H3K79me2.png')
                plt.clf()
                plt.figure(figsize=(10, 1))
                plt.bar(range(out.shape[1]), out[0, :, H3K79me2])
                plt.savefig('result/model_out/model_out_H3K79me2.png')
                plt.clf()
                #histone
                plt.figure(figsize=(10, 1))
                plt.bar(range(test_out.shape[1]), test_out[0, :, H3K4me3])
                plt.savefig('result/test_out/test_out_H3K4me3.png')
                plt.clf()
                plt.figure(figsize=(10, 1))
                plt.bar(range(out.shape[1]), out[0, :, H3K4me3])
                plt.savefig('result/model_out/model_out_H3K4me3.png')
                plt.clf()
                #histone
                plt.figure(figsize=(10, 1))
                plt.bar(range(test_out.shape[1]), test_out[0, :, H3K9ac])
                plt.savefig('result/test_out/test_out_H3K9ac.png')
                plt.clf()
                plt.figure(figsize=(10, 1))
                plt.bar(range(out.shape[1]), out[0, :, H3K9ac])
                plt.savefig('result/model_out/model_out_H3K9ac.png')
                plt.clf()
                #CAGE
                plt.figure(figsize=(10, 1))
                plt.bar(range(test_out.shape[1]), test_out[0, :, cage])
                plt.savefig('result/test_out/test_out_cage.png')
                plt.clf()
                plt.figure(figsize=(10, 1))
                plt.bar(range(out.shape[1]), out[0, :, cage])
                plt.savefig('result/model_out/model_out_cage.png')
                plt.clf()
        avr_test_loss = np.average(test_loss)
        avr_test_score = np.average(test_score)
    print('test data loss:{}, test r2 score:{}'.format(avr_test_loss,
                                                       avr_test_score))
예제 #4
0
def ge_test_peason_raw_fun(data, n_device, batchsize, n_targets, model_path):
    #データロード
    test_set = ge_data.ge_test_dataset(data)
    test_loader = DataLoader(test_set,
                             batch_size=batchsize,
                             shuffle=False,
                             num_workers=50)
    device_str = "cuda:{}".format(n_device)
    device = torch.device(device_str if torch.cuda.is_available() else "cpu")
    print("used device : ", device)
    #損失関数
    loss_fun = nn.PoissonNLLLoss()
    #モデルの読み込み
    test_model = ge_nn.Net(n_targets=n_targets)
    test_model.to(device)
    test_model.load_state_dict(torch.load(model_path))
    test_model.eval()
    #損失の記録
    test_loss = []
    #テストデータ番号
    count = 0
    with torch.no_grad():
        for (test_in, test_out) in test_loader:
            #モデル入力
            test_in, test_out = test_in.to(device), test_out.to(device)
            out = test_model(test_in)
            #損失計算
            loss = loss_fun(out, test_out)
            test_loss.append(loss.item())
            #グラフ描画
            out = torch.exp(out)
            #平滑化、相関係数を計算
            test_out = test_out.to("cpu")
            out = out.to("cpu")
            test_out = torch.chunk(test_out, batchsize, dim=0)
            out = torch.chunk(out, batchsize, dim=0)
            #配列格納用
            smooth_out = []
            smooth_test_out = []
            test_score = []
            #バッチの中身一つずつについて計算していく
            for i in range(batchsize):
                count = count + 1
                if count == 761:
                    break
                with open('pearson_test_log.txt', 'a') as f:
                    f.write('data{}:squeez\n'.format(count))
                t = torch.squeeze(test_out[i])
                o = torch.squeeze(out[i])
                s_t = ge_loss.smoothing_damy(t, n_targets)
                s_o = ge_loss.smoothing_damy(o, n_targets)
                #ピアソン相関の計算(n_targets)
                s_t = torch.stack(s_t)
                s_o = torch.stack(s_o)
                with open('pearson_test_log.txt', 'a') as f:
                    f.write('data{}:pearson\n'.format(count))
                pearson = ge_loss.pearsonR(s_o, s_t, n_targets)
                test_score.append(pearson)
            #(batchsize, n_targets)ずつファイルに追記していく
            with open('pearson_test_log.txt', 'a') as f:
                f.write('data{}:pearson csv write\n'.format(count))
            print(len(test_score))
            with open('pearsonr.csv', 'a') as fp:
                writer = csv.writer(fp)
                writer.writerows(test_score)
            avr_test_loss = np.average(test_loss)
            avr_test_score = np.mean(test_score)
    print(
        'test data loss:{}, test r2 score:{}, \n max:{} index{}:, \n min:{} index{}'
        .format(avr_test_loss, avr_test_score, np.max(test_score),
                np.argmax(test_score), np.min(test_score),
                np.argmin(test_score)))
    with open('pearson_test_log.txt', 'a') as f:
        f.write(
            'test data loss:{}, test r2 score:{}, max:{} index{}:, min:{} index{}'
            .format(avr_test_loss, avr_test_score, np.max(test_score),
                    np.argmax(test_score), np.min(test_score),
                    np.argmin(test_score)))