def runPrismX(clusterCount: int): #urllib.request.urlretrieve("https://mssm-seq-matrix.s3.amazonaws.com/mouse_matrix.h5", "mouse_matrix.h5") start = time.time() correlationFolder = "correlation_" + str(clusterCount) + "_folder_q" predictionFolder = "prediction_" + str(clusterCount) + "_folder_q" libs = px.list_libraries() gmt_file = px.load_library(libs[110]) px.createCorrelationMatrices("mouse_matrix.h5", correlationFolder, clusterCount=clusterCount, sampleCount=5000, correlationSampleCount=5000, verbose=True) t1 = time.time() - start print("T1: " + str(t1)) px.correlation_scores(gmt_file, correlationFolder, predictionFolder, verbose=True) t2 = time.time() - start print("T2: " + str(t2)) model = px.trainModel(predictionFolder, correlationFolder, gmt_file, training_size=300000, test_train_split=0.1, sample_positive=40000, sample_negative=200000, random_state=42, verbose=True) pickle.dump(model, open("gobp_model_" + str(clusterCount) + ".pkl", 'wb')) t3 = time.time() - start print("T3: " + str(t3))
import urllib.request import prismx as px import pickle from memory_profiler import memory_usage import os import time import matplotlib.pyplot as plt import pandas as pd import feather from prismx.utils import load_json, get_config, get_data_path, read_gmt from prismx.loaddata import get_genes from progress.bar import Bar import numpy as np px.print_libraries() libs = px.list_libraries() gmt_file = px.load_library(libs[28]) outname = libs[28] correlationFolder = "correlation_100_folder" predictionFolder = "prediction_100_folder" outfolder = "prismxresult_100" px.predict_gmt("gobp_model_100.pkl", gmt_file, correlationFolder, predictionFolder, outfolder, outname, step_size=1000, verbose=True)