Example #1
0
def runPrismX(clusterCount: int):
    #urllib.request.urlretrieve("https://mssm-seq-matrix.s3.amazonaws.com/mouse_matrix.h5", "mouse_matrix.h5")
    start = time.time()
    correlationFolder = "correlation_" + str(clusterCount) + "_folder_q"
    predictionFolder = "prediction_" + str(clusterCount) + "_folder_q"
    libs = px.list_libraries()
    gmt_file = px.load_library(libs[110])
    px.createCorrelationMatrices("mouse_matrix.h5",
                                 correlationFolder,
                                 clusterCount=clusterCount,
                                 sampleCount=5000,
                                 correlationSampleCount=5000,
                                 verbose=True)
    t1 = time.time() - start
    print("T1: " + str(t1))
    px.correlation_scores(gmt_file,
                          correlationFolder,
                          predictionFolder,
                          verbose=True)
    t2 = time.time() - start
    print("T2: " + str(t2))
    model = px.trainModel(predictionFolder,
                          correlationFolder,
                          gmt_file,
                          training_size=300000,
                          test_train_split=0.1,
                          sample_positive=40000,
                          sample_negative=200000,
                          random_state=42,
                          verbose=True)
    pickle.dump(model, open("gobp_model_" + str(clusterCount) + ".pkl", 'wb'))
    t3 = time.time() - start
    print("T3: " + str(t3))
Example #2
0
import urllib.request
import prismx as px
import pickle
from memory_profiler import memory_usage
import os
import time
import matplotlib.pyplot as plt
import pandas as pd
import feather
from prismx.utils import load_json, get_config, get_data_path, read_gmt
from prismx.loaddata import get_genes
from progress.bar import Bar
import numpy as np

px.print_libraries()
libs = px.list_libraries()
gmt_file = px.load_library(libs[28])

outname = libs[28]
correlationFolder = "correlation_100_folder"
predictionFolder = "prediction_100_folder"
outfolder = "prismxresult_100"

px.predict_gmt("gobp_model_100.pkl",
               gmt_file,
               correlationFolder,
               predictionFolder,
               outfolder,
               outname,
               step_size=1000,
               verbose=True)