예제 #1
0
    def load_from_hdf5_raw(self, dname = "mimic-cancer", cohort = False, nrows=None):

        data_dict = loadDataset(dname);
        print("Loaded dataset {0}".format(dname))
        
        self.x_train = data_dict['train_x']
        self.x_test = data_dict['test_x']
        self.x_valid = data_dict['valid_x']
        

        if cohort:
            self.y_valid = data_dict['valid_c']
            self.y_train = data_dict['train_c']
            self.y_test = data_dict['test_c']

        else:
            self.y_valid = data_dict['valid_y']
            self.y_train = data_dict['train_y']
            self.y_test = data_dict['test_y']
        

        if nrows is not None:
            print "Truncating rows"
             
            self.x_train = self.x_train[0:nrows]
            self.x_test = self.x_test[0:nrows]
            self.x_valid = self.x_valid[0:nrows]

            self.y_valid = self.y_valid[0:nrows]
            self.y_train = self.y_train[0:nrows]
            self.y_test = self.y_test[0:nrows]
                    
       
            
        print("Set up train/valid/test split")
예제 #2
0
    def load_from_hdf5_latent(self, dname = "mimic-cancer", feat_name='mu', ssi=False, cohort=False, nrows=None):

        
        #Load the latent representatinon instead of the raw features
        representations = loadHDF5('/data/ml2/vishakh/SHARED/representations.h5')

        # we need the labels anyways and we still care which class
        data_dict = loadDataset(dname);
        if ssi:
            feat_name = 'ssi-'+feat_name
            
        #only xs change 
        self.x_train = representations['train-vae-' + feat_name]
        self.x_test = representations['test-vae-' + feat_name]
        self.x_valid = representations['valid-vae-' + feat_name]
        
        
        if cohort:
            self.y_valid = data_dict['valid_c']
            self.y_train = data_dict['train_c']
            self.y_test = data_dict['test_c']

        else:
            self.y_valid = data_dict['valid_y']
            self.y_train = data_dict['train_y']
            self.y_test = data_dict['test_y']
        

        if nrows is not None:
            print "Truncating rows"
             
            self.x_train = self.x_train[0:nrows]
            self.x_test = self.x_test[0:nrows]
            self.x_valid = self.x_valid[0:nrows]

            self.y_valid = self.y_valid[0:nrows]
            self.y_train = self.y_train[0:nrows]
            self.y_test = self.y_test[0:nrows]
예제 #3
0
def run_model():
    #load mnist dataset
    X_train, y_train, X_test, y_test, num_classes = load.loadDataset("mnist")

    #create model from textfile
    model = cMod.createModel(filename=file_path)

    #set number of epochs
    epochs = 50

    #print summary
    print(model.summary())

    #fit model and print results
    model.fit(X_train,
              y_train,
              validation_data=(X_test, y_test),
              epochs=epochs,
              batch_size=200)
    score = model.evaluate(X_test, y_test, verbose=0)
    print("Accuracy: %.2f%%" % (score[1] * 100))

    # save the model
    model.save(model_name)
예제 #4
0
# 设置维度的变化范围,步长设置为1
for num_fea in range(5,31,1):
    print(num_fea)
    # # 设置目录
    # data_Dir = "../../dataset/TCGA_LUAD/TCGA_LUAD_logFC1.5_logFC0.5/methyGeneMerge2/"

    # 新建各个维度目录
    nDim_DIR = data_Dir + "/merge_mrmr" + str(num_fea) + "/"
    if (os.path.exists(nDim_DIR) != True):
        os.mkdir(nDim_DIR)

    # 导入离散数据
    # get the feature data and the label
    X = []
    y = []
    load.loadDataset(source_file_disc, X, y)
    X = np.array(X)
    y = np.array(y)

    # 导入原始数据
    # get the feature data and the label
    X_raw = []
    y_raw = []
    load.loadDataset(source_fileT, X_raw, y_raw)
    X_raw = np.array(X_raw)
    y_raw = np.array(y_raw)

    # ten fold cross validation
    skf = StratifiedKFold(n_splits=10,random_state=14,shuffle=True)
    # get the number of fold
    n = skf.get_n_splits(X, y)
parser.add_argument('--num-samples',
                    type=int,
                    default=5,
                    help='number of predictions to make for each test item')
parser.add_argument('runme_path', help='path to relevant runme.sh script')
parser.add_argument('conf_path',
                    help='path to *-config.pkl file in checkpoints')
parser.add_argument('weight_path',
                    help='path to *-params.h5 file in checkpoints')
parser.add_argument('dest_h5', help='.h5 file to write predictions to')

if __name__ == '__main__':
    args = parser.parse_args()

    print('Loading dataset')
    ds_dict = loadDataset()
    if 'p2d' in ds_dict:
        dataset = ds_dict['p2d']
    else:
        dataset = ds_dict['p3d']

    print('Loading DKF')
    dkf = load_dkf(ds_dict, args.runme_path, args.conf_path, args.weight_path)

    print('Generating eval data')
    is_2d = isinstance(dataset, p2d_loader.P2DDataset)
    pred_usable = None
    if is_2d:
        result = dataset.get_ds_for_eval(train=False, discard_no_annos=True)
        for_cond, for_pred = result['conditioning'], result['prediction']
        pred_scales = result['prediction_scales']
예제 #6
0
import os, time, sys
""" Add the higher level directory to PYTHONPATH to be able to access the models """
sys.path.append('../')
""" Change this to modify the loadDataset function """
from load import loadDataset
""" 
This will contain a hashmap where the 
parameters correspond to the default ones modified
by any command line options given to this script
"""
from parse_args_dkf import parse
params = parse()
""" Some utility functions from theanomodels """
from utils.misc import removeIfExists, createIfAbsent, mapPrint, saveHDF5, displayTime
""" Load the dataset into a hashmap. See load.py for details  """
dataset = loadDataset()
params['savedir'] += '-template'
createIfAbsent(params['savedir'])
""" Add dataset and NADE parameters to "params"
    which will become part of the model
"""
for k in ['dim_observations', 'data_type']:
    params[k] = dataset[k]
mapPrint('Options: ', params)
if params['use_nade']:
    params['data_type'] = 'binary_nade'
"""
import DKF + learn/evaluate functions
"""
start_time = time.time()
from stinfmodel.dkf import DKF
예제 #7
0
import os, time, sys, addpaths

# Change this to modify the loadDataset function
from load import loadDataset

# This will contain a hashmap where the parameters correspond to the default
# ones modified by any command line options given to this script
from parse_args_dkf import parse
params = parse()

# Some utility functions from theanomodels
from utils.misc import removeIfExists, createIfAbsent, mapPrint, saveHDF5, displayTime

# Load the dataset into a hashmap. See load.py for details
dataset = loadDataset(use_cond=params['use_cond'])
if params['use_cond']:
    print('Using conditioning information')
    train_cond_vals = dataset['train_cond_vals']
    val_cond_vals = dataset['val_cond_vals']
    assert train_cond_vals.ndim == 3, train_cond_vals.shape
    params['dim_cond'] = train_cond_vals.shape[2]
else:
    train_cond_vals = val_cond_vals = None
params['savedir'] += '-h36m'
createIfAbsent(params['savedir'])

# Add dataset and NADE parameters to "params" which will become part of the
# model
for k in ['dim_observations', 'data_type']:
    params[k] = dataset[k]
mapPrint('Options: ', params)
예제 #8
0
import load
import cMod

#load mnist dataset
XTrain, yTrain, XTest, yTest, numClasses = load.loadDataset("mnist")

#create model from textfile
model = cMod.createModel(filename='myM')

#set number of epochs
epochs = 10

#print summary
print(model.summary())

#fit model and print results
model.fit(XTrain,
          yTrain,
          validation_data=(XTest, yTest),
          epochs=epochs,
          batch_size=200)
score = model.evaluate(XTest, yTest, verbose=0)
print("Accuracy: %.2f%%" % (score[1] * 100))
예제 #9
0
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]


print("Loading data...")
X_train, y_train, X_val, y_val = load.loadDataset()

input_var = T.tensor4('inputs')
target_var = T.tensor4('target')

print("Building model and compiling functions...")

batchsize = 128

# Network
#network = Model.OneLayerMLP(batchsize, input_var)
network = model.simpleConv(input_var)

# Loss Function
prediction = lasagne.layers.get_output(network)
loss = T.mean(lasagne.objectives.squared_error(prediction, target_var))