Beispiel #1
0
def test_luna3d():
    # path = '/mnt/sda3/data/kaggle-lung/lunapred/luna_scan_v3_dice-20170131-173443/'
    path = '/mnt/sda3/data/kaggle-lung/lunapred_el/luna_scan_v3_dice-20170201-231707/'
    files = os.listdir(path)
    print files
    x, y, p = [], [], []
    for f in files:
        if 'in' in f:
            x.append(f)
        elif 'tgt' in f:
            y.append(f)
        else:
            p.append(f)
    x = sorted(x)
    y = sorted(y)
    p = sorted(p)
    for xf, yf, pf in zip(x, y, p):
        x_batch = utils.load_pkl(path + xf)
        pred_batch = utils.load_pkl(path + pf)
        y_batch = utils.load_pkl(path + yf)
        print xf
        print yf
        print pf
        # plot_2d_animation(x_batch[0], y_batch[0], pred_batch[0])
        plot_slice_3d_3(x_batch[0,0],y_batch[0,0],pred_batch[0,0],0,'aa')
Beispiel #2
0
    def read_act_texts(self):
        text_data = load_pkl('data/%s_labeled_text_data.pkl' % self.domain)
        pos_data = load_pkl('data/%s_dependency.pkl' % self.domain)
        act_texts = []
        #ipdb.set_trace()
        for i in range(len(text_data)):
            act_text = {}
            act_text['tokens'] = text_data[i]['words']
            act_text['sents'] = text_data[i]['sents']
            act_text['acts'] = text_data[i]['acts']
            act_text['sent_acts'] = text_data[i]['sent_acts']
            act_text['word2sent'] = text_data[i]['word2sent']
            act_text['tags'] = np.ones(len(text_data[i]['words']),
                                       dtype=np.int32)
            act_text['act2related'] = {}
            for acts in text_data[i]['acts']:
                act_text['act2related'][acts['act_idx']] = acts['related_acts']
                act_text['tags'][
                    acts['act_idx']] = acts['act_type'] + 1  # 2, 3, 4
            act_text['pos'] = []
            for sent in pos_data[i]:
                for word, pos in sent:
                    act_text['pos'].append(self.pos_dict[pos])

            self.create_matrix(act_text)
            act_texts.append(act_text)
        act_indices = ten_fold_split_ind(len(act_texts), self.k_fold_indices,
                                         self.k_fold)
        act_data = index2data(act_indices, act_texts)
        return act_data
def test_luna3d():
    # path = '/mnt/sda3/data/kaggle-lung/lunapred/luna_scan_v3_dice-20170131-173443/'
    path = '/mnt/sda3/data/kaggle-lung/lunapred_el/luna_scan_v3_dice-20170201-231707/'
    files = os.listdir(path)
    print files
    x, y, p = [], [], []
    for f in files:
        if 'in' in f:
            x.append(f)
        elif 'tgt' in f:
            y.append(f)
        else:
            p.append(f)
    x = sorted(x)
    y = sorted(y)
    p = sorted(p)
    for xf, yf, pf in zip(x, y, p):
        x_batch = utils.load_pkl(path + xf)
        pred_batch = utils.load_pkl(path + pf)
        y_batch = utils.load_pkl(path + yf)
        print xf
        print yf
        print pf
        # plot_2d_animation(x_batch[0], y_batch[0], pred_batch[0])
        plot_slice_3d_3(x_batch[0, 0], y_batch[0, 0], pred_batch[0, 0], 0,
                        'aa')
Beispiel #4
0
 def __init__(self, *args, **kwargs):
     parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
     data_dir = os.path.join(parent_dir, 'data')
     fid_to_label = load_pkl(os.path.join(data_dir, 'fid_to_label.pkl'))
     fid_to_text = load_pkl(os.path.join(data_dir, 'fid_to_text.pkl'))
     fids_path = os.path.join(data_dir, 'fids.pkl')
     label_to_int_path = os.path.join(data_dir, 'label_to_int.pkl')
     fids = load_pkl(fids_path)
     label_to_int = load_pkl(label_to_int_path)
     paths = [os.path.join(data_dir, 'images', 'img%s.jpg' % fid) for fid in fids]
     targets = [label_to_int[fid_to_label[fid]] for fid in fids]
     num_labels = len(np.unique(targets))
     grouped_paths = [[] for _ in range(num_labels)]
     for path, label in zip(paths, targets):
         grouped_paths[label].append(path)
     grouped_texts = [[] for _ in range(num_labels)]
     for fid, label in zip(fids, targets):
         grouped_texts[label].append(fid_to_text[fid])
     characters_list = []
     for i, paths in enumerate(grouped_paths):
         characters_list.append(paths)
     characters_list = np.array(characters_list)
     texts_list = []
     for i, texts in enumerate(grouped_texts):
         texts_list.append(texts)
     texts_list = np.array(texts_list)
     AbstractMetaOmniglot.__init__(self, characters_list, texts_list, *args, **kwargs)
Beispiel #5
0
    def __init__(self, alpha, parts, numpy=False, sparse=True):
        '''
        alpha: tuple of ints, the weak partition of 8 into 3 parts
        parts: tuple of ints, the partitions of each part of alpha
        cached_loc: string, location of the cached pickle file of S_8 mod S_alpha irreps
        '''
        self.alpha = alpha
        self.parts = parts
        self.cos_reps = coset_reps(sn(8), young_subgroup_perm(alpha))
        self.cyc_irrep_func = cyclic_irreps(alpha)
        self.yor_dict = None

        # cache orientation tuple -> cyclic irrep
        self.cyclic_irreps_re = {}
        self.cyclic_irreps_im = {}
        self.fill_cyclic_irreps()

        # also cache the cyclic irreps
        if numpy:
            pkl_loc = IRREP_LOC_FMT.format(alpha, parts)
            self.yor_dict = load_pkl(pkl_loc)
        elif sparse:
            pkl_loc = IRREP_SP_LOC_FMT.format(alpha, parts)
            self.yor_dict = load_sparse_pkl(pkl_loc)
        else:
            # TODO: deprecate
            print('neither sparse nor numpy')
            pkl_loc = IRREP_LOC_FMT.format(alpha, parts)
            self.np_yor_dict = load_pkl(pkl_loc)
Beispiel #6
0
def main():
    size = 600
    k = 4

    # (U, s, V) = utils.load_pkl(SVD_PATH)
    eig_face = utils.load_pkl(U4_PATH)
    x_mean = utils.load_pkl(X_MAEN)
    # eig_face = U.T

    print('- A3')
    # en = eig_face[:k]
    # en = np.matrix(eig_face[:k])
    en = eig_face
    # en = np.matrix(eig_face)
    print('  * encoder shape:', en.shape)
    sample = get_sample()

    sampleT = np.matrix(sample - x_mean).T
    print('  * sampleT matrix shape:', sampleT.shape)
    constuct_matrix = en * sampleT
    result = en.T * constuct_matrix
    result = np.array(result.T) + x_mean
    print('  * result shape:', result.shape)
    faces = result.reshape(-1, size, size, 3)
    faces -= np.min(faces)
    faces /= np.max(faces)
    faces = (faces * 255).astype(np.uint8)
    print('  * faces shape:', faces.shape)
    # print(faces)
    io.imsave(OUTPUT, faces.reshape(size, size, 3))
Beispiel #7
0
def load_dataset(name, as_image=False):
    if name == "mnist_small":
        return ut.load_pkl("datasets/mnist_small.pickle")

    if name == "cifar_small":
        return ut.load_pkl("datasets/cifar_small.pickle")

    if name == "digits":
        digits = load_digits()
        X = digits["data"]
        y = digits["target"]
        X /= 255.

        X, y = shuffle(X, y)

        Xtest = X[500:]
        ytest = y[500:]

        X = X[:500]
        y = y[:500]

        return {"X": X, "y": y, "Xtest": Xtest, "ytest": ytest}
    if name == "boat_images":
        # LOAD SMALL IMAGES
        imgList = glob.glob("datasets/boat_images/*.png")

        df = pd.read_csv("datasets/boat_images/coords.csv")

        X = np.zeros((len(imgList), 80, 80, 3))
        Y = np.zeros((len(imgList), 80, 80))
        for i, img in enumerate(imgList):
            X[i] = img_as_float(imread(img))

            flag = False
            yx_coors = []
            for _, row in df.iterrows():
                if img[img.rindex("/") +
                       1:] == row.image[row.image.rindex("/") + 1:]:
                    yx_coors += [(row.y, row.x)]
                    flag = True
            if flag == False:
                Y[i] = np.zeros((80, 80))
            else:
                #Y[i] =  np.ones((80, 80))*-1

                for y, x in yx_coors:
                    Y[i, y, x] = 1

        X = iu.last2first(X)
        Y = Y[:, np.newaxis]

        if as_image:
            return X, Y
        else:
            y = Y.sum(axis=1).sum(axis=1).sum(axis=1)

            return X, y.astype(int)
Beispiel #8
0
    def read_arg_sents(self):
        """
        Read data for action argument extractor
        PS: pos_data is generated by the function ``pos_tagging" in utils.py
        """
        indata = load_pkl('data/refined_%s_data.pkl' % self.domain)[-1]
        pos_data = load_pkl('data/%s_arg_pos.pkl' % self.domain)
        arg_sents = []
        # ipdb.set_trace()
        for i in range(len(indata)):
            for j in range(len(indata[i])):
                if len(indata[i][j]) == 0:
                    continue
                # -1 obj_ind refer to UNK
                words = indata[i][j]['last_sent'] + indata[i][j][
                    'this_sent'] + ['UNK']
                pos = [
                    self.pos_dict[p]
                    for w, p in pos_data[i][j][0] + pos_data[i][j][1]
                ] + [0]
                if len(words) != len(words):
                    ipdb.set_trace()
                    print('len(words) != len(words)')
                sent_len = len(words)
                act_inds = [
                    a['act_idx'] for a in indata[i][j]['acts']
                    if a['act_idx'] < self.num_words
                ]
                for k in range(len(indata[i][j]['acts'])):
                    act_ind = indata[i][j]['acts'][k]['act_idx']
                    obj_inds = indata[i][j]['acts'][k]['obj_idxs']
                    arg_sent = {}
                    arg_tags = np.ones(sent_len, dtype=np.int32)
                    if len(obj_inds[1]) == 0:
                        arg_tags[obj_inds[0]] = 2  # essential objects
                    else:
                        arg_tags[obj_inds[0]] = 4  # exclusive objects
                        arg_tags[obj_inds[1]] = 4  # exclusive objects
                    # generate distance representation
                    position = np.zeros(sent_len, dtype=np.int32)
                    position.fill(act_ind)
                    distance = np.abs(np.arange(sent_len) - position)

                    arg_sent['tokens'] = words
                    arg_sent['tags'] = arg_tags
                    arg_sent['pos'] = deepcopy(pos)
                    arg_sent['act_ind'] = act_ind
                    arg_sent['distance'] = distance
                    arg_sent['act_inds'] = act_inds
                    arg_sent['obj_inds'] = obj_inds
                    self.create_matrix(arg_sent)
                    arg_sents.append(arg_sent)
        # get k-fold split data
        arg_indices = ten_fold_split_ind(len(arg_sents), self.k_fold_indices,
                                         self.k_fold)
        arg_data = index2data(arg_indices, arg_sents)
        return arg_data
Beispiel #9
0
def preprocess(args, data_name):
    from utils import remove_info
    from sklearn.model_selection import train_test_split
    from preprocessor import Preprocessor
    torch.manual_seed(42)

    print('[Info] Process csv...')
    # for train and valid csv
    trainset = pd.read_csv('../data/task2_trainset.csv', dtype=str)
    trainset = remove_info(trainset)
    trainset, validset = train_test_split(trainset,
                                          test_size=0.1,
                                          random_state=42)
    testset = pd.read_csv('../data/task2_public_testset.csv', dtype=str)
    testset = remove_info(testset)

    print('[Info] Loading node vectors...')
    train_node_vec = load_pkl('../data/node_vec.pkl')  # torch([7000, 128]
    train_node_vec, valid_node_vec = train_test_split(train_node_vec.numpy(),
                                                      test_size=0.1,
                                                      random_state=42)
    train_node_vec = torch.FloatTensor(train_node_vec)
    valid_node_vec = torch.FloatTensor(valid_node_vec)
    test_node_vec = load_pkl('../data/node_vec_test.pkl')
    test_node_vec = test_node_vec.type(torch.FloatTensor)

    print('[INFO] Make bert dataset...')
    preprocessor = Preprocessor(args.model)
    train_data = preprocessor.get_dataset(trainset, n_workers=12)
    valid_data = preprocessor.get_dataset(validset, n_workers=12)
    test_data = preprocessor.get_dataset(testset, n_workers=12)

    tfidf = get_tfidf([data['tokens'] for data in train_data] +
                      [data['tokens'] for data in valid_data] +
                      [data['tokens'] for data in test_data])

    train_data = BertDataset(train_data, train_node_vec, tfidf[:6300],
                             args.max_len)
    valid_data = BertDataset(valid_data, valid_node_vec, tfidf[6300:7000],
                             args.max_len)
    test_data = BertDataset(test_data, test_node_vec, tfidf[7000:],
                            args.max_len)

    print('[INFO] Save pickles...')
    if not os.path.exists('../dataset/'):
        os.makedirs('../dataset/')
    with open('../dataset/trainData_%s.pkl' % data_name, 'wb') as f:
        pickle.dump(train_data, f)
    with open('../dataset/validData_%s.pkl' % data_name, 'wb') as f:
        pickle.dump(valid_data, f)
    with open('../dataset/testData_%s.pkl' % data_name, 'wb') as f:
        pickle.dump(test_data, f)
Beispiel #10
0
 def __init__(self, alpha, parts):
     self.alpha = alpha
     self.parts = parts
     self.values = -np.load(model_path(alpha, parts))
     self.idx_to_dist = load_pkl(
         os.path.join(get_prefix(), 'cube', 'cube2_pkls',
                      'idx_to_dist.pkl'))
     self.correct = np.load(
         os.path.join(get_prefix(), 'cube', 'fourier_eval_results',
                      str(alpha), str(parts), 'correct.npy'))
     self.cube_to_idx = load_pkl(
         os.path.join(get_prefix(), 'cube', 'cube2_pkls',
                      'cube_to_idx.pkl'))
def load_pretrained_model(l_in):


    l = conv3d(l_in, 64)
    l = inrn_v2_red(l)
    l = inrn_v2(l)
    l = feat_red(l)
    l = inrn_v2(l)

    l = inrn_v2_red(l)
    l = inrn_v2(l)
    l = feat_red(l)
    l = inrn_v2(l)

    l = feat_red(l)

    l = dense(l, 128, name='dense_fpr')

    l_out = nn.layers.DenseLayer(l, num_units=2,
                                 W=nn.init.Constant(0.),
                                 nonlinearity=nn.nonlinearities.softmax)


    metadata = utils.load_pkl(os.path.join("/home/eavsteen/dsb3/storage/metadata/dsb3/models/ikorshun/","luna_c3-20170226-174919.pkl"))
    nn.layers.set_all_param_values(l_out, metadata['param_values'])

    return nn.layers.get_all_layers(l_out)[-3]
Beispiel #12
0
def ultimate_evaluate(model):
    genres = ['action', 'drama', 'horror', 'romance']
    testingData = []
    testingLabels = []
    total = defaultdict.fromkeys(range(len(genres)), 0)
    correct = defaultdict.fromkeys(range(len(genres)), 0)
    yTrue, yPredict = [], []
    for genreIndex, genre in enumerate(genres):
        try:
            genreFeatures = load_pkl(genre + "_histogram_test")
            genreFeatures = np.array([np.array(f)
                                      for f in genreFeatures])  # numpy hack
        except Exception as e:
            print e
            return
        print "OK."
        for videoFeatures in genreFeatures:
            total[genreIndex] += 1
            d = defaultdict(int)
            predictedClasses = model.predict(
                videoFeatures)  #List of predictions, per-frame
            print predictedClasses
            for i in predictedClasses:
                d[i] += 1
            predictedGenre = max(d.iteritems(), key=lambda x: x[1])[0]
            yPredict.append(predictedGenre)
            yTrue.append(genreIndex)
            if predictedGenre == genreIndex:
                correct[genreIndex] += 1

    print correct, total

    confusionMatrix = confusion_matrix(yTrue, yPredict)
    print confusionMatrix
Beispiel #13
0
def build_model():
    metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH)
    metadata_path = utils.find_model_metadata(
        metadata_dir,
        patch_class_config.__name__.split('.')[-1])
    print('loading model', metadata_path)
    print('please check if model pkl is the correct one')
    metadata = utils.load_pkl(metadata_path)

    print('Build model')
    model = patch_class_config.build_model()
    all_layers = nn.layers.get_all_layers(model.l_out)
    num_params = nn.layers.count_params(model.l_out)
    print('  number of parameters: %d' % num_params)
    print(string.ljust('  layer output shapes:', 36), )
    print(string.ljust('#params:', 10), )
    print('output shape:')
    for layer in all_layers:
        name = string.ljust(layer.__class__.__name__, 32)
        num_param = sum(
            [np.prod(p.get_value().shape) for p in layer.get_params()])
        num_param = string.ljust(num_param.__str__(), 10)
        print('    %s %s %s' % (name, num_param, layer.output_shape))

    nn.layers.set_all_param_values(model.l_out, metadata['param_values'])
    return model
Beispiel #14
0
def main():
    # Target data
    filename = "120_kmeans_obj.pkl"

    kmeans = k.load_pkl(filename)
    spec, label = load_test_data()

    print("spec", spec.shape)
    print("label", label.shape)

    spec_ = np.empty((513, ), np.float32)
    for i in range(len(label)):
        spec_ = np.vstack((spec_, kmeans.cluster_centers_[label[i]]))
    spec_ = np.delete(spec_, 0, 0)

    print("compare data structure ----")
    print("spec: ", spec.shape)
    print("spec_: ", spec_.shape)

    print("spec data:", spec)
    print("spec_ data:", spec_)

    print("min-max spce_ data:", min_max(spec_))

    waveform = audio.inv_spectrogram(spec)
    waveform_ = audio.inv_spectrogram(spec_)
    waveformmm_ = audio.inv_spectrogram(min_max(spec_))

    audio.save_wav(waveform, 'ideal_out.wav')
    audio.save_wav(waveform_, 'idela_out_.wav')
    audio.save_wav(waveformmm_, 'idelal_outmm_.wav')
Beispiel #15
0
def split_transform(fsplit_lst, irrep_dict, alpha, parts, mem_dict=None):
    '''
    fsplit_pkl: list of pkl file names of the distance values for a chunk of the total distance values
    irrep_dict: irrep dict 
    alpha: weak partition
    parts: list/iterable of partitions of the parts of alpha
    '''
    print('     Computing transform on splits: {}'.format(fsplit_lst))
    cos_reps = coset_reps(sn(8), young_subgroup_perm(alpha))
    save_dict = {}
    cyc_irrep_func = cyclic_irreps(alpha)
    pid = os.getpid()

    for fsplit_pkl in fsplit_lst:
        with open(fsplit_pkl, 'r') as f:
            # dict of function values
            pkl_dict = load_pkl(fsplit_pkl)
            for perm_tup, tup_dict in pkl_dict.items():
                for tup, dists in tup_dict.items():
                    dist_tot = sum(dists)
                    perm_rep = irrep_dict[
                        perm_tup]  # perm_rep is a dict of (i, j) -> matrix
                    block_cyclic_rep = block_cyclic_irreps(
                        tup, cos_reps, cyc_irrep_func)
                    mult_yor_block(perm_rep, dist_tot, block_cyclic_rep,
                                   save_dict)
            if mem_dict is not None:
                mem_dict[pid] = max(check_memory(verbose=False),
                                    mem_dict.get(pid, 0))
            del pkl_dict

    block_size = wreath_dim(parts)
    n_cosets = coset_size(alpha)
    mat = convert_yor_matrix(save_dict, block_size, n_cosets)
    return mat
def load_pretrained_model(l_in):


    l = conv3d(l_in, 64)
    l = inrn_v2_red(l)
    l = inrn_v2(l)

    l = inrn_v2_red(l)
    l = inrn_v2(l)

    l = inrn_v2_red(l)
    l = inrn_v2_red(l)

    l = dense(drop(l), 128)

    l_out = nn.layers.DenseLayer(l, num_units=10,
                                 W=nn.init.Orthogonal(),
                                 b=nn.init.Constant(0.1),
                                 nonlinearity=nn.nonlinearities.softmax)



    metadata = utils.load_pkl(os.path.join("/mnt/storage/metadata/dsb3/models/eavsteen/","t_el_0-20170321-013339.pkl"))
    nn.layers.set_all_param_values(l_out, metadata['param_values'])

    return nn.layers.get_all_layers(l_out)[-3]
Beispiel #17
0
 def from_config(cls, validation_ratio, validation_seed, dir_path, data_sampling_freq,
                 start_sampling_freq, end_sampling_freq, start_seq_len, num_channels, return_long):
     assert end_sampling_freq <= data_sampling_freq
     target_location = os.path.join(dir_path, '{}c_{}v_{}ss_{}es_{}l.npz'.format(num_channels, DATASET_VERSION,
                                                                                 start_sampling_freq,
                                                                                 end_sampling_freq, start_seq_len))
     if os.path.exists(target_location):
         print('loading dataset from file: {}'.format(target_location))
         given_data = np.load(target_location)
         given_data = [load_pkl(target_location + '.pkl'), [given_data['arr_{}'.format(i)]
                                                            for i in range(len(given_data.keys()))]]
     else:
         print('creating dataset from scratch')
         dataset = cls(None, dir_path, data_sampling_freq, start_sampling_freq,
                       end_sampling_freq, start_seq_len, num_channels, return_long)
         np.savez_compressed(target_location, *dataset.datas)
         save_pkl(target_location + '.pkl', dataset.data_pointers)
         given_data = [dataset.data_pointers, dataset.datas]
     return_datasets = []
     for i in range(2):
         return_datasets.append(cls(given_data, dir_path, data_sampling_freq, start_sampling_freq,
                                    end_sampling_freq, start_seq_len, num_channels, return_long))
     data_pointers = [x for x in return_datasets[0].data_pointers]
     np.random.seed(validation_seed)
     np.random.shuffle(data_pointers)
     return_datasets[0].data_pointers = data_pointers[:int((1 - validation_ratio) * len(data_pointers))]
     return_datasets[1].data_pointers = data_pointers[int((1 - validation_ratio) * len(data_pointers)):]
     return return_datasets[0], return_datasets[1]
Beispiel #18
0
    def __init__(self, data_path, batch_size, transform_params, patient_ids=None, labels_path=None,
                 slice2roi_path=None, full_batch=False, random=True, infinite=False, view='sax',
                 data_prep_fun=data.transform_norm_rescale, **kwargs):

        if patient_ids:
            self.patient_paths = []
            for pid in patient_ids:
                self.patient_paths.append(data_path + '/%s/study/' % pid)
        else:
            self.patient_paths = glob.glob(data_path + '/*/study/')

        self.slice_paths = [sorted(glob.glob(p + '/%s_*.pkl' % view)) for p in self.patient_paths]
        self.slice_paths = list(itertools.chain(*self.slice_paths))
        self.slicepath2pid = {}
        for s in self.slice_paths:
            self.slicepath2pid[s] = int(utils.get_patient_id(s))

        self.nsamples = len(self.slice_paths)
        self.batch_size = batch_size
        self.rng = np.random.RandomState(42)
        self.full_batch = full_batch
        self.random = random
        self.infinite = infinite
        self.id2labels = data.read_labels(labels_path) if labels_path else None
        self.transformation_params = transform_params
        self.data_prep_fun = data_prep_fun
        self.slice2roi = utils.load_pkl(slice2roi_path) if slice2roi_path else None
Beispiel #19
0
def main(args):
    # load particles
    particles = dataset.load_particles(args.mrcs, datadir=args.datadir)
    log(particles.shape)
    Nimg, D, D = particles.shape

    trans = utils.load_pkl(args.trans)
    if type(trans) is tuple:
        trans = trans[1]
    trans *= args.tscale
    assert np.all(trans <= 1), "ERROR: Old pose format detected. Translations must be in units of fraction of box."
    trans *= D # convert to pixels
    assert len(trans) == Nimg

    xx,yy = np.meshgrid(np.arange(-D/2,D/2),np.arange(-D/2,D/2))
    TCOORD = np.stack([xx, yy],axis=2)/D # DxDx2
    
    imgs = []
    for ii in range(Nimg):
        ff = fft.fft2_center(particles[ii])
        tfilt = np.dot(TCOORD,trans[ii])*-2*np.pi
        tfilt = np.cos(tfilt) + np.sin(tfilt)*1j
        ff *= tfilt
        img = fft.ifftn_center(ff)
        imgs.append(img)

    imgs = np.asarray(imgs).astype(np.float32)
    mrc.write(args.o, imgs)

    if args.out_png:
        plot_projections(args.out_png, imgs[:9])
Beispiel #20
0
def build_model():
    l_in = nn.layers.InputLayer((None, ) + p_transform['patch_size'])
    l_dim = nn.layers.DimshuffleLayer(l_in, pattern=[0,'x',1,2,3])
    l_target = nn.layers.InputLayer((None, 1))

    l = conv3d(l_dim, 64)
    l = inrn_v2_red(l)
    l = inrn_v2(l)
    l = feat_red(l)
    l = inrn_v2(l)

    l = inrn_v2_red(l)
    l = inrn_v2(l)
    l = feat_red(l)
    l = inrn_v2(l)

    l = feat_red(l) 

    l_out = dense(l, 128)

    # l_out = nn.layers.DenseLayer(l, num_units=2,
    #                              W=nn.init.Constant(0.),
    #                              nonlinearity=nn.nonlinearities.softmax)

    metadata = utils.load_pkl(os.path.join("/home/eavsteen/dsb3/storage/metadata/dsb3/models/ikorshun/","luna_c3-20170226-174919.pkl"))

    for i in range(-20,0):
        print metadata['param_values'][i].shape

    nn.layers.set_all_param_values(l_out, metadata['param_values'][:-2])

    return namedtuple('Model', ['l_in', 'l_out', 'l_target'])(l_in, l_out, l_target)
Beispiel #21
0
def create_dataloader(args, vocabs=None, val=False):
    argvalpfx = "val_" if val else ""
    paths = [getattr(args, f"{argvalpfx}{mode}_path") for mode in MODES]
    if vocabs is None:
        vocabs = [getattr(args, f"{mode}_vocab") for mode in MODES]
        vocabs = [utils.load_pkl(v) if v is not None else None for v in vocabs]
    dset = dataset.TextSequenceDataset(
        paths=paths,
        feats=["string", "tensor"],
        vocabs=vocabs,
        vocab_limit=args.vocab_limit,
        pad_eos=args.eos,
        pad_bos=args.bos,
        unk=args.unk,
    )
    if vocabs is None:
        vocabs = dset.vocabs
    collator = dataset.TextSequenceBatchCollator(
        pad_idxs=[len(v) for v in vocabs]
    )
    return td.DataLoader(
        dataset=dset,
        batch_size=args.batch_size,
        shuffle=False if val else args.shuffle,
        num_workers=args.data_workers,
        collate_fn=collator,
        pin_memory=args.pin_memory
    )
Beispiel #22
0
def generate(args):
    devices = utils.get_devices(args.gpu)
    if args.seed is not None:
        utils.manual_seed(args.seed)

    logging.info("Loading data...")
    vocab = utils.load_pkl(args.vocab)

    logging.info("Initializing generation environment...")
    model = prepare_model(args, vocab)
    model = utils.to_device(model, devices)
    generator = Generator(
        model=model,
        device=devices[0],
        batch_size=args.batch_size,
        vocab=vocab,
        bos=args.bos,
        eos=args.eos,
        unk=args.unk,
        max_len=args.max_length
    )

    logging.info("Commencing generation...")
    samples = generator.generate(args.z_samples)
    if args.nearest_neighbors is not None:
        dataset = prepare_dataset(args, vocab)
        neighbors = nearest_neighbors(args, samples, dataset)
    else:
        neighbors = None
    save(args, samples, neighbors)

    logging.info("Done!")
Beispiel #23
0
def test_th_pkl(np_pkl, th_pkl):
    print('Testing equivalence')
    np_dict = load_pkl(np_pkl)
    th_dict = load_sparse_pkl(th_pkl)
    compare(np_dict, th_dict)
    print('All equal between numpy and torch versions!!')
    check_memory()
Beispiel #24
0
    def __init__(self, data_path, batch_size, transform_params, patient_ids=None, labels_path=None,
                 slice2roi_path=None, full_batch=False, random=True, infinite=True, min_slices=5, **kwargs):

        if patient_ids:
            patient_paths = []
            for pid in patient_ids:
                patient_paths.append(data_path + '/%s/study/' % pid)
        else:
            patient_paths = glob.glob(data_path + '/*/study/')

        self.pid2sax_slice_paths = defaultdict(list)
        self.pid2ch2_path, self.pid2ch4_path = {}, {}
        for p in patient_paths:
            pid = int(utils.get_patient_id(p))
            spaths = sorted(glob.glob(p + '/sax_*.pkl'), key=lambda x: int(re.search(r'/sax_(\d+)\.pkl$', x).group(1)))
            if len(spaths) > min_slices:
                self.pid2sax_slice_paths[pid] = spaths

                ch2_path = glob.glob(p + '/2ch_*.pkl')
                self.pid2ch2_path[pid] = ch2_path[0] if ch2_path else None
                ch4_path = glob.glob(p + '/4ch_*.pkl')
                self.pid2ch4_path[pid] = ch4_path[0] if ch4_path else None

        self.patient_ids = self.pid2sax_slice_paths.keys()
        self.nsamples = len(self.patient_ids)

        self.id2labels = data.read_labels(labels_path) if labels_path else None
        self.batch_size = batch_size
        self.rng = np.random.RandomState(42)
        self.full_batch = full_batch
        self.random = random
        self.batch_size = batch_size
        self.infinite = infinite
        self.transformation_params = transform_params
        self.slice2roi = utils.load_pkl(slice2roi_path) if slice2roi_path else None
Beispiel #25
0
def init(node_type, checkpoint_path=''):
    """Initialize shared information and data for given node_type (e.g., 'worker').

    Optionally, can load a stored model from a checkpoint path to continue training or eval.
    """
    if checkpoint_path:
        nn = load_pkl(checkpoint_path)  # Load stored and pickled model
    else:
        nn = FeedForwardNeuralNetwork(  # Return new FFNN
            run_config['NUM_FEATURES'],
            run_config['NUM_OUTPUTS'],
            run_config['LAYERS'])

    if node_type == 'worker':
        data = run_config['TRAIN']  # Worker needs training data, and training hyper parameters
        return nn, run_config['NUM_WEIGHTS'], SERVER_INFO, run_config['LEARNING_RATE'], run_config['BATCH_SIZE'], data

    elif node_type == 'validator':
        data = run_config['VALIDATION']  # Validator only needs validation data
        return nn, run_config['NUM_WEIGHTS'], SERVER_INFO, data

    elif node_type == 'server':
        data = (run_config['TRAIN'], run_config['VALIDATION'], run_config['TEST'])  # Needs all data
        return nn, run_config['NUM_WEIGHTS'], SERVER_INFO, run_config['NUM_BATCHES'], data

    else:
        print('Error: node type not recognized!')
        exit()

    return None
Beispiel #26
0
def build_model():
    l_in = nn.layers.InputLayer((None, ) + p_transform['patch_size'])
    l_dim = nn.layers.DimshuffleLayer(l_in, pattern=[0,'x',1,2,3])
    l_target = nn.layers.InputLayer((None, 1))

    l = conv3d(l_dim, 64)
    l = inrn_v2_red(l)
    l = inrn_v2(l)
    l = feat_red(l)
    l = inrn_v2(l)

    l = inrn_v2_red(l)
    l = inrn_v2(l)
    l = feat_red(l)
    l = inrn_v2(l)

    l = feat_red(l) 

    l_out = dense(l, 128)

    # l_out = nn.layers.DenseLayer(l, num_units=2,
    #                              W=nn.init.Constant(0.),
    #                              nonlinearity=nn.nonlinearities.softmax)

    metadata = utils.load_pkl(os.path.join("/home/eavsteen/dsb3/storage/metadata/dsb3/models/ikorshun/","luna_c3-20170226-174919.pkl"))

    for i in range(-20,0):
        print(metadata['param_values'][i].shape)

    nn.layers.set_all_param_values(l_out, metadata['param_values'][:-2])

    return namedtuple('Model', ['l_in', 'l_out', 'l_target'])(l_in, l_out, l_target)
Beispiel #27
0
def create_model(genres):

    trainingData = []
    trainingLabels = []

    number_of_classes = len(genres)

    for genreIndex, genre in enumerate(genres):
        try:
            genreFeatures = load_pkl(genre + "_ultimate_OF")
        except Exception as e:
            print e
            return
        for videoFeatures in genreFeatures:
            print videoFeatures.shape
            if videoFeatures.shape == (0, ):
                continue
            for scene in videoFeatures:
                for sequence in sequencify(scene, 4):
                    trainingData.append(sequence)
                    trainingLabels.append(genreIndex)

    trainingData = np.array(trainingData)
    trainingLabels = np.array(trainingLabels)
    print trainingData.shape
    print trainingLabels.shape
    model = optical_flow_model(number_of_classes)
    model.compile(optimizer='sgd',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    model.fit(trainingData[0], [trainingLabels[0]])
def build_model():
    metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH)
    metadata_path = utils.find_model_metadata(
        metadata_dir,
        patch_config.__name__.split('.')[-1])
    metadata = utils.load_pkl(metadata_path)

    print 'Build model'
    model = patch_config.build_model(patch_size=(window_size, window_size,
                                                 window_size))
    all_layers = nn.layers.get_all_layers(model.l_out)
    num_params = nn.layers.count_params(model.l_out)
    print '  number of parameters: %d' % num_params
    print string.ljust('  layer output shapes:', 36),
    print string.ljust('#params:', 10),
    print 'output shape:'
    for layer in all_layers:
        name = string.ljust(layer.__class__.__name__, 32)
        num_param = sum(
            [np.prod(p.get_value().shape) for p in layer.get_params()])
        num_param = string.ljust(num_param.__str__(), 10)
        print '    %s %s %s' % (name, num_param, layer.output_shape)

    nn.layers.set_all_param_values(model.l_out, metadata['param_values'])
    return model
Beispiel #29
0
def load_pretrained_model(l_in):

    l = conv3d(l_in, 64)
    l = inrn_v2_red(l)
    l = inrn_v2(l)
    l = feat_red(l)
    l = inrn_v2(l)

    l = inrn_v2_red(l)
    l = inrn_v2(l)
    l = feat_red(l)
    l = inrn_v2(l)

    l = feat_red(l)

    l = dense(l, 128, name='dense_fpr')

    l_out = nn.layers.DenseLayer(l,
                                 num_units=2,
                                 W=nn.init.Constant(0.),
                                 nonlinearity=nn.nonlinearities.softmax)

    metadata = utils.load_pkl(
        os.path.join(
            "/home/eavsteen/dsb3/storage/metadata/dsb3/models/ikorshun/",
            "luna_c3-20170226-174919.pkl"))
    nn.layers.set_all_param_values(l_out, metadata['param_values'])

    return nn.layers.get_all_layers(l_out)[-3]
Beispiel #30
0
def compute_user_avg_loc(city):
    """compute average latitude and longitude of businesses each user visited

    Arg:
        city - the city
    """
    print("\t[user] computing location features")
    # df = pd.read_csv(TRNTST_DIR + "{}/train_pos.csv".format(city))
    df = pd.read_csv(INPUT_DIR + "{}/user_business_interaction.csv".format(city))
    bus_profile = load_pkl(INPUT_DIR + "{}/city_business_profile.pkl".format(city))

    # df.assign(business_latitude=lambda x: bus_profile[x.business]["latitude"])
    # df.assign(business_longitude=lambda x: bus_profile[x.business]["longitude"])

    b_lat_dict = dict([(k, v["latitude"]) for k, v in bus_profile.items()])
    b_long_dict = dict([(k, v["longitude"]) for k, v in bus_profile.items()])

    df = df.assign(bus_lat=df.business.map(b_lat_dict))
    df = df.assign(bus_long=df.business.map(b_long_dict))

    # "ll": latitude and longitude
    print("\t[user] aggregating location (lat and long) by user")
    df_loc = df.groupby("user").agg({"bus_lat": ['max', 'min', 'mean'],
                                     "bus_long": ['max', 'min', 'mean']})

    # rename max, min, mean col to max_lat, min_lat, or mean_at. Same as `long`
    # while still maintaining the index as `user`
    user_lat = df_loc.bus_lat.reset_index()
    user_long = df_loc.bus_long.reset_index()
    user_loc = user_lat.join(user_long, on="user", how="outer",
                             lsuffix="_lat", rsuffix="_long")
    user_loc = user_loc.fillna(user_loc.mean())  # now `user` is column
    user_loc_dict = user_loc.set_index("user").to_dict(orient="index")
    dump_pkl(OUTPUT_DIR + "{}/city_user_loc.pkl".format(city), user_loc_dict)
def load_pretrained_model(l_in):

    l = conv3d(l_in, 64)
    l = inrn_v2_red(l)
    l = inrn_v2(l)

    l = inrn_v2_red(l)
    l = inrn_v2(l)

    l = inrn_v2_red(l)
    l = inrn_v2_red(l)

    l = dense(drop(l), 128)

    l_out = nn.layers.DenseLayer(l,
                                 num_units=10,
                                 W=nn.init.Orthogonal(),
                                 b=nn.init.Constant(0.1),
                                 nonlinearity=nn.nonlinearities.softmax)

    metadata = utils.load_pkl(
        os.path.join("/mnt/storage/metadata/dsb3/models/eavsteen/",
                     "t_el_0-20170321-013339.pkl"))
    nn.layers.set_all_param_values(l_out, metadata['param_values'])

    return nn.layers.get_all_layers(l_out)[-3]
def load_pretrained_model(l_in):

    l = conv3d(l_in, 64)
    l = inrn_v2_red(l)
    l = inrn_v2(l)

    l = inrn_v2_red(l)
    l = inrn_v2(l)

    l = inrn_v2_red(l)
    l = inrn_v2_red(l)

    l = dense(drop(l), 512)

    l = nn.layers.DenseLayer(l,
                             1,
                             nonlinearity=nn.nonlinearities.sigmoid,
                             W=nn.init.Orthogonal(),
                             b=nn.init.Constant(0))

    metadata = utils.load_pkl(
        os.path.join(
            "/home/eavsteen/dsb3/storage/metadata/dsb3/models/eavsteen/",
            "r_fred_malignancy_2-20170328-230443.pkl"))
    nn.layers.set_all_param_values(l, metadata['param_values'])

    return l
Beispiel #33
0
def parse_user():
    """Load users

    output: id2user.pkl,
            user2id.pkl,
            user.friend.pkl,
            user.profile.pkl
    """
    user_profile = {}
    user_friend = {}

    make_dir(PREPROCESS_DIR)

    print("\t[parse user] load user list")
    users_list = load_pkl(PREPROCESS_DIR + "users_list.pkl")
    users_list = set(users_list)

    print("\t[parse user] building user profiles")
    with open(DATA_DIR + "user.json", "r") as fin:
        for ind, ln in enumerate(fin):
            data = json.loads(ln)
            user_id = data['user_id']
            if user_id not in users_list:  # discard infrequent or irrelevant cities
                continue
            user_friend[user_id] = data['friends'].split(", ")
            del data['friends']
            del data['user_id']
            user_profile[user_id] = data

    # user adjacency and profile dictionary separately
    print(
        "\t[parse user] dumping user-friendship and user-profile information ..."
    )
    dump_pkl(PREPROCESS_DIR + "user_friend.pkl", user_friend)
    dump_pkl(PREPROCESS_DIR + "user_profile.pkl", user_profile)
def _get_features_data(mode='val'):
    """ deprecated with old dataset """
    """ Includes every sample with plotFeatures, videoFeatures, movie_id and genreLabel """
    featureData = []
    allData = load_pkl(baseName+mode)
    plots = []

    """Process plot vectors"""

    for data in allData:
        movie_id = data['movie_id']
        plot = data['plot']
        plots.append(plot)

    if mode=='train':
        textObj = Text()
        plotFeatures_all = textObj.fit_transform(plots)
        dump_pkl(textObj, 'plot_object_train')
    else:
        try:
            textObj = load_pkl('plot_object_train')
            plotFeatures_all = textObj.transform(plots).toarray()
        except:
            print "Please train the plots first."
            return

    plotIndex = -1
    for data in allData:
        plotIndex += 1
        movie_id =  data['movie_id']
        path = glob(video_resource+str(movie_id)+'.*')[0]       
        plot = data['plot']
        genreLabel = data['genreLabel']
        print plotIndex,"out of ",len(allData)
        print "Gathering features for",movie_id
        try:
            frames = list(get_frames(path, start_time=1000, end_time=200000, time_step=1000))
            videoFeatures = get_features_batch(frames, 'vgg16')
        except Exception as e:
            print e
            continue # Omit the movie if one of the feature is bad
            # videoFeatures = None
        plotFeatures = plotFeatures_all[plotIndex]

        featureData.append({'videoFeatures':videoFeatures, 'plotFeatures':plotFeatures, 'movie_id':movie_id, 'genreLabel':genreLabel})

    dump_pkl(featureData, 'feature_data_'+mode)
Beispiel #35
0
def train_classifier(genres=[
    'Chases', 'Dance', 'Eating', 'Fight', 'Heated_Discussions',
    'Normal_Chatting', 'Romance', 'Running', 'Tragic'
],
                     model_name=default_model_name):

    trainingData = []
    trainingLabels = []
    num_of_classes = len(genres)
    print("Number of classes:", num_of_classes)
    for genreIndex, genre in enumerate(genres):
        try:
            genreFeatures = load_pkl(genre + "_ultimate_" + default_model_name)
            genreFeatures = np.array([np.array(f) for f in genreFeatures])
        except Exception as e:
            print(e)
            return
        print("OK.")
        for videoFeatures in genreFeatures:
            randomIndices = range(len(videoFeatures))
            selectedFeatures = np.array(videoFeatures[randomIndices])
            for feature in selectedFeatures:
                trainingData.append(feature)
                trainingLabels.append([genreIndex])
    trainingData = np.array(trainingData)
    trainingLabels = np.array(trainingLabels)
    trainingData = np.reshape(
        trainingData, (trainingData.shape[0], 1, trainingData.shape[1]))
    trainingLabels = np.reshape(
        trainingLabels, (trainingLabels.shape[0], 1, trainingLabels.shape[1]))
    print("Train data shape : ", trainingData.shape)
    print(trainingLabels.shape)
    model = lstm_model(num_of_classes, 1)
    model.compile(optimizer='sgd',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    batch_size = 32
    nb_epoch = 100
    history = model.fit(trainingData,
                        trainingLabels,
                        batch_size=batch_size,
                        nb_epoch=nb_epoch)
    modelOutPath = 'data/models/lstm' + model_name + '_' + str(
        num_of_classes) + "g_bs" + str(batch_size) + "_ep" + str(
            nb_epoch) + ".h5"
    model.save(modelOutPath)
    print("Model saved at", modelOutPath)

    plt.plot(history.history["acc"])
    plt.title("model accuracy")
    plt.ylabel("accuracy")
    plt.xlabel("epoch")
    plt.show()

    plt.plot(history.history["loss"])
    plt.title("model loss")
    plt.ylabel("loss")
    plt.xlabel("epoch")
    plt.show()
Beispiel #36
0
def build_segmentation_model(l_in):
    metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH)
    metadata_path = utils.find_model_metadata(metadata_dir, patch_segmentation_config.__name__.split('.')[-1])
    metadata = utils.load_pkl(metadata_path)

    model = patch_segmentation_config.build_model(l_in=l_in, patch_size=p_transform['patch_size'])
    nn.layers.set_all_param_values(model.l_out, metadata['param_values'])
    return model
Beispiel #37
0
def do_test(model_ckpt_path, test_data_path, result_path, word_dict_path,
            emb_dim, hid_dim):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    word_dict = load_pkl(word_dict_path)
    PAD_IDX = word_dict.word2idx("<PAD>")
    print("load data...")
    testData = SentenceDataset(load_pkl(test_data_path),
                               word_dict,
                               PAD_IDX,
                               training=False)
    print("load model...")
    model = get_model(word_dict.get_len(), word_dict.get_len(), emb_dim,
                      hid_dim, device)
    model.load_state_dict(torch.load(model_ckpt_path))
    model.to(device)
    print("predicting...")
    make_prediction(model, testData, word_dict, result_path, device)
Beispiel #38
0
def test3():
    image_dir = utils.get_dir_path('analysis', pathfinder.METADATA_PATH)
    id2mm_shape = utils.load_pkl(image_dir + '/pid2mm.pkl')
    s = [(key, value) for (key, value) in sorted(id2mm_shape.items(), key=lambda x: x[1][0])]
    for i in xrange(5):
        print s[i]
    print '--------------------------'
    for i in xrange(1,6):
        print s[-i]
def load_pretrained_model(l_in):

    l = conv3d(l_in, 64)
    l = inrn_v2_red(l)
    l = inrn_v2(l)

    l = inrn_v2_red(l)
    l = inrn_v2(l)

    l = inrn_v2_red(l)
    l = inrn_v2_red(l)

    l = dense(drop(l), 512)

    d_final_layers = {}
    final_layers = []
    unit_ptr = 0

    for obj_idx, obj_name in enumerate(cfg_prop.order_objectives):
        ptype = cfg_prop.property_type[obj_name]

        if ptype == 'classification':
            num_units = len(cfg_prop.property_bin_borders[obj_name])
            l_fin = nn.layers.DenseLayer(l, num_units=num_units,
                     W=nn.init.Orthogonal(),
                     b=nn.init.Constant(cfg_prop.init_values_final_units[obj_name]),
                     nonlinearity=nn.nonlinearities.softmax, name='dense_softmax_'+ptype+'_'+obj_name)

        elif ptype == 'continuous':
            l_fin = nn.layers.DenseLayer(l, num_units=1,
                    W=nn.init.Orthogonal(),
                    b=nn.init.Constant(cfg_prop.init_values_final_units[obj_name]),
                    nonlinearity=nn.nonlinearities.softplus, name='dense_softplus_'+ptype+'_'+obj_name)

        elif ptype == 'bounded_continuous':
            l_fin = nn.layers.DenseLayer(l, num_units=1,
                    W=nn.init.Orthogonal(),
                    b=nn.init.Constant(cfg_prop.init_values_final_units[obj_name]),
                    nonlinearity=nn.nonlinearities.sigmoid, name='dense_sigmoid_'+ptype+'_'+obj_name)
        else:
            raise

        d_final_layers[obj_name] = l_fin
        final_layers.append(l_fin)

    l_out = nn.layers.ConcatLayer(final_layers, name = 'final_concat_layer')


    metadata = utils.load_pkl(os.path.join('/home/frederic/kaggle-dsb3/dsb/storage/metadata/dsb3/models/eavsteen/',"r_elias_28-20170331-230303.pkl"))
    nn.layers.set_all_param_values(l_out, metadata['param_values'])

    features = d_final_layers['malignancy']
    print 'features layer', features.name

    return features
Beispiel #40
0
  def load_weight_from_pkl(self, cpu_mode=False):
    with tf.variable_scope('load_pred_from_pkl'):
      self.w_input = {}
      self.w_assign_op = {}

      for name in self.w.keys():
        self.w_input[name] = tf.placeholder('float32', self.w[name].get_shape().as_list(), name=name)
        self.w_assign_op[name] = self.w[name].assign(self.w_input[name])

    for name in self.w.keys():
      self.w_assign_op[name].eval({self.w_input[name]: load_pkl(os.path.join(self.weight_dir, "%s.pkl" % name))})

    self.update_target_q_network()
def test_luna3d():
    image_dir = utils.get_dir_path('analysis', pathfinder.METADATA_PATH)
    image_dir = image_dir + '/test_luna/'
    utils.auto_make_dir(image_dir)

    id2zyxd = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)

    luna_data_paths = [
        '/mnt/sda3/data/kaggle-lung/luna_test_patient/1.3.6.1.4.1.14519.5.2.1.6279.6001.877026508860018521147620598474.mhd']

    candidates = utils.load_pkl(
        '/mnt/sda3/data/kaggle-lung/luna_test_patient/1.3.6.1.4.1.14519.5.2.1.6279.6001.877026508860018521147620598474.pkl')

    candidates = candidates[:4]
    print candidates
    print '--------------'
    print id2zyxd['1.3.6.1.4.1.14519.5.2.1.6279.6001.877026508860018521147620598474']

    for k, p in enumerate(luna_data_paths):
        id = os.path.basename(p).replace('.mhd', '')
        print id
        img, origin, pixel_spacing = utils_lung.read_mhd(p)
        lung_mask = lung_segmentation.segment_HU_scan_ira(img)
        print np.min(lung_mask), np.max(lung_mask)
        x, annotations_tf, tf_matrix, lung_mask_out = data_transforms.transform_scan3d(data=img,
                                                                                       pixel_spacing=pixel_spacing,
                                                                                       p_transform=p_transform,
                                                                                       luna_annotations=candidates,
                                                                                       p_transform_augment=None,
                                                                                       luna_origin=origin,
                                                                                       lung_mask=lung_mask,
                                                                                       world_coord_system=False)

        print np.min(lung_mask_out), np.max(lung_mask_out)

        plot_slice_3d_2(x, lung_mask_out, 0, id)
        plot_slice_3d_2(x, lung_mask_out, 1, id)
        plot_slice_3d_2(x, lung_mask_out, 2, id)

        # for zyxd in annotations_tf:
        #     plot_slice_3d_2(x, lung_mask_out, 0, id, idx=zyxd)
        #     plot_slice_3d_2(x, lung_mask_out, 1, id, idx=zyxd)
        #     plot_slice_3d_2(x, lung_mask_out, 2, id, idx=zyxd)

        for i in xrange(136, x.shape[1]):
            plot_slice_3d_2(x, lung_mask_out, 1, str(id) + str(i), idx=np.array([200, i, 200]))
def load_pretrained_model(l_in):

    l = conv3d(l_in, 64)
    l = inrn_v2_red(l)
    l = inrn_v2(l)

    l = inrn_v2_red(l)
    l = inrn_v2(l)

    l = inrn_v2_red(l)
    l = inrn_v2_red(l)

    l = drop(l, name='can_dropout')
    l = dense(l, 512, name='can_dense')

    final_layers = []
    for obj_idx, obj_name in enumerate(cfg_prop.order_objectives):
        ptype = cfg_prop.property_type[obj_name]
        if ptype == 'classification':
            num_units = len(cfg_prop.property_bin_borders[obj_name])
            l_fin = nn.layers.DenseLayer(l, num_units=num_units,
                     W=nn.init.Orthogonal(),
                     b=nn.init.Constant(cfg_prop.init_values_final_units[obj_name]),
                     nonlinearity=nn.nonlinearities.softmax, name='dense_'+ptype+'_'+obj_name)

        elif ptype == 'continuous':
            l_fin = nn.layers.DenseLayer(l, num_units=1,
                    W=nn.init.Orthogonal(),
                    b=nn.init.Constant(cfg_prop.init_values_final_units[obj_name]),
                    nonlinearity=nn.nonlinearities.softplus, name='dense_'+ptype+'_'+obj_name)

        else:
          raise

        final_layers.append(l_fin)

    l_out = nn.layers.ConcatLayer(final_layers, name = 'final_concat_layer')


    metadata = utils.load_pkl(os.path.join("/home/eavsteen/dsb3/storage/metadata/dsb3/models/eavsteen/","r_elias_10-20170328-003348.pkl"))
    nn.layers.set_all_param_values(l_out, metadata['param_values'])

    features = nn.layers.get_all_layers(l_out)[(-2-len(final_layers))]
    print 'features layer', features.name

    return features
def build_model():
    metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH)
    metadata_path = utils.find_model_metadata(metadata_dir, patch_class_config.__name__.split('.')[-1])
    metadata = utils.load_pkl(metadata_path)

    print 'Build model'
    model = patch_class_config.build_model()
    all_layers = nn.layers.get_all_layers(model.l_out)
    num_params = nn.layers.count_params(model.l_out)
    print '  number of parameters: %d' % num_params
    print string.ljust('  layer output shapes:', 36),
    print string.ljust('#params:', 10),
    print 'output shape:'
    for layer in all_layers:
        name = string.ljust(layer.__class__.__name__, 32)
        num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
        num_param = string.ljust(num_param.__str__(), 10)
        print '    %s %s %s' % (name, num_param, layer.output_shape)

    nn.layers.set_all_param_values(model.l_out, metadata['param_values'])
    return model
Beispiel #44
0
def evaluate_trained(config, state, channel):
    config_path = config.load_trained.from_path + 'model_config.pkl'
    epoch = config.load_trained.epoch
    params_path = config.load_trained.from_path + 'model_params_e%d.pkl'%(epoch) 
    assert config_path is not None
    assert params_path is not None
    assert os.path.isfile(params_path)
    assert os.path.isfile(config_path)
    print 'load the config options from the best trained model'
    used_config = utils.load_pkl(config_path)
    action = config.load_trained.action
    assert action == 1
    from_path = config.load_trained.from_path
    epoch = config.load_trained.epoch
    save_model_path = config.load_trained.from_path
    set_config(config, used_config)
    config.load_trained.action = action
    config.load_trained.from_path = from_path
    config.load_trained.epoch = epoch
    config.save_model_path = save_model_path
    
    model_type = config.model
    # set up automatically some fields in config
    if config.dataset.signature == 'MNIST_binary_russ':
        config[model_type].n_in = 784
        config[model_type].n_out = 784
    # Also copy back from config into state.
    for key in config:
        setattr(state, key, config[key])

    print 'Model Type: %s'%model_type
    print 'Host:    %s' % socket.gethostname()
    print 'Command: %s' % ' '.join(sys.argv)
    
    print 'initializing data engine'
    input_dtype = 'float32'
    target_dtype = 'int32'
    data_engine = None
    deep_orderless_bernoulli_nade.evaluate_trained(state, data_engine, params_path, channel)
Beispiel #45
0
    def __init__(self, data_path, batch_size, transform_params, patient_ids=None, labels_path=None,
                 slice2roi_path=None, full_batch=False, random=True, infinite=True, min_slices=0,
                 data_prep_fun=data.transform_norm_rescale,
                 **kwargs):

        if patient_ids:
            patient_paths = []
            for pid in patient_ids:
                patient_paths.append(data_path + '/%s/study/' % pid)
        else:
            patient_paths = glob.glob(data_path + '/*/study/')

        self.pid2slice_paths = defaultdict(list)
        nslices = []
        for p in patient_paths:
            pid = int(utils.get_patient_id(p))
            spaths = sorted(glob.glob(p + '/sax_*.pkl'), key=lambda x: int(re.search(r'/sax_(\d+)\.pkl$', x).group(1)))
            # consider patients only with min_slices
            if len(spaths) > min_slices:
                self.pid2slice_paths[pid] = spaths
                nslices.append(len(spaths))

        # take max number of slices
        self.nslices = int(np.max(nslices))

        self.patient_ids = self.pid2slice_paths.keys()
        self.nsamples = len(self.patient_ids)

        self.data_path = data_path
        self.id2labels = data.read_labels(labels_path) if labels_path else None
        self.batch_size = batch_size
        self.rng = np.random.RandomState(42)
        self.full_batch = full_batch
        self.random = random
        self.batch_size = batch_size
        self.infinite = infinite
        self.transformation_params = transform_params
        self.data_prep_fun = data_prep_fun
        self.slice2roi = utils.load_pkl(slice2roi_path) if slice2roi_path else None
def load_pretrained_model(l_in):

    l = conv3d(l_in, 64)
    l = inrn_v2_red(l)
    l = inrn_v2(l)

    l = inrn_v2_red(l)
    l = inrn_v2(l)

    l = inrn_v2_red(l)
    l = inrn_v2_red(l)

    l = dense(drop(l), 512)

    l = nn.layers.DenseLayer(l,1,nonlinearity=nn.nonlinearities.sigmoid, W=nn.init.Orthogonal(),
                b=nn.init.Constant(0))


    metadata = utils.load_pkl(os.path.join("/home/eavsteen/dsb3/storage/metadata/dsb3/models/eavsteen/","r_fred_malignancy_2-20170328-230443.pkl"))
    nn.layers.set_all_param_values(l, metadata['param_values'])

    return l
Beispiel #47
0
def build_model():
    l_in = nn.layers.InputLayer((None, n_candidates_per_patient, 1,) + p_transform['patch_size'])
    l_in_rshp = nn.layers.ReshapeLayer(l_in, (-1, 1,) + p_transform['patch_size'])
    l_target = nn.layers.InputLayer((batch_size,))

    base_n_filters = 128
    l = conv_prelu_layer(l_in_rshp, n_filters=base_n_filters)
    l = conv_prelu_layer(l, n_filters=base_n_filters)
    l = conv_prelu_layer(l, n_filters=base_n_filters)

    l = max_pool3d(l)

    l = conv_prelu_layer(l, n_filters=base_n_filters)
    l = conv_prelu_layer(l, n_filters=base_n_filters)
    l = conv_prelu_layer(l, n_filters=base_n_filters)
    l_enc = conv_prelu_layer(l, n_filters=base_n_filters)

    num_units_dense = 512
    l_d01 = dense_prelu_layer(l, num_units=512)
    l_d01 = nn.layers.ReshapeLayer(l_d01, (-1, n_candidates_per_patient, num_units_dense))
    l_d02 = dense_prelu_layer(l_d01, num_units=512)
    l_out = nn.layers.DenseLayer(l_d02, num_units=2,
                                 W=nn.init.Constant(0.),
                                 b=np.array([np.log((1397. - 362) / 1398), np.log(362. / 1397)], dtype='float32'),
                                 nonlinearity=nn.nonlinearities.softmax)

    metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH)
    metadata_path = utils.find_model_metadata(metadata_dir, 'luna_p8a1')
    metadata = utils.load_pkl(metadata_path)
    for p, pv in zip(nn.layers.get_all_params(l_enc), metadata['param_values']):
        if p.get_value().shape != pv.shape:
            raise ValueError("mismatch: parameter has shape %r but value to "
                             "set has shape %r" %
                             (p.get_value().shape, pv.shape))
        p.set_value(pv)

    return namedtuple('Model', ['l_in', 'l_out', 'l_target'])(l_in, l_out, l_target)
def test_luna3d():
    image_dir = utils.get_dir_path('analysis', pathfinder.METADATA_PATH)
    image_dir = image_dir + '/test_luna/'
    utils.auto_make_dir(image_dir)

    id2zyxd = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)

    luna_data_paths = [
        'problem_patients/1.3.6.1.4.1.14519.5.2.1.6279.6001.877026508860018521147620598474.mhd']

    candidates = utils.load_pkl(
        'problem_patients/1.3.6.1.4.1.14519.5.2.1.6279.6001.877026508860018521147620598474.pkl')

    candidates = candidates[:4]
    print candidates
    print '--------------'
    print id2zyxd['1.3.6.1.4.1.14519.5.2.1.6279.6001.877026508860018521147620598474']

    for k, p in enumerate(luna_data_paths):
        id = os.path.basename(p).replace('.mhd', '')
        print id
        img, origin, pixel_spacing = utils_lung.read_mhd(p)
        lung_mask = lung_segmentation.segment_HU_scan_elias(img)
        x, annotations_tf, tf_matrix, lung_mask_out = data_transforms.transform_scan3d(data=img,
                                                                                       pixel_spacing=pixel_spacing,
                                                                                       p_transform=p_transform,
                                                                                       luna_annotations=candidates,
                                                                                       p_transform_augment=None,
                                                                                       luna_origin=origin,
                                                                                       lung_mask=lung_mask,
                                                                                       world_coord_system=False)

        for zyxd in annotations_tf:
            plot_slice_3d_2(x, lung_mask_out, 0, id, img_dir='./', idx=zyxd)
            plot_slice_3d_2(x, lung_mask_out, 1, id, img_dir='./', idx=zyxd)
            plot_slice_3d_2(x, lung_mask_out, 2, id, img_dir='./', idx=zyxd)
Beispiel #49
0
                pred = compute_output(ofs)
                pred_batch.append(pred)
            pred_batch = np.array(pred_batch).swapaxes(0,1)

            msk = m_shared.get_value(borrow=True)
            new_preds_batch = []
            for i in range(len(pred_batch)):
                l = len(msk[i][msk[i]==0.])
                new_preds_batch.append(pred_batch[i][:-l])
            preds.extend(new_preds_batch)
            print "%i%%"%int(np.round((b+1)/float(n_batches)*100.))

            utils.save_pkl(preds, target_path)

    else: 
        preds = utils.load_pkl(target_path)
        assert len(preds) == len(split.test_idxs)

    print "Preparing csv files"
    pred_files = glob(csv_path+"Sample*")
    if len(pred_files) != len(split.test_idxs):
        for i, pred in enumerate(preds):

            pred = np.argmax(pred, axis=-1)

            # window = 10
            # new_pred = pred.copy()
            # for j in range(len(pred)-window):
            #     new_pred[j+window//2] = scipy.stats.mode(pred[j:j+window])[0][0]
            # pred = new_pred
# predictions path
predictions_dir = utils.get_dir_path('model-predictions', pathfinder.METADATA_PATH)
outputs_path = predictions_dir + '/%s' % config_name
outputs_img_path = predictions_dir + '/%s_img' % config_name
utils.auto_make_dir(outputs_img_path)

blob_files = sorted(glob.glob(outputs_path + '/*.pkl'))

p_transform = {'patch_size': (64, 64, 64),
               'mm_patch_size': (64, 64, 64),
               'pixel_spacing': (1., 1., 1.)
               }

for p in blob_files:
    pid = utils_lung.extract_pid_filename(p, '.pkl')
    blobs = utils.load_pkl(p)
    blobs = np.asarray(sorted(blobs, key=lambda x: x[-1], reverse=True))

    img, pixel_spacing = utils_lung.read_dicom_scan(pathfinder.DATA_PATH + '/' + pid)
    print pid
    for blob in blobs[:10]:
        patch_center = blob[:3]
        p1 = blob[-1]
        print p1
        x, _ = data_transforms.transform_patch3d(data=img,
                                                 luna_annotations=None,
                                                 patch_center=patch_center,
                                                 p_transform=p_transform,
                                                 pixel_spacing=pixel_spacing,
                                                 luna_origin=None,
                                                 world_coord_system=False)
#first make ordered list of objective functions
train_objectives = [config().d_objectives[obj_name] for obj_name in config().order_objectives]
test_objectives = [config().d_objectives_deterministic[obj_name] for obj_name in config().order_objectives]
# theano functions
print givens_train
iter_train = theano.function([idx], train_objectives, givens=givens_train, updates=updates)

print 'test_objectives'
print config().d_objectives_deterministic
print 'givens_valid'
print givens_valid
iter_validate = theano.function([], test_objectives, givens=givens_valid)

if config().restart_from_save:
    print 'Load model parameters for resuming'
    resume_metadata = utils.load_pkl(config().restart_from_save)
    nn.layers.set_all_param_values(model.l_out, resume_metadata['param_values'])
    start_chunk_idx = resume_metadata['chunks_since_start'] + 1
    chunk_idxs = range(start_chunk_idx, config().max_nchunks)

    lr = np.float32(utils.current_learning_rate(learning_rate_schedule, start_chunk_idx))
    print '  setting learning rate to %.7f' % lr
    learning_rate.set_value(lr)
    losses_eval_train = resume_metadata['losses_eval_train']
    losses_eval_valid = resume_metadata['losses_eval_valid']
else:
    chunk_idxs = range(config().max_nchunks)
    losses_eval_train = defaultdict(list)
    losses_eval_valid = defaultdict(list)
    start_chunk_idx = 0
Beispiel #52
0
import numpy as np
import theano
from itertools import izip
import lasagne as nn
import utils
import buffering
import utils_heart
from configuration import config, set_configuration, set_subconfiguration
from pathfinder import METADATA_PATH

if not (len(sys.argv) < 3):
    sys.exit("Usage: predict.py <metadata_path>")

metadata_path = sys.argv[1]
metadata_dir = utils.get_dir_path('train', METADATA_PATH)
metadata = utils.load_pkl(metadata_dir + '/%s' % metadata_path)
config_name = metadata['configuration']
if 'subconfiguration' in metadata:
    set_subconfiguration(metadata['subconfiguration'])

set_configuration(config_name)

# predictions paths
prediction_dir = utils.get_dir_path('predictions', METADATA_PATH)
prediction_path = prediction_dir + "/%s.pkl" % metadata['experiment_id']
prediction_mu_std_path = prediction_dir + "/%s_mu_sigma.pkl" % metadata['experiment_id']

print "Build model"
model = config().build_model()
all_layers = nn.layers.get_all_layers(model.l_top)
all_params = nn.layers.get_all_params(model.l_top)
Beispiel #53
0
theano.config.warn_float64 = 'raise'

if len(sys.argv) < 2:
    sys.exit("Usage: test_class_dsb.py <configuration_name> <valid|test>")

config_name = sys.argv[1]
set_configuration('configs_class_dsb', config_name)

set = sys.argv[2] if len(sys.argv) == 3 else 'test'

# metadata
metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH)
metadata_path = utils.find_model_metadata(metadata_dir, config_name)

metadata = utils.load_pkl(metadata_path)
expid = metadata['experiment_id']

# logs
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
sys.stdout = logger.Logger(logs_dir + '/%s-%s.log' % (expid, set))
sys.stderr = sys.stdout

# predictions path
predictions_dir = utils.get_dir_path('model-predictions', pathfinder.METADATA_PATH)
output_pkl_file = predictions_dir + '/%s-%s.pkl' % (expid, set)

submissions_dir = utils.get_dir_path('submissions', pathfinder.METADATA_PATH)
output_csv_file = submissions_dir + '/%s-%s.csv' % (expid, set)

# if os.path.isfile(output_pkl_file):
Beispiel #54
0
            return np.asarray([0] * len(properties_included), dtype='float32')
        else:
            return np.asarray([0] * len(properties), dtype='float32')
    else:
        label = []
        properties_dict = annotation[-1]
        if len(properties_included)>0:
            for p in properties_included:
                label.append(properties_dict[p]/5.0)
        else:
            for p in properties:
                    label.append(properties_dict[p])
    return label


rescale_params_hist_eq = utils.load_pkl( "luna_rescale_params_hist_eq.pkl" )


# data preparation function
def data_prep_function(data, pid, patch_center, pixel_spacing, luna_origin, p_transform,
                       p_transform_augment, world_coord_system, **kwargs):
    x, patch_annotation_tf = data_transforms.transform_patch3d(data=data,
                                                               luna_annotations=None,
                                                               patch_center=patch_center,
                                                               p_transform=p_transform,
                                                               p_transform_augment=p_transform_augment,
                                                               pixel_spacing=pixel_spacing,
                                                               luna_origin=luna_origin,
                                                               world_coord_system=world_coord_system)
    
    bins, original_borders = rescale_params_hist_eq[pid]
Beispiel #55
0
import numpy as np
import hashlib

import utils
import utils_lung
import pathfinder

rng = np.random.RandomState(42)


tvt_ids = utils.load_pkl(pathfinder.VALIDATION_SPLIT_PATH)
train_pids, valid_pids, test_pids = tvt_ids['training'], tvt_ids['validation'], tvt_ids['test']
all_pids = train_pids + valid_pids + test_pids
print 'total number of pids', len(all_pids)

id2label = utils_lung.read_labels(pathfinder.LABELS_PATH)
id2label_test = utils_lung.read_test_labels(pathfinder.TEST_LABELS_PATH)
id2label.update(id2label_test)
n_patients = len(id2label)

pos_ids = []
neg_ids = []

for pid, label in id2label.iteritems():
	if label ==1 :
		pos_ids.append(pid)
	elif label == 0 :
		neg_ids.append(pid)
	else:
		raise ValueError("weird shit is going down")
theano.config.warn_float64 = 'raise'

if len(sys.argv) < 2:
    sys.exit("Usage: test_luna_scan.py <configuration_name>")

config_name = sys.argv[1]
set_configuration('configs_fpred_scan', config_name)

# predictions path
predictions_dir = utils.get_dir_path('model-predictions', pathfinder.METADATA_PATH)
outputs_path = predictions_dir + '/%s' % config_name
pid2candidates_path = utils_lung.get_candidates_paths(outputs_path)
pid2candidates = {}
for k, v in pid2candidates_path.iteritems():
    pid2candidates[k] = utils.load_pkl(v)

pid2annotations = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH)

n_top = 1
tp_top_n = 0
fp_top_n = 0
tp = 0
n_pos = 0
idx1 = []
for k in pid2candidates.iterkeys():
    print '----------------------------------------'
    print k
    n_true = len(pid2annotations[k])
    n_det_top = int(np.sum(pid2candidates[k][:n_top, 3]))
    n_fp_top = int(n_top - np.sum(pid2candidates[k][:n_top, 3]))
                                                 p_transform=p_transform,
                                                 p_transform_augment=p_transform_augment,
                                                 pixel_spacing=pixel_spacing)
    x = data_transforms.pixelnormHU(x)
    return x


data_prep_function_train = partial(data_prep_function, p_transform_augment=p_transform_augment,
                                   p_transform=p_transform)
data_prep_function_valid = partial(data_prep_function, p_transform_augment=None,
                                   p_transform=p_transform)

# data iterators
batch_size = 4

train_valid_ids = utils.load_pkl(pathfinder.VALIDATION_SPLIT_PATH)
train_pids, valid_pids, test_pids = train_valid_ids['training'], train_valid_ids['validation'], train_valid_ids['test']
print 'n train', len(train_pids)
print 'n valid', len(valid_pids)

train_data_iterator = data_iterators.DSBPatientsDataGenerator(data_path=pathfinder.DATA_PATH,
                                                              batch_size=batch_size,
                                                              transform_params=p_transform,
                                                              n_candidates_per_patient=n_candidates_per_patient,
                                                              data_prep_fun=data_prep_function_train,
                                                              id2candidates_path=id2candidates_path,
                                                              rng=rng,
                                                              patient_ids=train_pids,
                                                              return_patch_locs=True,
                                                              random=True, infinite=True)
Beispiel #58
0
def read_split(path):
    d = utils.load_pkl(path)
    print d['valid']
Beispiel #59
0
import utils
import logger
import theano.tensor as T
import buffering
from configuration import config, set_configuration, set_subconfiguration
import pathfinder

if len(sys.argv) < 2:
    sys.exit("Usage: train.py <meta_configuration_name>")

config_name = sys.argv[1]

subconfig_name = config_name.replace('meta_', '')
metadata_dir = utils.get_dir_path('train', pathfinder.METADATA_PATH)
submodel_metadata_path = utils.find_model_metadata(metadata_dir, subconfig_name)
submodel_metadata = utils.load_pkl(submodel_metadata_path)

assert subconfig_name == submodel_metadata['configuration']
set_subconfiguration(subconfig_name)
set_configuration(config_name)

expid = utils.generate_expid(config_name)
print
print "Experiment ID: %s" % expid
print

# meta metadata and logs paths
metadata_path = metadata_dir + '/%s.pkl' % expid
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
sys.stdout = logger.Logger(logs_dir + '/%s.log' % expid)