def test_luna3d(): # path = '/mnt/sda3/data/kaggle-lung/lunapred/luna_scan_v3_dice-20170131-173443/' path = '/mnt/sda3/data/kaggle-lung/lunapred_el/luna_scan_v3_dice-20170201-231707/' files = os.listdir(path) print files x, y, p = [], [], [] for f in files: if 'in' in f: x.append(f) elif 'tgt' in f: y.append(f) else: p.append(f) x = sorted(x) y = sorted(y) p = sorted(p) for xf, yf, pf in zip(x, y, p): x_batch = utils.load_pkl(path + xf) pred_batch = utils.load_pkl(path + pf) y_batch = utils.load_pkl(path + yf) print xf print yf print pf # plot_2d_animation(x_batch[0], y_batch[0], pred_batch[0]) plot_slice_3d_3(x_batch[0,0],y_batch[0,0],pred_batch[0,0],0,'aa')
def read_act_texts(self): text_data = load_pkl('data/%s_labeled_text_data.pkl' % self.domain) pos_data = load_pkl('data/%s_dependency.pkl' % self.domain) act_texts = [] #ipdb.set_trace() for i in range(len(text_data)): act_text = {} act_text['tokens'] = text_data[i]['words'] act_text['sents'] = text_data[i]['sents'] act_text['acts'] = text_data[i]['acts'] act_text['sent_acts'] = text_data[i]['sent_acts'] act_text['word2sent'] = text_data[i]['word2sent'] act_text['tags'] = np.ones(len(text_data[i]['words']), dtype=np.int32) act_text['act2related'] = {} for acts in text_data[i]['acts']: act_text['act2related'][acts['act_idx']] = acts['related_acts'] act_text['tags'][ acts['act_idx']] = acts['act_type'] + 1 # 2, 3, 4 act_text['pos'] = [] for sent in pos_data[i]: for word, pos in sent: act_text['pos'].append(self.pos_dict[pos]) self.create_matrix(act_text) act_texts.append(act_text) act_indices = ten_fold_split_ind(len(act_texts), self.k_fold_indices, self.k_fold) act_data = index2data(act_indices, act_texts) return act_data
def test_luna3d(): # path = '/mnt/sda3/data/kaggle-lung/lunapred/luna_scan_v3_dice-20170131-173443/' path = '/mnt/sda3/data/kaggle-lung/lunapred_el/luna_scan_v3_dice-20170201-231707/' files = os.listdir(path) print files x, y, p = [], [], [] for f in files: if 'in' in f: x.append(f) elif 'tgt' in f: y.append(f) else: p.append(f) x = sorted(x) y = sorted(y) p = sorted(p) for xf, yf, pf in zip(x, y, p): x_batch = utils.load_pkl(path + xf) pred_batch = utils.load_pkl(path + pf) y_batch = utils.load_pkl(path + yf) print xf print yf print pf # plot_2d_animation(x_batch[0], y_batch[0], pred_batch[0]) plot_slice_3d_3(x_batch[0, 0], y_batch[0, 0], pred_batch[0, 0], 0, 'aa')
def __init__(self, *args, **kwargs): parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) data_dir = os.path.join(parent_dir, 'data') fid_to_label = load_pkl(os.path.join(data_dir, 'fid_to_label.pkl')) fid_to_text = load_pkl(os.path.join(data_dir, 'fid_to_text.pkl')) fids_path = os.path.join(data_dir, 'fids.pkl') label_to_int_path = os.path.join(data_dir, 'label_to_int.pkl') fids = load_pkl(fids_path) label_to_int = load_pkl(label_to_int_path) paths = [os.path.join(data_dir, 'images', 'img%s.jpg' % fid) for fid in fids] targets = [label_to_int[fid_to_label[fid]] for fid in fids] num_labels = len(np.unique(targets)) grouped_paths = [[] for _ in range(num_labels)] for path, label in zip(paths, targets): grouped_paths[label].append(path) grouped_texts = [[] for _ in range(num_labels)] for fid, label in zip(fids, targets): grouped_texts[label].append(fid_to_text[fid]) characters_list = [] for i, paths in enumerate(grouped_paths): characters_list.append(paths) characters_list = np.array(characters_list) texts_list = [] for i, texts in enumerate(grouped_texts): texts_list.append(texts) texts_list = np.array(texts_list) AbstractMetaOmniglot.__init__(self, characters_list, texts_list, *args, **kwargs)
def __init__(self, alpha, parts, numpy=False, sparse=True): ''' alpha: tuple of ints, the weak partition of 8 into 3 parts parts: tuple of ints, the partitions of each part of alpha cached_loc: string, location of the cached pickle file of S_8 mod S_alpha irreps ''' self.alpha = alpha self.parts = parts self.cos_reps = coset_reps(sn(8), young_subgroup_perm(alpha)) self.cyc_irrep_func = cyclic_irreps(alpha) self.yor_dict = None # cache orientation tuple -> cyclic irrep self.cyclic_irreps_re = {} self.cyclic_irreps_im = {} self.fill_cyclic_irreps() # also cache the cyclic irreps if numpy: pkl_loc = IRREP_LOC_FMT.format(alpha, parts) self.yor_dict = load_pkl(pkl_loc) elif sparse: pkl_loc = IRREP_SP_LOC_FMT.format(alpha, parts) self.yor_dict = load_sparse_pkl(pkl_loc) else: # TODO: deprecate print('neither sparse nor numpy') pkl_loc = IRREP_LOC_FMT.format(alpha, parts) self.np_yor_dict = load_pkl(pkl_loc)
def main(): size = 600 k = 4 # (U, s, V) = utils.load_pkl(SVD_PATH) eig_face = utils.load_pkl(U4_PATH) x_mean = utils.load_pkl(X_MAEN) # eig_face = U.T print('- A3') # en = eig_face[:k] # en = np.matrix(eig_face[:k]) en = eig_face # en = np.matrix(eig_face) print(' * encoder shape:', en.shape) sample = get_sample() sampleT = np.matrix(sample - x_mean).T print(' * sampleT matrix shape:', sampleT.shape) constuct_matrix = en * sampleT result = en.T * constuct_matrix result = np.array(result.T) + x_mean print(' * result shape:', result.shape) faces = result.reshape(-1, size, size, 3) faces -= np.min(faces) faces /= np.max(faces) faces = (faces * 255).astype(np.uint8) print(' * faces shape:', faces.shape) # print(faces) io.imsave(OUTPUT, faces.reshape(size, size, 3))
def load_dataset(name, as_image=False): if name == "mnist_small": return ut.load_pkl("datasets/mnist_small.pickle") if name == "cifar_small": return ut.load_pkl("datasets/cifar_small.pickle") if name == "digits": digits = load_digits() X = digits["data"] y = digits["target"] X /= 255. X, y = shuffle(X, y) Xtest = X[500:] ytest = y[500:] X = X[:500] y = y[:500] return {"X": X, "y": y, "Xtest": Xtest, "ytest": ytest} if name == "boat_images": # LOAD SMALL IMAGES imgList = glob.glob("datasets/boat_images/*.png") df = pd.read_csv("datasets/boat_images/coords.csv") X = np.zeros((len(imgList), 80, 80, 3)) Y = np.zeros((len(imgList), 80, 80)) for i, img in enumerate(imgList): X[i] = img_as_float(imread(img)) flag = False yx_coors = [] for _, row in df.iterrows(): if img[img.rindex("/") + 1:] == row.image[row.image.rindex("/") + 1:]: yx_coors += [(row.y, row.x)] flag = True if flag == False: Y[i] = np.zeros((80, 80)) else: #Y[i] = np.ones((80, 80))*-1 for y, x in yx_coors: Y[i, y, x] = 1 X = iu.last2first(X) Y = Y[:, np.newaxis] if as_image: return X, Y else: y = Y.sum(axis=1).sum(axis=1).sum(axis=1) return X, y.astype(int)
def read_arg_sents(self): """ Read data for action argument extractor PS: pos_data is generated by the function ``pos_tagging" in utils.py """ indata = load_pkl('data/refined_%s_data.pkl' % self.domain)[-1] pos_data = load_pkl('data/%s_arg_pos.pkl' % self.domain) arg_sents = [] # ipdb.set_trace() for i in range(len(indata)): for j in range(len(indata[i])): if len(indata[i][j]) == 0: continue # -1 obj_ind refer to UNK words = indata[i][j]['last_sent'] + indata[i][j][ 'this_sent'] + ['UNK'] pos = [ self.pos_dict[p] for w, p in pos_data[i][j][0] + pos_data[i][j][1] ] + [0] if len(words) != len(words): ipdb.set_trace() print('len(words) != len(words)') sent_len = len(words) act_inds = [ a['act_idx'] for a in indata[i][j]['acts'] if a['act_idx'] < self.num_words ] for k in range(len(indata[i][j]['acts'])): act_ind = indata[i][j]['acts'][k]['act_idx'] obj_inds = indata[i][j]['acts'][k]['obj_idxs'] arg_sent = {} arg_tags = np.ones(sent_len, dtype=np.int32) if len(obj_inds[1]) == 0: arg_tags[obj_inds[0]] = 2 # essential objects else: arg_tags[obj_inds[0]] = 4 # exclusive objects arg_tags[obj_inds[1]] = 4 # exclusive objects # generate distance representation position = np.zeros(sent_len, dtype=np.int32) position.fill(act_ind) distance = np.abs(np.arange(sent_len) - position) arg_sent['tokens'] = words arg_sent['tags'] = arg_tags arg_sent['pos'] = deepcopy(pos) arg_sent['act_ind'] = act_ind arg_sent['distance'] = distance arg_sent['act_inds'] = act_inds arg_sent['obj_inds'] = obj_inds self.create_matrix(arg_sent) arg_sents.append(arg_sent) # get k-fold split data arg_indices = ten_fold_split_ind(len(arg_sents), self.k_fold_indices, self.k_fold) arg_data = index2data(arg_indices, arg_sents) return arg_data
def preprocess(args, data_name): from utils import remove_info from sklearn.model_selection import train_test_split from preprocessor import Preprocessor torch.manual_seed(42) print('[Info] Process csv...') # for train and valid csv trainset = pd.read_csv('../data/task2_trainset.csv', dtype=str) trainset = remove_info(trainset) trainset, validset = train_test_split(trainset, test_size=0.1, random_state=42) testset = pd.read_csv('../data/task2_public_testset.csv', dtype=str) testset = remove_info(testset) print('[Info] Loading node vectors...') train_node_vec = load_pkl('../data/node_vec.pkl') # torch([7000, 128] train_node_vec, valid_node_vec = train_test_split(train_node_vec.numpy(), test_size=0.1, random_state=42) train_node_vec = torch.FloatTensor(train_node_vec) valid_node_vec = torch.FloatTensor(valid_node_vec) test_node_vec = load_pkl('../data/node_vec_test.pkl') test_node_vec = test_node_vec.type(torch.FloatTensor) print('[INFO] Make bert dataset...') preprocessor = Preprocessor(args.model) train_data = preprocessor.get_dataset(trainset, n_workers=12) valid_data = preprocessor.get_dataset(validset, n_workers=12) test_data = preprocessor.get_dataset(testset, n_workers=12) tfidf = get_tfidf([data['tokens'] for data in train_data] + [data['tokens'] for data in valid_data] + [data['tokens'] for data in test_data]) train_data = BertDataset(train_data, train_node_vec, tfidf[:6300], args.max_len) valid_data = BertDataset(valid_data, valid_node_vec, tfidf[6300:7000], args.max_len) test_data = BertDataset(test_data, test_node_vec, tfidf[7000:], args.max_len) print('[INFO] Save pickles...') if not os.path.exists('../dataset/'): os.makedirs('../dataset/') with open('../dataset/trainData_%s.pkl' % data_name, 'wb') as f: pickle.dump(train_data, f) with open('../dataset/validData_%s.pkl' % data_name, 'wb') as f: pickle.dump(valid_data, f) with open('../dataset/testData_%s.pkl' % data_name, 'wb') as f: pickle.dump(test_data, f)
def __init__(self, alpha, parts): self.alpha = alpha self.parts = parts self.values = -np.load(model_path(alpha, parts)) self.idx_to_dist = load_pkl( os.path.join(get_prefix(), 'cube', 'cube2_pkls', 'idx_to_dist.pkl')) self.correct = np.load( os.path.join(get_prefix(), 'cube', 'fourier_eval_results', str(alpha), str(parts), 'correct.npy')) self.cube_to_idx = load_pkl( os.path.join(get_prefix(), 'cube', 'cube2_pkls', 'cube_to_idx.pkl'))
def load_pretrained_model(l_in): l = conv3d(l_in, 64) l = inrn_v2_red(l) l = inrn_v2(l) l = feat_red(l) l = inrn_v2(l) l = inrn_v2_red(l) l = inrn_v2(l) l = feat_red(l) l = inrn_v2(l) l = feat_red(l) l = dense(l, 128, name='dense_fpr') l_out = nn.layers.DenseLayer(l, num_units=2, W=nn.init.Constant(0.), nonlinearity=nn.nonlinearities.softmax) metadata = utils.load_pkl(os.path.join("/home/eavsteen/dsb3/storage/metadata/dsb3/models/ikorshun/","luna_c3-20170226-174919.pkl")) nn.layers.set_all_param_values(l_out, metadata['param_values']) return nn.layers.get_all_layers(l_out)[-3]
def ultimate_evaluate(model): genres = ['action', 'drama', 'horror', 'romance'] testingData = [] testingLabels = [] total = defaultdict.fromkeys(range(len(genres)), 0) correct = defaultdict.fromkeys(range(len(genres)), 0) yTrue, yPredict = [], [] for genreIndex, genre in enumerate(genres): try: genreFeatures = load_pkl(genre + "_histogram_test") genreFeatures = np.array([np.array(f) for f in genreFeatures]) # numpy hack except Exception as e: print e return print "OK." for videoFeatures in genreFeatures: total[genreIndex] += 1 d = defaultdict(int) predictedClasses = model.predict( videoFeatures) #List of predictions, per-frame print predictedClasses for i in predictedClasses: d[i] += 1 predictedGenre = max(d.iteritems(), key=lambda x: x[1])[0] yPredict.append(predictedGenre) yTrue.append(genreIndex) if predictedGenre == genreIndex: correct[genreIndex] += 1 print correct, total confusionMatrix = confusion_matrix(yTrue, yPredict) print confusionMatrix
def build_model(): metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH) metadata_path = utils.find_model_metadata( metadata_dir, patch_class_config.__name__.split('.')[-1]) print('loading model', metadata_path) print('please check if model pkl is the correct one') metadata = utils.load_pkl(metadata_path) print('Build model') model = patch_class_config.build_model() all_layers = nn.layers.get_all_layers(model.l_out) num_params = nn.layers.count_params(model.l_out) print(' number of parameters: %d' % num_params) print(string.ljust(' layer output shapes:', 36), ) print(string.ljust('#params:', 10), ) print('output shape:') for layer in all_layers: name = string.ljust(layer.__class__.__name__, 32) num_param = sum( [np.prod(p.get_value().shape) for p in layer.get_params()]) num_param = string.ljust(num_param.__str__(), 10) print(' %s %s %s' % (name, num_param, layer.output_shape)) nn.layers.set_all_param_values(model.l_out, metadata['param_values']) return model
def main(): # Target data filename = "120_kmeans_obj.pkl" kmeans = k.load_pkl(filename) spec, label = load_test_data() print("spec", spec.shape) print("label", label.shape) spec_ = np.empty((513, ), np.float32) for i in range(len(label)): spec_ = np.vstack((spec_, kmeans.cluster_centers_[label[i]])) spec_ = np.delete(spec_, 0, 0) print("compare data structure ----") print("spec: ", spec.shape) print("spec_: ", spec_.shape) print("spec data:", spec) print("spec_ data:", spec_) print("min-max spce_ data:", min_max(spec_)) waveform = audio.inv_spectrogram(spec) waveform_ = audio.inv_spectrogram(spec_) waveformmm_ = audio.inv_spectrogram(min_max(spec_)) audio.save_wav(waveform, 'ideal_out.wav') audio.save_wav(waveform_, 'idela_out_.wav') audio.save_wav(waveformmm_, 'idelal_outmm_.wav')
def split_transform(fsplit_lst, irrep_dict, alpha, parts, mem_dict=None): ''' fsplit_pkl: list of pkl file names of the distance values for a chunk of the total distance values irrep_dict: irrep dict alpha: weak partition parts: list/iterable of partitions of the parts of alpha ''' print(' Computing transform on splits: {}'.format(fsplit_lst)) cos_reps = coset_reps(sn(8), young_subgroup_perm(alpha)) save_dict = {} cyc_irrep_func = cyclic_irreps(alpha) pid = os.getpid() for fsplit_pkl in fsplit_lst: with open(fsplit_pkl, 'r') as f: # dict of function values pkl_dict = load_pkl(fsplit_pkl) for perm_tup, tup_dict in pkl_dict.items(): for tup, dists in tup_dict.items(): dist_tot = sum(dists) perm_rep = irrep_dict[ perm_tup] # perm_rep is a dict of (i, j) -> matrix block_cyclic_rep = block_cyclic_irreps( tup, cos_reps, cyc_irrep_func) mult_yor_block(perm_rep, dist_tot, block_cyclic_rep, save_dict) if mem_dict is not None: mem_dict[pid] = max(check_memory(verbose=False), mem_dict.get(pid, 0)) del pkl_dict block_size = wreath_dim(parts) n_cosets = coset_size(alpha) mat = convert_yor_matrix(save_dict, block_size, n_cosets) return mat
def load_pretrained_model(l_in): l = conv3d(l_in, 64) l = inrn_v2_red(l) l = inrn_v2(l) l = inrn_v2_red(l) l = inrn_v2(l) l = inrn_v2_red(l) l = inrn_v2_red(l) l = dense(drop(l), 128) l_out = nn.layers.DenseLayer(l, num_units=10, W=nn.init.Orthogonal(), b=nn.init.Constant(0.1), nonlinearity=nn.nonlinearities.softmax) metadata = utils.load_pkl(os.path.join("/mnt/storage/metadata/dsb3/models/eavsteen/","t_el_0-20170321-013339.pkl")) nn.layers.set_all_param_values(l_out, metadata['param_values']) return nn.layers.get_all_layers(l_out)[-3]
def from_config(cls, validation_ratio, validation_seed, dir_path, data_sampling_freq, start_sampling_freq, end_sampling_freq, start_seq_len, num_channels, return_long): assert end_sampling_freq <= data_sampling_freq target_location = os.path.join(dir_path, '{}c_{}v_{}ss_{}es_{}l.npz'.format(num_channels, DATASET_VERSION, start_sampling_freq, end_sampling_freq, start_seq_len)) if os.path.exists(target_location): print('loading dataset from file: {}'.format(target_location)) given_data = np.load(target_location) given_data = [load_pkl(target_location + '.pkl'), [given_data['arr_{}'.format(i)] for i in range(len(given_data.keys()))]] else: print('creating dataset from scratch') dataset = cls(None, dir_path, data_sampling_freq, start_sampling_freq, end_sampling_freq, start_seq_len, num_channels, return_long) np.savez_compressed(target_location, *dataset.datas) save_pkl(target_location + '.pkl', dataset.data_pointers) given_data = [dataset.data_pointers, dataset.datas] return_datasets = [] for i in range(2): return_datasets.append(cls(given_data, dir_path, data_sampling_freq, start_sampling_freq, end_sampling_freq, start_seq_len, num_channels, return_long)) data_pointers = [x for x in return_datasets[0].data_pointers] np.random.seed(validation_seed) np.random.shuffle(data_pointers) return_datasets[0].data_pointers = data_pointers[:int((1 - validation_ratio) * len(data_pointers))] return_datasets[1].data_pointers = data_pointers[int((1 - validation_ratio) * len(data_pointers)):] return return_datasets[0], return_datasets[1]
def __init__(self, data_path, batch_size, transform_params, patient_ids=None, labels_path=None, slice2roi_path=None, full_batch=False, random=True, infinite=False, view='sax', data_prep_fun=data.transform_norm_rescale, **kwargs): if patient_ids: self.patient_paths = [] for pid in patient_ids: self.patient_paths.append(data_path + '/%s/study/' % pid) else: self.patient_paths = glob.glob(data_path + '/*/study/') self.slice_paths = [sorted(glob.glob(p + '/%s_*.pkl' % view)) for p in self.patient_paths] self.slice_paths = list(itertools.chain(*self.slice_paths)) self.slicepath2pid = {} for s in self.slice_paths: self.slicepath2pid[s] = int(utils.get_patient_id(s)) self.nsamples = len(self.slice_paths) self.batch_size = batch_size self.rng = np.random.RandomState(42) self.full_batch = full_batch self.random = random self.infinite = infinite self.id2labels = data.read_labels(labels_path) if labels_path else None self.transformation_params = transform_params self.data_prep_fun = data_prep_fun self.slice2roi = utils.load_pkl(slice2roi_path) if slice2roi_path else None
def main(args): # load particles particles = dataset.load_particles(args.mrcs, datadir=args.datadir) log(particles.shape) Nimg, D, D = particles.shape trans = utils.load_pkl(args.trans) if type(trans) is tuple: trans = trans[1] trans *= args.tscale assert np.all(trans <= 1), "ERROR: Old pose format detected. Translations must be in units of fraction of box." trans *= D # convert to pixels assert len(trans) == Nimg xx,yy = np.meshgrid(np.arange(-D/2,D/2),np.arange(-D/2,D/2)) TCOORD = np.stack([xx, yy],axis=2)/D # DxDx2 imgs = [] for ii in range(Nimg): ff = fft.fft2_center(particles[ii]) tfilt = np.dot(TCOORD,trans[ii])*-2*np.pi tfilt = np.cos(tfilt) + np.sin(tfilt)*1j ff *= tfilt img = fft.ifftn_center(ff) imgs.append(img) imgs = np.asarray(imgs).astype(np.float32) mrc.write(args.o, imgs) if args.out_png: plot_projections(args.out_png, imgs[:9])
def build_model(): l_in = nn.layers.InputLayer((None, ) + p_transform['patch_size']) l_dim = nn.layers.DimshuffleLayer(l_in, pattern=[0,'x',1,2,3]) l_target = nn.layers.InputLayer((None, 1)) l = conv3d(l_dim, 64) l = inrn_v2_red(l) l = inrn_v2(l) l = feat_red(l) l = inrn_v2(l) l = inrn_v2_red(l) l = inrn_v2(l) l = feat_red(l) l = inrn_v2(l) l = feat_red(l) l_out = dense(l, 128) # l_out = nn.layers.DenseLayer(l, num_units=2, # W=nn.init.Constant(0.), # nonlinearity=nn.nonlinearities.softmax) metadata = utils.load_pkl(os.path.join("/home/eavsteen/dsb3/storage/metadata/dsb3/models/ikorshun/","luna_c3-20170226-174919.pkl")) for i in range(-20,0): print metadata['param_values'][i].shape nn.layers.set_all_param_values(l_out, metadata['param_values'][:-2]) return namedtuple('Model', ['l_in', 'l_out', 'l_target'])(l_in, l_out, l_target)
def create_dataloader(args, vocabs=None, val=False): argvalpfx = "val_" if val else "" paths = [getattr(args, f"{argvalpfx}{mode}_path") for mode in MODES] if vocabs is None: vocabs = [getattr(args, f"{mode}_vocab") for mode in MODES] vocabs = [utils.load_pkl(v) if v is not None else None for v in vocabs] dset = dataset.TextSequenceDataset( paths=paths, feats=["string", "tensor"], vocabs=vocabs, vocab_limit=args.vocab_limit, pad_eos=args.eos, pad_bos=args.bos, unk=args.unk, ) if vocabs is None: vocabs = dset.vocabs collator = dataset.TextSequenceBatchCollator( pad_idxs=[len(v) for v in vocabs] ) return td.DataLoader( dataset=dset, batch_size=args.batch_size, shuffle=False if val else args.shuffle, num_workers=args.data_workers, collate_fn=collator, pin_memory=args.pin_memory )
def generate(args): devices = utils.get_devices(args.gpu) if args.seed is not None: utils.manual_seed(args.seed) logging.info("Loading data...") vocab = utils.load_pkl(args.vocab) logging.info("Initializing generation environment...") model = prepare_model(args, vocab) model = utils.to_device(model, devices) generator = Generator( model=model, device=devices[0], batch_size=args.batch_size, vocab=vocab, bos=args.bos, eos=args.eos, unk=args.unk, max_len=args.max_length ) logging.info("Commencing generation...") samples = generator.generate(args.z_samples) if args.nearest_neighbors is not None: dataset = prepare_dataset(args, vocab) neighbors = nearest_neighbors(args, samples, dataset) else: neighbors = None save(args, samples, neighbors) logging.info("Done!")
def test_th_pkl(np_pkl, th_pkl): print('Testing equivalence') np_dict = load_pkl(np_pkl) th_dict = load_sparse_pkl(th_pkl) compare(np_dict, th_dict) print('All equal between numpy and torch versions!!') check_memory()
def __init__(self, data_path, batch_size, transform_params, patient_ids=None, labels_path=None, slice2roi_path=None, full_batch=False, random=True, infinite=True, min_slices=5, **kwargs): if patient_ids: patient_paths = [] for pid in patient_ids: patient_paths.append(data_path + '/%s/study/' % pid) else: patient_paths = glob.glob(data_path + '/*/study/') self.pid2sax_slice_paths = defaultdict(list) self.pid2ch2_path, self.pid2ch4_path = {}, {} for p in patient_paths: pid = int(utils.get_patient_id(p)) spaths = sorted(glob.glob(p + '/sax_*.pkl'), key=lambda x: int(re.search(r'/sax_(\d+)\.pkl$', x).group(1))) if len(spaths) > min_slices: self.pid2sax_slice_paths[pid] = spaths ch2_path = glob.glob(p + '/2ch_*.pkl') self.pid2ch2_path[pid] = ch2_path[0] if ch2_path else None ch4_path = glob.glob(p + '/4ch_*.pkl') self.pid2ch4_path[pid] = ch4_path[0] if ch4_path else None self.patient_ids = self.pid2sax_slice_paths.keys() self.nsamples = len(self.patient_ids) self.id2labels = data.read_labels(labels_path) if labels_path else None self.batch_size = batch_size self.rng = np.random.RandomState(42) self.full_batch = full_batch self.random = random self.batch_size = batch_size self.infinite = infinite self.transformation_params = transform_params self.slice2roi = utils.load_pkl(slice2roi_path) if slice2roi_path else None
def init(node_type, checkpoint_path=''): """Initialize shared information and data for given node_type (e.g., 'worker'). Optionally, can load a stored model from a checkpoint path to continue training or eval. """ if checkpoint_path: nn = load_pkl(checkpoint_path) # Load stored and pickled model else: nn = FeedForwardNeuralNetwork( # Return new FFNN run_config['NUM_FEATURES'], run_config['NUM_OUTPUTS'], run_config['LAYERS']) if node_type == 'worker': data = run_config['TRAIN'] # Worker needs training data, and training hyper parameters return nn, run_config['NUM_WEIGHTS'], SERVER_INFO, run_config['LEARNING_RATE'], run_config['BATCH_SIZE'], data elif node_type == 'validator': data = run_config['VALIDATION'] # Validator only needs validation data return nn, run_config['NUM_WEIGHTS'], SERVER_INFO, data elif node_type == 'server': data = (run_config['TRAIN'], run_config['VALIDATION'], run_config['TEST']) # Needs all data return nn, run_config['NUM_WEIGHTS'], SERVER_INFO, run_config['NUM_BATCHES'], data else: print('Error: node type not recognized!') exit() return None
def build_model(): l_in = nn.layers.InputLayer((None, ) + p_transform['patch_size']) l_dim = nn.layers.DimshuffleLayer(l_in, pattern=[0,'x',1,2,3]) l_target = nn.layers.InputLayer((None, 1)) l = conv3d(l_dim, 64) l = inrn_v2_red(l) l = inrn_v2(l) l = feat_red(l) l = inrn_v2(l) l = inrn_v2_red(l) l = inrn_v2(l) l = feat_red(l) l = inrn_v2(l) l = feat_red(l) l_out = dense(l, 128) # l_out = nn.layers.DenseLayer(l, num_units=2, # W=nn.init.Constant(0.), # nonlinearity=nn.nonlinearities.softmax) metadata = utils.load_pkl(os.path.join("/home/eavsteen/dsb3/storage/metadata/dsb3/models/ikorshun/","luna_c3-20170226-174919.pkl")) for i in range(-20,0): print(metadata['param_values'][i].shape) nn.layers.set_all_param_values(l_out, metadata['param_values'][:-2]) return namedtuple('Model', ['l_in', 'l_out', 'l_target'])(l_in, l_out, l_target)
def create_model(genres): trainingData = [] trainingLabels = [] number_of_classes = len(genres) for genreIndex, genre in enumerate(genres): try: genreFeatures = load_pkl(genre + "_ultimate_OF") except Exception as e: print e return for videoFeatures in genreFeatures: print videoFeatures.shape if videoFeatures.shape == (0, ): continue for scene in videoFeatures: for sequence in sequencify(scene, 4): trainingData.append(sequence) trainingLabels.append(genreIndex) trainingData = np.array(trainingData) trainingLabels = np.array(trainingLabels) print trainingData.shape print trainingLabels.shape model = optical_flow_model(number_of_classes) model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.fit(trainingData[0], [trainingLabels[0]])
def build_model(): metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH) metadata_path = utils.find_model_metadata( metadata_dir, patch_config.__name__.split('.')[-1]) metadata = utils.load_pkl(metadata_path) print 'Build model' model = patch_config.build_model(patch_size=(window_size, window_size, window_size)) all_layers = nn.layers.get_all_layers(model.l_out) num_params = nn.layers.count_params(model.l_out) print ' number of parameters: %d' % num_params print string.ljust(' layer output shapes:', 36), print string.ljust('#params:', 10), print 'output shape:' for layer in all_layers: name = string.ljust(layer.__class__.__name__, 32) num_param = sum( [np.prod(p.get_value().shape) for p in layer.get_params()]) num_param = string.ljust(num_param.__str__(), 10) print ' %s %s %s' % (name, num_param, layer.output_shape) nn.layers.set_all_param_values(model.l_out, metadata['param_values']) return model
def load_pretrained_model(l_in): l = conv3d(l_in, 64) l = inrn_v2_red(l) l = inrn_v2(l) l = feat_red(l) l = inrn_v2(l) l = inrn_v2_red(l) l = inrn_v2(l) l = feat_red(l) l = inrn_v2(l) l = feat_red(l) l = dense(l, 128, name='dense_fpr') l_out = nn.layers.DenseLayer(l, num_units=2, W=nn.init.Constant(0.), nonlinearity=nn.nonlinearities.softmax) metadata = utils.load_pkl( os.path.join( "/home/eavsteen/dsb3/storage/metadata/dsb3/models/ikorshun/", "luna_c3-20170226-174919.pkl")) nn.layers.set_all_param_values(l_out, metadata['param_values']) return nn.layers.get_all_layers(l_out)[-3]
def compute_user_avg_loc(city): """compute average latitude and longitude of businesses each user visited Arg: city - the city """ print("\t[user] computing location features") # df = pd.read_csv(TRNTST_DIR + "{}/train_pos.csv".format(city)) df = pd.read_csv(INPUT_DIR + "{}/user_business_interaction.csv".format(city)) bus_profile = load_pkl(INPUT_DIR + "{}/city_business_profile.pkl".format(city)) # df.assign(business_latitude=lambda x: bus_profile[x.business]["latitude"]) # df.assign(business_longitude=lambda x: bus_profile[x.business]["longitude"]) b_lat_dict = dict([(k, v["latitude"]) for k, v in bus_profile.items()]) b_long_dict = dict([(k, v["longitude"]) for k, v in bus_profile.items()]) df = df.assign(bus_lat=df.business.map(b_lat_dict)) df = df.assign(bus_long=df.business.map(b_long_dict)) # "ll": latitude and longitude print("\t[user] aggregating location (lat and long) by user") df_loc = df.groupby("user").agg({"bus_lat": ['max', 'min', 'mean'], "bus_long": ['max', 'min', 'mean']}) # rename max, min, mean col to max_lat, min_lat, or mean_at. Same as `long` # while still maintaining the index as `user` user_lat = df_loc.bus_lat.reset_index() user_long = df_loc.bus_long.reset_index() user_loc = user_lat.join(user_long, on="user", how="outer", lsuffix="_lat", rsuffix="_long") user_loc = user_loc.fillna(user_loc.mean()) # now `user` is column user_loc_dict = user_loc.set_index("user").to_dict(orient="index") dump_pkl(OUTPUT_DIR + "{}/city_user_loc.pkl".format(city), user_loc_dict)
def load_pretrained_model(l_in): l = conv3d(l_in, 64) l = inrn_v2_red(l) l = inrn_v2(l) l = inrn_v2_red(l) l = inrn_v2(l) l = inrn_v2_red(l) l = inrn_v2_red(l) l = dense(drop(l), 128) l_out = nn.layers.DenseLayer(l, num_units=10, W=nn.init.Orthogonal(), b=nn.init.Constant(0.1), nonlinearity=nn.nonlinearities.softmax) metadata = utils.load_pkl( os.path.join("/mnt/storage/metadata/dsb3/models/eavsteen/", "t_el_0-20170321-013339.pkl")) nn.layers.set_all_param_values(l_out, metadata['param_values']) return nn.layers.get_all_layers(l_out)[-3]
def load_pretrained_model(l_in): l = conv3d(l_in, 64) l = inrn_v2_red(l) l = inrn_v2(l) l = inrn_v2_red(l) l = inrn_v2(l) l = inrn_v2_red(l) l = inrn_v2_red(l) l = dense(drop(l), 512) l = nn.layers.DenseLayer(l, 1, nonlinearity=nn.nonlinearities.sigmoid, W=nn.init.Orthogonal(), b=nn.init.Constant(0)) metadata = utils.load_pkl( os.path.join( "/home/eavsteen/dsb3/storage/metadata/dsb3/models/eavsteen/", "r_fred_malignancy_2-20170328-230443.pkl")) nn.layers.set_all_param_values(l, metadata['param_values']) return l
def parse_user(): """Load users output: id2user.pkl, user2id.pkl, user.friend.pkl, user.profile.pkl """ user_profile = {} user_friend = {} make_dir(PREPROCESS_DIR) print("\t[parse user] load user list") users_list = load_pkl(PREPROCESS_DIR + "users_list.pkl") users_list = set(users_list) print("\t[parse user] building user profiles") with open(DATA_DIR + "user.json", "r") as fin: for ind, ln in enumerate(fin): data = json.loads(ln) user_id = data['user_id'] if user_id not in users_list: # discard infrequent or irrelevant cities continue user_friend[user_id] = data['friends'].split(", ") del data['friends'] del data['user_id'] user_profile[user_id] = data # user adjacency and profile dictionary separately print( "\t[parse user] dumping user-friendship and user-profile information ..." ) dump_pkl(PREPROCESS_DIR + "user_friend.pkl", user_friend) dump_pkl(PREPROCESS_DIR + "user_profile.pkl", user_profile)
def _get_features_data(mode='val'): """ deprecated with old dataset """ """ Includes every sample with plotFeatures, videoFeatures, movie_id and genreLabel """ featureData = [] allData = load_pkl(baseName+mode) plots = [] """Process plot vectors""" for data in allData: movie_id = data['movie_id'] plot = data['plot'] plots.append(plot) if mode=='train': textObj = Text() plotFeatures_all = textObj.fit_transform(plots) dump_pkl(textObj, 'plot_object_train') else: try: textObj = load_pkl('plot_object_train') plotFeatures_all = textObj.transform(plots).toarray() except: print "Please train the plots first." return plotIndex = -1 for data in allData: plotIndex += 1 movie_id = data['movie_id'] path = glob(video_resource+str(movie_id)+'.*')[0] plot = data['plot'] genreLabel = data['genreLabel'] print plotIndex,"out of ",len(allData) print "Gathering features for",movie_id try: frames = list(get_frames(path, start_time=1000, end_time=200000, time_step=1000)) videoFeatures = get_features_batch(frames, 'vgg16') except Exception as e: print e continue # Omit the movie if one of the feature is bad # videoFeatures = None plotFeatures = plotFeatures_all[plotIndex] featureData.append({'videoFeatures':videoFeatures, 'plotFeatures':plotFeatures, 'movie_id':movie_id, 'genreLabel':genreLabel}) dump_pkl(featureData, 'feature_data_'+mode)
def train_classifier(genres=[ 'Chases', 'Dance', 'Eating', 'Fight', 'Heated_Discussions', 'Normal_Chatting', 'Romance', 'Running', 'Tragic' ], model_name=default_model_name): trainingData = [] trainingLabels = [] num_of_classes = len(genres) print("Number of classes:", num_of_classes) for genreIndex, genre in enumerate(genres): try: genreFeatures = load_pkl(genre + "_ultimate_" + default_model_name) genreFeatures = np.array([np.array(f) for f in genreFeatures]) except Exception as e: print(e) return print("OK.") for videoFeatures in genreFeatures: randomIndices = range(len(videoFeatures)) selectedFeatures = np.array(videoFeatures[randomIndices]) for feature in selectedFeatures: trainingData.append(feature) trainingLabels.append([genreIndex]) trainingData = np.array(trainingData) trainingLabels = np.array(trainingLabels) trainingData = np.reshape( trainingData, (trainingData.shape[0], 1, trainingData.shape[1])) trainingLabels = np.reshape( trainingLabels, (trainingLabels.shape[0], 1, trainingLabels.shape[1])) print("Train data shape : ", trainingData.shape) print(trainingLabels.shape) model = lstm_model(num_of_classes, 1) model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy']) batch_size = 32 nb_epoch = 100 history = model.fit(trainingData, trainingLabels, batch_size=batch_size, nb_epoch=nb_epoch) modelOutPath = 'data/models/lstm' + model_name + '_' + str( num_of_classes) + "g_bs" + str(batch_size) + "_ep" + str( nb_epoch) + ".h5" model.save(modelOutPath) print("Model saved at", modelOutPath) plt.plot(history.history["acc"]) plt.title("model accuracy") plt.ylabel("accuracy") plt.xlabel("epoch") plt.show() plt.plot(history.history["loss"]) plt.title("model loss") plt.ylabel("loss") plt.xlabel("epoch") plt.show()
def build_segmentation_model(l_in): metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH) metadata_path = utils.find_model_metadata(metadata_dir, patch_segmentation_config.__name__.split('.')[-1]) metadata = utils.load_pkl(metadata_path) model = patch_segmentation_config.build_model(l_in=l_in, patch_size=p_transform['patch_size']) nn.layers.set_all_param_values(model.l_out, metadata['param_values']) return model
def do_test(model_ckpt_path, test_data_path, result_path, word_dict_path, emb_dim, hid_dim): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') word_dict = load_pkl(word_dict_path) PAD_IDX = word_dict.word2idx("<PAD>") print("load data...") testData = SentenceDataset(load_pkl(test_data_path), word_dict, PAD_IDX, training=False) print("load model...") model = get_model(word_dict.get_len(), word_dict.get_len(), emb_dim, hid_dim, device) model.load_state_dict(torch.load(model_ckpt_path)) model.to(device) print("predicting...") make_prediction(model, testData, word_dict, result_path, device)
def test3(): image_dir = utils.get_dir_path('analysis', pathfinder.METADATA_PATH) id2mm_shape = utils.load_pkl(image_dir + '/pid2mm.pkl') s = [(key, value) for (key, value) in sorted(id2mm_shape.items(), key=lambda x: x[1][0])] for i in xrange(5): print s[i] print '--------------------------' for i in xrange(1,6): print s[-i]
def load_pretrained_model(l_in): l = conv3d(l_in, 64) l = inrn_v2_red(l) l = inrn_v2(l) l = inrn_v2_red(l) l = inrn_v2(l) l = inrn_v2_red(l) l = inrn_v2_red(l) l = dense(drop(l), 512) d_final_layers = {} final_layers = [] unit_ptr = 0 for obj_idx, obj_name in enumerate(cfg_prop.order_objectives): ptype = cfg_prop.property_type[obj_name] if ptype == 'classification': num_units = len(cfg_prop.property_bin_borders[obj_name]) l_fin = nn.layers.DenseLayer(l, num_units=num_units, W=nn.init.Orthogonal(), b=nn.init.Constant(cfg_prop.init_values_final_units[obj_name]), nonlinearity=nn.nonlinearities.softmax, name='dense_softmax_'+ptype+'_'+obj_name) elif ptype == 'continuous': l_fin = nn.layers.DenseLayer(l, num_units=1, W=nn.init.Orthogonal(), b=nn.init.Constant(cfg_prop.init_values_final_units[obj_name]), nonlinearity=nn.nonlinearities.softplus, name='dense_softplus_'+ptype+'_'+obj_name) elif ptype == 'bounded_continuous': l_fin = nn.layers.DenseLayer(l, num_units=1, W=nn.init.Orthogonal(), b=nn.init.Constant(cfg_prop.init_values_final_units[obj_name]), nonlinearity=nn.nonlinearities.sigmoid, name='dense_sigmoid_'+ptype+'_'+obj_name) else: raise d_final_layers[obj_name] = l_fin final_layers.append(l_fin) l_out = nn.layers.ConcatLayer(final_layers, name = 'final_concat_layer') metadata = utils.load_pkl(os.path.join('/home/frederic/kaggle-dsb3/dsb/storage/metadata/dsb3/models/eavsteen/',"r_elias_28-20170331-230303.pkl")) nn.layers.set_all_param_values(l_out, metadata['param_values']) features = d_final_layers['malignancy'] print 'features layer', features.name return features
def load_weight_from_pkl(self, cpu_mode=False): with tf.variable_scope('load_pred_from_pkl'): self.w_input = {} self.w_assign_op = {} for name in self.w.keys(): self.w_input[name] = tf.placeholder('float32', self.w[name].get_shape().as_list(), name=name) self.w_assign_op[name] = self.w[name].assign(self.w_input[name]) for name in self.w.keys(): self.w_assign_op[name].eval({self.w_input[name]: load_pkl(os.path.join(self.weight_dir, "%s.pkl" % name))}) self.update_target_q_network()
def test_luna3d(): image_dir = utils.get_dir_path('analysis', pathfinder.METADATA_PATH) image_dir = image_dir + '/test_luna/' utils.auto_make_dir(image_dir) id2zyxd = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH) luna_data_paths = [ '/mnt/sda3/data/kaggle-lung/luna_test_patient/1.3.6.1.4.1.14519.5.2.1.6279.6001.877026508860018521147620598474.mhd'] candidates = utils.load_pkl( '/mnt/sda3/data/kaggle-lung/luna_test_patient/1.3.6.1.4.1.14519.5.2.1.6279.6001.877026508860018521147620598474.pkl') candidates = candidates[:4] print candidates print '--------------' print id2zyxd['1.3.6.1.4.1.14519.5.2.1.6279.6001.877026508860018521147620598474'] for k, p in enumerate(luna_data_paths): id = os.path.basename(p).replace('.mhd', '') print id img, origin, pixel_spacing = utils_lung.read_mhd(p) lung_mask = lung_segmentation.segment_HU_scan_ira(img) print np.min(lung_mask), np.max(lung_mask) x, annotations_tf, tf_matrix, lung_mask_out = data_transforms.transform_scan3d(data=img, pixel_spacing=pixel_spacing, p_transform=p_transform, luna_annotations=candidates, p_transform_augment=None, luna_origin=origin, lung_mask=lung_mask, world_coord_system=False) print np.min(lung_mask_out), np.max(lung_mask_out) plot_slice_3d_2(x, lung_mask_out, 0, id) plot_slice_3d_2(x, lung_mask_out, 1, id) plot_slice_3d_2(x, lung_mask_out, 2, id) # for zyxd in annotations_tf: # plot_slice_3d_2(x, lung_mask_out, 0, id, idx=zyxd) # plot_slice_3d_2(x, lung_mask_out, 1, id, idx=zyxd) # plot_slice_3d_2(x, lung_mask_out, 2, id, idx=zyxd) for i in xrange(136, x.shape[1]): plot_slice_3d_2(x, lung_mask_out, 1, str(id) + str(i), idx=np.array([200, i, 200]))
def load_pretrained_model(l_in): l = conv3d(l_in, 64) l = inrn_v2_red(l) l = inrn_v2(l) l = inrn_v2_red(l) l = inrn_v2(l) l = inrn_v2_red(l) l = inrn_v2_red(l) l = drop(l, name='can_dropout') l = dense(l, 512, name='can_dense') final_layers = [] for obj_idx, obj_name in enumerate(cfg_prop.order_objectives): ptype = cfg_prop.property_type[obj_name] if ptype == 'classification': num_units = len(cfg_prop.property_bin_borders[obj_name]) l_fin = nn.layers.DenseLayer(l, num_units=num_units, W=nn.init.Orthogonal(), b=nn.init.Constant(cfg_prop.init_values_final_units[obj_name]), nonlinearity=nn.nonlinearities.softmax, name='dense_'+ptype+'_'+obj_name) elif ptype == 'continuous': l_fin = nn.layers.DenseLayer(l, num_units=1, W=nn.init.Orthogonal(), b=nn.init.Constant(cfg_prop.init_values_final_units[obj_name]), nonlinearity=nn.nonlinearities.softplus, name='dense_'+ptype+'_'+obj_name) else: raise final_layers.append(l_fin) l_out = nn.layers.ConcatLayer(final_layers, name = 'final_concat_layer') metadata = utils.load_pkl(os.path.join("/home/eavsteen/dsb3/storage/metadata/dsb3/models/eavsteen/","r_elias_10-20170328-003348.pkl")) nn.layers.set_all_param_values(l_out, metadata['param_values']) features = nn.layers.get_all_layers(l_out)[(-2-len(final_layers))] print 'features layer', features.name return features
def build_model(): metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH) metadata_path = utils.find_model_metadata(metadata_dir, patch_class_config.__name__.split('.')[-1]) metadata = utils.load_pkl(metadata_path) print 'Build model' model = patch_class_config.build_model() all_layers = nn.layers.get_all_layers(model.l_out) num_params = nn.layers.count_params(model.l_out) print ' number of parameters: %d' % num_params print string.ljust(' layer output shapes:', 36), print string.ljust('#params:', 10), print 'output shape:' for layer in all_layers: name = string.ljust(layer.__class__.__name__, 32) num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()]) num_param = string.ljust(num_param.__str__(), 10) print ' %s %s %s' % (name, num_param, layer.output_shape) nn.layers.set_all_param_values(model.l_out, metadata['param_values']) return model
def evaluate_trained(config, state, channel): config_path = config.load_trained.from_path + 'model_config.pkl' epoch = config.load_trained.epoch params_path = config.load_trained.from_path + 'model_params_e%d.pkl'%(epoch) assert config_path is not None assert params_path is not None assert os.path.isfile(params_path) assert os.path.isfile(config_path) print 'load the config options from the best trained model' used_config = utils.load_pkl(config_path) action = config.load_trained.action assert action == 1 from_path = config.load_trained.from_path epoch = config.load_trained.epoch save_model_path = config.load_trained.from_path set_config(config, used_config) config.load_trained.action = action config.load_trained.from_path = from_path config.load_trained.epoch = epoch config.save_model_path = save_model_path model_type = config.model # set up automatically some fields in config if config.dataset.signature == 'MNIST_binary_russ': config[model_type].n_in = 784 config[model_type].n_out = 784 # Also copy back from config into state. for key in config: setattr(state, key, config[key]) print 'Model Type: %s'%model_type print 'Host: %s' % socket.gethostname() print 'Command: %s' % ' '.join(sys.argv) print 'initializing data engine' input_dtype = 'float32' target_dtype = 'int32' data_engine = None deep_orderless_bernoulli_nade.evaluate_trained(state, data_engine, params_path, channel)
def __init__(self, data_path, batch_size, transform_params, patient_ids=None, labels_path=None, slice2roi_path=None, full_batch=False, random=True, infinite=True, min_slices=0, data_prep_fun=data.transform_norm_rescale, **kwargs): if patient_ids: patient_paths = [] for pid in patient_ids: patient_paths.append(data_path + '/%s/study/' % pid) else: patient_paths = glob.glob(data_path + '/*/study/') self.pid2slice_paths = defaultdict(list) nslices = [] for p in patient_paths: pid = int(utils.get_patient_id(p)) spaths = sorted(glob.glob(p + '/sax_*.pkl'), key=lambda x: int(re.search(r'/sax_(\d+)\.pkl$', x).group(1))) # consider patients only with min_slices if len(spaths) > min_slices: self.pid2slice_paths[pid] = spaths nslices.append(len(spaths)) # take max number of slices self.nslices = int(np.max(nslices)) self.patient_ids = self.pid2slice_paths.keys() self.nsamples = len(self.patient_ids) self.data_path = data_path self.id2labels = data.read_labels(labels_path) if labels_path else None self.batch_size = batch_size self.rng = np.random.RandomState(42) self.full_batch = full_batch self.random = random self.batch_size = batch_size self.infinite = infinite self.transformation_params = transform_params self.data_prep_fun = data_prep_fun self.slice2roi = utils.load_pkl(slice2roi_path) if slice2roi_path else None
def load_pretrained_model(l_in): l = conv3d(l_in, 64) l = inrn_v2_red(l) l = inrn_v2(l) l = inrn_v2_red(l) l = inrn_v2(l) l = inrn_v2_red(l) l = inrn_v2_red(l) l = dense(drop(l), 512) l = nn.layers.DenseLayer(l,1,nonlinearity=nn.nonlinearities.sigmoid, W=nn.init.Orthogonal(), b=nn.init.Constant(0)) metadata = utils.load_pkl(os.path.join("/home/eavsteen/dsb3/storage/metadata/dsb3/models/eavsteen/","r_fred_malignancy_2-20170328-230443.pkl")) nn.layers.set_all_param_values(l, metadata['param_values']) return l
def build_model(): l_in = nn.layers.InputLayer((None, n_candidates_per_patient, 1,) + p_transform['patch_size']) l_in_rshp = nn.layers.ReshapeLayer(l_in, (-1, 1,) + p_transform['patch_size']) l_target = nn.layers.InputLayer((batch_size,)) base_n_filters = 128 l = conv_prelu_layer(l_in_rshp, n_filters=base_n_filters) l = conv_prelu_layer(l, n_filters=base_n_filters) l = conv_prelu_layer(l, n_filters=base_n_filters) l = max_pool3d(l) l = conv_prelu_layer(l, n_filters=base_n_filters) l = conv_prelu_layer(l, n_filters=base_n_filters) l = conv_prelu_layer(l, n_filters=base_n_filters) l_enc = conv_prelu_layer(l, n_filters=base_n_filters) num_units_dense = 512 l_d01 = dense_prelu_layer(l, num_units=512) l_d01 = nn.layers.ReshapeLayer(l_d01, (-1, n_candidates_per_patient, num_units_dense)) l_d02 = dense_prelu_layer(l_d01, num_units=512) l_out = nn.layers.DenseLayer(l_d02, num_units=2, W=nn.init.Constant(0.), b=np.array([np.log((1397. - 362) / 1398), np.log(362. / 1397)], dtype='float32'), nonlinearity=nn.nonlinearities.softmax) metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH) metadata_path = utils.find_model_metadata(metadata_dir, 'luna_p8a1') metadata = utils.load_pkl(metadata_path) for p, pv in zip(nn.layers.get_all_params(l_enc), metadata['param_values']): if p.get_value().shape != pv.shape: raise ValueError("mismatch: parameter has shape %r but value to " "set has shape %r" % (p.get_value().shape, pv.shape)) p.set_value(pv) return namedtuple('Model', ['l_in', 'l_out', 'l_target'])(l_in, l_out, l_target)
def test_luna3d(): image_dir = utils.get_dir_path('analysis', pathfinder.METADATA_PATH) image_dir = image_dir + '/test_luna/' utils.auto_make_dir(image_dir) id2zyxd = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH) luna_data_paths = [ 'problem_patients/1.3.6.1.4.1.14519.5.2.1.6279.6001.877026508860018521147620598474.mhd'] candidates = utils.load_pkl( 'problem_patients/1.3.6.1.4.1.14519.5.2.1.6279.6001.877026508860018521147620598474.pkl') candidates = candidates[:4] print candidates print '--------------' print id2zyxd['1.3.6.1.4.1.14519.5.2.1.6279.6001.877026508860018521147620598474'] for k, p in enumerate(luna_data_paths): id = os.path.basename(p).replace('.mhd', '') print id img, origin, pixel_spacing = utils_lung.read_mhd(p) lung_mask = lung_segmentation.segment_HU_scan_elias(img) x, annotations_tf, tf_matrix, lung_mask_out = data_transforms.transform_scan3d(data=img, pixel_spacing=pixel_spacing, p_transform=p_transform, luna_annotations=candidates, p_transform_augment=None, luna_origin=origin, lung_mask=lung_mask, world_coord_system=False) for zyxd in annotations_tf: plot_slice_3d_2(x, lung_mask_out, 0, id, img_dir='./', idx=zyxd) plot_slice_3d_2(x, lung_mask_out, 1, id, img_dir='./', idx=zyxd) plot_slice_3d_2(x, lung_mask_out, 2, id, img_dir='./', idx=zyxd)
pred = compute_output(ofs) pred_batch.append(pred) pred_batch = np.array(pred_batch).swapaxes(0,1) msk = m_shared.get_value(borrow=True) new_preds_batch = [] for i in range(len(pred_batch)): l = len(msk[i][msk[i]==0.]) new_preds_batch.append(pred_batch[i][:-l]) preds.extend(new_preds_batch) print "%i%%"%int(np.round((b+1)/float(n_batches)*100.)) utils.save_pkl(preds, target_path) else: preds = utils.load_pkl(target_path) assert len(preds) == len(split.test_idxs) print "Preparing csv files" pred_files = glob(csv_path+"Sample*") if len(pred_files) != len(split.test_idxs): for i, pred in enumerate(preds): pred = np.argmax(pred, axis=-1) # window = 10 # new_pred = pred.copy() # for j in range(len(pred)-window): # new_pred[j+window//2] = scipy.stats.mode(pred[j:j+window])[0][0] # pred = new_pred
# predictions path predictions_dir = utils.get_dir_path('model-predictions', pathfinder.METADATA_PATH) outputs_path = predictions_dir + '/%s' % config_name outputs_img_path = predictions_dir + '/%s_img' % config_name utils.auto_make_dir(outputs_img_path) blob_files = sorted(glob.glob(outputs_path + '/*.pkl')) p_transform = {'patch_size': (64, 64, 64), 'mm_patch_size': (64, 64, 64), 'pixel_spacing': (1., 1., 1.) } for p in blob_files: pid = utils_lung.extract_pid_filename(p, '.pkl') blobs = utils.load_pkl(p) blobs = np.asarray(sorted(blobs, key=lambda x: x[-1], reverse=True)) img, pixel_spacing = utils_lung.read_dicom_scan(pathfinder.DATA_PATH + '/' + pid) print pid for blob in blobs[:10]: patch_center = blob[:3] p1 = blob[-1] print p1 x, _ = data_transforms.transform_patch3d(data=img, luna_annotations=None, patch_center=patch_center, p_transform=p_transform, pixel_spacing=pixel_spacing, luna_origin=None, world_coord_system=False)
#first make ordered list of objective functions train_objectives = [config().d_objectives[obj_name] for obj_name in config().order_objectives] test_objectives = [config().d_objectives_deterministic[obj_name] for obj_name in config().order_objectives] # theano functions print givens_train iter_train = theano.function([idx], train_objectives, givens=givens_train, updates=updates) print 'test_objectives' print config().d_objectives_deterministic print 'givens_valid' print givens_valid iter_validate = theano.function([], test_objectives, givens=givens_valid) if config().restart_from_save: print 'Load model parameters for resuming' resume_metadata = utils.load_pkl(config().restart_from_save) nn.layers.set_all_param_values(model.l_out, resume_metadata['param_values']) start_chunk_idx = resume_metadata['chunks_since_start'] + 1 chunk_idxs = range(start_chunk_idx, config().max_nchunks) lr = np.float32(utils.current_learning_rate(learning_rate_schedule, start_chunk_idx)) print ' setting learning rate to %.7f' % lr learning_rate.set_value(lr) losses_eval_train = resume_metadata['losses_eval_train'] losses_eval_valid = resume_metadata['losses_eval_valid'] else: chunk_idxs = range(config().max_nchunks) losses_eval_train = defaultdict(list) losses_eval_valid = defaultdict(list) start_chunk_idx = 0
import numpy as np import theano from itertools import izip import lasagne as nn import utils import buffering import utils_heart from configuration import config, set_configuration, set_subconfiguration from pathfinder import METADATA_PATH if not (len(sys.argv) < 3): sys.exit("Usage: predict.py <metadata_path>") metadata_path = sys.argv[1] metadata_dir = utils.get_dir_path('train', METADATA_PATH) metadata = utils.load_pkl(metadata_dir + '/%s' % metadata_path) config_name = metadata['configuration'] if 'subconfiguration' in metadata: set_subconfiguration(metadata['subconfiguration']) set_configuration(config_name) # predictions paths prediction_dir = utils.get_dir_path('predictions', METADATA_PATH) prediction_path = prediction_dir + "/%s.pkl" % metadata['experiment_id'] prediction_mu_std_path = prediction_dir + "/%s_mu_sigma.pkl" % metadata['experiment_id'] print "Build model" model = config().build_model() all_layers = nn.layers.get_all_layers(model.l_top) all_params = nn.layers.get_all_params(model.l_top)
theano.config.warn_float64 = 'raise' if len(sys.argv) < 2: sys.exit("Usage: test_class_dsb.py <configuration_name> <valid|test>") config_name = sys.argv[1] set_configuration('configs_class_dsb', config_name) set = sys.argv[2] if len(sys.argv) == 3 else 'test' # metadata metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH) metadata_path = utils.find_model_metadata(metadata_dir, config_name) metadata = utils.load_pkl(metadata_path) expid = metadata['experiment_id'] # logs logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH) sys.stdout = logger.Logger(logs_dir + '/%s-%s.log' % (expid, set)) sys.stderr = sys.stdout # predictions path predictions_dir = utils.get_dir_path('model-predictions', pathfinder.METADATA_PATH) output_pkl_file = predictions_dir + '/%s-%s.pkl' % (expid, set) submissions_dir = utils.get_dir_path('submissions', pathfinder.METADATA_PATH) output_csv_file = submissions_dir + '/%s-%s.csv' % (expid, set) # if os.path.isfile(output_pkl_file):
return np.asarray([0] * len(properties_included), dtype='float32') else: return np.asarray([0] * len(properties), dtype='float32') else: label = [] properties_dict = annotation[-1] if len(properties_included)>0: for p in properties_included: label.append(properties_dict[p]/5.0) else: for p in properties: label.append(properties_dict[p]) return label rescale_params_hist_eq = utils.load_pkl( "luna_rescale_params_hist_eq.pkl" ) # data preparation function def data_prep_function(data, pid, patch_center, pixel_spacing, luna_origin, p_transform, p_transform_augment, world_coord_system, **kwargs): x, patch_annotation_tf = data_transforms.transform_patch3d(data=data, luna_annotations=None, patch_center=patch_center, p_transform=p_transform, p_transform_augment=p_transform_augment, pixel_spacing=pixel_spacing, luna_origin=luna_origin, world_coord_system=world_coord_system) bins, original_borders = rescale_params_hist_eq[pid]
import numpy as np import hashlib import utils import utils_lung import pathfinder rng = np.random.RandomState(42) tvt_ids = utils.load_pkl(pathfinder.VALIDATION_SPLIT_PATH) train_pids, valid_pids, test_pids = tvt_ids['training'], tvt_ids['validation'], tvt_ids['test'] all_pids = train_pids + valid_pids + test_pids print 'total number of pids', len(all_pids) id2label = utils_lung.read_labels(pathfinder.LABELS_PATH) id2label_test = utils_lung.read_test_labels(pathfinder.TEST_LABELS_PATH) id2label.update(id2label_test) n_patients = len(id2label) pos_ids = [] neg_ids = [] for pid, label in id2label.iteritems(): if label ==1 : pos_ids.append(pid) elif label == 0 : neg_ids.append(pid) else: raise ValueError("weird shit is going down")
theano.config.warn_float64 = 'raise' if len(sys.argv) < 2: sys.exit("Usage: test_luna_scan.py <configuration_name>") config_name = sys.argv[1] set_configuration('configs_fpred_scan', config_name) # predictions path predictions_dir = utils.get_dir_path('model-predictions', pathfinder.METADATA_PATH) outputs_path = predictions_dir + '/%s' % config_name pid2candidates_path = utils_lung.get_candidates_paths(outputs_path) pid2candidates = {} for k, v in pid2candidates_path.iteritems(): pid2candidates[k] = utils.load_pkl(v) pid2annotations = utils_lung.read_luna_annotations(pathfinder.LUNA_LABELS_PATH) n_top = 1 tp_top_n = 0 fp_top_n = 0 tp = 0 n_pos = 0 idx1 = [] for k in pid2candidates.iterkeys(): print '----------------------------------------' print k n_true = len(pid2annotations[k]) n_det_top = int(np.sum(pid2candidates[k][:n_top, 3])) n_fp_top = int(n_top - np.sum(pid2candidates[k][:n_top, 3]))
p_transform=p_transform, p_transform_augment=p_transform_augment, pixel_spacing=pixel_spacing) x = data_transforms.pixelnormHU(x) return x data_prep_function_train = partial(data_prep_function, p_transform_augment=p_transform_augment, p_transform=p_transform) data_prep_function_valid = partial(data_prep_function, p_transform_augment=None, p_transform=p_transform) # data iterators batch_size = 4 train_valid_ids = utils.load_pkl(pathfinder.VALIDATION_SPLIT_PATH) train_pids, valid_pids, test_pids = train_valid_ids['training'], train_valid_ids['validation'], train_valid_ids['test'] print 'n train', len(train_pids) print 'n valid', len(valid_pids) train_data_iterator = data_iterators.DSBPatientsDataGenerator(data_path=pathfinder.DATA_PATH, batch_size=batch_size, transform_params=p_transform, n_candidates_per_patient=n_candidates_per_patient, data_prep_fun=data_prep_function_train, id2candidates_path=id2candidates_path, rng=rng, patient_ids=train_pids, return_patch_locs=True, random=True, infinite=True)
def read_split(path): d = utils.load_pkl(path) print d['valid']
import utils import logger import theano.tensor as T import buffering from configuration import config, set_configuration, set_subconfiguration import pathfinder if len(sys.argv) < 2: sys.exit("Usage: train.py <meta_configuration_name>") config_name = sys.argv[1] subconfig_name = config_name.replace('meta_', '') metadata_dir = utils.get_dir_path('train', pathfinder.METADATA_PATH) submodel_metadata_path = utils.find_model_metadata(metadata_dir, subconfig_name) submodel_metadata = utils.load_pkl(submodel_metadata_path) assert subconfig_name == submodel_metadata['configuration'] set_subconfiguration(subconfig_name) set_configuration(config_name) expid = utils.generate_expid(config_name) print print "Experiment ID: %s" % expid print # meta metadata and logs paths metadata_path = metadata_dir + '/%s.pkl' % expid logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH) sys.stdout = logger.Logger(logs_dir + '/%s.log' % expid)