def get_validation_split(data_file, training_file, validation_file, data_split=0.8, overwrite=False): """ Splits the data into the training and validation indices list. :param data_file: pytables hdf5 data file :param training_file: :param validation_file: :param data_split: :param overwrite: :return: """ if overwrite or not os.path.exists(training_file): print("Creating validation split...") nb_samples = data_file.root.data.shape[0] sample_list = list(range(nb_samples)) training_list, validation_list = split_list(sample_list, split=data_split) pickle_dump(training_list, training_file) pickle_dump(validation_list, validation_file) return training_list, validation_list else: print("Loading previous validation split...") return pickle_load(training_file), pickle_load(validation_file)
def load_data(data_type, train_on_cv=False, cv_random_state=42, cv_fold=5, cv_index=0): if data_type == 'train': if train_on_cv: data = pickle_load( format_filename(PROCESSED_DATA_DIR, TRAIN_CV_DATA_TEMPLATE, random=cv_random_state, fold=cv_fold, index=cv_index)) else: data = pickle_load( format_filename(PROCESSED_DATA_DIR, TRAIN_DATA_FILENAME)) elif data_type == 'dev': if train_on_cv: data = pickle_load( format_filename(PROCESSED_DATA_DIR, DEV_CV_DATA_TEMPLATE, random=cv_random_state, fold=cv_fold, index=cv_index)) else: data = pickle_load( format_filename(PROCESSED_DATA_DIR, DEV_DATA_FILENAME)) elif data_type == 'test': data = pickle_load( format_filename(PROCESSED_DATA_DIR, TEST_DATA_FILENAME)) else: raise ValueError('data type not understood: {}'.format(data_type)) return data
def fold_split(FG, seed=1234): labels = tuple(FG.labels) assert labels in [('AD', 'CN'), ('AD', 'MCI', 'CN')] subject_ids = pickle_load(FG.subject_ids_path) diagnosis = pickle_load(FG.diagnosis_path) X = np.array( [sid for sid in subject_ids['smri'] if diagnosis[sid] in labels]) y = np.array([diagnosis[x] for x in X]) skf = StratifiedKFold(n_splits=FG.fold, shuffle=True, random_state=seed) train, test = list(skf.split(X, y))[FG.running_fold] print( 'Train ', tuple( zip(labels, [len(X[train][y[train] == label]) for label in labels]))) print( 'Test ', tuple(zip(labels, [len(X[test][y[test] == label]) for label in labels]))) ratio = [ len(X[train][y[train] == label]) / len(X[train]) for label in labels ] ratio = torch.Tensor(2 * (1 - np.array(ratio))) return X, y, train, test, ratio
def gen_product_sentences(): order_products_prior = pickle_load(config.order_products_prior_path)[[ 'order_id', 'product_id', 'add_to_cart_order' ]].copy() order_products_train = pickle_load(config.order_products_train_path)[[ 'order_id', 'product_id', 'add_to_cart_order' ]].copy() order_products_train["product_id"] = order_products_train[ "product_id"].astype(str) order_products_prior["product_id"] = order_products_prior[ "product_id"].astype(str) product_sentences_train = order_products_train.sort_values([ 'order_id', 'add_to_cart_order' ]).groupby('order_id').apply(lambda order: order['product_id'].tolist()) product_sentences_prior = order_products_prior.sort_values([ 'order_id', 'add_to_cart_order' ]).groupby('order_id').apply(lambda order: order['product_id'].tolist()) product_sentences = product_sentences_prior.append( product_sentences_train).values return product_sentences
def get_name_type_for_subject(entity2name_path, entity2type_path, triple_path, save_dir): entity2name = pickle_load(entity2name_path) entity2type = pickle_load(entity2type_path) triples = pickle_load(triple_path) has_name_count = 0 has_type_count = 0 subject2name = {} subject2type = {} for index, subject in enumerate(triples.keys()): if subject in entity2name: subject2name[subject] = entity2name[subject] has_name_count += 1 if subject in entity2type: subject2type[subject] = entity2type[subject] has_type_count += 1 print(has_name_count, len(triples.keys())) print(has_type_count, len(triples.keys())) pickle_save(subject2name, os.path.join(save_dir, 'trim_subject2name.pkl')) pickle_save(subject2type, os.path.join(save_dir, 'trim_subject2type.pkl'))
def load_agents_params(params, agents_list): agents, states, traces, trajectories, dataframes = {}, {}, {}, {}, {} # states['all_states'] = {} environment = create_test_env(params.env, n_envs=params.n_envs, is_atari=params.is_atari, stats_path=params.stats_path, seed=0, log_dir=params.log_dir, should_render=not params.no_render, hyperparams=params.hyperparams) for a in agents_list: """load agent model""" model_path = os.path.join(params.agents_dir, a, params.env + ".pkl") agents[a] = ALGOS[a].load(model_path, env=environment) """load agent states traces and trajectories""" params_path = os.path.join(params.stt_dir, a) for file in os.listdir(params_path): if file.startswith('States'): states[a] = pickle_load(os.path.join(params_path, file)) elif file.startswith('Traces'): traces[a] = pickle_load(os.path.join(params_path, file)) else: trajectories[a] = pickle_load(os.path.join(params_path, file)) # states['all_states'] = {**states['all_states'], **states[a]} # The number of "same" states between algos is very small because the id of a state is based on the # full observation, therefore taking into account the current score. """importance dataframes""" data = { 'state': list(states[a].keys()), 'q_values': [x.observed_actions for x in states[a].values()] } df = pd.DataFrame(data) # top "important_state_percentage" of the states sorted by importance dataframes[a] = compute_states_importance(df, compare_to=params.state_importance).sort_values( ['importance'], ascending=False).head(int(df.shape[0] * params.important_state_percentage)) return agents, states, traces, trajectories, dataframes
def prepare_training_data(): X_train, y_train = pickle_load('data/train.pkl') X_valid, y_valid = pickle_load('data/valid.pkl') X_test, y_test = pickle_load('data/test.pkl') trainer = Trainer(mp_pool=Pool(8)) trainer.prepare_data(X_train, y_train, X_valid, y_valid, X_test, y_test) trainer.save('data2')
def load_idx2cate(): idx2cate1 = pickle_load( format_filename(PROCESSED_DATA_DIR, IDX2TOKEN_TEMPLATE, level='cate1')) idx2cate2 = pickle_load( format_filename(PROCESSED_DATA_DIR, IDX2TOKEN_TEMPLATE, level='cate2')) idx2cate3 = pickle_load( format_filename(PROCESSED_DATA_DIR, IDX2TOKEN_TEMPLATE, level='cate3')) return idx2cate1, idx2cate2, idx2cate3
def __init__(self): ## fixme: 静态部分改成离线计算好的dict from annoy import AnnoyIndex from utils import pickle_load self.ann_index = AnnoyIndex(300, metric='euclidean') self.ann_index.load('data/index/annoy.euclidean.idx') self.id2word = pickle_load('data/index/id2word.pkl') self.word2id = pickle_load('data/index/word2id.pkl')
def get_pca_features(y, npca_comp=40, tss=0.1): ''' Get pca features Args y - 1D array mono sound_data npca_comp - int number of pca components needed to extract pca features tss - float clip length of sound(sec) return : pca_features - 2D array(ntseg, npca_comp) ''' # length of sound(y) sound_len = len(y) # length of time segment tseg_len = int(tss*SR) # number of time segments in the music ntseg = sound_len//tseg_len pca_basis_path = get_pca_basis_path(tss) try : content = pickle_load(pca_basis_path) # if pca basis with tss doesn't exist, generate pca basis with tss except FileNotFoundError: print("FileNotFoundError : {}".format(pca_basis_path)) pca_generator(clip_sec = tss) content = pickle_load(pca_basis_path) w = content['w'] # eigen value v = content['v'] # eigen vector print("Number of pca vectors : {}".format(len(w))) window = int(SR*tss) nfft = window win_len = window # win_len<=nfft hop_len = window D = librosa.core.stft(y, n_fft=nfft, win_length=win_len, hop_length=hop_len) D_T = D.T ntime, nfreq = D_T.shape pca_features = np.zeros((ntseg, npca_comp)) for i in range(min(ntime, ntseg)): for j in range(npca_comp): pca_features[i][j] = abs(complex_dot(D_T[i], v[j])) return pca_features
def load_input_data(data_folder, data_kind, level, use_text_input, use_text_input_l, use_text_input_r, use_text_input_r_with_pad, use_aspect_input, use_aspect_text_input, use_loc_input, use_offset_input, use_mask): dirname = os.path.join('./data', data_folder) input_data = [] if use_text_input: input_data.append( pickle_load( os.path.join(dirname, '{}_{}_input.pkl'.format(data_kind, level)))) if use_text_input_l: input_data.append( pickle_load( os.path.join(dirname, '{}_{}_input_l.pkl'.format(data_kind, level)))) if use_text_input_r: input_data.append( pickle_load( os.path.join(dirname, '{}_{}_input_r.pkl'.format(data_kind, level)))) if use_text_input_r_with_pad: input_data.append( pickle_load( os.path.join( dirname, '{}_{}_input_r_with_pad.pkl'.format(data_kind, level)))) if use_aspect_input: input_data.append( pickle_load( os.path.join(dirname, '{}_aspect_input.pkl'.format(data_kind)))) if use_aspect_text_input: input_data.append( pickle_load( os.path.join(dirname, '{}_{}_aspect_input.pkl'.format(data_kind, level)))) if use_loc_input: input_data.append( pickle_load( os.path.join(dirname, '{}_{}_pos_input.pkl'.format(data_kind, level)))) if use_offset_input: input_data.append( pickle_load( os.path.join(dirname, '{}_{}_offset_input.pkl'.format(data_kind, level)))) if use_mask: input_data.append( pickle_load( os.path.join(dirname, '{}_{}_mask.pkl'.format(data_kind, level)))) if len(input_data) == 1: input_data = input_data[0] if len(input_data) == 0: raise Exception('No Input!') return input_data
def __init__(self): self.cdir = os.path.abspath(os.path.dirname(__file__)) self.unigram_fixing = UnigramFixing() self.vn_vocab, self.vn_bare_vocab, self.vn_long_vocab, self.vn_long_bare_vocab \ = vnbarenorm.load_vn_vocab() self.vn_special_words = vnbarenorm.load_special_words() self.vn_true_bare_bigram_hie_ddict = utils.pickle_load("%s/models/data/out/hierachical_true_dict.pkl"%self.cdir) self.vn_true_bare_bigram_f_ddict = utils.pickle_load("%s/models/data/out/hierachical_true_first_ab_dict.pkl"%self.cdir) self.hard_fixing_map = vnbarenorm.load_hard_fixing() self.load_one_fix()
def align(params, load_prefix, save_path): """ Align depth and color images. Save everything into a single pickle object. Parameters ---------- params: Camera intrinsic parameters. load_prefix: Path to load data. Will load color stream from `load_prefix`_color.avi, depth stream from `load_prefix`_depth.pkl, and body stream from `load_prefix`_body.pkl. save_path: Path to save result data. """ color_src = cv2.VideoCapture(load_prefix + '_color.avi') depth_src = pickle_load(load_prefix + '_depth.pkl') body_src = pickle_load(load_prefix + '_body.pkl') depth_height = depth_src[0].shape[0] depth_width = depth_src[0].shape[1] h_coord = np.tile(np.reshape(np.arange(1, depth_width + 1), [1, -1]), [depth_height, 1]) - params['cx_d'] v_coord = np.tile(np.reshape(np.arange(1, depth_height + 1), [-1, 1]), [1, depth_width]) - params['cy_d'] pcloud_frames = [] depth_frames = [] color_frames = [] body_frames = [] for depth, body in tqdm(zip(depth_src, body_src)): _, color = color_src.read() pcloud = depth_to_world(depth, params, h_coord, v_coord) pcloud_frames.append(pcloud) color = world_to_color(params, pcloud, color) color_frames.append(color) body_frames.append(body) depth_frames.append(depth) data = { 'pclouds': pcloud_frames, 'depths': depth_frames, 'colors': color_frames, 'bodies': body_frames } pickle_save(save_path, data)
def load_model_with_weights(fname, custom_layers=None): """ Loads model and scaler from .zip """ if custom_layers is None: custom_layers = {} with zipfile.ZipFile(fname, mode='r') as zipf: # First, instantiate model from config json_str = zipf.read('config.json') config = json.loads(json_str) metadata = config['metadata'] del config['metadata'] model = keras.models.model_from_config(config, custom_objects=custom_layers) # Unzip weights.hdf to a temporary file and load it wf, tmpfname = tempfile.mkstemp() # weights - extract to tmpfname with open(tmpfname, 'w') as wf: with zipf.open('weights.hdf5') as zwf: wf.write(zwf.read()) model.load_weights(tmpfname) # scaler - extract to tmpfname with open(tmpfname, 'w') as sf: with zipf.open('scaler.pickle') as zsf: sf.write(zsf.read()) scaler = utils.pickle_load(sf.name) os.remove(tmpfname) return model, scaler, metadata
def show_results(res_paths): results = {} for path in res_paths: result = pickle_load(path) for k, v in result.items(): if k not in results.keys(): results[k] = result[k] results = collections.OrderedDict(sorted(results.items())) fig, ax = plt.subplots(figsize=(9, 5.5)) colors = cm.Dark2(np.linspace(0, 1, len(results))) count = 0 for k, res in results.items(): mean, std = np.nanmean(res, axis=0), np.nanstd(res, axis=0) # ax.errorbar(np.arange(mean.shape[0]), mean, yerr=std, color=colors[count], label=k, fmt='-o') plt.plot(np.arange(mean.shape[0]) + 1, mean, '-o', color=colors[count], label=k) count += 1 print(np.array_str(mean[8:], precision=3)) print("Average precision of %s for future prediction: %f" % (k, mean[8:].mean())) # Now add the legend with some customizations. legend = ax.legend(loc='upper right') ax.set_xlabel("time step") ax.set_ylabel("average precision") plt.axvline(x=8.5, color='r', linestyle='--') plt.text(3, 0.1, 'tracking', fontsize=18, color='grey') plt.text(11, 0.1, 'prediction', fontsize=18, color='grey') plt.show()
def stats(): vocaburary = Vocabulary.load(Q_VOCAB_NAME) kvs = [] for key, value in sorted(vocaburary.doc_freq.iteritems(), key=lambda (k, v): (v, k)): kvs.append([key, value]) TOP_TOKEN = set() question_list = utils.pickle_load("question_tokens.dat") for i in xrange(1, 200): key, value = kvs[-i][0], kvs[-i][1] #print vocaburary.id_2_token[key],value TOP_TOKEN.add(key) f_pattern = open("q_pattern.dat", "w", encoding="utf-8") for question in question_list: pattern = [] origin = [] counting = 0 for token in question: if token in TOP_TOKEN: s_token = vocaburary.id_2_token[token] counting += 1 else: s_token = "*" pattern.append(s_token) w = vocaburary.id_2_token[token] origin.append(w) if counting > 0: pattern = " ".join(pattern) origin = " ".join(origin) f_pattern.write(u"%s\t|\t%s\n" % (pattern, origin)) f_pattern.close()
def debug_test_set(): clf = pickle_load(os.path.join('models6', 'strong_classifier_276.pkl')) trainer = Trainer(mp_pool=Pool(8)) trainer.load_data('data') print("Strong classifier test metrics:") predictions = clf.classify_batch(trainer.test_ds.X_integral) print(Metrics(predictions, trainer.test_ds.y))
def find_papers(args): y, m, d = args.report_date.split('-') y = int(y) m = int(m) d = int(d) if 'none' in args.filter_primary_category: pcates = ['cs.CV', 'cs.AI', 'cs.LG', 'stat.ML', 'cs.RO'] elif '+' in args.filter_primary_category: pcates = args.filter_primary_category.replace(' ', '').split('+') else: pcates = [args.filter_primary_category] assert type(pcates) == list, 'typeError: primary category list' sort_by = date_sort_by(args.date_sort_by) db = pickle_load(args.db_path) print('database has {} entries'.format(len(db))) result = {} for k, v in db.items(): if (y == v[sort_by].tm_year) and (m == v[sort_by].tm_mon) and ( d == v[sort_by].tm_mday): if v['arxiv_primary_category']['term'] in pcates: result[k] = v print('found {} entries'.format(len(result))) return result, pcates
def load_input_data(data_folder, data_kind, level, use_text_input, use_aspect_input, use_aspect_text_input): dirname = os.path.join('./data', data_folder) # ./data/car/ input_data = [] if use_text_input: input_data.append(pickle_load(os.path.join(dirname, '{}_{}_input.pkl'.format(data_kind, level)))) if use_aspect_input: input_data.append(pickle_load(os.path.join(dirname, '{}_aspect_input.pkl'.format(data_kind)))) if use_aspect_text_input: input_data.append(pickle_load(os.path.join(dirname, '{}_{}_aspect_input.pkl'.format(data_kind, level)))) if len(input_data) == 1: input_data = input_data[0] if len(input_data) == 0: raise Exception('No Input!') return input_data
def eval_deprecated(): '''partially deprecated''' k = opt.k train_node = TrainNode(opt) idx2bin = {} dataset = utils.load_data('query') ### use dataset eventually dataset = dataset.to(device) dsnode_path = opt.dsnode_path + str(opt.n_clusters) #print('dsnode path {}'.format(dsnode_path)) dsnode = utils.pickle_load(dsnode_path) print('dsnode {}'.format(dsnode)) train_node.train(dataset, dsnode, idx2bin) #idx (of query) in entire dataset, bin is idx of leaf bin. eval_root = train_node.create_eval_tree() idx2bin = eval_root.idx2bin #eval root should contain dict for answers set indices and bin #, for evaluation. #serialize print('train.py - serializing model evaluation tree...') eval_root_path = osp.join(opt.data_dir, 'model_eval_root') ########### utils.pickle_dump(eval_root, eval_root_path) ## evaluate ## queryset = utils.load_data('query') neighbors = utils.load_data('answers') acc, probe_count = eval_model(eval_root, queryset, neighbors, opt) print('train.py - Query set prediction acc {} probe count {}'.format(acc, probe_count))
def run_validation_cases(validation_keys_file, model_file, training_modalities, labels, hdf5_file, output_label_map=False, output_dir=".", threshold=0.7, overlap=16, permute=False): validation_indices = pickle_load(validation_keys_file) model = load_old_model(model_file) data_file = tables.open_file(hdf5_file, "r") for index in validation_indices: # if 'subject_ids' in data_file.root: # case_directory = os.path.join(output_dir, data_file.root.subject_ids[index].decode('utf-8')) # else: case_directory = os.path.join(output_dir, "validation_case_{}".format(index)) run_validation_case(data_index=index, output_dir=case_directory, model=model, data_file=data_file, training_modalities=training_modalities, output_label_map=output_label_map, labels=labels, threshold=threshold, overlap=overlap, permute=permute) data_file.close()
def load_model(model_name): """ """ # Import model (HACK) sys.path.append(os.path.join(paths["trainer"], model_name)) from capsnet import CapsuleNet sys.path.pop() # Load model try: model_params = pickle_load( os.path.join(paths["trainer"], model_name, "outs", "model_params.pkl")) except FileNotFoundError: return None, None dataset = model_params.pop("dataset") _, model = CapsuleNet(**model_params) model_params["dataset"] = dataset # Clean modules (HACK) del sys.modules["capsnet"] return model, model_params
def __init__(self, config, sess): self.config = config self.sess = sess self.log = utils.Log() self.episode_length = int(self.config['META']['EPISODE_LENGTH']) self.action_dim = int(self.config['META']['ACTION_DIM']) self.statistic_dim = int(self.config['META']['STATISTIC_DIM']) self.reward_dim = int(self.config['META']['REWARD_DIM']) self.discount_factor = float(self.config['META']['DISCOUNT_FACTOR']) self.log_step = int(self.config['META']['LOG_STEP']) self.sample_episodes_per_batch = int(self.config['TPGR']['SAMPLE_EPISODES_PER_BATCH']) self.sample_users_per_batch = int(self.config['TPGR']['SAMPLE_USERS_PER_BATCH']) self.learning_rate = float(self.config['TPGR']['LEARNING_RATE']) self.l2_factor = float(self.config['TPGR']['L2_FACTOR']) self.entropy_factor = float(self.config['TPGR']['ENTROPY_FACTOR']) self.child_num = int(self.config['TPGR']['CHILD_NUM']) self.boundary_rating = float(self.config['ENV']['BOUNDARY_RATING']) self.eval_batch_size = int(self.config['TPGR']['EVAL_BATCH_SIZE']) self.train_batch_size = self.sample_episodes_per_batch * self.sample_users_per_batch self.result_file_path = '../data/result/result_log/' + time.strftime('%Y%m%d%H%M%S') + '_' + self.config['ENV']['ALPHA'] + '_' + self.config['ENV']['RATING_FILE'] self.rnn_file_path = '../data/run_time/%s_rnn_model_%s' % (self.config['ENV']['RATING_FILE'], self.config['TPGR']['RNN_MODEL_VS']) self.load_model = self.config['TPGR']['LOAD_MODEL'] == 'T' self.load_model_path = '../data/model/%s_tpgr_model_%s' % (self.config['ENV']['RATING_FILE'], self.config['TPGR']['MODEL_LOAD_VS']) self.save_model_path = '../data/model/%s_tpgr_model_%s' % (self.config['ENV']['RATING_FILE'], self.config['TPGR']['MODEL_SAVE_VS'].split('s')[0]) self.tree_file_path = '../data/run_time/%s_tree_model_%s_%s_c%d_%s' % (self.config['ENV']['RATING_FILE'], self.config['TPGR']['CLUSTERING_VECTOR_TYPE'].lower(), self.config['TPGR']['CLUSTERING_TYPE'].lower(), self.child_num, self.config['TPGR']['TREE_VS']) self.hidden_units = [int(item) for item in self.config['TPGR']['HIDDEN_UNITS'].split(',')] if self.config['TPGR']['HIDDEN_UNITS'].lower() != 'none' else [] self.forward_env = Env(self.config) self.user_num, self.item_num, self.r_matrix, self.user_to_rele_num = self.forward_env.get_init_data() self.boundry_user_id = self.forward_env.boundry_user_id self.test_user_num = int(self.user_num/self.eval_batch_size)*self.eval_batch_size-self.boundry_user_id self.bc_dim = int(math.ceil(math.log(self.item_num, self.child_num))) self.env = [Env(self.config, self.user_num, self.item_num, self.r_matrix, self.user_to_rele_num) for i in range(max(self.train_batch_size, self.eval_batch_size * int(math.ceil(self.user_num / self.eval_batch_size))))] ### self.rnn_input_dim = self.action_dim + self.reward_dim + self.statistic_dim self.rnn_output_dim = self.rnn_input_dim self.layer_units = [self.statistic_dim + self.rnn_output_dim] + self.hidden_units + [self.child_num] self.is_eval = False self.qs_mean_list = [] self.storage = [] self.training_steps = 0 if self.load_model: self.training_steps = int(self.config['TPGR']['MODEL_LOAD_VS'].split('s')[-1]) tree_model = utils.pickle_load(self.tree_file_path) self.id_to_code, self.code_to_id = (tree_model['id_to_code'], tree_model['code_to_id']) self.aval_val = self.get_aval() self.log.log('making graph') self.make_graph() self.sess.run(tf.global_variables_initializer()) self.log.log('graph made')
def predict(model, model_prediction_dir, label): print("Predicting...") model.eval() try: data_file = tables.open_file(config["pred_data_file"], "r") #load test idx test_idxs = pickle_load(config["test_file"]) # for index in range(len(data_file.root.data)): for index in test_idxs: if "subject_ids" in data_file.root: case_dir = os.path.join(model_prediction_dir, data_file.root.subject_ids[index].decode('utf-8')) else: case_dir = os.path.join(output_dir, "pred_case_{}".format(index)) if not os.path.exists(case_dir): os.makedirs(case_dir) data_array = np.asarray(data_file.root.data[index])[np.newaxis] affine = data_file.root.affine[index] assert data_array.shape == config["input_shape"], "Wrong data shape!Expected {0}, but got {1}.".format(config["input_shape"], data_array.shape) if config["cuda_devices"] is not None: inputs = torch.from_numpy(data_array) inputs = inputs.type(torch.FloatTensor) inputs = inputs.cuda() with torch.no_grad(): if config["VAE_enable"]: outputs, distr = model(inputs) else: outputs = model(inputs) output_array = np.asarray(outputs.tolist()) output_array = output_array > 0.5 #for whole labels ben # output_image_whole = nib.Nifti1Image(output_array[0][0].astype('int'), affine) # output_image_enhancing = nib.Nifti1Image(output_array[0][1].astype('int'), affine) # output_image_core = nib.Nifti1Image(output_array[0][2].astype('int'), affine) # output_image_whole.to_filename(os.path.join(case_dir, "prediction_label_core.nii.gz")) # output_image_enhancing.to_filename(os.path.join(case_dir, "prediction_label_edema.nii.gz")) # output_image_core.to_filename(os.path.join(case_dir, "prediction_label_enhancing.nii.gz")) #for single label output_image = nib.Nifti1Image(output_array[0][0].astype('int'), affine) output_image.to_filename(os.path.join(case_dir, "prediction_label_{}.nii.gz".format(label))) #combine all the labels if len(os.listdir(case_dir))==3 and label==4: output_image_combined = np.zeros(output_array[0][0].shape) #get other two segs,i.e prediction_label_1.nii.gz, prediction_label_2.nii.gz output_image_core = nib.load(os.path.join(case_dir, "prediction_label_1.nii.gz")).get_data() output_image_whole = nib.load(os.path.join(case_dir, "prediction_label_2.nii.gz")).get_data() #merge 3 segs into 1 output_image_combined[output_image_whole == 1] = 2 output_image_combined[output_image_core == 1] = 1 output_image_combined[output_array[0][0].astype('int') == 1] = 4 output_image_combined = nib.Nifti1Image(output_image_combined, affine) output_image_combined.to_filename(os.path.join(case_dir, "prediction_label_seg_whole.nii.gz")) finally: data_file.close()
def get_style_images(self, _id=""): fname = 'style_imgs_{}'.format(self.rst) if _id: fname += "_" + str(_id) return utils.pickle_load( os.path.join(self.base_dir, 'dataset/{}.pkl'.format(fname)))[:self.max_size]
def main(): args = parse_args() set_seeds(args.seed) # Because I keep forgetting to select the correct env assert args.env in args.history_file env = create_env(args.env) print("Loading environment", args.env) print("Action space:", env.action_space) print("Observation space:", env.observation_space) history = pickle_load(args.history_file) if args.best_agent: agents = get_best_agents_dedup(history, 1) eval_outfile = 'eval_best_agent' elif args.best_agent_ensemble: agents = get_best_agents_dedup(history, 3) eval_outfile = 'eval_best_agent_ensemble' elif args.last_agent: agents = [history['agents'][-1][0]] eval_outfile = 'eval_last_agent' elif args.last_agent_ensemble: agents = history['agents'][-1][:3] eval_outfile = 'eval_last_agent_ensemble' else: assert False dim_in, dim_out = get_input_output_dim(env) policies = [a.get_policy(dim_in, dim_out) for a in agents] policy = EnsemblePolicy(policies) if args.evaluate: total_rew = [] for i in range(args.evaluate): env.seed(args.seed + i) ep_rew, ep_len = run_episode(env, policy) total_rew.append(ep_rew) print( f"ep = {i + 1}/{args.evaluate} reward = {ep_rew:.2f} len = {ep_len}" ) print( f"mean_reward = {np.mean(total_rew):.2f} +- {np.std(total_rew):.2f}" ) np.save(Path(args.history_file).with_name(eval_outfile), total_rew) elif args.render: for i in count(): env.seed(args.seed + i) ep_rew, ep_len = run_episode(env, policy, render_human) print(f"ep = {i + 1}/inf reward = {ep_rew:.2f} len = {ep_len}") elif args.gif: render_gif = RenderGif() env.seed(args.seed) run_episode(env, policy, render_gif) render_gif.save(args.gif)
def debug_build_features(count_only=False): all_features = Trainer.build_features(2, 2) print(len(all_features)) if count_only: return for feature in all_features: print(feature) pickle_save(all_features, "all_features.pkl") feature_reloaded = pickle_load('all_features.pkl') assert feature_reloaded == all_features
def __init__(self, classifier_filename, detect_window_size=24, detect_window_step_size=24, n_process=1): self.classifier = pickle_load(classifier_filename) self.window_size = detect_window_size self.step_size = detect_window_step_size self.n_process = n_process self.pool = None if self.n_process <= 1 else Pool(self.n_process)
def get_validation_split(data_file, training_file, validation_file, data_split=0.8, overwrite=False): """ """ if overwrite or not os.path.exists(training_file): print("Creating validation split...") nb_samples = data_file.root.data.shape[0] sample_list = list(range(nb_samples)) training_list, validation_list = split_list(sample_list, split=data_split) pickle_dump(training_list, training_file) pickle_dump(validation_list, validation_file) return training_list, validation_list else: print("Loading previous validation split...") return pickle_load(training_file), pickle_load(validation_file)
def __init__(self, base_dir, batch_size, mode=1, cls=1, prune=None, de_norm=False): TRAIN = 1 TEST = 2 self.base_dir = base_dir self.batch_size = batch_size ds_dir = os.path.join(self.base_dir, 'dataset/class_{}'.format(cls)) if mode == TRAIN: self.x = utils.pickle_load(ds_dir + '/imgs_train.pkl') self.y = utils.pickle_load(ds_dir + '/marks_train.pkl') elif mode == TEST: self.x = utils.pickle_load(ds_dir + '/imgs_test.pkl') self.y = utils.pickle_load(ds_dir + '/marks_test.pkl') else: raise ("Invalid option, should be one {} or {}".format( TRAIN, TEST)) if de_norm: self.x = utils.de_norm(self.x) self.y = utils.de_norm(self.y) self.labels = np.array( [1 if np.sum(mask) > 0 else 0 for mask in self.y]) if prune is not None: self.x, self.y, self.labels = utils.prune(self.x, self.y, self.labels, prune) self.x = utils.norm(self.x) self.y = utils.norm(self.y) self.classes = np.unique(self.labels) self.per_class_ids = {} ids = np.array(range(len(self.x))) for c in self.classes: self.per_class_ids[c] = ids[self.labels == c] print(Counter(self.labels))
def load_scaler(fname): """ Load just the scaler from a model .zip """ with zipfile.ZipFile(fname, mode='r') as zipf: # scaler - extract to tmpfname wf, tmpfname = tempfile.mkstemp() with open(tmpfname, 'w') as sf: with zipf.open('scaler.pickle') as zsf: sf.write(zsf.read()) scaler = utils.pickle_load(sf.name) os.remove(tmpfname) return scaler
def __init__ (self, bus_name, bus_path): self.current_status = None self.app_ids = [] self.applications = {} self.user_ids = [] self.users = [] self.status = {} self.updated_timestamp = None self.notification_num = 10 self.refresh_interval = 60 self.refresh_hanedler = None self.notification = [] self.session = None self.session_code = None self.api_key = '9103981b8f62c7dbede9757113372240' self.secret = '4cd1dffd6bf0d466bf9ffcf2dcf7805c' self.office_status = defs.NO_LOGIN self.last_nid = 0 self.prepare_directories () path = self.local_data_dir + "/cache.pickle" cache = utils.pickle_load (path) if cache != None: for skey in cache: setattr (self, skey, cache[skey]) try: dbus.service.Object.__init__ (self, bus_name, bus_path) except KeyError: logging.debug ("DBus interface registration failed - other wallbox running somewhere") pass path = self.local_data_dir + "/auth.pickle" fb = utils.restore_auth_status (path, self.api_key, self.secret) if fb != None: self.uid = fb.uid self.status_changed (defs.IS_LOGIN) self.fb = fb if self.uid not in self.user_ids: self.user_ids.append (self.uid) self.refresh_hanedler = gobject.timeout_add (self.refresh_interval * 1000, self._refresh) else: self.status_changed (defs.NO_LOGIN)
def latlng_to_shp(cityid): """ Converts sampling CSV (lat, long) into shapefiles """ if not override and path.isfile(path.join(LATLNGS_SHP_DIR, str(cityid) + '.shp')): print '%s.shp already exists. Skipping' % cityid return print 'Processing latlng data for city %s...' % cityid data = pickle_load(path.join(LATLNGS_DIR, str(cityid))) print 'Data loaded' writer = shapefile.Writer(shapefile.POINT) writer.autoBalance = 1 writer.field('price', 'N') for make in MAKES: writer.field(make[:10], 'N', 19, 9) count = 0 for (lat, lng) in data: writer.point(lng, lat) record = { 'price': data[(lat, lng)]['price'] } for make in MAKES: if 'makes' in data[(lat, lng)]: record[make[:10]] = str(data[(lat, lng)]['makes'].get(make, 0.0))[:19] else: record[make[:10]] = 0.0 writer.record(**record) count += 1 if (count % 5000) == 0: print '%s entries copied' % count shp_path = path.join(LATLNGS_SHP_DIR, str(cityid)) print 'Writing shapefile...' writer.save(shp_path) print 'Defining projection...' if projection: arcpy.DefineProjection_management(shp_path + '.shp', PROJECTION_FILE) print 'Written shapefile to %s' % shp_path
def _load_state(self): obj = utils.pickle_load(self.wdir+"/layerwisetrainer_state") self.layer_index = obj["layer_index"] self.iter = obj["iter"] self.loss = obj["loss"] self.mlp_best = obj["mlp_best"] self.mlp_crrnt = obj["mlp_crrnt"] #self.iters_without_impr = obj["iters_without_impr"] self.train_sets.set_state(obj["train_sets"]) out = StringIO.StringIO() print >>out, "\n********** Resuming from **********" print >>out, "layer_index", self.layer_index print >>out, "iter", self.iter print >>out, "loss", self.loss print >>out, "mlp_best", self.mlp_best print >>out, "mlp_crrnt", self.mlp_crrnt self.logger.info(out.getvalue()) load(self.model, self.mlp_crrnt, gradients=True)
def _load_state(self): obj = utils.pickle_load(self.wdir+"/trainer_state") self.iter = obj["iter"] self.done = obj["done"] self.loss = obj["loss"] self.rate = obj["rate"] self.mlp_best = obj["mlp_best"] self.halving = obj["halving"] self.wasAccepted = obj["wasAccepted"] self.train_sets.set_state(obj["train_sets"]) self.valid_sets.set_state(obj["valid_sets"]) out = StringIO.StringIO() print >>out, "\n********** Resuming from **********" print >>out, "iter", self.iter print >>out, "done", self.done print >>out, "loss", self.loss print >>out, "rate", self.rate print >>out, "mlp_best", self.mlp_best print >>out, "halving", self.halving print >>out, "wasAccepted", self.wasAccepted self.logger.info(out.getvalue())
def regist_amakan(): amazon_url_list = utils.pickle_load('amazon_url_list.pickel') for url in amazon_url_list[22:]: try: # 個別ページにアクセス driver.get(amakan_product_url.format(url)) print(driver.find_element_by_css_selector('.card-header').text) # 「読んだ」ボタンを取得 elem = driver.find_element_by_css_selector('.wis-numbers > .waves-effect') # 既に「読んだ」がついていたらスキップ if 'active' not in elem.get_attribute('class'): elem.click() except Exception as e: # なんか失敗したらとりあえずエラーだけ出しておく print(url) print(e) # 感謝のsleep1秒 sleep(1)
def compute_resampling(city_id): log('Computing resample points for %s', city_id) output_dir = join(RESAMPLE_DIR, str(city_id)) if not exists(output_dir): os.makedirs(output_dir) log('Created %s', output_dir) else: log('%s already exists!', output_dir) # find the county fips codes city = pickle_load(join(CITY_DIR, str(city_id))) city_name = city['name'].strip().title() state_name = city['state'].strip().title() log('Looking up state info for %s...', state_name) if 'Utah' in state_name: state_name = 'UT' # weird ass corner case.. state = us.states.lookup(state_name) fips_set = fips_codes_dict[(city['name'].lower(), state.fips)] log('County fips codes for %s, %s: %s', city_name, state.name, fips_set) road_shps = fetch_road_shp(output_dir, fips_set) all_roads_shp = join(output_dir, 'all_roads.shp') if not exists(all_roads_shp): log('Merging %s into %s', list(road_shps), all_roads_shp) arcpy.Merge_management(list(road_shps), all_roads_shp) log('Merge complete') boundary_shp = join(CITY_BOUNDARY_SHP_DIR, 'tl_2014_%s_place.shp' % state.fips) city_boundary_shp = join(output_dir, 'city_boundary.shp') if not exists(city_boundary_shp): log('Fetching city boundary from %s...', boundary_shp) query = "\"NAME\" Like '%%%s%%'" % city['name'].title() log('Using query %s', query) arcpy.MakeFeatureLayer_management(boundary_shp, 'temp', query) arcpy.CopyFeatures_management('temp', city_boundary_shp) log('City boundary %s fetched', city_boundary_shp) all_roads_clipped_shp = join(output_dir, 'all_roads_clipped.shp') if not exists(all_roads_clipped_shp): log('Clipping all_roads into city boundary...') arcpy.Clip_analysis(all_roads_shp, city_boundary_shp, all_roads_clipped_shp) log('Clip complete') all_roads_buffered_shp = join(output_dir, 'all_roads_buffered.shp') if not exists(all_roads_buffered_shp): log('Buffering into %s...', all_roads_buffered_shp) arcpy.Buffer_analysis(all_roads_clipped_shp, all_roads_buffered_shp, '20 Meter') log('Buffering complete') sample_grid_shp = join(output_dir, 'sample_grid.shp') sample_grid_labels_shp = join(output_dir, 'sample_grid_label.shp') if not exists(sample_grid_shp): log('Creating fishnet...') desc = arcpy.Describe(all_roads_buffered_shp) arcpy.CreateFishnet_management( sample_grid_shp, str(desc.extent.lowerLeft), str(desc.extent.XMin) + ' ' + str(desc.extent.YMax + 10), '0.0003', '0.0003', '0', '0', str(desc.extent.upperRight), 'LABELS', '#', 'POLYLINE' ) log('Fishnet creation complete') sample_grid_clipped_shp = join(output_dir, 'sample_grid_clipped.shp') if not exists(sample_grid_clipped_shp): log('Clipping fishnet to buffered roads...') arcpy.Clip_analysis(sample_grid_labels_shp, all_roads_buffered_shp, sample_grid_clipped_shp) log('Clip fishnet to buffered roads complete') ''' sample_grid_erased_shp = join(output_dir, 'sample_grid_erased.shp') if not exists(sample_grid_erased_shp): print 'Projecting using', PROJECTION_FILE # arcpy.DefineProjection_management(sample_grid_clipped_shp, PROJECTION_FILE) print 'Erasing existing samples from fishnet...' samples_shp = join(LATLNGS_SHP_DIR, str(city_id) + '.shp') arcpy.Erase_analysis(sample_grid_clipped_shp, samples_shp, sample_grid_erased_shp, '20 Meter') print 'Erasing existing samples from fishnet complete' ''' log('FINISHED computing resampling for %s: %s, %s', city_id, city_name, state.name) return sample_grid_clipped_shp log('======================END==========================')
import sys from os import path import time from census import Census, ALL from fcc.census_block_conversions import census_block_dict import shapefile from constants import CACHE_DIR, LATLNGS_SHP_DIR from utils import pickle_save, pickle_load YEAR = 2010 try: BLOCK_DATA_CACHE = pickle_load(path.join(CACHE_DIR, 'block_data')) except Exception as e: print 'CANNOT LOAD BLOCK DATA CACHE: ', str(e) sys.exit(1) CENSUS_DATA_CACHE = pickle_load(path.join(CACHE_DIR, 'census_data_cache')) CENSUS_API_KEY = '3a7f0def09f356fa48f84558824b09a009c1f6e2' INCOME_VARIABLE = 'B06011_001E' AGE_VARIABLE = 'B05004_004E' client = Census(CENSUS_API_KEY).acs def get_block_data(lat, lng, data): if not data.get((lat, lng)): data[(lat, lng)] = census_block_dict(lat, lng, year=YEAR) return data[(lat, lng)] def get_variable(lat, lng, data, var=INCOME_VARIABLE):
for cityid in CITY_IDS: city = City(cityid) print 'Processing', city.name, city.state name = city.name.title() state = city.state.title() area = city.area samples = city.samples polygon_path = path.join(SAMPLES_DIR, 'polygons', str(cityid)) polygon = pickle.load(open(polygon_path, 'r')) sample_area = polygon['area'] coverage = float(sample_area) / float(area) moran_indices = pickle_load(path.join(CACHE_DIR, 'moran_indices')) digest.append({ 'cityid': cityid, 'name': name, 'state': state, 'area': area, 'samples': samples, 'sample_area': sample_area, 'coverage': coverage, 'moran_index': moran_indices[cityid] }) pickle.dump(digest, open(path.join(STATS_DIR, 'all_cities'), 'wb')) print 'FINISHED'
def compute_city(cityid): data = pickle_load(path.join(CACHE_DIR, 'MA_census_data')) print data
from os import path from collections import defaultdict import shapefile from utils import pickle_save, pickle_load from constants import CACHE_DIR, LATLNGS_SHP_DIR try: BLOCK_DATA_CACHE = pickle_load(path.join(CACHE_DIR, 'block_data')) except Exception as e: print 'CANNOT LOAD BLOCK DATA CACHE: ', str(e) BLOCK_DATA_CACHE = {} def get_block_id(data): return data['Block']['FIPS'][:-1] def group_price_by_block(cityid): reader = shapefile.Reader(path.join(LATLNGS_SHP_DIR, str(cityid))) changed = False block_total = {} block_data = BLOCK_DATA_CACHE[cityid] for sr in reader.shapeRecords(): (lng, lat) = sr.shape.points[0] data = block_data.get((lat, lng)) if not data: continue
import sys import shapefile from constants import CACHE_DIR, SHAPEFILE_DIR, CONVEXHULL_DIR, CITY_IDS, PROJECTION_FILE from utils import pickle_load, pickle_save def format_zipcode(zipcode): zipcode = str(zipcode) while len(zipcode) < 5: zipcode = '0' + zipcode return zipcode if __name__=='__main__': all_cars_data = pickle_load(path.join(CACHE_DIR, 'all_cars_data')) all_census_data = pickle_load(path.join(CACHE_DIR, 'all_census_data')) reader = shapefile.Reader(path.join(CACHE_DIR, 'zipcodes/zip_poly/zip_poly')) zipcodeList = {} for key, data in all_cars_data.iteritems(): zipcode = data[94] cityid = int(data[95]) formatted_zipcode = format_zipcode(zipcode) if formatted_zipcode not in zipcodeList: zipcodeList[formatted_zipcode] = 1 print 'Reading polygon file...' zip_poly = {}
def comp_sketch(matrix, objective, load_N=False, save_N=False, N_dir='../N_file/', **kwargs): """ Given matrix A, the function comp_sketch computes a sketch for A and performs further operations on PA. It returns the total running time and the desired quantity. parameter: matrix: a RowMatrix object storing the matrix [A b] objective: either 'x' or 'N' 'x': the function returns the solution to the problem min_x || PA[:,:-1]x - PA[:,-1] ||_2 'N': the function returns a square matrix N such that PA[:,:-1]*inv(N) is a matrix with orthonormal columns load_N: load the precomputed N matrices if possible (it reduces the actual running time for sampling sketches) save_N: save the computed N matrices for future use sketch_type: either 'projection' or 'sampling' projection_type: cw, gaussian, rademacher or srdht c: projection size s: sampling size (for sampling sketch only) k: number of independent trials to run """ sketch_type = kwargs.get('sketch_type') if not os.path.exists(N_dir): os.makedirs(N_dir) if objective == 'x': if sketch_type == 'projection': projection = Projections(**kwargs) t = time.time() x = projection.execute(matrix, 'x', save_N) t = time.time() - t if save_N: logger.info('Saving N matrices from projections!') N = [a[0] for a in x] x = [a[1] for a in x] # saving N filename = N_dir + 'N_' + matrix.name + '_projection_' + kwargs.get('projection_type') + '_c' + str(int(kwargs.get('c'))) + '_k' + str(int(kwargs.get('k')))+ '.dat' data = {'N': N, 'time': t} pickle_write(filename,data) elif sketch_type == 'sampling': s = kwargs.get('s') new_N_proj = 0 N_proj_filename = N_dir + 'N_' + matrix.name + '_projection_' + kwargs.get('projection_type') + '_c' + str(int(kwargs.get('c'))) + '_k' + str(int(kwargs.get('k'))) +'.dat' if load_N and os.path.isfile(N_proj_filename): logger.info('Found N matrices from projections, loading them!') N_proj_filename = N_dir + 'N_' + matrix.name + '_projection_' + kwargs.get('projection_type') + '_c' + str(int(kwargs.get('c'))) + '_k' + str(int(kwargs.get('k'))) +'.dat' result = pickle_load(N_proj_filename) N_proj = result['N'] t_proj = result['time'] else: # otherwise, compute it t = time.time() projection = Projections(**kwargs) N_proj = projection.execute(matrix, 'N') t_proj = time.time() - t new_N_proj = 1 sampling = Sampling(N=N_proj) t = time.time() x = sampling.execute(matrix, 'x', s, save_N ) t = time.time() - t + t_proj if save_N and new_N_proj: logger.info('Saving N matrices from projections!') #filename = N_dir + 'N_' + matrix.name + '_projection_' + kwargs.get('projection_type') + '_c' + str(int(kwargs.get('c'))) + '_k' + str(int(kwargs.get('k'))) + '.dat' data = {'N': N_proj, 'time': t_proj} pickle_write(N_proj_filename,data) if save_N: logger.info('Saving N matrices from sampling!') N = [a[0] for a in x] x = [a[1] for a in x] filename = N_dir + 'N_' + matrix.name + '_sampling_s' + str(int(kwargs.get('s'))) + '_' + kwargs.get('projection_type') + '_c' + str(int(kwargs.get('c'))) + '_k' + str(int(kwargs.get('k'))) + '.dat' data = {'N': N, 'time': t} pickle_write(filename,data) else: raise ValueError('Please enter a valid sketch type!') return x, t elif objective == 'N': if sketch_type == 'projection': N_proj_filename = N_dir + 'N_' + matrix.name + '_projection_' + kwargs.get('projection_type') + '_c' + str(int(kwargs.get('c'))) + '_k' + str(int(kwargs.get('k'))) + '.dat' if load_N and os.path.isfile(N_proj_filename): logger.info('Found N matrices from projections, loading them!') result = pickle_load(N_proj_filename) N = result['N'] t = result['time'] else: t = time.time() projection = Projections(**kwargs) N = projection.execute(matrix, 'N') t = time.time() - t if save_N: logger.info('Saving N matrices from projections!') data = {'N': N, 'time': t} pickle_write(N_proj_filename,data) elif sketch_type == 'sampling': s = kwargs.get('s') new_N_proj = 0 new_N_samp = 0 N_samp_filename = N_dir + 'N_' + matrix.name + '_sampling_s' + str(int(kwargs.get('s'))) + '_' + kwargs.get('projection_type') + '_c' + str(int(kwargs.get('c'))) + '_k' + str(int(kwargs.get('k'))) + '.dat' N_proj_filename = N_dir + 'N_' + matrix.name + '_projection_' + kwargs.get('projection_type') + '_c' + str(int(kwargs.get('c'))) + '_k' + str(int(kwargs.get('k'))) + '.dat' if load_N and os.path.isfile(N_samp_filename): logger.info('Found N matrices from sampling, loading them!') result = pickle_load(N_samp_filename) N = result['N'] t = result['time'] elif load_N and os.path.isfile(N_proj_filename): logger.info('Found N matrices from projections, loading them!') result = pickle_load(N_proj_filename) N_proj = result['N'] t_proj = result['time'] sampling = Sampling(N=N_proj) t = time.time() N = sampling.execute(matrix, 'N', s) t = time.time() - t + t_proj new_N_samp = 1 else: t = time.time() projection = Projections(**kwargs) N_proj = projection.execute(matrix, 'N') t_proj = time.time() - t new_N_proj = 1 t = time.time() sampling = Sampling(N=N_proj) N = sampling.execute(matrix, 'N', s) t = time.time() - t + t_proj new_N_samp = 1 if save_N and new_N_proj: logger.info('Saving N matrices from projections!') data = {'N': N_proj, 'time': t_proj} pickle_write(N_proj_filename,data) if save_N and new_N_samp: logger.info('Saving N matrices from sampling!') data = {'N': N, 'time': t} pickle_write(N_samp_filename,data) else: raise ValueError('Please enter a valid sketch type!') return N, t else: raise ValueError('Please enter a valid objective!')