def main(argv): # copy data dst = os.path.join(FLAGS.tmp_dir, 'Off_parasol.mat') if not gfile.Exists(dst): print('Started Copy') src = os.path.join(FLAGS.src_dir, 'Off_parasol.mat') if not gfile.IsDirectory(FLAGS.tmp_dir): gfile.MkDir(FLAGS.tmp_dir) gfile.Copy(src, dst) print('File copied to destination') else: print('File exists') # load stimulus file = h5py.File(dst, 'r') # Load Masked movie data = file.get('maskedMovdd') stimulus = np.array(data) # load cell response cells = file.get('cells') ttf_log = file.get('ttf_log') ttf_avg = file.get('ttf_avg') # Load spike Response of cells data = file.get('Y') responses = np.array(data) # get mask total_mask_log = file.get('totalMaskAccept_log') print('Got data') # get cell and mask nsub_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] if FLAGS.taskid < 107 * len(nsub_list): cell_idx = [np.int(np.floor(FLAGS.taskid / len(nsub_list)))] cellid = cells[np.int(np.floor(FLAGS.taskid / len(nsub_list)))] Nsub = nsub_list[FLAGS.taskid % len(nsub_list)] partition_list = np.arange(10) elif FLAGS.taskid < 107 * len(nsub_list) + 37 * 10: cell_idx = [39, 42, 44, 45] #[np.int(FLAGS.taskid)] cellid = cells[cell_idx] cellid = np.squeeze(cellid) task_id_effective = FLAGS.taskid - 107 * len(nsub_list) partition_list = [task_id_effective % 10] nsub_list_pop = np.arange(4, 41) Nsub = nsub_list_pop[np.int(np.floor(task_id_effective / 10))] elif FLAGS.taskid < 107 * len(nsub_list) + 37 * 10 + 19 * 10: cell_idx = [39, 42] #[np.int(FLAGS.taskid)] cellid = cells[cell_idx] cellid = np.squeeze(cellid) task_id_effective = FLAGS.taskid - 107 * len(nsub_list) - 37 * 10 partition_list = [task_id_effective % 10] nsub_list_pop = np.arange(2, 21) Nsub = nsub_list_pop[np.int(np.floor(task_id_effective / 10))] elif FLAGS.taskid < 107 * len(nsub_list) + 37 * 10 + 19 * 10 + 19 * 10: cell_idx = [44, 45] #[np.int(FLAGS.taskid)] cellid = cells[cell_idx] cellid = np.squeeze(cellid) task_id_effective = FLAGS.taskid - 107 * len( nsub_list) - 37 * 10 - 19 * 10 partition_list = [task_id_effective % 10] nsub_list_pop = np.arange(2, 21) Nsub = nsub_list_pop[np.int(np.floor(task_id_effective / 10))] print(cell_idx) print(Nsub) mask = (total_mask_log[cell_idx, :].sum(0) != 0) mask_matrix = np.reshape(mask != 0, [40, 80]) # make mask bigger - add one row one left/right r, c = np.where(mask_matrix) mask_matrix[r.min() - 1:r.max() + 1, c.min() - 1:c.max() + 1] = True mask = np.ndarray.flatten(mask_matrix) stim_use = stimulus[:, mask] resp_use = responses[:, cell_idx] print('Prepared data') # get last 10% as test data np.random.seed(23) frac_test = 0.1 tms_test = np.arange(np.floor(stim_use.shape[0] * (1 - frac_test)), 1 * np.floor(stim_use.shape[0])).astype(np.int) # Random partitions n_partitions = 10 tms_train_validate = np.arange( 0, np.floor(stim_use.shape[0] * (1 - frac_test))).astype(np.int) frac_validate = 0.1 partitions = [] for ipartition in range(n_partitions): perm = np.random.permutation(tms_train_validate) tms_train = perm[0:np.floor((1 - frac_validate) * perm.shape[0])] tms_validate = perm[np.floor((1 - frac_validate) * perm.shape[0]):perm.shape[0]] partitions += [{ 'tms_train': tms_train, 'tms_validate': tms_validate, 'tms_test': tms_test }] print('Made partitions') # Do fitting # tms_train = np.arange(0, np.floor(stim_use.shape[0] * 0.8)).astype(np.int) # tms_test = np.arange(np.floor(stim_use.shape[0] * 0.8), # 1 * np.floor(stim_use.shape[0] * 0.9)).astype(np.int) for ipartition in partition_list: print(cell_idx, cellid, Nsub) ss = '_'.join([str(ic) for ic in cellid]) save_filename = os.path.join( FLAGS.save_path, 'Cell_%s_nsub_%d_part_%d_jnt.pkl' % (ss, Nsub, ipartition)) if not gfile.Exists(save_filename): print('Fitting started') op = jnt_model.Flat_clustering_jnt( stim_use, resp_use, Nsub, partitions[ipartition]['tms_train'], partitions[ipartition]['tms_validate'], steps_max=10000, eps=1e-9) # op = jnt_model.Flat_clustering_jnt(stim_use, resp_use, Nsub, # tms_train, # tms_test, # steps_max=10000, eps=1e-9) K, b, alpha, lam_log, lam_log_test, fitting_phase, fit_params = op print('Fitting done') save_dict = { 'K': K, 'b': b, 'lam_log': lam_log, 'lam_log_test': lam_log_test, 'fitting_phase': fitting_phase, 'fit_params': fit_params } pickle.dump(save_dict, gfile.Open(save_filename, 'w')) print('Saved results')
def load_data(context): train_data = [] train_target = [] valid_data = [] valid_target = [] training_data_sample_weight = [] valid_data_sample_weight = [] count = 0 train_pos_cnt = 0 train_neg_cnt = 0 valid_pos_cnt = 0 valid_neg_cnt = 0 feature_column_nums = context["feature_column_nums"] sample_weight_column_num = context["sample_weight_column_num"] allFileNames = gfile.ListDirectory(root) normFileNames = filter(lambda x: not x.startswith(".") and not x.startswith("_"), allFileNames) tprint("Total input file count is " + str(len(normFileNames)) + ".") file_count = 1 line_count = 0 for normFileName in normFileNames: print("Now loading " + normFileName + " Progress: " + str(file_count) + "/" + str(len(normFileNames)) + ".") sys.stdout.flush() file_count += 1 with gfile.Open(os.path.join(root, normFileName), 'rb') as f: gf = gzip.GzipFile(fileobj=StringIO(f.read())) while True: line = gf.readline() if len(line) == 0: break line_count += 1 if line_count % 10000 == 0: tprint("Total loading lines: " + str(line_count)) columns = line.split(delimiter) if feature_column_nums == None: feature_column_nums = range(0, len(columns)) feature_column_nums.remove(target_index) if random.random() >= valid_data_percentage: # Append training data train_target.append([float(columns[target_index])]) if(columns[target_index] == "1"): train_pos_cnt += 1 else : train_neg_cnt += 1 single_train_data = [] for feature_column_num in feature_column_nums: single_train_data.append(float(columns[feature_column_num].strip('\n'))) train_data.append(single_train_data) if sample_weight_column_num >= 0 and sample_weight_column_num < len(columns): weight = float(columns[sample_weight_column_num].strip('\n')) if weight < 0.0: print("Warning: weight is below 0. example:" + line) weight= 1.0 training_data_sample_weight.append([weight]) else: training_data_sample_weight.append([1.0]) else: # Append validation data valid_target.append([float(columns[target_index])]) if(columns[target_index] == "1"): valid_pos_cnt += 1 else: valid_neg_cnt += 1 single_valid_data = [] for feature_column_num in feature_column_nums: single_valid_data.append(float(columns[feature_column_num].strip('\n'))) valid_data.append(single_valid_data) if sample_weight_column_num >= 0 and sample_weight_column_num < len(columns): weight = float(columns[sample_weight_column_num].strip('\n')) if weight < 0.0: print("Warning: weight is below 0. example:" + line) weight= 1.0 valid_data_sample_weight.append([weight]) else: valid_data_sample_weight.append([1.0]) tprint("Total data count: " + str(line_count) + ".") tprint("Train pos count: " + str(train_pos_cnt) + ", neg count: " + str(train_neg_cnt) + ".") tprint("Valid pos count: " + str(valid_pos_cnt) + ", neg count: " + str(valid_neg_cnt) + ".") context['feature_count'] = len(feature_column_nums) return train_data, train_target, valid_data, valid_target, training_data_sample_weight, valid_data_sample_weight
def testProfileBasic(self): ops.reset_default_graph() opts = model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS.copy() opts['account_type_regexes'] = ['.*'] opts['select'] = ['params', 'float_ops', 'micros', 'bytes', 'device', 'op_types', 'occurrence'] outfile = os.path.join(test.get_temp_dir(), 'dump') opts['output'] = 'file:outfile=' + outfile # Test the output without run_meta. sess = session.Session() r = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) profiler = model_analyzer.Profiler(sess.graph) profiler.profile_name_scope(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.print_model_analysis( sess.graph, tfprof_cmd='scope', tfprof_options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) # Test the output with run_meta. run_meta = config_pb2.RunMetadata() _ = sess.run(r, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) profiler.add_step(1, run_meta) profiler.profile_graph(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.print_model_analysis( sess.graph, tfprof_cmd='graph', run_meta=run_meta, tfprof_options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) profiler.profile_python_codes(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.print_model_analysis( sess.graph, tfprof_cmd='code', run_meta=run_meta, tfprof_options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) profiler.profile_operations(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.print_model_analysis( sess.graph, tfprof_cmd='op', run_meta=run_meta, tfprof_options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str) # Test the output difference between multi-step profile and 1-step profile. _ = sess.run(r, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) profiler.add_step(2, run_meta) profiler.profile_name_scope(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.print_model_analysis( sess.graph, tfprof_cmd='scope', run_meta=run_meta, tfprof_options=opts) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertNotEqual(pma_str, profiler_str) opts2 = opts.copy() opts2['select'] = ['params', 'float_ops'] profiler.profile_name_scope(opts2) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() model_analyzer.print_model_analysis( sess.graph, tfprof_cmd='scope', run_meta=run_meta, tfprof_options=opts2) with gfile.Open(outfile, 'r') as f: pma_str = f.read() self.assertEqual(pma_str, profiler_str)
from tensorflow.contrib import lookup from tensorflow.python.platform import gfile # # #figure out document lengths length = [] import nltk as nl for f in df['Review Text']: length.append(len(nl.word_tokenize(f))) MAX_DOCUMENT_LENGTH = max(length) PADWORD = 'ZYXW' vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor( MAX_DOCUMENT_LENGTH) vocab_processor.fit(lines) with gfile.Open('vocab.tsv', 'wb') as f: f.write("{}\n".format(PADWORD)) for word, index in vocab_processor.vocabulary_._mapping.items(): f.write("{}\n".format(word)) x = np.array(list(vocab_processor.fit_transform(lines))) vocabulary = vocab_processor.vocabulary_ def load_embedding_vectors_word2vec(vocabulary, filename, binary): # load embedding_vectors from the word2vec encoding = 'utf-8' with open(filename, "rb") as f: header = f.readline() vocab_size, vector_size = map(int, header.split())
def _write_with_backup(filename, content): if gfile.Exists(filename): gfile.Rename(filename, filename + '.old', overwrite=True) with gfile.Open(filename, 'w') as f: f.write(content)
def restore(cls, filename): with gfile.Open(filename, 'rb') as f: return pickle.loads(f.read())
def main(argv): # copy WN data dst = os.path.join(FLAGS.tmp_dir, 'Off_parasol.mat') if not gfile.Exists(dst): print('Started Copy') src = os.path.join(FLAGS.src_dir, 'Off_parasol.mat') if not gfile.IsDirectory(FLAGS.tmp_dir): gfile.MkDir(FLAGS.tmp_dir) gfile.Copy(src, dst) print('File copied to destination') else: print('File exists') # load stimulus file = h5py.File(dst, 'r') # Load Masked movie data = file.get('maskedMovdd') stimulus = np.array(data) # load cell response cells = file.get('cells') cells = np.array(cells) cells = np.squeeze(cells) ttf_log = file.get('ttf_log') ttf_avg = file.get('ttf_avg') # Load spike Response of cells data = file.get('Y') responses = np.array(data) # get mask total_mask_log = np.array(file.get('totalMaskAccept_log')) print('Got WN data') # Get NSEM data dat_nsem_mov = sio.loadmat( gfile.Open( '/home/bhaishahster/nsem_data/' 'pc2015_10_29_2/NSinterval_30_025.mat', 'r')) stimulus_nsem = dat_nsem_mov['mov'] stimulus_nsem = np.transpose(stimulus_nsem, [2, 1, 0]) stimulus_nsem = np.reshape(stimulus_nsem, [stimulus_nsem.shape[0], -1]) dat_nsem_resp = sio.loadmat( gfile.Open( '/home/bhaishahster/nsem_data/' 'pc2015_10_29_2/OFF_parasol_trial_resp' '_data_NSEM_data039.mat', 'r')) responses_nsem = dat_nsem_resp['resp_cell_log'] print('Git NSEM data') # read line corresponding to task with gfile.Open(FLAGS.task_params_file, 'r') as f: for itask in range(FLAGS.taskid + 1): line = f.readline() line = line[:-1] # Remove \n from end. print(line) # get task parameters by parsing the lines line_split = line.split(';') cell_idx = line_split[0] cell_idx = cell_idx[1:-1].split(',') cell_idx = [int(i) for i in cell_idx] Nsub = int(line_split[1]) projection_type = line_split[2] lam_proj = float(line_split[3]) ipartition = int(line_split[4]) cell_idx_mask = cell_idx ## print(cell_idx) print(Nsub) print(cell_idx_mask) mask = (total_mask_log[cell_idx_mask, :].sum(0) != 0) mask_matrix = np.reshape(mask != 0, [40, 80]) # make mask bigger - add one row one left/right r, c = np.where(mask_matrix) mask_matrix[r.min() - 1:r.max() + 1, c.min() - 1:c.max() + 1] = True neighbor_mat = su_model.get_neighbormat(mask_matrix, nbd=1) mask = np.ndarray.flatten(mask_matrix) ## WN preprocess stim_use_wn = stimulus[:, mask] resp_use_wn = responses[:, cell_idx] # get last 10% as test data np.random.seed(23) frac_test = 0.1 tms_test = np.arange(np.floor(stim_use_wn.shape[0] * (1 - frac_test)), 1 * np.floor(stim_use_wn.shape[0])).astype(np.int) # Random partitions n_partitions = 10 tms_train_validate = np.arange( 0, np.floor(stim_use_wn.shape[0] * (1 - frac_test))).astype(np.int) frac_validate = 0.1 partitions_wn = [] for _ in range(n_partitions): perm = np.random.permutation(tms_train_validate) tms_train = perm[0:np.floor((1 - frac_validate) * perm.shape[0])] tms_validate = perm[np.floor((1 - frac_validate) * perm.shape[0]):perm.shape[0]] partitions_wn += [{ 'tms_train': tms_train, 'tms_validate': tms_validate, 'tms_test': tms_test }] print('Made partitions') print('WN data preprocessed') ## NSEM preprocess stim_use_nsem = stimulus_nsem[:, mask] ttf_use = np.array(ttf_log[cell_idx, :]).astype(np.float32).squeeze() stim_use_nsem = filterMov_time(stim_use_nsem, ttf_use) resp_use_nsem = np.array(responses_nsem[cell_idx][0, 0]).astype(np.float32).T # Remove first 30 frames due to convolution artifact. stim_use_nsem = stim_use_nsem[30:, :] resp_use_nsem = resp_use_nsem[30:, :] n_trials = resp_use_nsem.shape[1] t_nsem = resp_use_nsem.shape[0] tms_train_1tr_nsem = np.arange(np.floor(t_nsem / 2)) tms_test_1tr_nsem = np.arange(np.ceil(t_nsem / 2), t_nsem) # repeat in time dimension, divide into training and testing. stim_use_nsem = np.tile(stim_use_nsem.T, n_trials).T resp_use_nsem = np.ndarray.flatten(resp_use_nsem.T) resp_use_nsem = np.expand_dims(resp_use_nsem, 1) tms_train_nsem = np.array([]) tms_test_nsem = np.array([]) for itrial in range(n_trials): tms_train_nsem = np.append(tms_train_nsem, tms_train_1tr_nsem + itrial * t_nsem) tms_test_nsem = np.append(tms_test_nsem, tms_test_1tr_nsem + itrial * t_nsem) tms_train_nsem = tms_train_nsem.astype(np.int) tms_test_nsem = tms_test_nsem.astype(np.int) print('NSEM data preprocessed') ss = '_'.join([str(cells[ic]) for ic in cell_idx]) save_filename = os.path.join( FLAGS.save_path, 'Cell_%s_nsub_%d_%s_%.3f_part_%d_jnt.pkl' % (ss, Nsub, projection_type, lam_proj, ipartition)) save_filename_partial = os.path.join( FLAGS.save_path_partial, 'Cell_%s_nsub_%d_%s_%.3f_part_%d_jnt.pkl' % (ss, Nsub, projection_type, lam_proj, ipartition)) ## Do fitting # Fit SU on WN print('Fitting started on WN') op = su_model.Flat_clustering_jnt( stim_use_wn, resp_use_wn, Nsub, partitions_wn[ipartition]['tms_train'], partitions_wn[ipartition]['tms_validate'], steps_max=10000, eps=1e-9, projection_type=projection_type, neighbor_mat=neighbor_mat, lam_proj=lam_proj, eps_proj=0.01, save_filename_partial=save_filename_partial, fitting_phases=[1]) _, _, alpha, lam_log_wn, lam_log_test_wn, fitting_phase, fit_params_wn = op print('Fitting done on WN') # Fit on NSEM op = su_model.fit_scales(stim_use_nsem[tms_train_nsem, :], resp_use_nsem[tms_train_nsem, :], stim_use_nsem[tms_test_nsem, :], resp_use_nsem[tms_test_nsem, :], Ns=Nsub, K=fit_params_wn[0][0], b=fit_params_wn[0][1], params=fit_params_wn[0][2], lr=0.1, eps=1e-9) K_nsem, b_nsem, nl_params_nsem, lam_log_nsem, lam_log_test_nsem = op # Collect results and save fit_params = fit_params_wn + [[K_nsem, b_nsem, nl_params_nsem]] lam_log = [lam_log_wn, np.array(lam_log_nsem)] lam_log_test = [lam_log_test_wn, np.array(lam_log_test_nsem)] save_dict = { 'lam_log': lam_log, 'lam_log_test': lam_log_test, 'fit_params': fit_params } pickle.dump(save_dict, gfile.Open(save_filename, 'w')) print('Saved results')
def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=dtypes.float32, reshape=True, validation_size=5000, seed=None): if fake_data: def fake(): return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype, seed=seed) train = fake() validation = fake() test = fake() return base.Datasets(train=train, validation=validation, test=test) TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' TEST_IMAGES = 't10k-images-idx3-ubyte.gz' TEST_LABELS = 't10k-labels-idx1-ubyte.gz' local_file = base.maybe_download(TRAIN_IMAGES, train_dir, SOURCE_URL + TRAIN_IMAGES) with gfile.Open(local_file, 'rb') as f: train_images = extract_images(f) local_file = base.maybe_download(TRAIN_LABELS, train_dir, SOURCE_URL + TRAIN_LABELS) with gfile.Open(local_file, 'rb') as f: train_labels = extract_labels(f, one_hot=one_hot) local_file = base.maybe_download(TEST_IMAGES, train_dir, SOURCE_URL + TEST_IMAGES) with gfile.Open(local_file, 'rb') as f: test_images = extract_images(f) local_file = base.maybe_download(TEST_LABELS, train_dir, SOURCE_URL + TEST_LABELS) with gfile.Open(local_file, 'rb') as f: test_labels = extract_labels(f, one_hot=one_hot) if not 0 <= validation_size <= len(train_images): raise ValueError( 'Validation size should be between 0 and {}. Received: {}.'.format( len(train_images), validation_size)) validation_images = train_images[:validation_size] validation_labels = train_labels[:validation_size] train_images = train_images[validation_size:] train_labels = train_labels[validation_size:] options = dict(dtype=dtype, reshape=reshape, seed=seed) train = DataSet(train_images, train_labels, **options) validation = DataSet(validation_images, validation_labels, **options) test = DataSet(test_images, test_labels, **options) return base.Datasets(train=train, validation=validation, test=test)
def load_data(data_file): data_file_list = data_file.split(",") global feature_column_nums logging.info("input data %s" % data_file_list) logging.info("SELECTED_COLUMN_NUMS" + str(feature_column_nums)) train_data = [] train_target = [] valid_data = [] valid_target = [] training_data_sample_weight = [] valid_data_sample_weight = [] train_pos_cnt = 0 train_neg_cnt = 0 valid_pos_cnt = 0 valid_neg_cnt = 0 file_count = 1 line_count = 0 for currentFile in data_file_list: logging.info("Now loading " + currentFile + " Progress: " + str(file_count) + "/" + str(len(data_file_list)) + ".") file_count += 1 with gfile.Open(currentFile, 'rb') as f: gf = gzip.GzipFile(fileobj=StringIO(f.read())) while True: line = gf.readline() if len(line) == 0: break line_count += 1 if line_count % 10000 == 0: logging.info("Total loading lines: " + str(line_count)) columns = line.split(DELIMITER) if feature_column_nums is None: feature_column_nums = range(0, len(columns)) feature_column_nums.remove(target_column_num) if sample_weight_column_num >= 0: feature_column_nums.remove(sample_weight_column_num) if random.random() >= VALID_TRAINING_DATA_RATIO: # Append training data train_target.append([float(columns[target_column_num])]) if columns[target_column_num] == "1": train_pos_cnt += 1 else: train_neg_cnt += 1 single_train_data = [] for feature_column_num in feature_column_nums: try: single_train_data.append( float(columns[feature_column_num].strip('\n'))) except: logging.info( "Could not convert " + str(columns[feature_column_num].strip('\n') + " to float")) logging.info("feature_column_num: " + str(feature_column_num)) train_data.append(single_train_data) if sample_weight_column_num >= 0 and sample_weight_column_num < len( columns): weight = float( columns[sample_weight_column_num].strip('\n')) if weight < 0.0: logging.info( "Warning: weight is below 0. example:" + line) weight = 1.0 training_data_sample_weight.append([weight]) else: training_data_sample_weight.append([1.0]) else: # Append validation data valid_target.append([float(columns[target_column_num])]) if columns[target_column_num] == "1": valid_pos_cnt += 1 else: valid_neg_cnt += 1 single_valid_data = [] for feature_column_num in feature_column_nums: try: single_valid_data.append( float(columns[feature_column_num].strip('\n'))) except: logging.info( "Could not convert " + str(columns[feature_column_num].strip('\n') + " to float")) logging.info("feature_column_num: " + str(feature_column_num)) valid_data.append(single_valid_data) if sample_weight_column_num >= 0 and sample_weight_column_num < len( columns): weight = float( columns[sample_weight_column_num].strip('\n')) if weight < 0.0: logging.info( "Warning: weight is below 0. example:" + line) weight = 1.0 valid_data_sample_weight.append([weight]) else: valid_data_sample_weight.append([1.0]) logging.info("Total data count: " + str(line_count) + ".") logging.info("Train pos count: " + str(train_pos_cnt) + ", neg count: " + str(train_neg_cnt) + ".") logging.info("Valid pos count: " + str(valid_pos_cnt) + ", neg count: " + str(valid_neg_cnt) + ".") return { "train_data": train_data, "train_target": train_target, "valid_data": valid_data, "valid_target": valid_target, "train_data_sample_weight": training_data_sample_weight, "valid_data_sample_weight": valid_data_sample_weight, "feature_count": len(feature_column_nums) }
def txt_line_iterator(path): """Iterate through lines of file.""" with gfile.Open(path) as f: for line in f: yield line.strip()
def _save_vocab_file(vocab_file, subtoken_list): """Save subtokens to file.""" with gfile.Open(vocab_file, mode="w") as f: for subtoken in subtoken_list: f.write("'%s'\n" % subtoken)
def testSelectEverthingDetail(self): ops.reset_default_graph() dev = '/gpu:0' if test.is_gpu_available() else '/cpu:0' outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder( builder.trainable_variables_parameter()).with_file_output( outfile).with_accounted_types(['.*']).select([ 'micros', 'bytes', 'params', 'float_ops', 'occurrence', 'device', 'op_types', 'input_shapes' ]).build()) config = config_pb2.ConfigProto() with session.Session(config=config) as sess, ops.device(dev): x = lib.BuildSmallModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) model_analyzer.profile(sess.graph, run_meta, options=opts) with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long outputs = f.read().split('\n') self.assertEqual( outputs[0], 'node name | # parameters | # float_ops | requested bytes | total execution time | accelerator execution time | cpu execution time | assigned devices | op types | op count (run|defined) | input shapes' ) for o in outputs[1:]: if o.find('Conv2D ') > 0: metrics = o[o.find('(') + 1:o.find(')')].split(',') # Make sure time is profiled. gap = 1 if test.is_gpu_available() else 2 for i in range(3, 6, gap): mat = re.search('(.*)[um]s/(.*)[um]s', metrics[i]) self.assertGreater(float(mat.group(1)), 0.0) self.assertGreater(float(mat.group(2)), 0.0) # Make sure device is profiled. if test.is_gpu_available(): self.assertTrue(metrics[6].find('gpu') > 0) self.assertFalse(metrics[6].find('cpu') > 0) else: self.assertFalse(metrics[6].find('gpu') > 0) self.assertTrue(metrics[6].find('cpu') > 0) # Make sure float_ops is profiled. mat = re.search('(.*)k/(.*)k flops', metrics[1].strip()) self.assertGreater(float(mat.group(1)), 0.0) self.assertGreater(float(mat.group(2)), 0.0) # Make sure op_count is profiled. self.assertEqual(metrics[8].strip(), '1/1|1/1') # Make sure input_shapes is profiled. self.assertEqual(metrics[9].strip(), '0:2x6x6x3|1:3x3x3x6') if o.find('DW (3x3x3x6') > 0: metrics = o[o.find('(') + 1:o.find(')')].split(',') mat = re.search('(.*)/(.*) params', metrics[1].strip()) self.assertGreater(float(mat.group(1)), 0.0) self.assertGreater(float(mat.group(2)), 0.0)
def testComplexCodeView(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder( builder.trainable_variables_parameter()).with_file_output( outfile).with_accounted_types(['.*']).with_node_names( show_name_regexes=['.*model_analyzer_testlib.py.*']). account_displayed_op_only(False).select( ['params', 'float_ops']).build()) with session.Session() as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) run_meta = config_pb2.RunMetadata() _ = sess.run(x, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) tfprof_node = model_analyzer.profile(sess.graph, run_meta, cmd='code', options=opts) # pylint: disable=line-too-long with gfile.Open(outfile, 'r') as f: lines = f.read().split('\n') result = '\n'.join([l[:min(len(l), 80)] for l in lines]) self.assertEqual( 'node name | # parameters | # float_ops\n_TFProfRoot (--/2.84k params, --/91.04k flops)\n model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_... (0/1.80k para\n model_analyzer_testlib.py:35:BuildSmallModel:image = array_ops... (0/0 param\n model_analyzer_testlib.py:39:BuildSmallModel:initializer=init_... (0/4 param\n model_analyzer_testlib.py:43:BuildSmallModel:initializer=init_... (0/648 par\n model_analyzer_testlib.py:44:BuildSmallModel:x = nn_ops.conv2d... (0/0 param\n model_analyzer_testlib.py:48:BuildSmallModel:initializer=init_... (0/1.15k p\n model_analyzer_testlib.py:49:BuildSmallModel:x = nn_ops.conv2d... (0/0 param\n model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_... (gradient) (0\n model_analyzer_testlib.py:44:BuildSmallModel:x = nn_ops.conv2d... (gradient)\n model_analyzer_testlib.py:49:BuildSmallModel:x = nn_ops.conv2d... (gradient)\n model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c... (0/1.04k para\n model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c... (gradient) (0\n model_analyzer_testlib.py:64:BuildFullModel:target = array_op... (0/0 params, \n model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_... (0/0 params, \n model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_... (gradient) (0\n model_analyzer_testlib.py:67:BuildFullModel:return sgd_op.min... (0/0 params, \n', result) self.assertLess(0, tfprof_node.total_exec_micros) self.assertEqual(2844, tfprof_node.total_parameters) self.assertEqual(91040, tfprof_node.total_float_ops) self.assertEqual(8, len(tfprof_node.children)) self.assertEqual('_TFProfRoot', tfprof_node.name) self.assertEqual( 'model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_...', tfprof_node.children[0].name) self.assertEqual( 'model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_... (gradient)', tfprof_node.children[1].name) self.assertEqual( 'model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c...', tfprof_node.children[2].name) self.assertEqual( 'model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c... (gradient)', tfprof_node.children[3].name) self.assertEqual( 'model_analyzer_testlib.py:64:BuildFullModel:target = array_op...', tfprof_node.children[4].name) self.assertEqual( 'model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_...', tfprof_node.children[5].name) self.assertEqual( 'model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_... (gradient)', tfprof_node.children[6].name) self.assertEqual( 'model_analyzer_testlib.py:67:BuildFullModel:return sgd_op.min...', tfprof_node.children[7].name)
def _restore(self, path): """Restores this estimator from given path. Note: will rebuild the graph and initialize all parameters, and will ignore provided model. Args: path: Path to checkpoints and other information. """ # Currently Saver requires absolute path to work correctly. path = os.path.abspath(path) self._graph = ops.Graph() with self._graph.as_default(): endpoints_filename = os.path.join(path, 'endpoints') if not os.path.exists(endpoints_filename): raise ValueError("Restore folder doesn't contain endpoints.") with gfile.Open(endpoints_filename) as foutputs: endpoints = foutputs.read().split('\n') graph_filename = os.path.join(path, 'graph.pbtxt') if not os.path.exists(graph_filename): raise ValueError( "Restore folder doesn't contain graph definition.") with gfile.Open(graph_filename) as fgraph: graph_def = graph_pb2.GraphDef() text_format.Merge(fgraph.read(), graph_def) (self._inp, self._out, self._model_predictions, self._model_loss) = importer.import_graph_def( graph_def, name='', return_elements=endpoints) saver_filename = os.path.join(path, 'saver.pbtxt') if not os.path.exists(saver_filename): raise ValueError( "Restore folder doesn't contain saver definition.") with gfile.Open(saver_filename) as fsaver: saver_def = train.SaverDef() text_format.Merge(fsaver.read(), saver_def) self._saver = train.Saver(saver_def=saver_def) # Restore trainer self._global_step = self._graph.get_tensor_by_name('global_step:0') self._train = self._graph.get_operation_by_name('train') # Restore summaries. self._summaries = self._graph.get_operation_by_name( 'MergeSummary/MergeSummary') # Restore session. if not isinstance(self._config, RunConfig): self._config = RunConfig(verbose=self.verbose) self._session = session.Session(self._config.tf_master, config=self._config.tf_config) checkpoint_path = train.latest_checkpoint(path) if checkpoint_path is None: raise ValueError( "Missing checkpoint files in the %s. Please " "make sure you are you have checkpoint file that describes " "latest checkpoints and appropriate checkpoints are there. " "If you have moved the folder, you at this point need to " "update manually update the paths in the checkpoint file." % path) self._saver.restore(self._session, checkpoint_path) # Set to be initialized. self._initialized = True
def spike_triggered_clustering(X, Y, Ns, tms_tr, tms_tst, K=None, b=None, steps_max=10000, eps=1e-6, projection_type=None, neighbor_mat=None, lam_proj=0, eps_proj=0.01, save_filename_partial=None, fitting_phases=[1, 2, 3]): """Subunit estimation using spike triggered clustering. The fitting proceeds in three phases - First phase: Ignoring the output nonlinearity and soft-clustering of spike triggered stimuli to estimate K and b. Second phase: Fix K, optimize b and output nonlinearity by gradient descent. Third phase : Optimize K, b and the nonlinearity by gradient descent. Args: X : Stimulus (dims: # samples x # pixels). Y: Responses (dims: # samples x # cells). Ns: Number of subunits (scalar). tms_tr: Sample indices used for training (dims: # training samples). tms_tst: Samples indices for validation (dims: # validation samples). K: Initial subunit filter (dims: # pixels x Ns). b: Initial weights for different subunits (dims: Ns x # cells). steps_max: Maximum number of steps for first phase. eps: Threshold change of loss, for convergence. projection_type: Regularization type ('lnl1' or 'l1'). neighbor_mat: Adjacency matrix for pixels (dims: # pixels x # pixels). lam_proj: Regularization strength. eps_proj: Hyperparameter for 'lnl1' regularization. save_filename_partial: Checkpoint filename. fitting_phases: Phases of fitting to be applied (list with elements from {1, 2, 3}). Returns: K : Final subunit filters, (dims: # n_pix x #SU). b : Final subunit weights for different cells (dims: # SU x # cells). alpha: Softmax weights for each stimulus and different subunits (dims: # samples x Ns). lam_log: Training loss curve. lam_log_test: Validation loss curve. fitting_phase: The phase (1/2/3) corresponding to each iteration. fit_params: Outputs (K, b, nonlinearity parameters) after each fitting phase. """ # projection_op='lnl1' # X is Txmask X_tr = X[tms_tr, :] Y_tr = Y[tms_tr, :] X_test = X[tms_tst, :] Y_test = Y[tms_tst, :] Tlen = Y_tr.shape[0] times = np.arange(Tlen) N1 = X_tr.shape[1] n_cells = Y.shape[1] Sigma = numpy.dot(X_tr.transpose(),X_tr)/float(X_tr.shape[0]) if projection_type == 'lnl1': if neighbor_mat is None: neighbor_mat = np.eye(N1) # load previously saved data if gfile.Exists(save_filename_partial): try: data = pickle.load(gfile.Open(save_filename_partial, 'r')) K = data['K'] b = data['b'] lam_log = data['lam_log'] lam_log_test = data['lam_log_test'] irepeat_start = data['irepeat'] lam = lam_log[-1] lam_test = lam_log_test[-1] lam_min = data['lam_min'] K_min = data['K_min'] b_min = data['b_min'] #print('Partially fit model parameters loaded') except: pass #print('Error in loading file') if K is None: K = 2*rng.rand(N1,Ns)-0.5 K_min = np.copy(K) if b is None: b = 2*rng.rand(Ns, n_cells)-0.5 b_min = np.copy(b) lam_log = np.zeros((0, n_cells)) lam_log_test = np.zeros((0, n_cells)) lam = np.inf lam_test = np.inf lam_min = np.inf irepeat_start = 0 else: #print('No partially fit model') # initialize filters if K is None: K = 2*rng.rand(N1,Ns)-0.5 K_min = np.copy(K) if b is None: b = 2*rng.rand(Ns, n_cells)-0.5 b_min = np.copy(b) lam_log = np.zeros((0, n_cells)) lam_log_test = np.zeros((0, n_cells)) lam = np.inf lam_test = np.inf lam_min = np.inf irepeat_start = 0 #print('Variables initialized') fitting_phase = np.array([]) fit_params = [] # Find subunits - no output NL if 1 in fitting_phases: for irepeat in range(irepeat_start, np.int(steps_max)): if irepeat % 100 == 99: save_dict = {'K': K, 'b': b, 'lam_log': lam_log, 'lam_log_test': lam_log_test, 'irepeat': irepeat, 'K_min': K_min, 'b_min': b_min, 'lam_min': lam_min} if save_filename_partial is not None: pickle.dump(save_dict, gfile.Open(save_filename_partial, 'w' )) # compute reweighted L1 weights if projection_type == 'lnl1': wts = 1 / (neighbor_mat.dot(np.abs(K)) + eps_proj) # test data _, lam_test = compute_fr_loss(K, b, X_test, Y_test) lam_log_test = np.append(lam_log_test, np.expand_dims(lam_test, 0), 0) # train data lam_prev = np.copy(lam) _, lam = compute_fr_loss(K, b, X_tr, Y_tr) lam_log = np.append(lam_log, np.expand_dims(lam, 0), 0) if np.sum(lam) <= np.sum(lam_min) : K_min = np.copy(K) b_min = np.copy(b) lam_min = np.copy(lam) lam_test_at_lam_min = np.copy(lam_test) #print(itime) K_new_list_nr = [] K_new_list_dr = [] mean_ass_f_list = [] for icell in range(n_cells): tms = np.int64(np.arange(Tlen)) t_sp = tms[Y_tr[:, icell] != 0] Y_tsp = Y_tr[t_sp, icell] f = np.exp(numpy.dot(X_tr, K) + b[:, icell]) alpha = (f.transpose()/f.sum(1)).transpose() xx = (Y_tsp.transpose()*alpha[t_sp, :].T).T sta_f = X_tr[t_sp,:].transpose().dot(xx) mean_ass_f = xx.sum(0) K_new_list_nr += [numpy.linalg.solve(Sigma,sta_f)] K_new_list_dr += [mean_ass_f] mean_ass_f_list += [mean_ass_f] K_new_list_nr = np.array(K_new_list_nr) K_new_list_dr = np.array(K_new_list_dr) mean_ass_f_list = np.array(mean_ass_f_list).T # recompute ?? K = np.mean(K_new_list_nr, 0) / np.mean(K_new_list_dr, 0) # Soft thresholding for K if projection_type == 'lnl1': K = np.maximum(K - (wts * lam_proj), 0) - np.maximum(- K - (wts * lam_proj), 0) if projection_type == 'l1': K = np.maximum(K - lam_proj, 0) - np.maximum(- K - lam_proj, 0) b = np.log((1/Tlen)*mean_ass_f_list)- np.expand_dims(np.diag(0.5*K.transpose().dot(Sigma.dot(K))), 1) #print(irepeat, lam, lam_prev) if np.sum(np.abs(lam_prev - lam)) < eps: print('Subunits fitted, Train loss: %.7f, ' 'Test loss: %.7f after %d iterations' % (lam, lam_test, irepeat)) break fitting_phase = np.append(fitting_phase, np.ones(lam_log.shape[0])) nl_params = np.repeat(np.expand_dims(np.array([1.0, 0.0]), 1), n_cells, 1) fit_params += [[np.copy(K_min), np.copy(b_min), nl_params]] # fit NL + b + Kscale if 2 in fitting_phases: K, b, nl_params, loss_log, loss_log_test = fit_scales(X_tr, Y_tr, X_test, Y_test, Ns=Ns, K=K, b=b, params=nl_params, lr=0.001, eps=eps) if 'lam_log' in vars(): lam_log = np.append(lam_log, np.array(loss_log), 0) else: lam_log = np.array(loss_log) if 'lam_log_test' in vars(): lam_log_test = np.append(lam_log_test, np.array(loss_log_test), 0) else: lam_log_test = np.array(loss_log_test) fitting_phase = np.append(fitting_phase, 2 * np.ones(np.array(loss_log).shape[0])) fit_params += [[np.copy(K), np.copy(b), nl_params]] # Fit all params if 3 in fitting_phases: K, b, nl_params, loss_log, loss_log_test = fit_all(X_tr, Y_tr, X_test, Y_test, Ns=Ns, K=K, b=b, train_phase=3, params=nl_params, lr=0.001, eps=eps) if 'lam_log' in vars(): lam_log = np.append(lam_log, np.array(loss_log), 0) else: lam_log = np.array(loss_log) if 'lam_log_test' in vars(): lam_log_test = np.append(lam_log_test, np.array(loss_log_test), 0) else: lam_log_test = np.array(loss_log_test) fitting_phase = np.append(fitting_phase, 3 * np.ones(np.array(loss_log).shape[0])) fit_params += [[np.copy(K), np.copy(b), nl_params]] return K, b, alpha, lam_log, lam_log_test, fitting_phase, fit_params
def format_tensor(tensor, tensor_name, np_printoptions, print_all=False, tensor_slicing=None, highlight_options=None, include_numeric_summary=False, write_path=None): """Generate formatted str to represent a tensor or its slices. Args: tensor: (numpy ndarray) The tensor value. tensor_name: (str) Name of the tensor, e.g., the tensor's debug watch key. np_printoptions: (dict) Numpy tensor formatting options. print_all: (bool) Whether the tensor is to be displayed in its entirety, instead of printing ellipses, even if its number of elements exceeds the default numpy display threshold. (Note: Even if this is set to true, the screen output can still be cut off by the UI frontend if it consist of more lines than the frontend can handle.) tensor_slicing: (str or None) Slicing of the tensor, e.g., "[:, 1]". If None, no slicing will be performed on the tensor. highlight_options: (tensor_format.HighlightOptions) options to highlight elements of the tensor. See the doc of tensor_format.format_tensor() for more details. include_numeric_summary: Whether a text summary of the numeric values (if applicable) will be included. write_path: A path to save the tensor value (after any slicing) to (optional). `numpy.save()` is used to save the value. Returns: An instance of `debugger_cli_common.RichTextLines` representing the (potentially sliced) tensor. """ if tensor_slicing: # Validate the indexing. value = command_parser.evaluate_tensor_slice(tensor, tensor_slicing) sliced_name = tensor_name + tensor_slicing else: value = tensor sliced_name = tensor_name auxiliary_message = None if write_path: with gfile.Open(write_path, "wb") as output_file: np.save(output_file, value) line = debugger_cli_common.RichLine("Saved value to: ") line += debugger_cli_common.RichLine(write_path, font_attr="bold") line += " (%sB)" % bytes_to_readable_str(gfile.Stat(write_path).length) auxiliary_message = debugger_cli_common.rich_text_lines_from_rich_line_list( [line, debugger_cli_common.RichLine("")]) if print_all: np_printoptions["threshold"] = value.size else: np_printoptions["threshold"] = DEFAULT_NDARRAY_DISPLAY_THRESHOLD return tensor_format.format_tensor( value, sliced_name, include_metadata=True, include_numeric_summary=include_numeric_summary, auxiliary_message=auxiliary_message, np_printoptions=np_printoptions, highlight_options=highlight_options)
def main(argv): # parse task params # read line corresponding to task with gfile.Open(FLAGS.task_params_file, 'r') as f: for _ in range(FLAGS.taskid + 1): line = f.readline() print(line) # get task parameters by parsing the line. line_split = line.split(';') cells = gfile.ListDirectory(FLAGS.src_dir) cell_idx = line_split[0] cell_idx = cell_idx[1:-1].split(',') nsub = int(line_split[1]) projection_type = line_split[2] lam_proj = float(line_split[3]) ipartition = int(line_split[4][:-1]) # Copy data for all the data cell_str_final = '' dst_log = [] for icell in cell_idx: icell = int(icell) cell_string = cells[icell] cell_str_final += cell_string # copy data for the corresponding task dst = os.path.join(FLAGS.tmp_dir, cell_string) dst_log += [dst] if not gfile.Exists(dst): print('Started Copy') src = os.path.join(FLAGS.src_dir, cell_string) if not gfile.IsDirectory(FLAGS.tmp_dir): gfile.MkDir(FLAGS.tmp_dir) gfile.Copy(src, dst) print('File %s copied to destination' % cell_string) else: print('File %s exists' % cell_string) # Load data for different cells stim_log = [] resp_log = [] mask_matrix_log = [] for dst in dst_log: print('Loading %s' % dst) data = h5py.File(dst) stimulus = np.array(data.get('stimulus')) stimulus = stimulus[:-1, :] # drop the last frame so that it's # the same size as the binned spike train response = np.squeeze(np.array(data.get('response'))) response = np.expand_dims(response, 1) mask_matrix = np.array(data.get('mask')) stim_log += [stimulus] resp_log += [response] mask_matrix_log += [mask_matrix] # Prepare for fitting across multiple cells # Get total mask mask_matrix_pop = np.array(mask_matrix_log).sum(0) > 0 # Get total response. resp_len = np.min([resp_log[icell].shape[0] for icell in range(4)]) response_pop = np.zeros((resp_len, len(resp_log))) for icell in range(len(resp_log)): response_pop[:, icell] = resp_log[icell][:resp_len, 0] # Get total stimulus. stimulus_pop = np.zeros((resp_len, mask_matrix_pop.sum())) # Find non-zero locations for each mask element nnz_log = [np.where(imask > 0) for imask in mask_matrix_log] nnz_pop = np.where(mask_matrix_pop > 0) for ipix in range(mask_matrix_pop.sum()): print(ipix) r = nnz_pop[0][ipix] c = nnz_pop[1][ipix] stim_pix = np.zeros(resp_len) nc = 0 for icell in range(len(nnz_log)): pix_cell_bool = np.logical_and(nnz_log[icell][0] == r, nnz_log[icell][1] == c) if pix_cell_bool.sum() > 0: pix_cell = np.where(pix_cell_bool > 0)[0][0] stim_pix += stim_log[icell][:resp_len, pix_cell] nc += 1 if nc == 0: print('Error') stim_pix = stim_pix / nc stimulus_pop[:, ipix] = stim_pix # Fit with a given number of subunits print('Starting fitting') get_su_nsub(stimulus_pop, response_pop, mask_matrix_pop, cell_str_final, nsub, projection_type, lam_proj, ipartition)
def convert(saved_model_dir, output_tflite=None, output_arrays=None, tag_set=None, signature_key=signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY, batch_size=1): """Convert a savedmodel to tflite flatbuffer. Args: saved_model_dir: Saved model directory to convert. output_tflite: File path to write result flatbuffer. output_arrays: List of output tensor names, the default value is None, which means conversion keeps all output tensors. This is also used to filter tensors that are from Op currently not supported in tflite, e.g., Argmax). tag_set: This is the set of tags to get meta_graph_def in saved_model. signature_key: This is the signature key to extract inputs, outputs. batch_size: If input tensor shape has None at first dimension, e.g. (None,224,224,3), replace None with batch_size. Returns: The converted data. For example if tflite was the destination, then this will be a tflite flatbuffer in a bytes array. Raises: ValueError: If tag_set does not indicate any meta_graph_def in saved_model, or signature_key is not in relevant meta_graph_def, or input shape has None beyond 1st dimension, e.g., (1,None, None, 3), or given output_arrays are not valid causing empty outputs. """ if tag_set is None: tag_set = set([tag_constants.SERVING]) meta_graph = get_meta_graph_def(saved_model_dir, tag_set) signature_def = get_signature_def(meta_graph, signature_key) inputs, outputs = get_inputs_outputs(signature_def) graph = ops.Graph() with session.Session(graph=graph) as sess: loader.load(sess, meta_graph.meta_info_def.tags, saved_model_dir) in_tensors = [graph.get_tensor_by_name(input_) for input_ in inputs] # Users can use output_arrays to filter output tensors for conversion. # If output_arrays is None, we keep all output tensors. In future, we may # use tflite supported Op list and check whether op is custom Op to # automatically filter output arrays. # TODO(zhixianyan): Use tflite supported Op list to filter outputs. if output_arrays is not None: output_arrays = output_arrays.split(",") out_tensors = [ graph.get_tensor_by_name(output) for output in outputs if output.split(":")[0] in output_arrays ] else: out_tensors = [graph.get_tensor_by_name(output) for output in outputs] output_names = [node.split(":")[0] for node in outputs] if not out_tensors: raise ValueError( "No valid output tensors for '{}', possible values are '{}'".format( output_arrays, output_names)) frozen_graph_def = tf_graph_util.convert_variables_to_constants( sess, graph.as_graph_def(), output_names) # Toco requires fully defined tensor shape, for input tensor with None in # their shape, e.g., (None, 224, 224, 3), we need to replace first None with # a given batch size. For shape with more None, e.g. (None, None, None, 3), # still be able to replace and convert, but require further investigation. # TODO(zhixianyan): Add supports for input tensor with more None in shape. for i in range(len(in_tensors)): shape = in_tensors[i].get_shape().as_list() if shape[0] is None: shape[0] = batch_size if None in shape[1:]: raise ValueError( "Only support None shape at 1st dim as batch_size. But tensor " "'{}' 's shape '{}' has None at other dimension. ".format( inputs[i], shape)) in_tensors[i].set_shape(shape) result = lite.toco_convert(frozen_graph_def, in_tensors, out_tensors) if output_tflite is not None: with gfile.Open(output_tflite, "wb") as f: f.write(result) logging.info("Successfully converted to: %s", output_tflite) return result
def get_su_nsub(stimulus, response, mask_matrix, cell_string, nsub, projection_type, lam_proj, ipartition): """Get 'nsub' subunits.""" np.random.seed(95) # 23 for _jnt.pkl, 46 for _jnt_2.pkl, 93 for _nov, _jan # Get a few (5) training, testing, validation partitions # continuous partitions # ifrac = 0.8 # tms_train = np.arange(0, np.floor(stimulus.shape[0]*ifrac)).astype(np.int) # Random partitions # get last 10% as test data frac_test = 0.1 tms_test = np.arange(np.floor(stimulus.shape[0] * (1 - frac_test)), 1 * np.floor(stimulus.shape[0])).astype(np.int) # Random partitions n_partitions = 10 tms_train_validate = np.arange( 0, np.floor(stimulus.shape[0] * (1 - frac_test))).astype(np.int) frac_validate = 0.1 partitions = [] for _ in range(n_partitions): perm = np.random.permutation(tms_train_validate) tms_train = perm[0:np.floor((1 - frac_validate) * perm.shape[0])] tms_validate = perm[np.floor((1 - frac_validate) * perm.shape[0]):perm.shape[0]] partitions += [{ 'tms_train': tms_train, 'tms_validate': tms_validate, 'tms_test': tms_test }] print('Made partitions') # do fitting for different lambdas # from IPython import embed; embed() neighbor_mat = su_model.get_neighbormat(mask_matrix, nbd=1) save_name = os.path.join( FLAGS.save_path, 'Cell_%s_nsub_%d_%s_%.6f_part_%d_%s.pkl' % (cell_string, nsub, projection_type, lam_proj, ipartition, FLAGS.save_suffix)) save_filename_partial = os.path.join( FLAGS.save_path_partial, 'Cell_%s_nsub_%d_%s_%.6f_part' '_%d_%s.pkl' % (cell_string, nsub, projection_type, lam_proj, ipartition, FLAGS.save_suffix)) if not gfile.Exists(save_name): print(cell_string, nsub, projection_type, lam_proj, ipartition) op = su_model.Flat_clustering_jnt( stimulus, response, nsub, partitions[ipartition]['tms_train'], partitions[ipartition]['tms_validate'], steps_max=10000, eps=1e-9, projection_type=projection_type, neighbor_mat=neighbor_mat, lam_proj=lam_proj, eps_proj=0.01, save_filename_partial=save_filename_partial) k_f, b_f, _, loss_log_f, loss_log_test_f, fitting_phase_f, fit_params_f = op print('Fitting done') save_dict = { 'K': k_f, 'b': b_f, 'loss_log': loss_log_f, 'loss_log_test': loss_log_test_f, 'fitting_phase': fitting_phase_f, 'fit_params': fit_params_f } pickle.dump(save_dict, gfile.Open(save_name, 'w')) print('Saved results')
def save(self, filename): with gfile.Open(filename, 'wb') as f: f.write(pickle.dumps(self))
def _load_graph_def_from_event_file(event_file_path): event = event_pb2.Event() with gfile.Open(event_file_path, "rb") as f: event.ParseFromString(f.read()) return graph_pb2.GraphDef.FromString(event.graph_def)
def extract_cifar10(local_url, data_dir): """ Extracts the CIFAR-10 dataset and return numpy arrays with the different sets :param local_url: where the tar.gz archive is located locally :param data_dir: where to extract the archive's file :return: a tuple (train data, train labels, test data, test labels) """ # These numpy dumps can be reloaded to avoid performing the pre-processing # if they exist in the working directory. # Changing the order of this list will ruin the indices below. preprocessed_files = ['/cifar10_train.npy', '/cifar10_train_labels.npy', '/cifar10_test.npy', '/cifar10_test_labels.npy'] all_preprocessed = True for file in preprocessed_files: if not gfile.Exists(data_dir + file): all_preprocessed = False break if all_preprocessed: # Reload pre-processed training data from numpy dumps with gfile.Open(data_dir + preprocessed_files[0], mode='r') as file_obj: train_data = np.load(file_obj) with gfile.Open(data_dir + preprocessed_files[1], mode='r') as file_obj: train_labels = np.load(file_obj) # Reload pre-processed testing data from numpy dumps with gfile.Open(data_dir + preprocessed_files[2], mode='r') as file_obj: test_data = np.load(file_obj) with gfile.Open(data_dir + preprocessed_files[3], mode='r') as file_obj: test_labels = np.load(file_obj) else: # Do everything from scratch # Define lists of all files we should extract train_files = ["data_batch_" + str(i) for i in xrange(1,6)] test_file = ["test_batch"] cifar10_files = train_files + test_file # Check if all files have already been extracted need_to_unpack = False for file in cifar10_files: if not gfile.Exists(file): need_to_unpack = True break # We have to unpack the archive if need_to_unpack: tarfile.open(local_url, 'r:gz').extractall(data_dir) # Load training images and labels images = [] labels = [] for file in train_files: # Construct filename filename = data_dir + "/cifar-10-batches-py/" + file # Unpickle dictionary and extract images and labels images_tmp, labels_tmp = unpickle_cifar_dic(filename) # Append to lists images.append(images_tmp) labels.append(labels_tmp) # Convert to numpy arrays and reshape in the expected format train_data = np.asarray(images, dtype=np.float32).reshape((50000,3,32,32)) train_data = np.swapaxes(train_data, 1, 3) train_labels = np.asarray(labels, dtype=np.int32).reshape(50000) # Save so we don't have to do this again np.save(data_dir + preprocessed_files[0], train_data) np.save(data_dir + preprocessed_files[1], train_labels) # Construct filename for test file filename = data_dir + "/cifar-10-batches-py/" + test_file[0] # Load test images and labels test_data, test_images = unpickle_cifar_dic(filename) # Convert to numpy arrays and reshape in the expected format test_data = np.asarray(test_data,dtype=np.float32).reshape((10000,3,32,32)) test_data = np.swapaxes(test_data, 1, 3) test_labels = np.asarray(test_images, dtype=np.int32).reshape(10000) # Save so we don't have to do this again np.save(data_dir + preprocessed_files[2], test_data) np.save(data_dir + preprocessed_files[3], test_labels) return train_data, train_labels, test_data, test_labels
def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=dtypes.float32, reshape=True, validation_size=5000, seed=None, source_url=DEFAULT_SOURCE_URL): if fake_data: def fake(): return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype, seed=seed) train = fake() validation = fake() test = fake() return base.Datasets(train=train, validation=validation, test=test) if not source_url: # empty string check source_url = DEFAULT_SOURCE_URL dataset = train_dir.split("/")[-1] if dataset == "cifar": (train_images, train_labels), (test_images, test_labels) = \ tf.keras.datasets.cifar10.load_data() train_labels = tf.keras.utils.to_categorical(train_labels, 10) test_labels = tf.keras.utils.to_categorical(test_labels, 10) train_images = train_images / 255.0 test_images = test_images / 255.0 if dataset == "circles": from sklearn import datasets X, y = datasets.make_circles(n_samples=70000, factor=.5, noise=.05) X = (X - X.min()) / (X.max() - X.min()) train_images = X train_labels = tf.keras.utils.to_categorical(y, 2) test_images = train_images[:10000] test_labels = train_labels[:10000] train_images = train_images[10000:] train_labels = train_labels[10000:] if dataset == "s-curve": # 3-d data from sklearn import datasets X, y = datasets.make_s_curve(n_samples=70000, noise=0.5, random_state=2019) if dataset == "swiss-roll": # 3-d data datasets.make_swiss_roll(n_samples=70000, noise=0.5, random_state=2019) if dataset == "mnist" or dataset == "fashion": TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' TEST_IMAGES = 't10k-images-idx3-ubyte.gz' TEST_LABELS = 't10k-labels-idx1-ubyte.gz' # local_file = base.maybe_download(TRAIN_IMAGES, train_dir, # source_url + TRAIN_IMAGES) with gfile.Open(local_file, 'rb') as f: train_images = extract_images(f) local_file = base.maybe_download(TRAIN_LABELS, train_dir, source_url + TRAIN_LABELS) with gfile.Open(local_file, 'rb') as f: train_labels = extract_labels(f, one_hot=one_hot) local_file = base.maybe_download(TEST_IMAGES, train_dir, source_url + TEST_IMAGES) with gfile.Open(local_file, 'rb') as f: test_images = extract_images(f) local_file = base.maybe_download(TEST_LABELS, train_dir, source_url + TEST_LABELS) with gfile.Open(local_file, 'rb') as f: test_labels = extract_labels(f, one_hot=one_hot) if not 0 <= validation_size <= len(train_images): raise ValueError( 'Validation size should be between 0 and {}. Received: {}.'.format( len(train_images), validation_size)) validation_images = train_images[:validation_size] validation_labels = train_labels[:validation_size] train_images = train_images[validation_size:] train_labels = train_labels[validation_size:] options = dict(dtype=dtype, reshape=reshape, seed=seed) train = DataSet(train_images, train_labels, **options) validation = DataSet(validation_images, validation_labels, **options) test = DataSet(test_images, test_labels, **options) return base.Datasets(train=train, validation=validation, test=test)