def store_and_or_load_data(dataset_info, outputdir): if dataset_info.endswith('.pkl'): save_path = dataset_info else: dataset = os.path.basename(dataset_info) data_dir = os.path.dirname(dataset_info) save_path = os.path.join(outputdir, dataset + '_Manager.pkl') if not os.path.exists(save_path): lock = lockfile.LockFile(save_path) while not lock.i_am_locking(): try: lock.acquire(timeout=60) # wait up to 60 seconds except lockfile.LockTimeout: lock.break_lock() lock.acquire() print('I locked', lock.path) # It is not yet sure, whether the file already exists try: if not os.path.exists(save_path): D = CompetitionDataManager(dataset, data_dir, verbose=True, encode_labels=True) fh = open(save_path, 'w') pickle.dump(D, fh, -1) fh.close() else: D = pickle.load(open(save_path, 'r')) except Exception: raise finally: lock.release() else: D = pickle.load(open(save_path, 'r')) return D
def load_model(modelfile): layers = {} with open(modelfile, "rb") as f: layers = cPickle.load(f) with open("params_" + modelfile, "rb") as f: for layer_key in layers.keys(): layers[layer_key].params = cPickle.load(f) n_of_layers = len(layers.keys()) flstm = AttendedLSTM(input_dim=layers[0].input_dim, output_dim=layers[0].outer_output_dim, number_of_layers=n_of_layers, hidden_dims=[layers[0].output_dim]) flstm.build_loaded_model(layers) embedded_test, test_labels = WordEmbeddingLayer.load_embedded_data(path="../data/", name="test", representation="glove.840B.300d") binary_embedded_test = [] binary_test_labels = [] for i in np.arange(len(embedded_test)): if np.argmax(test_labels[i]) != 1: binary_embedded_test.append(embedded_test[i]) binary_test_labels.append(np.eye(2)[np.argmax(test_labels[i]) // 2]) flstm.test_dev(binary_embedded_test, binary_test_labels) return flstm
def extract_images(f): """Extract the images into a 4D uint8 numpy array [index, y, x, depth]. Args: f: A file object that can be passed into a gzip reader. Returns: data: A 4D uint8 numpy array [index, y, x, depth]. Raises: ValueError: If the bytestream does not start with 2051. """ train_images = [] train_labels = [] test_images = [] test_labels = [] print('Extracting', f.name) with tarfile.TarFile(fileobj=gzip.GzipFile(fileobj=f)) as tfile: for name in tfile.getnames(): if 'data_batch' in name: fd = tfile.extractfile(name) data = cPickle.load(fd) for image, label in zip(data['data'], data['labels']): image = np.array(image) train_images.append(image) train_labels.append(label) elif 'test_batch' in name: fd = tfile.extractfile(name) data = cPickle.load(fd) for image, label in zip(data['data'], data['labels']): image = np.array(image) test_images.append(image) test_labels.append(label) else: continue return train_images, train_labels, test_images, test_labels
def predict_new(): """ An example of how to load a trained model and use it to predict labels. """ # load the saved model classifier = pickle.load(open('../data/lr_best_model.pkl')) # compile a predictor function predict_model = theano.function( inputs=[classifier.input], outputs=classifier.y_pred) # We can test it on some examples from test test with gzip.open('../data/kaggle_test.pkl.gz', 'rb') as f: test_data = pickle.load(f) predicted_values = predict_model(test_data/255) result = numpy.vstack((numpy.arange(predicted_values.shape[0])+1,predicted_values)) res = result.T import csv numpy.savetxt("../data/result_lr.csv",res,fmt=('%d','%d'),delimiter=',',header='ImageId,Label')
def read_pickle_from_file(filename): with tf.gfile.Open(filename, 'rb') as f: if sys.version_info >= (3, 0): data_dict = pickle.load(f, encoding='bytes') else: data_dict = pickle.load(f) return data_dict
def sample(args): with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f: saved_args = cPickle.load(f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'rb') as f: chars, vocab = cPickle.load(f) model = Model(saved_args, True) val_loss_file = args.save_dir + '/val_loss.json' with tf.Session() as sess: saver = tf.train.Saver(tf.all_variables()) if os.path.exists(val_loss_file): with open(val_loss_file, "r") as text_file: text = text_file.read() loss_json = json.loads(text) losses = loss_json.keys() losses.sort(key=lambda x: float(x)) loss = losses[0] model_checkpoint_path = loss_json[loss]['checkpoint_path'] #print(model_checkpoint_path) saver.restore(sess, model_checkpoint_path) result = model.sample(sess, chars, vocab, args.n, args.prime, args.sample_rule, args.temperature) print(result) #add this back in later, not sure why its not working output = "/data/output/"+ str(int(time.time())) + ".txt" with open(output, "w") as text_file: text_file.write(result) print(output)
def _parse_file(cls, path, pickle=False): """parse a .chain file into a list of the type [(L{Chain}, arr, arr, arr) ...] :param fname: name of the file""" fname = path if fname.endswith(".gz"): fname = path[:-3] if fname.endswith('.pkl'): #you asked for the pickled file. I'll give it to you log.debug("loading pickled file %s ..." % fname) return cPickle.load( open(fname) ) elif os.path.isfile("%s.pkl" % fname): #there is a cached version I can give to you log.info("loading pickled file %s.pkl ..." % fname) if os.stat(path).st_mtime > os.stat("%s.pkl" % fname).st_mtime: log.critical("*** pickled file %s.pkl is not up to date ***" % (path)) return cPickle.load( open("%s.pkl" % fname) ) data = fastLoadChain(path, cls._strfactory) if pickle and not os.path.isfile('%s.pkl' % fname): log.info("pckling to %s.pkl" % (fname)) with open('%s.pkl' % fname, 'wb') as fd: cPickle.dump(data, fd) return data
def load_batch(fpath, label_key='labels'): """Internal utility for parsing CIFAR data. # Arguments fpath: path the file to parse. label_key: key for label data in the retrieve dictionary. # Returns A tuple `(data, labels)`. """ f = open(fpath, 'rb') if sys.version_info < (3,): d = cPickle.load(f) else: d = cPickle.load(f, encoding='bytes') # decode utf8 d_decoded = {} for k, v in d.items(): d_decoded[k.decode('utf8')] = v d = d_decoded f.close() data = d['data'] labels = d[label_key] data = data.reshape(data.shape[0], 3, 32, 32) return data, labels
def creator(path): archive_path = download.cached_download(url) train_x = numpy.empty((5, 10000, 3072), dtype=numpy.uint8) train_y = numpy.empty((5, 10000), dtype=numpy.uint8) test_y = numpy.empty(10000, dtype=numpy.uint8) dir_name = '{}-batches-py'.format(name) with tarfile.open(archive_path, 'r:gz') as archive: # training set for i in range(5): file_name = '{}/data_batch_{}'.format(dir_name, i + 1) d = pickle.load(archive.extractfile(file_name)) train_x[i] = d['data'] train_y[i] = d['labels'] # test set file_name = '{}/test_batch'.format(dir_name) d = pickle.load(archive.extractfile(file_name)) test_x = d['data'] test_y[...] = d['labels'] # copy to array train_x = train_x.reshape(50000, 3072) train_y = train_y.reshape(50000) numpy.savez_compressed(path, train_x=train_x, train_y=train_y, test_x=test_x, test_y=test_y) return {'train_x': train_x, 'train_y': train_y, 'test_x': test_x, 'test_y': test_y}
def store_and_or_load_data(outputdir, dataset, data_dir): save_path = os.path.join(outputdir, dataset + '_Manager.pkl') if not os.path.exists(save_path): lock = lockfile.LockFile(save_path) while not lock.i_am_locking(): try: lock.acquire(timeout=60) # wait up to 60 seconds except lockfile.LockTimeout: lock.break_lock() lock.acquire() print('I locked', lock.path) # It is not yet sure, whether the file already exists try: if not os.path.exists(save_path): D = SimpleDataManager(dataset, data_dir, verbose=True) fh = open(save_path, 'w') pickle.dump(D, fh, -1) fh.close() else: D = pickle.load(open(save_path, 'r')) except Exception: raise finally: lock.release() else: D = pickle.load(open(save_path, 'r')) print('Loaded data') return D
def load_pickle(f): version = platform.python_version_tuple() if version[0] == '2': return pickle.load(f) elif version[0] == '3': return pickle.load(f, encoding='latin1') raise ValueError("invalid python version: {}".format(version))
def test_save_the_best(): with NamedTemporaryFile() as dst,\ NamedTemporaryFile() as dst_best: track_cost = TrackTheBest("cost") main_loop = MockMainLoop( extensions=[FinishAfter(after_n_epochs=1), WriteCostExtension(), track_cost, Checkpoint(dst.name, after_batch=True, save_separately=['log']) .add_condition( "after_batch", OnLogRecord(track_cost.notification_name), (dst_best.name,))]) main_loop.run() assert main_loop.log[4]['saved_to'] == (dst.name, dst_best.name) assert main_loop.log[5]['saved_to'] == (dst.name, dst_best.name) assert main_loop.log[6]['saved_to'] == (dst.name,) with open(dst_best.name, 'rb') as src: assert cPickle.load(src).log.status['iterations_done'] == 5 root, ext = os.path.splitext(dst_best.name) log_path = root + "_log" + ext with open(log_path, 'rb') as src: assert cPickle.load(src).status['iterations_done'] == 5
def test_read_backward_compatibility(): """Test backwards compatibility with a pickled file that's created with Python 2.7.3, Numpy 1.7.1_ahl2 and Pandas 0.14.1 """ fname = path.join(path.dirname(__file__), "data", "test-data.pkl") # For newer versions; verify that unpickling fails when using cPickle if PANDAS_VERSION >= LooseVersion("0.16.1"): if sys.version_info[0] >= 3: with pytest.raises(UnicodeDecodeError), open(fname) as fh: cPickle.load(fh) else: with pytest.raises(TypeError), open(fname) as fh: cPickle.load(fh) # Verify that PickleStore() uses a backwards compatible unpickler. store = PickleStore() with open(fname) as fh: # PickleStore compresses data with lz4 version = {'blob': compressHC(fh.read())} df = store.read(sentinel.arctic_lib, version, sentinel.symbol) expected = pd.DataFrame(range(4), pd.date_range(start="20150101", periods=4)) assert (df == expected).all().all()
def main(): # Reading the configuration from stdin classifier = pickle.load(sys.stdin) info = pickle.load(sys.stdin) assert isinstance(classifier, tmva.TMVAClassifier) or isinstance(classifier, tmva.TMVARegressor) assert isinstance(info, tmva._AdditionalInformation) tmva_process(classifier, info)
def _pickle_load(f): if sys.version_info > (3, ): # python3 return pickle.load(f, encoding='latin-1') else: # python2 return pickle.load(f)
def __init__(self, experiment_name): self.engine = experiment.Experiment.get_engine( experiment_name, "sqlite" ) SQLAlchemySession.configure(bind=self.engine) self.session = SQLAlchemySession() self.hdf5_file = h5py.File( os.path.join(experiment_name, "phenotypes.hdf5"), "r" ) self.config = os.path.join(experiment_name, "configuration.yaml") if not os.path.isfile(self.config): self.config = None # Experiment info. filename = os.path.join(experiment_name, "experiment_info.pkl") with open(filename, "rb") as f: self.info = pickle.load(f) # Task info. self.task_info = {} path = os.path.join(experiment_name, "tasks") for task_dir in os.listdir(path): info_path = os.path.join(path, task_dir, "task_info.pkl") if os.path.isfile(info_path): with open(info_path, "rb") as f: self.task_info[task_dir] = pickle.load(f) # Correlation matrix. filename = os.path.join(experiment_name, "phen_correlation_matrix.npy") self.correlation_matrix = np.load(filename)
def __init__(self, path, random_seed, fold): np.random.seed(random_seed) self.path = path self.linkfile = path + 'allPostLinkMap.pickle' # self.edgelistfile = path + 'edgelist.txt' self.labelfile = path + 'allPostLabelMap.pickle' self.authorfile = path + 'allPostAuthorMap.pickle' self.authorattrifile = path + 'allAuthorAttrisProc.pickle' self.authorlinkfile = path + 'allAuthorLinks.pickle' self.textfile = path + 'allUserTextSkip.pickle2' self.foldfile = path + 'allFolds.pickle' self.threadfile = path + 'allThreadPost.pickle' self.embfile = path + 'node.emb' self.fold = fold self.nodes_infor = [] self.node_map = {} with open(self.textfile, 'rb') as fin: allTextEmbed = pickle.load(fin, encoding='latin1') self.allTextMap = pickle.load(fin, encoding='latin1') fin.close() self.node_count = len(self.allTextMap) for i in range(self.node_count): self.add_node(i) self.read_label() self.read_text() self.read_link() self.label_count = len(self.label_map) # print('label count:', self.label_count) self.construct_data()
def load_data(dataset): print('... loading data') with gzip.open(dataset, 'rb') as f: try: train_set, valid_set, test_set = pickle.load(f, encoding='latin1') except: train_set, valid_set, test_set = pickle.load(f) def shared_dataset(data_xy, borrow=True): data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) return shared_x, T.cast(shared_y, 'int32') test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] return rval
def load(cls, fn, compress=True, *args, **kwargs): if compress and not fn.strip().lower().endswith('.gz'): fn = fn + '.gz' assert os.path.isfile(fn), 'File %s does not exist.' % (fn,) if compress: return pickle.load(gzip.open(fn, 'rb')) return pickle.load(open(fn, 'rb'))
def sample(args): with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f: saved_args = cPickle.load(f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'rb') as f: chars, vocab = cPickle.load(f) model = Model(saved_args, True) with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(args.save_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) ts = model.sample(sess, chars, vocab, args.n, args.prime, args.sample) print("Sampled Output\n") print(ts) print("Converting Text to Speech") tts = gTTS(text=ts, lang='en-uk') tts.save("ts.mp3") audio = MP3("ts.mp3") audio_length = audio.info.length print("Speaker is Getting Ready") mixer.init() mixer.music.load('ts.mp3') mixer.music.play() time.sleep(audio_length+5)
def get_data(): """Get data in form suitable for episodic training. Returns: Train and test data as dictionaries mapping label to list of examples. """ with tf.gfile.GFile(DATA_FILE_FORMAT % 'train', 'rb') as f: processed_train_data = pickle.load(f) with tf.gfile.GFile(DATA_FILE_FORMAT % 'test', 'rb') as f: processed_test_data = pickle.load(f) train_data = {} test_data = {} for data, processed_data in zip([train_data, test_data], [processed_train_data, processed_test_data]): for image, label in zip(processed_data['images'], processed_data['labels']): if label not in data: data[label] = [] data[label].append(image.reshape([-1]).astype('float32')) intersection = set(train_data.keys()) & set(test_data.keys()) assert not intersection, 'Train and test data intersect.' ok_num_examples = [len(ll) == 20 for _, ll in train_data.items()] assert all(ok_num_examples), 'Bad number of examples in train data.' ok_num_examples = [len(ll) == 20 for _, ll in test_data.items()] assert all(ok_num_examples), 'Bad number of examples in test data.' logging.info('Number of labels in train data: %d.', len(train_data)) logging.info('Number of labels in test data: %d.', len(test_data)) return train_data, test_data
def Init(self): TFunctionApprox.Init(self) L= self.Locate if self.Params['data_x'] != None: self.DataX= pickle.load(open(L(self.Params['data_x']), 'rb')) if self.Params['data_y'] != None: self.DataY= pickle.load(open(L(self.Params['data_y']), 'rb')) self.C= [] self.Closests= [] self.CDists= [] #Distance to the closest point if self.Params['C'] != None: self.C= copy.deepcopy(self.Params['C']) if self.Params['Closests'] != None: self.Closests= copy.deepcopy(self.Params['Closests']) if self.Params['CDists'] != None: self.CDists= copy.deepcopy(self.Params['CDists']) if self.Options['kernel']=='l2g': #L2 norm Gaussian self.kernel= Gaussian self.dist= Dist elif self.Options['kernel']=='maxg': #Max norm Gaussian self.kernel= GaussianM self.dist= DistM else: raise Exception('Undefined kernel type:',self.Options['kernel']) self.lazy_copy= True #Assign True when DataX or DataY is updated. self.CheckPredictability()
def train(args): data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size # check compatibility if training is continued from previously saved model if args.init_from is not None: # check if all necessary files exist assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.get_checkpoint_state(args.init_from) assert ckpt,"No checkpoint found" assert ckpt.model_checkpoint_path,"No model path found in checkpoint" # open old config and check if models are compatible with open(os.path.join(args.init_from, 'config.pkl')) as f: saved_model_args = cPickle.load(f) need_be_same=["model","rnn_size","num_layers","seq_length"] for checkme in need_be_same: assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme # open saved vocab/dict and check if vocabs/dicts are compatible with open(os.path.join(args.init_from, 'chars_vocab.pkl')) as f: saved_chars, saved_vocab = cPickle.load(f) assert saved_chars==data_loader.chars, "Data and loaded model disagreee on character set!" assert saved_vocab==data_loader.vocab, "Data and loaded model disagreee on dictionary mappings!" with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) model = Model(args) with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) # restore model if args.init_from is not None: saver.restore(sess, ckpt.model_checkpoint_path) for e in range(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) data_loader.reset_batch_pointer() state = model.initial_state.eval() for b in range(data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y, model.initial_state: state} train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed) end = time.time() print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, end - start)) if (e * data_loader.num_batches + b) % args.save_every == 0\ or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path))
def load_data(data_file): with gzip.open(data_file, 'rb') as f: try: train_set, valid_set, test_set = pickle.load(f, encoding='latin1') except: train_set, valid_set, test_set = pickle.load(f) return (train_set, valid_set, test_set)
def _get_object_data(self): obj = None mime_data = cb.mimeData() if mime_data.hasFormat(PYTHON_TYPE): serialized_data = BytesIO(mime_data.data(PYTHON_TYPE).data()) klass = load(serialized_data) obj = load(serialized_data) return obj
def run_LogisticRegression(): # start by importing Deep Learning Funcs funcs = DeepLearnFuncs() learning_rate=0.0001 n_epochs=1000 batch_size=2 dfLLdata = funcs.sgd_optimization(learning_rate, n_epochs, batch_size) ############ ### plotting likelihood or cost ### the cost we minimize during training is the negative log likelihood of ############ x = dfLLdata['iter'].values y = dfLLdata['LL_iter'].values plt.figure() plt.plot(x, y, 'bo--') plt.xlabel('iterations', fontsize=14) plt.ylabel('negative log likelihood', fontsize=14) plt.title('LogReg: learning_rate = '+str(learning_rate)+' batch_size = '+str(batch_size), fontsize=14) ############ ### plotting likelihood or cost ############ x = dfLLdata['iter'].values y = dfLLdata['0-1-loss'].values plt.figure() plt.plot(x, y, 'bo--') plt.xlabel('iterations') plt.ylabel('0-1-loss %') plt.title('LogReg: learning_rate = '+str(learning_rate)+' batch_size = '+str(batch_size)) ############ # load the saved model ############ classifier = cPickle.load(open('best_model.pkl')) # compile a predictor function predict_model = theano.function( inputs=[classifier.input], outputs=classifier.y_pred) # We can test it on some examples from test test test_set = cPickle.load(open('test_set.pkl')) test_set_x, test_set_y = test_set test_set_x = test_set_x.get_value() # We can test it on some examples from test test test_labels = cPickle.load(open('test_labels.pkl')) predicted_values = predict_model(test_set_x[13:18]) print ("Predicted/Labels values for the first 10 examples in test set:") print predicted_values print test_labels[13:18] fig, ax = plt.subplots(ncols=1, nrows=1) img = np.reshape(test_set_x[13],(30,30)) ax.imshow(img, cmap="Greys_r") plt.show()
def load_data(dataset): ''' Loads the dataset :type dataset: string :param dataset: the path to the dataset (here MNIST) ''' ############# # LOAD DATA # ############# print('... loading data') # Load the dataset with gzip.open(dataset, 'rb') as f: try: train_set, valid_set, test_set = pickle.load(f, encoding='latin1') except: train_set, valid_set, test_set = pickle.load(f) # train_set, valid_set, test_set format: tuple(input, target) # input is a numpy.ndarray of 2 dimensions (a matrix) # where each row corresponds to an example. target is a # numpy.ndarray of 1 dimension (vector) that has the same length as # the number of rows in the input. It should give the target # to the example with the same index in the input. def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, T.cast(shared_y, 'int32') test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] return rval
def load_mnist(dataset, shared = False): """Loads the dataset :type dataset: string :param dataset: the path to the dataset (here MINST) """ ############### # LOAD DATA # ############### # Download the MNIST dataset if it is not present data_dir, data_file = os.path.split(dataset) if data_dir == "" and not os.path.isfile(dataset): # Check if dataset is in the data directory. new_path = os.path.join( os.path.split(__file__)[0], dataset ) if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz': dataset = new_path if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz': from six.moves import urllib origin = ( 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz' ) print('Downloading data from %s' % origin) urllib.request.urlretrieve(origin, dataset) print('... loading mnist') with gzip.open(dataset, 'rb') as f: try: train_set, valid_set, test_set = pickle.load(f, encoding='latin1') except: train_set, valid_set, test_set = pickle.load(f) # train_set, valid_set, test_set format: tuple(input, target) # input is numpy.ndarray of 2 dimensions (a matrix) # where each row corresponds to an example. target is a # numpy.ndarray of 1 dimension (vector) that has the same length as # the number of rows in the input. It should give the target # to the example whith the same index in the input. if shared: test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) else: test_set_x, test_set_y = test_set valid_set_x, valid_set_y = valid_set train_set_x, train_set_y = train_set rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] return rval
def unpickle(file): fp = open(file, 'rb') if sys.version_info.major == 2: data = pickle.load(fp) elif sys.version_info.major == 3: data = pickle.load(fp, encoding='latin-1') fp.close() return data
def atisfold(fold): assert fold in range(5) filename = os.path.join(PREFIX, 'atis.fold'+str(fold)+'.pkl.gz') f = gzip.open(filename, 'rb') try: train_set, valid_set, test_set, dicts = pickle.load(f, encoding='latin1') except: train_set, valid_set, test_set, dicts = pickle.load(f) return train_set, valid_set, test_set, dicts
def load_params(self,file_): for k,v in pickle.load(file_).items(): self.params_di[k].set_value(v)
def load(self, path): with open(path, 'rb') as f: load_lib = cPickle.load(f) for attr in ['pid', 'name', 'added', 'songs', 'playlists', 'folders']: self.__dict__[attr] = getattr(load_lib, attr) self._path = path
if __name__ == '__main__': setting = None with open('mysql_setting.yml', 'r') as f: setting = yaml.load(f) connection = pymysql.connect(host=setting['host'], user=setting['user'], password=setting['password'], db='rakuten_recipe', charset='utf8mb4', cursorclass=pymysql.cursors.SSCursor) category_recipe_ids = None with open('result/category_recipe_ids_b.out', 'rb') as f: category_recipe_ids = pickle.load(f) vocab_set = set() for category, recipe_ids in category_recipe_ids.items(): for recipe_id in recipe_ids: with connection.cursor() as cursor: sql = "select title from recipes where recipe_id = {};".format( recipe_id) cursor.execute(sql) result = cursor.fetchone()
def get_string_as_list(self,any_subgroup_string): from io import BytesIO S = BytesIO(any_subgroup_string.encode("ascii")) subgroups = pickle.load( S ) return subgroups
def unpickle(file_path): with open(file_path, 'rb') as cPickle_file: a = cPickle.load(cPickle_file) return a
def handler(dn, new, old, command): # type: (str, dict, dict, str) -> None # create tmp dir tmpDir = os.path.dirname(tmpFile) listener.setuid(0) try: if not os.path.exists(tmpDir): os.makedirs(tmpDir) except Exception as exc: ud.debug(ud.LISTENER, ud.ERROR, "%s: could not create tmp dir %s (%s)" % (name, tmpDir, exc)) return finally: listener.unsetuid() # modrdn stuff # 'r'+'a' -> renamed # command='r' and "not new and old" # command='a' and "new and not old" # write old object to pickle file oldObject = {} listener.setuid(0) try: # object was renamed -> save old object if command == "r" and old: with open(tmpFile, "wb") as fp: os.chmod(tmpFile, 0o600) pickle.dump({"dn": dn, "old": old}, fp) elif command == "a" and not old: if os.path.isfile(tmpFile): with open(tmpFile, "rb") as fp: p = pickle.load(fp) oldObject = p.get("old", {}) os.remove(tmpFile) except Exception as exc: if os.path.isfile(tmpFile): os.remove(tmpFile) ud.debug(ud.LISTENER, ud.ERROR, "%s: could not read/write tmp file %s (%s)" % (name, tmpFile, exc)) finally: listener.unsetuid() # update exports file lines = _read(lambda match: not match or match.group(1) != _quote(dn)) if new and b'univentionShareNFS' in new.get('objectClass', []): path = new['univentionSharePath'][0].decode('UTF-8') options = [ 'rw' if new.get('univentionShareWriteable', [b''])[0] == b'yes' else 'ro', 'root_squash' if new.get('univentionShareNFSRootSquash', [b''])[0] == b'yes' else 'no_root_squash', 'async' if new.get('univentionShareNFSSync', [b''])[0] == b'async' else 'sync', 'subtree_check' if new.get('univentionShareNFSSubTree', [b''])[0] == b'yes' else 'no_subtree_check', ] + [cs.decode('UTF-8') for cs in new.get('univentionShareNFSCustomSetting', [])] lines.append('%s -%s %s # LDAP:%s' % ( _exports_escape(path), _quote(','.join(options)), _quote(' '.join(nfs_allowed.decode('ASCII') for nfs_allowed in new.get('univentionShareNFSAllowed', [b'*']))), _quote(dn) )) _write(lines) listener.setuid(0) try: # object was renamed if not old and oldObject and command == "a": old = oldObject ret = univention.lib.listenerSharePath.createOrRename(old, new, listener.configRegistry) if ret: ud.debug(ud.LISTENER, ud.ERROR, "%s: rename/create of sharePath for %s failed (%s)" % (name, dn, ret)) finally: listener.unsetuid() else: _write(lines)
import numpy as np from six.moves import cPickle filename = "train_cost.save" f = open(filename, 'rb') train_cost = cPickle.load(f) f.close() filename = "validate_cost.save" f = open(filename, 'rb') validate_cost = cPickle.load(f) f.close() filename = "best_learning_rate.save" f = open(filename, 'rb') best_learning_rate = cPickle.load(f) f.close() print(train_cost) print(validate_cost) print(best_learning_rate)
def load_data(config): """ Load HASYv2 dataset. Parameters ---------- mode : string, optional (default: "complete") - "complete" : Returns {'x': x, 'y': y} with all labeled data - "fold-1": Returns {'x_train': x_train, 'y_train': y_train, 'x_test': x_test, 'y_test': y_test} - "fold-2", ..., "fold-10": See "fold-1" Returns ------- dict See "mode" parameter for details """ mode = 'fold-1' # Download if not already done fname = 'HASYv2.tar.bz2' origin = 'https://zenodo.org/record/259444/files/HASYv2.tar.bz2' fpath = get_file(fname, origin=origin, untar=False, md5_hash='fddf23f36e24b5236f6b3a0880c778e3') path = os.path.dirname(fpath) # Extract content if not already done untar_fpath = os.path.join(path, "HASYv2") if not os.path.exists(untar_fpath): print('Extract contents from archive...') tfile = tarfile.open(fpath, 'r:bz2') try: tfile.extractall(path=untar_fpath) except (Exception, KeyboardInterrupt) as e: if os.path.exists(untar_fpath): if os.path.isfile(untar_fpath): os.remove(untar_fpath) else: shutil.rmtree(untar_fpath) raise tfile.close() # Create pickle if not already done pickle_fpath = os.path.join(untar_fpath, "hasy-data.pickle") if not os.path.exists(pickle_fpath): # Load mapping from symbol names to indices symbol_csv_fpath = os.path.join(untar_fpath, "symbols.csv") symbol_id2index, labels = _generate_index(symbol_csv_fpath) globals()["labels"] = labels # Load data data_csv_fpath = os.path.join(untar_fpath, "hasy-data-labels.csv") data_csv = _load_csv(data_csv_fpath) x_compl = np.zeros((len(data_csv), 1, WIDTH, HEIGHT), dtype=np.uint8) y_compl = [] s_compl = [] path2index = {} # Load HASYv2 data for i, data_item in enumerate(data_csv): fname = os.path.join(untar_fpath, data_item['path']) s_compl.append(fname) x_compl[i, 0, :, :] = scipy.ndimage.imread(fname, flatten=False, mode='L') label = symbol_id2index[data_item['symbol_id']] y_compl.append(label) path2index[fname] = i y_compl = np.array(y_compl, dtype=np.int64) data = { 'x': x_compl, 'y': y_compl, 's': s_compl, 'labels': labels, 'path2index': path2index } # Store data as pickle to speed up later calls with open(pickle_fpath, 'wb') as f: pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL) else: with open(pickle_fpath, 'rb') as f: data = pickle.load(f) globals()["labels"] = data['labels'] labels = data['labels'] x_compl = data['x'] y_compl = np.reshape(data['y'], (len(data['y']), 1)) s_compl = data['s'] path2index = data['path2index'] if K.image_dim_ordering() == 'tf': x_compl = x_compl.transpose(0, 2, 3, 1) if mode == 'complete': return {'x': x_compl, 'y': y_compl} elif mode.startswith('fold-'): fold = int(mode.split("-")[1]) if not (1 <= fold <= 10): raise NotImplementedError # Load fold fold_dir = os.path.join(untar_fpath, "classification-task/fold-{}".format(fold)) train_csv_fpath = os.path.join(fold_dir, "train.csv") test_csv_fpath = os.path.join(fold_dir, "test.csv") train_csv = _load_csv(train_csv_fpath) test_csv = _load_csv(test_csv_fpath) train_ids = np.array([path2index[row['path']] for row in train_csv]) test_ids = np.array([path2index[row['path']] for row in test_csv]) x_train = x_compl[train_ids] x_test = x_compl[test_ids] y_train = y_compl[train_ids] y_test = y_compl[test_ids] s_train = [s_compl[id_] for id_ in train_ids] s_test = [s_compl[id_] for id_ in test_ids] splitd = train_test_split(x_train, y_train, s_train, test_size=0.10, random_state=42, stratify=y_train) x_train, x_val, y_train, y_val, s_train, s_val = splitd data = { 'x_train': x_train, 'y_train': y_train, 'x_test': x_test, 'y_test': y_test, 'x_val': x_val, 'y_val': y_val, 's_train': s_train, 's_val': s_val, 's_test': s_test, 'labels': labels } return data else: raise NotImplementedError
value=0.), 'R': keras.preprocessing.sequence.pad_sequences(x['R'], maxlen=maxTitleLen, value=0.), 'L': keras.utils.to_categorical(x['L'], num_classes=2) } if __name__ == "__main__": path = u.getMostRecentOf("prepared-data/recommender-v1", "pkl") print("Loading dataset " + str(path) + "... ") with open(path, 'rb') as f: train = pickle.load(f) validate = pickle.load(f) test = pickle.load(f) ref_dict = pickle.load(f) word_mapper = pickle.load(f) train = prepSet(train) validate = prepSet(validate) test = prepSet(test) print("Train size: " + str(len(train['T']))) print("Test size: " + str(len(test['T']))) print("Validation size: " + str(len(validate['T']))) print(train['T'][1:5]) print(test['T'][1:5])
def unpickle(filename): with open(filename, 'rb') as fo: return cPickle.load(fo, encoding='latin1')
ori_N_total = N_total # number of items to simulate mt = flex.mersenne_twister(seed=0) random_orientations = [] for iteration in range(ori_N_total): random_orientations.append(mt.random_double_r3_rotation_matrix()) transmitted_info = dict( spectra=SS, amplitudes=A, orientations=random_orientations, ) with (open("confirm_P1_range_reduced_intensities_dict.pickle", "rb")) as F: # Einsle reduced #with (open("confirm_P1_range_oxidized_intensities_dict.pickle","rb")) as F: # Einsle oxidized #with (open("confirm_P1_range_metallic_intensities_dict.pickle","rb")) as F: # Einsle metallic intensity_dict = pickle.load(F) transmitted_info["intensity_dict"] = intensity_dict print("finished setup in rank 0") else: transmitted_info = None if usingMPI: transmitted_info = comm.bcast(transmitted_info, root=0) comm.barrier() import os host = os.environ["HOST"] print("barrier from rank %d of %d" % (rank, size), host) origin = col((1500, 1500)) position0 = col((1500, 3000)) - origin nitem = 0 nall_spots = 0
def _load_datablocks(self, obj, check_format=True, directory=None): ''' Create the datablock from a dictionary. ''' from dxtbx.format.Registry import Registry from dxtbx.model import BeamFactory, DetectorFactory from dxtbx.model import GoniometerFactory, ScanFactory from dxtbx.serialize.filename import load_path from dxtbx.format.image import ImageBool, ImageDouble from dxtbx.format.FormatMultiImage import FormatMultiImage # If we have a list, extract for each dictionary in the list if isinstance(obj, list): return [self._load_datablocks(dd, check_format, directory) for dd in obj] elif not isinstance(obj, dict): raise InvalidDataBlockError("Unexpected datablock type {} instead of dict".format(type(obj))) # Make sure the id signature is correct if not obj.get("__id__") == "DataBlock": raise InvalidDataBlockError( "Expected __id__ 'DataBlock', but found {}".format(repr(obj.get("__id__")))) # Get the list of models blist = obj.get('beam', []) dlist = obj.get('detector', []) glist = obj.get('goniometer', []) slist = obj.get('scan', []) def load_models(obj): try: beam = BeamFactory.from_dict(blist[obj['beam']]) except Exception: beam = None try: dobj = dlist[obj['detector']] detector = DetectorFactory.from_dict(dobj) except Exception: detector = None try: gonio = GoniometerFactory.from_dict(glist[obj['goniometer']]) except Exception: gonio = None try: scan = ScanFactory.from_dict(slist[obj['scan']]) except Exception: scan = None return beam, detector, gonio, scan # Loop through all the imagesets imagesets = [] for imageset in obj['imageset']: ident = imageset['__id__'] if "params" in imageset: format_kwargs = imageset['params'] else: format_kwargs = {} if ident == 'ImageSweep': beam, detector, gonio, scan = load_models(imageset) if "template" in imageset: template = load_path(imageset['template'], directory=directory) i0, i1 = scan.get_image_range() iset = dxtbx.imageset.ImageSetFactory.make_sweep( template, range(i0, i1+1), None, beam, detector, gonio, scan, check_format, format_kwargs=format_kwargs) if 'mask' in imageset and imageset['mask'] is not None: imageset['mask'] = load_path(imageset['mask'], directory=directory) iset.external_lookup.mask.filename = imageset['mask'] if check_format: with open(imageset['mask']) as infile: iset.external_lookup.mask.data = ImageBool(pickle.load(infile)) if 'gain' in imageset and imageset['gain'] is not None: imageset['gain'] = load_path(imageset['gain'], directory=directory) iset.external_lookup.gain.filename = imageset['gain'] if check_format: with open(imageset['gain']) as infile: iset.external_lookup.gain.data = ImageDouble(pickle.load(infile)) if 'pedestal' in imageset and imageset['pedestal'] is not None: imageset['pedestal'] = load_path(imageset['pedestal'], directory=directory) iset.external_lookup.pedestal.filename = imageset['pedestal'] if check_format: with open(imageset['pedestal']) as infile: iset.external_lookup.pedestal.data = ImageDouble(pickle.load(infile)) if 'dx' in imageset and imageset['dx'] is not None: imageset['dx'] = load_path(imageset['dx'], directory=directory) iset.external_lookup.dx.filename = imageset['dx'] with open(imageset['dx']) as infile: iset.external_lookup.dx.data = ImageDouble(pickle.load(infile)) if 'dy' in imageset and imageset['dy'] is not None: imageset['dy'] = load_path(imageset['dy'], directory=directory) iset.external_lookup.dy.filename = imageset['dy'] with open(imageset['dy']) as infile: iset.external_lookup.dy.data = ImageDouble(pickle.load(infile)) iset.update_detector_px_mm_data() elif "master" in imageset: template = load_path(imageset['master'], directory=directory) i0, i1 = scan.get_image_range() indices = imageset['images'] if check_format == False: format_class = FormatMultiImage else: format_class = None iset = dxtbx.imageset.ImageSetFactory.make_sweep( template, list(range(i0, i1+1)), format_class = format_class, beam = beam, detector = detector, goniometer = gonio, scan = scan, check_format = check_format, format_kwargs = format_kwargs) if 'mask' in imageset and imageset['mask'] is not None: imageset['mask'] = load_path(imageset['mask'], directory) iset.external_lookup.mask.filename = imageset['mask'] if check_format: with open(imageset['mask']) as infile: iset.external_lookup.mask.data = ImageBool(pickle.load(infile)) if 'gain' in imageset and imageset['gain'] is not None: imageset['gain'] = load_path(imageset['gain'], directory) iset.external_lookup.gain.filename = imageset['gain'] if check_format: with open(imageset['gain']) as infile: iset.external_lookup.gain.data = ImageDouble(pickle.load(infile)) if 'pedestal' in imageset and imageset['pedestal'] is not None: imageset['pedestal'] = load_path(imageset['pedestal'], directory) iset.external_lookup.pedestal.filename = imageset['pedestal'] if check_format: with open(imageset['pedestal']) as infile: iset.external_lookup.pedestal.data = ImageDouble(pickle.load(infile)) if 'dx' in imageset and imageset['dx'] is not None: imageset['dx'] = load_path(imageset['dx'], directory) iset.external_lookup.dx.filename = imageset['dx'] with open(imageset['dx']) as infile: iset.external_lookup.dx.data = ImageDouble(pickle.load(infile)) if 'dy' in imageset and imageset['dy'] is not None: imageset['dy'] = load_path(imageset['dy'], directory) iset.external_lookup.dy.filename = imageset['dy'] with open(imageset['dy']) as infile: iset.external_lookup.dy.data = ImageDouble(pickle.load(infile)) iset.update_detector_px_mm_data() imagesets.append(iset) elif ident == 'ImageSet' or ident == "ImageGrid": filenames = [image['filename'] for image in imageset['images']] indices = [image['image'] for image in imageset['images'] if 'image' in image] assert len(indices) == 0 or len(indices) == len(filenames) iset = dxtbx.imageset.ImageSetFactory.make_imageset( filenames, None, check_format, indices, format_kwargs=format_kwargs) if ident == "ImageGrid": grid_size = imageset['grid_size'] iset = dxtbx.imageset.ImageGrid.from_imageset(iset, grid_size) for i, image in enumerate(imageset['images']): beam, detector, gonio, scan = load_models(image) iset.set_beam(beam, i) iset.set_detector(detector, i) iset.set_goniometer(gonio, i) iset.set_scan(scan, i) if 'mask' in imageset and imageset['mask'] is not None: imageset['mask'] = load_path(imageset['mask'], directory) iset.external_lookup.mask.filename = imageset['mask'] if check_format: with open(imageset['mask']) as infile: iset.external_lookup.mask.data = ImageBool(pickle.load(infile)) if 'gain' in imageset and imageset['gain'] is not None: imageset['gain'] = load_path(imageset['gain'], directory) iset.external_lookup.gain.filename = imageset['gain'] if check_format: with open(imageset['gain']) as infile: iset.external_lookup.gain.data = ImageDouble(pickle.load(infile)) if 'pedestal' in imageset and imageset['pedestal'] is not None: imageset['pedestal'] = load_path(imageset['pedestal'], directory) iset.external_lookup.pedestal.filename = imageset['pedestal'] if check_format: with open(imageset['pedestal']) as infile: iset.external_lookup.pedestal.data = ImageDouble(pickle.load(infile)) if 'dx' in imageset and imageset['dx'] is not None: imageset['dx'] = load_path(imageset['dx'], directory) iset.external_lookup.dx.filename = imageset['dx'] with open(imageset['dx']) as infile: iset.external_lookup.dx.data = ImageDouble(pickle.load(infile)) if 'dy' in imageset and imageset['dy'] is not None: imageset['dy'] = load_path(imageset['dy'], directory) iset.external_lookup.dy.filename = imageset['dy'] with open(imageset['dy']) as infile: iset.external_lookup.dy.data = ImageDouble(pickle.load(infile)) iset.update_detector_px_mm_data() imagesets.append(iset) else: raise RuntimeError('expected ImageSet/ImageSweep, got %s' % ident) # Return the datablock return DataBlock(imagesets)
def process_in_parallel(tag, total_range_size, binary, output_dir, load_ckpt, load_detectron, opts=''): """Run the specified binary NUM_GPUS times in parallel, each time as a subprocess that uses one GPU. The binary must accept the command line arguments `--range {start} {end}` that specify a data processing range. """ # Snapshot the current cfg state in order to pass to the inference # subprocesses cfg_file = os.path.join(output_dir, '{}_range_config.yaml'.format(tag)) with open(cfg_file, 'w') as f: yaml.dump(cfg, stream=f) subprocess_env = os.environ.copy() processes = [] NUM_GPUS = torch.cuda.device_count() subinds = np.array_split(range(total_range_size), NUM_GPUS) # Determine GPUs to use cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES') if cuda_visible_devices: gpu_inds = list(map(int, cuda_visible_devices.split(','))) assert -1 not in gpu_inds, \ 'Hiding GPU indices using the \'-1\' index is not supported' else: gpu_inds = range(cfg.NUM_GPUS) gpu_inds = list(gpu_inds) # Run the binary in cfg.NUM_GPUS subprocesses for i, gpu_ind in enumerate(gpu_inds): start = subinds[i][0] end = subinds[i][-1] + 1 subprocess_env['CUDA_VISIBLE_DEVICES'] = str(gpu_ind) cmd = ( 'python3 {binary} --range {start} {end} --cfg {cfg_file} --set {opts} ' '--output_dir {output_dir}') if load_ckpt is not None: cmd += ' --load_ckpt {load_ckpt}' elif load_detectron is not None: cmd += ' --load_detectron {load_detectron}' cmd = cmd.format(binary=shlex_quote(binary), start=int(start), end=int(end), cfg_file=shlex_quote(cfg_file), output_dir=output_dir, load_ckpt=load_ckpt, load_detectron=load_detectron, opts=' '.join([shlex_quote(opt) for opt in opts])) logger.info('{} range command {}: {}'.format(tag, i, cmd)) if i == 0: subprocess_stdout = subprocess.PIPE else: filename = os.path.join( output_dir, '%s_range_%s_%s.stdout' % (tag, start, end)) subprocess_stdout = open(filename, 'w') p = subprocess.Popen(cmd, shell=True, env=subprocess_env, stdout=subprocess_stdout, stderr=subprocess.STDOUT, bufsize=1) processes.append((i, p, start, end, subprocess_stdout)) # Log output from inference processes and collate their results outputs = [] for i, p, start, end, subprocess_stdout in processes: log_subprocess_output(i, p, output_dir, tag, start, end) if isinstance(subprocess_stdout, IOBase): subprocess_stdout.close() range_file = os.path.join(output_dir, '%s_range_%s_%s.pkl' % (tag, start, end)) range_data = pickle.load(open(range_file, 'rb')) outputs.append(range_data) return outputs
self.input_label_h5 = '' self.input_json = '' self.split = 'test' self.coco_json = '' self.id = '' self.pretrained_weight= 1 ##### xud # In[3]: os.environ['CUDA_VISIBLE_DEVICES'] = '1' opt = Opt() opt.infos_path = './log_st/infos_-best_attention_wehao.pkl' # Load infos with open(opt.infos_path) as f: infos = cPickle.load(f) # In[4]: # override and collect parameters if len(opt.input_fc_dir) == 0: opt.input_fc_h5 = infos['opt'].input_fc_h5 opt.input_att_h5 = infos['opt'].input_att_h5 opt.input_label_h5 = infos['opt'].input_label_h5 if len(opt.input_json) == 0: opt.input_json = infos['opt'].input_json if opt.batch_size == 0: opt.batch_size = infos['opt'].batch_size if len(opt.id) == 0:
def __init__(self,datadir,work_params,plot=False,esd_plot=False,half_data_flag=0): casetag = work_params.output.prefix # read the ground truth values back in from six.moves import cPickle as pickle # it is assumed (for now) that the reference millers contain a complete asymmetric unit # of indices, within the (d_max,d_min) region of interest and possibly outside the region. reference_millers = pickle.load(open(os.path.join(datadir,casetag+"_miller.pickle"),"rb")) experiment_manager = read_experiments(work_params) obs = pickle.load(open(os.path.join(datadir,casetag+"_observation.pickle"),"rb")) print("Read in %d observations"%(len(obs["observed_intensity"]))) reference_millers.show_summary(prefix="Miller index file ") print(len(obs["frame_lookup"]),len(obs["observed_intensity"]), flex.max(obs['miller_lookup']),flex.max(obs['frame_lookup'])) max_frameno = flex.max(obs["frame_lookup"]) from iotbx import mtz mtz_object = mtz.object(file_name=work_params.scaling.mtz_file) #for array in mtz_object.as_miller_arrays(): # this_label = array.info().label_string() # print this_label, array.observation_type() I_sim = mtz_object.as_miller_arrays()[0].as_intensity_array() I_sim.show_summary() MODEL_REINDEX_OP = work_params.model_reindex_op I_sim = I_sim.change_basis(MODEL_REINDEX_OP).map_to_asu() #match up isomorphous (the simulated fake F's) with experimental unique set matches = miller.match_multi_indices( miller_indices_unique=reference_millers.indices(), miller_indices=I_sim.indices()) print("original unique",len(reference_millers.indices())) print("isomorphous set",len(I_sim.indices())) print("pairs",len(matches.pairs())) iso_data = flex.double(len(reference_millers.indices())) for pair in matches.pairs(): iso_data[pair[0]] = I_sim.data()[pair[1]] reference_data = miller.array(miller_set = reference_millers, data = iso_data) reference_data.set_observation_type_xray_intensity() FOBS = prepare_observations_for_scaling(work_params,obs=obs, reference_intensities=reference_data, files = experiment_manager.get_files(), half_data_flag=half_data_flag) I,I_visited,G,G_visited = I_and_G_base_estimate(FOBS,params=work_params) print("I length",len(I), "G length",len(G), "(Reference set; entire asymmetric unit)") assert len(reference_data.data()) == len(I) #presumably these assertions fail when half data are taken for CC1/2 or d_min is cut model_I = reference_data.data()[0:len(I)] T = Timer("%d frames"%(len(G), )) mapper = mapper_factory(xscale6e) minimizer = mapper(I,G,I_visited,G_visited,FOBS,params=work_params, experiments=experiment_manager.get_experiments()) del T minimizer.show_summary() Fit = minimizer.e_unpack() Gstats=flex.mean_and_variance(Fit["G"].select(G_visited==1)) print("G mean and standard deviation:",Gstats.mean(),Gstats.unweighted_sample_standard_deviation()) if "Bfactor" in work_params.levmar.parameter_flags: Bstats=flex.mean_and_variance(Fit["B"].select(G_visited==1)) print("B mean and standard deviation:",Bstats.mean(),Bstats.unweighted_sample_standard_deviation()) show_correlation(Fit["I"],model_I,I_visited,"Correlation of I:") Fit_stddev = minimizer.e_unpack_stddev() # XXX FIXME known bug: the length of Fit["G"] could be smaller than the length of experiment_manager.get_files() # Not sure if this has any operational drawbacks. It's a result of half-dataset selection. if plot: plot_it(Fit["I"], model_I, mode="I") if "Rxy" in work_params.levmar.parameter_flags: show_histogram(Fit["Ax"],"Histogram of x rotation (degrees)") show_histogram(Fit["Ay"],"Histogram of y rotation (degrees)") print() if esd_plot: minimizer.esd_plot() from cctbx.examples.merging.show_results import show_overall_observations table1,self.n_bins,self.d_min = show_overall_observations( Fit["I"],Fit_stddev["I"],I_visited, reference_data,FOBS,title="Statistics for all reflections", work_params = work_params) self.FSIM=FOBS self.ordered_intensities=reference_data self.reference_millers=reference_millers self.Fit_I=Fit["I"] self.Fit_I_stddev=Fit_stddev["I"] self.I_visited=I_visited self.Fit = Fit self.experiments = experiment_manager
def load_pkl(file_name): pkl = open(file_name, "rb") data = pickle.load(pkl) pkl.close() return data
pred_y = np.argmax(logit_outputs, axis=1) true_y = np.argmax(current_batch_labels_test, axis=1) true_cnt += sum(pred_y == true_y) test_batch_cnt += 1 test_acc = true_cnt / float(test_batch_cnt * opt.batch_size) print("epoch {} test_acc {:.4f} test_num: {}".format( epoch, test_acc, test_batch_cnt * opt.batch_size)) if __name__ == '__main__': opt = opts.parse_opt() opt_dict = vars(opt) for k, v in opt_dict.items(): print(k + ': \t' + str(v)) with open('permuted_mnist_110.pkl', 'rb') as f: permuted_mnist = cPickle.load(f) if os.path.isdir(opt.rcst_model_save_path) is False: os.mkdir(opt.rcst_model_save_path) x_train_permuted = permuted_mnist['x_train_permuted'] y_train = permuted_mnist['y_train'] x_test_permuted = permuted_mnist['x_test_permuted'] y_test = permuted_mnist['y_test'] # training train(opt, x_train_permuted, y_train, x_test_permuted, y_test)
def test_run_once(self, mock_check_drive): mock_check_drive.side_effect = lambda r, d, mc: os.path.join(r, d) ou = object_updater.ObjectUpdater( { 'devices': self.devices_dir, 'mount_check': 'false', 'swift_dir': self.testdir, 'interval': '1', 'concurrency': '1', 'node_timeout': '15' }, logger=self.logger) ou.run_once() async_dir = os.path.join(self.sda1, get_async_dir(POLICIES[0])) os.mkdir(async_dir) ou.run_once() self.assertTrue(os.path.exists(async_dir)) # each run calls check_device self.assertEqual([ mock.call(self.devices_dir, 'sda1', False), mock.call(self.devices_dir, 'sda1', False), ], mock_check_drive.mock_calls) mock_check_drive.reset_mock() ou = object_updater.ObjectUpdater( { 'devices': self.devices_dir, 'mount_check': 'TrUe', 'swift_dir': self.testdir, 'interval': '1', 'concurrency': '1', 'node_timeout': '15' }, logger=self.logger) odd_dir = os.path.join(async_dir, 'not really supposed ' 'to be here') os.mkdir(odd_dir) ou.run_once() self.assertTrue(os.path.exists(async_dir)) self.assertEqual([ mock.call(self.devices_dir, 'sda1', True), ], mock_check_drive.mock_calls) ohash = hash_path('a', 'c', 'o') odir = os.path.join(async_dir, ohash[-3:]) mkdirs(odir) older_op_path = os.path.join( odir, '%s-%s' % (ohash, normalize_timestamp(time() - 1))) op_path = os.path.join(odir, '%s-%s' % (ohash, normalize_timestamp(time()))) for path in (op_path, older_op_path): with open(path, 'wb') as async_pending: pickle.dump( { 'op': 'PUT', 'account': 'a', 'container': 'c', 'obj': 'o', 'headers': { 'X-Container-Timestamp': normalize_timestamp(0) } }, async_pending) ou.run_once() self.assertTrue(not os.path.exists(older_op_path)) self.assertTrue(os.path.exists(op_path)) self.assertEqual(ou.logger.get_increment_counts(), { 'failures': 1, 'unlinks': 1 }) self.assertIsNone(pickle.load(open(op_path, 'rb')).get('successes')) bindsock = listen_zero() def accepter(sock, return_code): try: with Timeout(3): inc = sock.makefile('rb') out = sock.makefile('wb') out.write(b'HTTP/1.1 %d OK\r\nContent-Length: 0\r\n\r\n' % return_code) out.flush() self.assertEqual(inc.readline(), b'PUT /sda1/0/a/c/o HTTP/1.1\r\n') headers = HeaderKeyDict() line = inc.readline() while line and line != b'\r\n': headers[line.split(b':')[0]] = \ line.split(b':')[1].strip() line = inc.readline() self.assertIn(b'x-container-timestamp', headers) self.assertIn(b'X-Backend-Storage-Policy-Index', headers) except BaseException as err: return err return None def accept(return_codes): try: events = [] for code in return_codes: with Timeout(3): sock, addr = bindsock.accept() events.append(spawn(accepter, sock, code)) for event in events: err = event.wait() if err: raise err except BaseException as err: return err return None event = spawn(accept, [201, 500, 500]) for dev in ou.get_container_ring().devs: if dev is not None: dev['port'] = bindsock.getsockname()[1] ou.logger._clear() ou.run_once() err = event.wait() if err: raise err self.assertTrue(os.path.exists(op_path)) self.assertEqual(ou.logger.get_increment_counts(), {'failures': 1}) self.assertEqual([0], pickle.load(open(op_path, 'rb')).get('successes')) event = spawn(accept, [404, 201]) ou.logger._clear() ou.run_once() err = event.wait() if err: raise err self.assertTrue(os.path.exists(op_path)) self.assertEqual(ou.logger.get_increment_counts(), {'failures': 1}) self.assertEqual([0, 2], pickle.load(open(op_path, 'rb')).get('successes')) event = spawn(accept, [201]) ou.logger._clear() ou.run_once() err = event.wait() if err: raise err # we remove the async_pending and its containing suffix dir, but not # anything above that self.assertFalse(os.path.exists(op_path)) self.assertFalse(os.path.exists(os.path.dirname(op_path))) self.assertTrue( os.path.exists(os.path.dirname(os.path.dirname(op_path)))) self.assertEqual(ou.logger.get_increment_counts(), { 'unlinks': 1, 'successes': 1 })
def load_data(dataset): ''' Loads the dataset :type dataset: string :param dataset: the path to the dataset (here MNIST) ''' ############# # LOAD DATA # ############# # Download the MNIST dataset if it is not present data_dir, data_file = os.path.split(dataset) if data_dir == "" and not os.path.isfile(dataset): # Check if dataset is in the data directory. new_path = os.path.join( os.path.split(__file__)[0], "..", "data", dataset) if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz': dataset = new_path if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz': from six.moves import urllib origin = ( 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz') print('Downloading data from %s' % origin) urllib.request.urlretrieve(origin, dataset) print('... loading data') # Load the dataset with gzip.open(dataset, 'rb') as f: try: train_set, valid_set, test_set = pickle.load(f, encoding='latin1') except: train_set, valid_set, test_set = pickle.load(f) # train_set, valid_set, test_set format: tuple(input, target) # input is a numpy.ndarray of 2 dimensions (a matrix) # where each row corresponds to an example. target is a # numpy.ndarray of 1 dimension (vector) that has the same length as # the number of rows in the input. It should give the target # to the example with the same index in the input. def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, T.cast(shared_y, 'int32') test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] return rval
import sys import math import time # Import MNIST data from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("/tmp/data/", one_hot=True) DATA_PATH = 'medical_data/' DATA_FILE = DATA_PATH + 'medical_data.pickle' INCLUDE_TEST_SET = False print("Loading datasets...") with open(DATA_FILE, 'rb') as f: save = pickle.load(f) train_X = save['train_data'] train_Y = save['train_labels'] val_X = save['val_data'] val_Y = save['val_labels'] if INCLUDE_TEST_SET: test_X = save['test_data'] test_Y = save['test_labels'] del save # hint to help gc free up memory print('Training set', train_X.shape, train_Y.shape) print('Validation set', val_X.shape, val_Y.shape) if INCLUDE_TEST_SET: print('Test set', test_X.shape, test_Y.shape)
def load_dict(filename_): with open(filename_, 'rb') as f: ret_di = pickle.load(f) return ret_di
def make_invariance_sets(): print "\nMaking invariance datasets..." with open(DATA_PATH + 'art_data.pickle', 'rb') as f: save = pickle.load(f) val_X = save['val_data'] val_Y = save['val_labels'] del save # hint to help gc free up memory n = len(val_X) translated_val_X = np.ndarray((n, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS), dtype=np.float32) flipped_val_X = np.ndarray((n, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS), dtype=np.float32) inverted_val_X = np.ndarray((n, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS), dtype=np.float32) dark_val_X = np.ndarray((n, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS), dtype=np.float32) bright_val_X = np.ndarray((n, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS), dtype=np.float32) high_contrast_val_X = np.ndarray((n, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS), dtype=np.float32) low_contrast_val_X = np.ndarray((n, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS), dtype=np.float32) print "\tFlipping and inverting images..." val_X_RGB = (val_X * PIXEL_DEPTH) + PIXEL_DEPTH / 2.0 for i in range(n): npimg = val_X_RGB[i, :, :, :] img = Image.fromarray(np.uint8(npimg)) translated_val_X[i, :, :, :] = translate_img(npimg) flipped_val_X[i, :, :, :] = np.array(img.rotate(180)) inverted_val_X[i, :, :, :] = np.array(PIL.ImageOps.invert(img)) bright_mod = ImageEnhance.Brightness(img) dark_val_X[i, :, :, :] = bright_mod.enhance(0.75) bright_val_X[i, :, :, :] = bright_mod.enhance(1.5) contrast_mod = ImageEnhance.Contrast(img) low_contrast_val_X[i, :, :, :] = bright_mod.enhance(0.75) high_contrast_val_X[i, :, :, :] = bright_mod.enhance(1.5) print "\tScaling pixel values..." translated_val_X = scale_pixel_values(translated_val_X) flipped_val_X = scale_pixel_values(flipped_val_X) inverted_val_X = scale_pixel_values(inverted_val_X) dark_val_X = scale_pixel_values(dark_val_X) bright_val_X = scale_pixel_values(bright_val_X) high_contrast_val_X = scale_pixel_values(high_contrast_val_X) low_contrast_val_X = scale_pixel_values(low_contrast_val_X) print "\tPickling file..." save = { 'translated_val_data': translated_val_X, 'flipped_val_data': flipped_val_X, 'inverted_val_data': inverted_val_X, 'bright_val_data': bright_val_X, 'dark_val_data': dark_val_X, 'high_contrast_val_data': high_contrast_val_X, 'low_contrast_val_data': low_contrast_val_X, } save_pickle_file('invariance_art_data.pickle', save)
def read_data(data_dir, force=False): def create_onehot_label(x): label = np.zeros((1, NUM_LABELS), dtype=np.float32) label[:, int(x)] = 1 return label pickle_file = os.path.join(data_dir, "EmotionDetectorData.pickle") if force or not os.path.exists(pickle_file): train_filename = os.path.join(data_dir, "train.csv") data_frame = pd.read_csv(train_filename) data_frame['Pixels'] = data_frame['Pixels'].apply( lambda x: np.fromstring(x, sep=" ") / 255.0) data_frame = data_frame.dropna() print("Reading train.csv ...") train_images = np.vstack(data_frame['Pixels']).reshape( -1, IMAGE_SIZE, IMAGE_SIZE, 1) print(train_images.shape) train_labels = np.array( [list(map(create_onehot_label, data_frame['Emotion'].values))]).reshape(-1, NUM_LABELS) print(train_labels.shape) permutations = np.random.permutation(train_images.shape[0]) train_images = train_images[permutations] train_labels = train_labels[permutations] validation_percent = int(train_images.shape[0] * VALIDATION_PERCENT) validation_images = train_images[:validation_percent] validation_labels = train_labels[:validation_percent] train_images = train_images[validation_percent:] train_labels = train_labels[validation_percent:] print("Reading test.csv ...") test_filename = os.path.join(data_dir, "test.csv") data_frame = pd.read_csv(test_filename) data_frame['Pixels'] = data_frame['Pixels'].apply( lambda x: np.fromstring(x, sep=" ") / 255.0) data_frame = data_frame.dropna() test_images = np.vstack(data_frame['Pixels']).reshape( -1, IMAGE_SIZE, IMAGE_SIZE, 1) with open(pickle_file, "wb") as file: try: print('Picking ...') save = { "train_images": train_images, "train_labels": train_labels, "validation_images": validation_images, "validation_labels": validation_labels, "test_images": test_images, } pickle.dump(save, file, pickle.HIGHEST_PROTOCOL) except: print("Unable to pickle file :/") with open(pickle_file, "rb") as file: save = pickle.load(file) train_images = save["train_images"] train_labels = save["train_labels"] validation_images = save["validation_images"] validation_labels = save["validation_labels"] test_images = save["test_images"] return train_images, train_labels, validation_images, validation_labels, test_images
tf_summary_writer = tf and tf.summary.FileWriter(opt.checkpoint_path) infos = {} histories = {} if opt.start_from is not None: if opt.load_best_score == 1: model_path = os.path.join(opt.start_from, 'model-best.pth') info_path = os.path.join(opt.start_from, 'infos_' + opt.id + '-best.pkl') else: model_path = os.path.join(opt.start_from, 'model.pth') info_path = os.path.join(opt.start_from, 'infos_' + opt.id + '.pkl') # open old infos and check if models are compatible with open(info_path) as f: infos = cPickle.load(f) saved_model_opt = infos['opt'] # opt.learning_rate = saved_model_opt.learning_rate print('Loading the model %s...' % (model_path)) model.load_state_dict(torch.load(model_path)) if os.path.isfile( os.path.join(opt.start_from, 'histories_' + opt.id + '.pkl')): with open( os.path.join(opt.start_from, 'histories_' + opt.id + '.pkl')) as f: histories = cPickle.load(f) if opt.decode_noc: model._reinit_word_weight(opt, dataset.ctoi, dataset.wtoi)
inferred_labels = model.fit(normalized_flux[validate_set], normalized_ivar[validate_set]) inferred_labels = np.vstack(inferred_labels).T fig, ax = plt.subplots(3) for i, label_name in enumerate(model.vectorizer.label_names): ax[i].scatter(labelled_set[label_name][validate_set], inferred_labels[:, i]) raise a """ # Fit individual spectra using two different models. with open("apogee-rg-individual-visit-normalized.pickle", "rb") as fp: individual_visit_spectra = pickle.load(fp, encoding="latin-1") latex_labels = { "TEFF": "T_{\\rm eff}", "LOGG": "\log{g}", "FE_H": "{\\rm [Fe/H]}" } models_to_compare = { #"model1": "gridsearch-2.0-3.0.model", "model2": "gridsearch-2.0-3.0-s2-heuristically-set.model" } for model_name, saved_filename in models_to_compare.items(): scale_factor = saved_filename
def train(opt): # Load data loader = DataLoader(opt) opt.vocab_size = loader.vocab_size opt.seq_length = loader.seq_length # Tensorboard summaries (they're great!) tb_summary_writer = tb and tb.SummaryWriter(opt.checkpoint_path) # Load pretrained model, info file, histories file infos = {} histories = {} if opt.start_from is not None: with open(os.path.join(opt.start_from, 'infos_'+opt.id+'.pkl')) as f: infos = cPickle.load(f) saved_model_opt = infos['opt'] need_be_same=["rnn_type", "rnn_size", "num_layers"] for checkme in need_be_same: assert vars(saved_model_opt)[checkme] == vars(opt)[checkme], "Command line argument and saved model disagree on '%s' " % checkme if os.path.isfile(os.path.join(opt.start_from, 'histories_'+opt.id+'.pkl')): with open(os.path.join(opt.start_from, 'histories_'+opt.id+'.pkl')) as f: histories = cPickle.load(f) iteration = infos.get('iter', 0) epoch = infos.get('epoch', 0) val_result_history = histories.get('val_result_history', {}) loss_history = histories.get('loss_history', {}) lr_history = histories.get('lr_history', {}) #ss_prob_history = histories.get('ss_prob_history', {}) loader.iterators = infos.get('iterators', loader.iterators) loader.split_ix = infos.get('split_ix', loader.split_ix) if opt.load_best_score == 1: best_val_score = infos.get('best_val_score', None) # Create model model = models.setup(opt, reverse = False).cuda() pretrained_dict = torch.load(opt.model) model.load_state_dict(pretrained_dict, strict=False) back_model = models.setup(opt, reverse= True).cuda() d_pretrained_dict = torch.load('./log_xe/d_model.pth') back_model.load_state_dict(d_pretrained_dict, strict=False) dp_model = model dp_model.train() back_model.train() # Loss function crit = utils.LanguageModelCriterion() rl_crit = utils.RewardCriterion() # Optimizer and learning rate adjustment flag optimizer = utils.build_optimizer(chain(model.parameters(), back_model.parameters()), opt) #back_optimizer = utils.build_optimizer(back_model.parameters(), opt) update_lr_flag = True # Load the optimizer if vars(opt).get('start_from', None) is not None and os.path.isfile(os.path.join(opt.start_from,"optimizer.pth")): optimizer.load_state_dict(torch.load(os.path.join(opt.start_from, 'optimizer.pth'))) # Training loop while True: # Update learning rate once per epoch if update_lr_flag: # Assign the learning rate if epoch > opt.learning_rate_decay_start and opt.learning_rate_decay_start >= 0: frac = (epoch - opt.learning_rate_decay_start) // opt.learning_rate_decay_every decay_factor = opt.learning_rate_decay_rate ** frac opt.current_lr = opt.learning_rate * decay_factor else: opt.current_lr = opt.learning_rate utils.set_lr(optimizer, opt.current_lr) # Assign the scheduled sampling prob if epoch > opt.scheduled_sampling_start and opt.scheduled_sampling_start >= 0: frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every #opt.ss_prob = min(opt.scheduled_sampling_increase_prob * frac, opt.scheduled_sampling_max_prob) #model.ss_prob = opt.ss_prob # If start self critical training if opt.self_critical_after != -1 and epoch >= opt.self_critical_after: sc_flag = True init_scorer(opt.cached_tokens) else: sc_flag = False update_lr_flag = False # Load data from train split (0) start = time.time() data = loader.get_batch('train') data_time = time.time() - start start = time.time() # Unpack data torch.cuda.synchronize() tmp = [data['fc_feats'], data['att_feats'], data['labels'], data['dist'], data['masks'], data['att_masks']] tmp = [_ if _ is None else torch.from_numpy(_).cuda() for _ in tmp] fc_feats, att_feats, labels, dist_label, masks, att_masks = tmp mid_data = data['labels'] reverse_labels = np.reshape(mid_data, (-1, 6*30)) reverse_labels = np.flip(reverse_labels, 1).copy() masks = masks.view((-1, 6*30)) mid_mask = data['masks'] reverse_masks = np.reshape(mid_mask, (-1, 6*30)) reverse_masks = np.flip(reverse_masks, 1).copy() batchsize = fc_feats.size(0) # Forward pass and loss optimizer.zero_grad() #print (torch.sum(labels!=0), torch.sum(masks!=0)) if 1: if 1: wordact, x_all = dp_model(fc_feats, att_feats, labels) wordact = F.log_softmax(wordact, 1) mask = masks[:,1:].contiguous() wordact = wordact[:,:,:-1] #wordact_t = wordact.permute(0, 2, 1).contiguous() #wordact_t = wordact_t.view(wordact_t.size(0) * wordact_t.size(1), -1) labels = labels.contiguous().view(-1, 6*30).cpu() wordclass_v = labels[:, 1:] #wordclass_t = wordclass_v.contiguous().view(\ # wordclass_v.size(0) * wordclass_v.size(1), 1) #maskids = torch.nonzero(mask.view(-1).cpu()).numpy().reshape(-1) #loss = F.cross_entropy(wordact_t[maskids, ...], \ # wordclass_t[maskids, ...].contiguous().view(maskids.shape[0])).cuda() loss = crit(wordact.transpose(2,1), wordclass_v, mask.cpu()) #back_optimizer.zero_grad() labels_flip = torch.from_numpy(reverse_labels).cuda().view((-1, 6, 30)) wordact, x_all_flip = back_model(fc_feats, att_feats, labels_flip) wordact = F.log_softmax(wordact, 1) reverse_masks = torch.from_numpy(reverse_masks).cuda() reverse_masks = reverse_masks[:, 1:].contiguous() wordact = wordact[:,:,:-1] #wordact_t = wordact.permute(0, 2, 1).contiguous() #wordact_t = wordact_t.view(wordact_t.size(0) * wordact_t.size(1), -1) labels_flip = labels_flip.contiguous().view(-1, 6*30).cpu() wordclass_v = labels_flip[:, 1:] #wordclass_t = wordclass_v.contiguous().view(\ # wordclass_v.size(0) * wordclass_v.size(1), 1) #maskids = torch.nonzero(reverse_masks.view(-1).cpu()).numpy().reshape(-1) #loss_flip = F.cross_entropy(wordact_t[maskids, ...], \ # wordclass_t[maskids, ...].contiguous().view(maskids.shape[0])).cuda() loss_flip = crit(wordact.transpose(2,1), wordclass_v, reverse_masks.cpu()) #utils.clip_gradient(optimizer, opt.grad_clip) #optimizer.step() x_all = x_all[:,:,:-1] x_all_flip = x_all_flip[:,:,:-1] #x_all = x_all[:, :, :-1] #x_all_flip = x_all_flip[:, :,1:] idx = [i for i in range(x_all_flip.size()[2]-1, -1, -1)] idx = torch.LongTensor(idx[1:]) idx = Variable(idx).cuda() invert_backstates = x_all_flip.index_select(2, idx) invert_backstates = invert_backstates.detach() l2_loss = ((x_all[:, :, :-1] - invert_backstates)** 2).mean() all_loss = loss.cuda() #+ loss_flip.cuda() + 3*l2_loss all_loss.backward() #l2_loss.backward() utils.clip_gradient(optimizer, opt.grad_clip) optimizer.step() train_loss = loss.item() torch.cuda.synchronize() total_time = time.time() - start if 1: if iteration % opt.print_freq == 1: print('Read data:', time.time() - start) if not sc_flag: print("iter {} (epoch {}), train_loss = {:.3f},loss_reg = {:.3f}, loss_flip = {:.3f}, data_time = {:.3f}, time/batch = {:.3f}" \ .format(iteration, epoch, train_loss, l2_loss, loss_flip, data_time, total_time)) else: print("iter {} (epoch {}), avg_reward = {:.3f}, data_time = {:.3f}, time/batch = {:.3f}" \ .format(iteration, epoch, np.mean(reward[:,0]), data_time, total_time)) # Update the iteration and epoch iteration += 1 if data['bounds']['wrapped']: epoch += 1 update_lr_flag = True # Write the training loss summary if (iteration % opt.losses_log_every == 0): add_summary_value(tb_summary_writer, 'train_loss', train_loss, iteration) add_summary_value(tb_summary_writer, 'learning_rate', opt.current_lr, iteration) #add_summary_value(tb_summary_writer, 'scheduled_sampling_prob', model.ss_prob, iteration) if sc_flag: add_summary_value(tb_summary_writer, 'avg_reward', np.mean(reward[:,0]), iteration) loss_history[iteration] = train_loss if not sc_flag else np.mean(reward[:,0]) lr_history[iteration] = opt.current_lr #ss_prob_history[iteration] = model.ss_prob # Validate and save model if (iteration % opt.save_checkpoint_every == 0): checkpoint_path = os.path.join(opt.checkpoint_path, 'model.pth') torch.save(model.state_dict(), checkpoint_path) checkpoint_path = os.path.join(opt.checkpoint_path, 'd_model.pth') torch.save(back_model.state_dict(), checkpoint_path) print("model saved to {}".format(checkpoint_path)) optimizer_path = os.path.join(opt.checkpoint_path, 'optimizer.pth') torch.save(optimizer.state_dict(), optimizer_path) # Evaluate model eval_kwargs = {'split': 'test', 'dataset': opt.input_json} eval_kwargs.update(vars(opt)) val_loss, predictions, lang_stats = eval_utils.eval_split(dp_model, crit, loader, eval_kwargs) # Write validation result into summary add_summary_value(tb_summary_writer, 'validation loss', val_loss, iteration) if lang_stats is not None: for k,v in lang_stats.items(): add_summary_value(tb_summary_writer, k, v, iteration) val_result_history[iteration] = {'loss': val_loss, 'lang_stats': lang_stats, 'predictions': predictions} # Our metric is CIDEr if available, otherwise validation loss if opt.language_eval == 1: current_score = lang_stats['CIDEr'] else: current_score = - val_loss # Save model in checkpoint path best_flag = False if best_val_score is None or current_score > best_val_score: best_val_score = current_score best_flag = True checkpoint_path = os.path.join(opt.checkpoint_path, 'model.pth') torch.save(model.state_dict(), checkpoint_path) checkpoint_path = os.path.join(opt.checkpoint_path, 'd_model.pth') torch.save(back_model.state_dict(), checkpoint_path) print("model saved to {}".format(checkpoint_path)) optimizer_path = os.path.join(opt.checkpoint_path, 'optimizer.pth') torch.save(optimizer.state_dict(), optimizer_path) # Dump miscalleous informations infos['iter'] = iteration infos['epoch'] = epoch infos['iterators'] = loader.iterators infos['split_ix'] = loader.split_ix infos['best_val_score'] = best_val_score infos['opt'] = opt infos['vocab'] = loader.get_vocab() histories['val_result_history'] = val_result_history histories['loss_history'] = loss_history histories['lr_history'] = lr_history #histories['ss_prob_history'] = ss_prob_history with open(os.path.join(opt.checkpoint_path, 'infos_'+opt.id+'.pkl'), 'wb') as f: cPickle.dump(infos, f) with open(os.path.join(opt.checkpoint_path, 'histories_'+opt.id+'.pkl'), 'wb') as f: cPickle.dump(histories, f) # Save model to unique file if new best model if best_flag: model_fname = 'model-best-i{:05d}-score{:.4f}.pth'.format(iteration, best_val_score) infos_fname = 'model-best-i{:05d}-infos.pkl'.format(iteration) checkpoint_path = os.path.join(opt.checkpoint_path, model_fname) torch.save(model.state_dict(), checkpoint_path) checkpoint_path = os.path.join(opt.checkpoint_path, 'd_model-best.pth') torch.save(back_model.state_dict(), checkpoint_path) print("model saved to {}".format(checkpoint_path)) with open(os.path.join(opt.checkpoint_path, infos_fname), 'wb') as f: cPickle.dump(infos, f)
# THIS IMPLEMENTATION WILL INCLUDE CONFUSION MATRIX # The input will now be able for different size of image import numpy as np from six.moves import cPickle as pickle import time import filters import gabor_filter with open('C:/data/train_data/data.pickle', 'rb') as f: tr_dat = pickle.load(f) with open('C:/data/train_data/label.pickle', 'rb') as f: tr_lab = pickle.load(f) with open('C:/data/test_data/data.pickle', 'rb') as f: te_dat = pickle.load(f) with open('C:/data/test_data/label.pickle', 'rb') as f: te_lab = pickle.load(f) def sigmoid(x): return 1 / (1 + np.exp(-x)) def sig_deri(x): return x * (1 - x) # 1st layer initialization layer1_ch = 2 CNN_layer1_map = np.ndarray(shape=(layer1_ch, 8, 8)) CNN_layer1_weight = (np.random.rand(layer1_ch, 3, 3) - 0.5) * 2.5 CNN_layer1_bias = np.random.rand(layer1_ch, 8, 8) CNN_layer1_stride = 2 # 2nd layer initialization
def pred_eval( config, predictor, test_data, imdb_test, vis=False, ignore_cache=None, logger=None, pairdb=None, ): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param predictor: Predictor :param test_data: data iterator, must be non-shuffle :param imdb_test: image database :param vis: controls visualization :param ignore_cache: ignore the saved cache file :param logger: the logger instance :return: """ print(imdb_test.result_path) print("test iter size: ", config.TEST.test_iter) pose_err_file = os.path.join( imdb_test.result_path, imdb_test.name + "_pose_iter{}.pkl".format(config.TEST.test_iter), ) if os.path.exists(pose_err_file) and not ignore_cache and not vis: with open(pose_err_file, "rb") as fid: if six.PY3: [ all_rot_err, all_trans_err, all_poses_est, all_poses_gt, ] = cPickle.load(fid, encoding="latin1") else: [ all_rot_err, all_trans_err, all_poses_est, all_poses_gt, ] = cPickle.load(fid) imdb_test.evaluate_pose(config, all_poses_est, all_poses_gt, logger) pose_add_plots_dir = os.path.join(imdb_test.result_path, "add_plots") mkdir_if_missing(pose_add_plots_dir) imdb_test.evaluate_pose_add( config, all_poses_est, all_poses_gt, output_dir=pose_add_plots_dir, logger=logger, ) pose_arp2d_plots_dir = os.path.join(imdb_test.result_path, "arp_2d_plots") mkdir_if_missing(pose_arp2d_plots_dir) imdb_test.evaluate_pose_arp_2d( config, all_poses_est, all_poses_gt, output_dir=pose_arp2d_plots_dir, logger=logger, ) return assert vis or not test_data.shuffle assert config.TEST.BATCH_PAIRS == 1 if not isinstance(test_data, PrefetchingIter): test_data = PrefetchingIter(test_data) num_pairs = len(pairdb) height = 480 width = 640 data_time, net_time, post_time = 0.0, 0.0, 0.0 sum_EPE_all = 0.0 num_inst_all = 0.0 sum_EPE_viz = 0.0 num_inst_viz = 0.0 sum_EPE_vizbg = 0.0 num_inst_vizbg = 0.0 sum_PoseErr = [ np.zeros((len(imdb_test.classes) + 1, 2)) for batch_idx in range(config.TEST.test_iter) ] all_rot_err = [[[] for j in range(config.TEST.test_iter)] for batch_idx in range(len(imdb_test.classes)) ] # num_cls x test_iter all_trans_err = [[[] for j in range(config.TEST.test_iter)] for batch_idx in range(len(imdb_test.classes))] all_poses_est = [[[] for j in range(config.TEST.test_iter)] for batch_idx in range(len(imdb_test.classes))] all_poses_gt = [[[] for j in range(config.TEST.test_iter)] for batch_idx in range(len(imdb_test.classes))] num_inst = np.zeros(len(imdb_test.classes) + 1) K = config.dataset.INTRINSIC_MATRIX if (config.TEST.test_iter > 1 or config.TEST.VISUALIZE) and True: print( "************* start setup render_glumpy environment... ******************" ) if config.dataset.dataset.startswith("ModelNet"): from lib.render_glumpy.render_py_light_modelnet_multi import ( Render_Py_Light_ModelNet_Multi, ) modelnet_root = config.modelnet_root texture_path = os.path.join(modelnet_root, "gray_texture.png") model_path_list = [ os.path.join(config.dataset.model_dir, "{}.obj".format(model_name)) for model_name in config.dataset.class_name ] render_machine = Render_Py_Light_ModelNet_Multi( model_path_list, texture_path, K, width, height, config.dataset.ZNEAR, config.dataset.ZFAR, brightness_ratios=[0.7], ) else: render_machine = Render_Py( config.dataset.model_dir, config.dataset.class_name, K, width, height, config.dataset.ZNEAR, config.dataset.ZFAR, ) def render(render_machine, pose, cls_idx, K=None): if config.dataset.dataset.startswith("ModelNet"): idx = 2 # generate random light_position if idx % 6 == 0: light_position = [1, 0, 1] elif idx % 6 == 1: light_position = [1, 1, 1] elif idx % 6 == 2: light_position = [0, 1, 1] elif idx % 6 == 3: light_position = [-1, 1, 1] elif idx % 6 == 4: light_position = [-1, 0, 1] elif idx % 6 == 5: light_position = [0, 0, 1] else: raise Exception("???") light_position = np.array(light_position) * 0.5 # inverse yz light_position[0] += pose[0, 3] light_position[1] -= pose[1, 3] light_position[2] -= pose[2, 3] colors = np.array([1, 1, 1]) # white light intensity = np.random.uniform(0.9, 1.1, size=(3, )) colors_randk = 0 light_intensity = colors[colors_randk] * intensity # randomly choose a render machine rm_randk = 0 # random.randint(0, len(brightness_ratios) - 1) rgb_gl, depth_gl = render_machine.render( cls_idx, pose[:3, :3], pose[:3, 3], light_position, light_intensity, brightness_k=rm_randk, r_type="mat", ) rgb_gl = rgb_gl.astype("uint8") else: rgb_gl, depth_gl = render_machine.render(cls_idx, pose[:3, :3], pose[:, 3], r_type="mat", K=K) rgb_gl = rgb_gl.astype("uint8") return rgb_gl, depth_gl print( "***************setup render_glumpy environment succeed ******************" ) if config.TEST.PRECOMPUTED_ICP: print("precomputed_ICP") config.TEST.test_iter = 1 all_rot_err = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] all_trans_err = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] all_poses_est = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] all_poses_gt = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] xy_trans_err = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] z_trans_err = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] for idx in range(len(pairdb)): pose_path = pairdb[idx]["depth_rendered"][:-10] + "-pose_icp.txt" pose_rendered_update = np.loadtxt(pose_path, skiprows=1) pose_observed = pairdb[idx]["pose_observed"] r_dist_est, t_dist_est = calc_rt_dist_m(pose_rendered_update, pose_observed) xy_dist = np.linalg.norm(pose_rendered_update[:2, -1] - pose_observed[:2, -1]) z_dist = np.linalg.norm(pose_rendered_update[-1, -1] - pose_observed[-1, -1]) print( "{}: r_dist_est: {}, t_dist_est: {}, xy_dist: {}, z_dist: {}". format(idx, r_dist_est, t_dist_est, xy_dist, z_dist)) class_id = imdb_test.classes.index(pairdb[idx]["gt_class"]) # store poses estimation and gt all_poses_est[class_id][0].append(pose_rendered_update) all_poses_gt[class_id][0].append(pairdb[idx]["pose_observed"]) all_rot_err[class_id][0].append(r_dist_est) all_trans_err[class_id][0].append(t_dist_est) xy_trans_err[class_id][0].append(xy_dist) z_trans_err[class_id][0].append(z_dist) all_rot_err = np.array(all_rot_err) all_trans_err = np.array(all_trans_err) print("rot = {} +/- {}".format(np.mean(all_rot_err[class_id][0]), np.std(all_rot_err[class_id][0]))) print("trans = {} +/- {}".format(np.mean(all_trans_err[class_id][0]), np.std(all_trans_err[class_id][0]))) num_list = all_trans_err[class_id][0] print("xyz: {:.2f} +/- {:.2f}".format( np.mean(num_list) * 100, np.std(num_list) * 100)) num_list = xy_trans_err[class_id][0] print("xy: {:.2f} +/- {:.2f}".format( np.mean(num_list) * 100, np.std(num_list) * 100)) num_list = z_trans_err[class_id][0] print("z: {:.2f} +/- {:.2f}".format( np.mean(num_list) * 100, np.std(num_list) * 100)) imdb_test.evaluate_pose(config, all_poses_est, all_poses_gt, logger) pose_add_plots_dir = os.path.join(imdb_test.result_path, "add_plots_precomputed_ICP") mkdir_if_missing(pose_add_plots_dir) imdb_test.evaluate_pose_add( config, all_poses_est, all_poses_gt, output_dir=pose_add_plots_dir, logger=logger, ) pose_arp2d_plots_dir = os.path.join(imdb_test.result_path, "arp_2d_plots_precomputed_ICP") mkdir_if_missing(pose_arp2d_plots_dir) imdb_test.evaluate_pose_arp_2d( config, all_poses_est, all_poses_gt, output_dir=pose_arp2d_plots_dir, logger=logger, ) return if config.TEST.BEFORE_ICP: print("before_ICP") config.TEST.test_iter = 1 all_rot_err = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] all_trans_err = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] all_poses_est = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] all_poses_gt = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] xy_trans_err = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] z_trans_err = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] for idx in range(len(pairdb)): pose_path = pairdb[idx]["depth_rendered"][:-10] + "-pose.txt" pose_rendered_update = np.loadtxt(pose_path, skiprows=1) pose_observed = pairdb[idx]["pose_observed"] r_dist_est, t_dist_est = calc_rt_dist_m(pose_rendered_update, pose_observed) xy_dist = np.linalg.norm(pose_rendered_update[:2, -1] - pose_observed[:2, -1]) z_dist = np.linalg.norm(pose_rendered_update[-1, -1] - pose_observed[-1, -1]) class_id = imdb_test.classes.index(pairdb[idx]["gt_class"]) # store poses estimation and gt all_poses_est[class_id][0].append(pose_rendered_update) all_poses_gt[class_id][0].append(pairdb[idx]["pose_observed"]) all_rot_err[class_id][0].append(r_dist_est) all_trans_err[class_id][0].append(t_dist_est) xy_trans_err[class_id][0].append(xy_dist) z_trans_err[class_id][0].append(z_dist) all_trans_err = np.array(all_trans_err) imdb_test.evaluate_pose(config, all_poses_est, all_poses_gt, logger) pose_add_plots_dir = os.path.join(imdb_test.result_path, "add_plots_before_ICP") mkdir_if_missing(pose_add_plots_dir) imdb_test.evaluate_pose_add( config, all_poses_est, all_poses_gt, output_dir=pose_add_plots_dir, logger=logger, ) pose_arp2d_plots_dir = os.path.join(imdb_test.result_path, "arp_2d_plots_before_ICP") mkdir_if_missing(pose_arp2d_plots_dir) imdb_test.evaluate_pose_arp_2d( config, all_poses_est, all_poses_gt, output_dir=pose_arp2d_plots_dir, logger=logger, ) return # ------------------------------------------------------------------------------ t_start = time.time() t = time.time() for idx, data_batch in enumerate(test_data): if np.sum(pairdb[idx] ["pose_rendered"]) == -12: # NO POINT VALID IN INIT POSE print(idx) class_id = imdb_test.classes.index(pairdb[idx]["gt_class"]) for pose_iter_idx in range(config.TEST.test_iter): all_poses_est[class_id][pose_iter_idx].append( pairdb[idx]["pose_rendered"]) all_poses_gt[class_id][pose_iter_idx].append( pairdb[idx]["pose_observed"]) r_dist = 1000 t_dist = 1000 all_rot_err[class_id][pose_iter_idx].append(r_dist) all_trans_err[class_id][pose_iter_idx].append(t_dist) sum_PoseErr[pose_iter_idx][class_id, :] += np.array( [r_dist, t_dist]) sum_PoseErr[pose_iter_idx][-1, :] += np.array([r_dist, t_dist]) # post process if idx % 50 == 0: print_and_log( "testing {}/{} data {:.4f}s net {:.4f}s calc_gt {:.4f}s". format( (idx + 1), num_pairs, data_time / ((idx + 1) * test_data.batch_size), net_time / ((idx + 1) * test_data.batch_size), post_time / ((idx + 1) * test_data.batch_size), ), logger, ) print("in test: NO POINT_VALID IN rendered") continue data_time += time.time() - t t = time.time() pose_rendered = pairdb[idx]["pose_rendered"] if np.sum(pose_rendered) == -12: print(idx) class_id = imdb_test.classes.index(pairdb[idx]["gt_class"]) num_inst[class_id] += 1 num_inst[-1] += 1 for pose_iter_idx in range(config.TEST.test_iter): all_poses_est[class_id][pose_iter_idx].append(pose_rendered) all_poses_gt[class_id][pose_iter_idx].append( pairdb[idx]["pose_observed"]) # post process if idx % 50 == 0: print_and_log( "testing {}/{} data {:.4f}s net {:.4f}s calc_gt {:.4f}s". format( (idx + 1), num_pairs, data_time / ((idx + 1) * test_data.batch_size), net_time / ((idx + 1) * test_data.batch_size), post_time / ((idx + 1) * test_data.batch_size), ), logger, ) t = time.time() continue output_all = predictor.predict(data_batch) net_time += time.time() - t t = time.time() rst_iter = [] for output in output_all: cur_rst = {} cur_rst["se3"] = np.squeeze( output["se3_output"].asnumpy()).astype("float32") if not config.TEST.FAST_TEST and config.network.PRED_FLOW: cur_rst["flow"] = np.squeeze( output["flow_est_crop_output"].asnumpy().transpose( (2, 3, 1, 0))).astype("float16") else: cur_rst["flow"] = None if config.network.PRED_MASK and config.TEST.UPDATE_MASK not in [ "init", "box_rendered" ]: mask_pred = np.squeeze( output["mask_observed_pred_output"].asnumpy()).astype( "float32") cur_rst["mask_pred"] = mask_pred rst_iter.append(cur_rst) post_time += time.time() - t # sample_ratio = 1 # 0.01 for batch_idx in range(0, test_data.batch_size): # if config.TEST.VISUALIZE and not (r_dist>15 and t_dist>0.05): # continue # 3388, 5326 # calculate the flow error -------------------------------------------- t = time.time() if config.network.PRED_FLOW and not config.TEST.FAST_TEST: # evaluate optical flow flow_gt = par_generate_gt(config, pairdb[idx]) if config.network.PRED_FLOW: all_diff = calc_EPE_one_pair(rst_iter[batch_idx], flow_gt, "flow") sum_EPE_all += all_diff["epe_all"] num_inst_all += all_diff["num_all"] sum_EPE_viz += all_diff["epe_viz"] num_inst_viz += all_diff["num_viz"] sum_EPE_vizbg += all_diff["epe_vizbg"] num_inst_vizbg += all_diff["num_vizbg"] # calculate the se3 error --------------------------------------------- # evaluate se3 estimation pose_rendered = pairdb[idx]["pose_rendered"] class_id = imdb_test.classes.index(pairdb[idx]["gt_class"]) num_inst[class_id] += 1 num_inst[-1] += 1 post_time += time.time() - t # iterative refine se3 estimation -------------------------------------------------- for pose_iter_idx in range(config.TEST.test_iter): t = time.time() pose_rendered_update = RT_transform( pose_rendered, rst_iter[0]["se3"][:-3], rst_iter[0]["se3"][-3:], config.dataset.trans_means, config.dataset.trans_stds, config.network.ROT_COORD, ) # calculate error r_dist, t_dist = calc_rt_dist_m(pose_rendered_update, pairdb[idx]["pose_observed"]) # store poses estimation and gt all_poses_est[class_id][pose_iter_idx].append( pose_rendered_update) all_poses_gt[class_id][pose_iter_idx].append( pairdb[idx]["pose_observed"]) all_rot_err[class_id][pose_iter_idx].append(r_dist) all_trans_err[class_id][pose_iter_idx].append(t_dist) sum_PoseErr[pose_iter_idx][class_id, :] += np.array( [r_dist, t_dist]) sum_PoseErr[pose_iter_idx][-1, :] += np.array([r_dist, t_dist]) if config.TEST.VISUALIZE: print("idx {}, iter {}: rError: {}, tError: {}".format( idx + batch_idx, pose_iter_idx + 1, r_dist, t_dist)) post_time += time.time() - t # # if more than one iteration if pose_iter_idx < (config.TEST.test_iter - 1) or config.TEST.VISUALIZE: t = time.time() # get refined image K_path = pairdb[idx]["image_observed"][:-10] + "-K.txt" if os.path.exists(K_path): K = np.loadtxt(K_path) image_refined, depth_refined = render( render_machine, pose_rendered_update, config.dataset.class_name.index( pairdb[idx]["gt_class"]), K=K, ) image_refined = image_refined[:, :, :3] # update minibatch update_package = [{ "image_rendered": image_refined, "src_pose": pose_rendered_update }] if config.network.INPUT_DEPTH: update_package[0]["depth_rendered"] = depth_refined if config.network.INPUT_MASK: mask_rendered_refined = np.zeros(depth_refined.shape) mask_rendered_refined[depth_refined > 0.2] = 1 update_package[0][ "mask_rendered"] = mask_rendered_refined if config.network.PRED_MASK: # init, box_rendered, mask_rendered, box_observed, mask_observed if config.TEST.UPDATE_MASK == "box_rendered": input_names = [ blob_name[0] for blob_name in data_batch.provide_data[0] ] update_package[0][ "mask_observed"] = np.squeeze( data_batch.data[0][input_names.index( "mask_rendered")].asnumpy() [batch_idx]) # noqa elif config.TEST.UPDATE_MASK == "init": pass else: raise Exception( "Unknown UPDATE_MASK type: {}".format( config.network.UPDATE_MASK)) pose_rendered = pose_rendered_update data_batch = update_data_batch(config, data_batch, update_package) data_time += time.time() - t # forward and get rst if pose_iter_idx < config.TEST.test_iter - 1: t = time.time() output_all = predictor.predict(data_batch) net_time += time.time() - t t = time.time() rst_iter = [] for output in output_all: cur_rst = {} if config.network.REGRESSOR_NUM == 1: cur_rst["se3"] = np.squeeze( output["se3_output"].asnumpy()).astype( "float32") if not config.TEST.FAST_TEST and config.network.PRED_FLOW: cur_rst["flow"] = np.squeeze( output["flow_est_crop_output"].asnumpy(). transpose((2, 3, 1, 0))).astype("float16") else: cur_rst["flow"] = None if (config.network.PRED_MASK and config.TEST.UPDATE_MASK not in ["init", "box_rendered"]): mask_pred = np.squeeze( output["mask_observed_pred_output"]. asnumpy()).astype("float32") cur_rst["mask_pred"] = mask_pred rst_iter.append(cur_rst) post_time += time.time() - t # post process if idx % 50 == 0: print_and_log( "testing {}/{} data {:.4f}s net {:.4f}s calc_gt {:.4f}s". format( (idx + 1), num_pairs, data_time / ((idx + 1) * test_data.batch_size), net_time / ((idx + 1) * test_data.batch_size), post_time / ((idx + 1) * test_data.batch_size), ), logger, ) t = time.time() all_rot_err = np.array(all_rot_err) all_trans_err = np.array(all_trans_err) # save inference results if not config.TEST.VISUALIZE: with open(pose_err_file, "wb") as f: print("saving result cache to {}".format(pose_err_file)) cPickle.dump( [all_rot_err, all_trans_err, all_poses_est, all_poses_gt], f, protocol=2) print("done") if config.network.PRED_FLOW: print_and_log("evaluate flow:", logger) print_and_log( "EPE all: {}".format(sum_EPE_all / max(num_inst_all, 1.0)), logger) print_and_log( "EPE ignore unvisible: {}".format( sum_EPE_vizbg / max(num_inst_vizbg, 1.0)), logger) print_and_log( "EPE visible: {}".format(sum_EPE_viz / max(num_inst_viz, 1.0)), logger) print_and_log("evaluate pose:", logger) imdb_test.evaluate_pose(config, all_poses_est, all_poses_gt, logger) # evaluate pose add pose_add_plots_dir = os.path.join(imdb_test.result_path, "add_plots") mkdir_if_missing(pose_add_plots_dir) imdb_test.evaluate_pose_add(config, all_poses_est, all_poses_gt, output_dir=pose_add_plots_dir, logger=logger) pose_arp2d_plots_dir = os.path.join(imdb_test.result_path, "arp_2d_plots") mkdir_if_missing(pose_arp2d_plots_dir) imdb_test.evaluate_pose_arp_2d(config, all_poses_est, all_poses_gt, output_dir=pose_arp2d_plots_dir, logger=logger) print_and_log("using {} seconds in total".format(time.time() - t_start), logger)
"title": "BJ’s Restaurant & Brewhouse", "location": "Jacksonville", "employees": "Employees are " + employee_sentiment + "." + "The company then bought 26.", "customers": "Customers are happy. The company then bought 26.", "shareholders": "Shareholders are happy. The company then bought 26.", "management": "Management is performing well. The company then bought 26." } # colors = {'background': '#111111', 'text': '#7FDBFF'} my_path = os.path.abspath(os.path.dirname('__file__')) path = os.path.join(my_path, "data/cpickle/") first_dict = pickle.load(open(path + "first_page.p", "rb")) figure = pf.figs_polar(first_dict["code_start"], "bench", first_dict["code_start"]) comp_plot_output = figure fig = figs(first_dict["code_start"], first_dict["the_benchmark"]) stock_plot_output = fig df_perf_summary = first_dict["df_perf_summary_output"] def make_dash_table(df): ''' Return a dash definitio of an HTML table for a Pandas dataframe ''' table = []
def load_dataset(filename): """ Load the dataset from the filename. We assume it is a pickled Bunch """ with open(filename, "rb") as fid: data = cPickle.load(fid) return data