def crop_pickle(self,pickle_name,func=1,bias=[[0,0]],nm=5): ''' func=1: arraycrop func=2: randomcrop ''' with open(pickle_name,'rb') as f: save = pickle.load(f): data = save['image'] del save images = dict() if func == 1: self.set_boxes(bias) for name, value in data.iteritems(): cropped_images = self.arraycrop(value) for key, cropped_image in enumerate(cropped_images): images.setdefault(str(key)+'-'+name,cropped_image) elif func == 2: for name, value in data.iteritems(): cropped_images = self.randomcrop(value,nm) for key, cropped_image in enumerate(cropped_images): images.setdefault(str(key)+'-'+name,cropped_image) with open('new_'+pickle_name,'wb') as f: save={ 'image': images } pickle.dump(save,f,pickle.HIGHEST_PROTOCOL) f.close()
def save_model(self, modelfile): with open(modelfile, "wb") as f: cPickle.dump(self.layers, f, protocol=cPickle.HIGHEST_PROTOCOL) with open("params_" + modelfile, "wb") as f: for layer_key in self.layers.keys(): cPickle.dump(self.layers[layer_key].params, f, protocol=cPickle.HIGHEST_PROTOCOL)
def test_simple(): fig = plt.figure() # un-comment to debug # recursive_pickle(fig) pickle.dump(fig, BytesIO(), pickle.HIGHEST_PROTOCOL) ax = plt.subplot(121) pickle.dump(ax, BytesIO(), pickle.HIGHEST_PROTOCOL) ax = plt.axes(projection='polar') plt.plot(list(xrange(10)), label='foobar') plt.legend() # recursive_pickle(fig) pickle.dump(ax, BytesIO(), pickle.HIGHEST_PROTOCOL) # ax = plt.subplot(121, projection='hammer') # recursive_pickle(ax, 'figure') # pickle.dump(ax, BytesIO(), pickle.HIGHEST_PROTOCOL) plt.figure() plt.bar(left=list(xrange(10)), height=list(xrange(10))) pickle.dump(plt.gca(), BytesIO(), pickle.HIGHEST_PROTOCOL) fig = plt.figure() ax = plt.axes() plt.plot(list(xrange(10))) ax.set_yscale('log') pickle.dump(fig, BytesIO(), pickle.HIGHEST_PROTOCOL)
def save(self, fn, compress=True): if compress and not fn.strip().lower().endswith('.gz'): fn = fn + '.gz' if compress: pickle.dump(self, gzip.open(fn, 'wb')) else: pickle.dump(self, open(fn, 'wb'))
def train_model(args): data_loader = InputHandler(args.data_dir, args.batch_size, args.result_length) args.vocabulary_size = data_loader.vocabulary_size # Save the original files, so that we can load the model when sampling with open(os.path.join(args.snapshots_dir, CONFIGURATION_FILE), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.snapshots_dir, WORDS_VOCABULARY_FILE), 'wb') as f: cPickle.dump((data_loader.words, data_loader.vocabulary), f) model = RNNModel(args.rnn_size, args.network_depth, args.batch_size, args.result_length, args.vocabulary_size, args.gradient) with tf.Session() as session: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) for e in range(args.num_epochs): session.run(tf.assign(model.lr, args.training_rate * (args.decay_rate ** e))) data_loader.set_batch_pointer_to_zero() state = model.initial_state.eval() for b in range(data_loader.num_batches): x, y = data_loader.get_next_batch() feed = {model.input_data: x, model.targets: y, model.initial_state: state} train_loss, state, _ = session.run([model.cost, model.final_state, model.train_op], feed) if (e * data_loader.num_batches + b) % args.snapshot == 0 \ or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result snapshot_path = os.path.join(args.snapshots_dir, 'model.ckpt') saver.save(session, snapshot_path, global_step = e * data_loader.num_batches + b) print("Model snapshot was taken to {}".format(snapshot_path))
def save(self, file_name="results"): """Persist the results. :param file_name: str, The name for the save file. """ file_name = file_name + ".p" pickle.dump(self, open(file_name, 'wb'), protocol=2)
def test_complete(): fig = plt.figure('Figure with a label?', figsize=(10, 6)) plt.suptitle('Can you fit any more in a figure?') # make some arbitrary data x, y = np.arange(8), np.arange(10) data = u = v = np.linspace(0, 10, 80).reshape(10, 8) v = np.sin(v * -0.6) plt.subplot(3, 3, 1) plt.plot(list(xrange(10))) plt.subplot(3, 3, 2) plt.contourf(data, hatches=['//', 'ooo']) plt.colorbar() plt.subplot(3, 3, 3) plt.pcolormesh(data) plt.subplot(3, 3, 4) plt.imshow(data) plt.subplot(3, 3, 5) plt.pcolor(data) plt.subplot(3, 3, 6) plt.streamplot(x, y, u, v) plt.subplot(3, 3, 7) plt.quiver(x, y, u, v) plt.subplot(3, 3, 8) plt.scatter(x, x**2, label='$x^2$') plt.legend(loc='upper left') plt.subplot(3, 3, 9) plt.errorbar(x, x * -0.5, xerr=0.2, yerr=0.4) ###### plotting is done, now test its pickle-ability ######### # Uncomment to debug any unpicklable objects. This is slow (~200 seconds). # recursive_pickle(fig) result_fh = BytesIO() pickle.dump(fig, result_fh, pickle.HIGHEST_PROTOCOL) plt.close('all') # make doubly sure that there are no figures left assert_equal(plt._pylab_helpers.Gcf.figs, {}) # wind back the fh and load in the figure result_fh.seek(0) fig = pickle.load(result_fh) # make sure there is now a figure manager assert_not_equal(plt._pylab_helpers.Gcf.figs, {}) assert_equal(fig.get_label(), 'Figure with a label?')
def setUp(self): numpy.random.seed(9 + 5 + 2015) self.train_features_mock = [ numpy.random.randint(0, 256, (10, 3, 32, 32)).astype('uint8') for i in range(5)] self.train_targets_mock = [ numpy.random.randint(0, 10, (10,)).astype('uint8') for i in range(5)] self.test_features_mock = numpy.random.randint( 0, 256, (10, 3, 32, 32)).astype('uint8') self.test_targets_mock = numpy.random.randint( 0, 10, (10,)).astype('uint8') self.tempdir = tempfile.mkdtemp() cwd = os.getcwd() os.chdir(self.tempdir) os.mkdir('cifar-10-batches-py') for i, (x, y) in enumerate(zip(self.train_features_mock, self.train_targets_mock)): filename = os.path.join( 'cifar-10-batches-py', 'data_batch_{}'.format(i + 1)) with open(filename, 'wb') as f: cPickle.dump({'data': x, 'labels': y}, f) filename = os.path.join('cifar-10-batches-py', 'test_batch') with open(filename, 'wb') as f: cPickle.dump({'data': self.test_features_mock, 'labels': self.test_targets_mock}, f) with tarfile.open('cifar-10-python.tar.gz', 'w:gz') as tar_file: tar_file.add('cifar-10-batches-py') os.chdir(cwd)
def setUp(self): numpy.random.seed(9 + 5 + 2015) self.train_features_mock = numpy.random.randint( 0, 256, (10, 3, 32, 32)).astype('uint8') self.train_fine_labels_mock = numpy.random.randint( 0, 100, (10,)).astype('uint8') self.train_coarse_labels_mock = numpy.random.randint( 0, 20, (10,)).astype('uint8') self.test_features_mock = numpy.random.randint( 0, 256, (10, 3, 32, 32)).astype('uint8') self.test_fine_labels_mock = numpy.random.randint( 0, 100, (10,)).astype('uint8') self.test_coarse_labels_mock = numpy.random.randint( 0, 20, (10,)).astype('uint8') self.tempdir = tempfile.mkdtemp() cwd = os.getcwd() os.chdir(self.tempdir) os.mkdir('cifar-100-python') filename = os.path.join('cifar-100-python', 'train') with open(filename, 'wb') as f: cPickle.dump({'data': self.train_features_mock.reshape((10, -1)), 'fine_labels': self.train_fine_labels_mock, 'coarse_labels': self.train_coarse_labels_mock}, f) filename = os.path.join('cifar-100-python', 'test') with open(filename, 'wb') as f: cPickle.dump({'data': self.test_features_mock.reshape((10, -1)), 'fine_labels': self.test_fine_labels_mock, 'coarse_labels': self.test_coarse_labels_mock}, f) with tarfile.open('cifar-100-python.tar.gz', 'w:gz') as tar_file: tar_file.add('cifar-100-python') os.chdir(cwd)
def store_and_or_load_data(outputdir, dataset, data_dir): save_path = os.path.join(outputdir, dataset + '_Manager.pkl') if not os.path.exists(save_path): lock = lockfile.LockFile(save_path) while not lock.i_am_locking(): try: lock.acquire(timeout=60) # wait up to 60 seconds except lockfile.LockTimeout: lock.break_lock() lock.acquire() print('I locked', lock.path) # It is not yet sure, whether the file already exists try: if not os.path.exists(save_path): D = SimpleDataManager(dataset, data_dir, verbose=True) fh = open(save_path, 'w') pickle.dump(D, fh, -1) fh.close() else: D = pickle.load(open(save_path, 'r')) except Exception: raise finally: lock.release() else: D = pickle.load(open(save_path, 'r')) print('Loaded data') return D
def _run_tmva_training(self, info): """ Run subprocess to train tmva factory :param info: class with additional information """ tmva_process = subprocess.Popen( 'cd {directory}; {executable} -c "from rep.estimators import _tmvaFactory; _tmvaFactory.main()"'.format( directory=info.directory, executable=sys.executable), stdin=PIPE, stdout=PIPE, stderr=subprocess.STDOUT, shell=True) cPickle.dump(self, tmva_process.stdin) cPickle.dump(info, tmva_process.stdin) stdout, stderr = tmva_process.communicate() assert tmva_process.returncode == 0, \ 'ERROR: TMVA process is incorrect finished \n LOG: %s \n %s' % (stderr, stdout) assert 'TrainTree' in root_numpy.list_trees(os.path.join(info.directory, info.tmva_root)), \ 'ERROR: Result file has not TrainTree' xml_filename = os.path.join(info.directory, 'weights', '{job}_{name}.weights.xml'.format(job=info.tmva_job, name=self._method_name)) with open(xml_filename, 'r') as xml_file: self.formula_xml = xml_file.read()
def create_content_dir(): """ Make empty files for colnames.pkl, colnames_all.pkl and archfiles.db3 for the current content type ft['content']. This only works within the development (git) directory in conjunction with the --create option. """ dirname = msid_files['contentdir'].abs if not os.path.exists(dirname): logger.info('Making directory {}'.format(dirname)) os.makedirs(dirname) empty = set() if not os.path.exists(msid_files['colnames'].abs): with open(msid_files['colnames'].abs, 'wb') as f: pickle.dump(empty, f, protocol=0) if not os.path.exists(msid_files['colnames_all'].abs): with open(msid_files['colnames_all'].abs, 'wb') as f: pickle.dump(empty, f, protocol=0) if not os.path.exists(msid_files['archfiles'].abs): archfiles_def = open('archfiles_def.sql').read() filename = msid_files['archfiles'].abs logger.info('Creating db {}'.format(filename)) db = Ska.DBI.DBI(dbi='sqlite', server=filename, autocommit=False) db.execute(archfiles_def) db.commit()
def cached_yaml_load(path): """ Load a pickled YAML file from cache. :param str path: The path to load. :returns: The loaded YAML file, possibly from cache. :rtype: dict """ path = os.path.abspath(path) ho = hashlib.sha256() ho.update(path.encode('UTF-8')) h = ho.hexdigest() if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR) p = os.path.join(CACHE_DIR, h) if os.path.exists(p): # cache has file if os.path.getmtime(p) >= os.path.getmtime(path): # check that it's newer try: with open(p, 'rb') as file: return pickle.load(file) except EOFError: os.remove(p) # cache file corrupted, recreate it y = yaml.load(codecs.open(path, "r", encoding="utf-8")) with open(p, 'wb') as file: pickle.dump(y, file) return y
def train(epoch_num, output_dir, *args): model_name = args[0][0] file = args[0][1] log_name = "logs/" + model_name + ".log" model_name = output_dir + "training/" + model_name # direct stdout to log file log_file = open(log_name, 'a+') # TODO: gram_num here is a magic number! train_chars = LargeCharFeatureGenerator(file, 10); if os.path.isfile(model_name): with open(model_name,'rb') as f: model = cPickle.load(f) else: model = SimpleLSTM(train_chars.vocab_size) avg_loss = train_with_sgd(model, train_chars, nepoch=_NEPOCH, learning_rate=_LEARNING_RATE, mini_batch_size=_BATCH_SIZE) with open(model_name, 'wb') as f: cPickle.dump(model, f, protocol=cPickle.HIGHEST_PROTOCOL) log_file.write(avg_loss) log_file.close()
def append_flipped_rois(self): """ This method is irrelevant with database, so implement here Append flipped images to ROI database Note this method doesn't actually flip the 'image', it flip boxes instead """ cache_file = os.path.join(self.cache_path, self.name + '_' + cfg.TRAIN.PROPOSAL_METHOD + '_roidb_flip.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: flip_roidb = cPickle.load(fid) print('{} gt flipped roidb loaded from {}'.format(self.name, cache_file)) else: num_images = self.num_images widths = [PIL.Image.open(self.image_path_at(i)).size[0] for i in range(num_images)] flip_roidb = [] for i in range(num_images): boxes = self.roidb[i]['boxes'].copy() oldx1 = boxes[:, 0].copy() oldx2 = boxes[:, 2].copy() boxes[:, 0] = widths[i] - oldx2 - 1 boxes[:, 2] = widths[i] - oldx1 - 1 assert (boxes[:, 2] >= boxes[:, 0]).all() entry = {'boxes': boxes, 'gt_overlaps': self.roidb[i]['gt_overlaps'], 'gt_classes': self.roidb[i]['gt_classes'], 'flipped': True} flip_roidb.append(entry) with open(cache_file, 'wb') as fid: cPickle.dump(flip_roidb, fid, cPickle.HIGHEST_PROTOCOL) print('wrote gt flipped roidb to {}'.format(cache_file)) self.roidb.extend(flip_roidb) self._image_index *= 2
def pickle_dump(data, filename): """ Equivalent to pickle.dump(data, open(filename, 'w')) but closes the file to prevent filehandle leakage. """ with open(filename, 'wb') as fh: pickle.dump(data, fh)
def train_loop(): graph_generated = False while True: while data_q.empty(): time.sleep(0.1) inp = data_q.get() if inp == 'end': # quit res_q.put('end') break elif inp == 'train': # restart training res_q.put('train') train = True continue elif inp == 'val': # start validation pickle.dump(model, open(LOGPATH + 'model', 'wb'), -1) res_q.put('val') train = False continue x = xp.asarray(inp[0]) y = xp.asarray(inp[1]) if train: optimizer.zero_grads() loss = model.forward(x, y, train=True) loss.backward() optimizer.update() else: loss = model.forward(x, y, train=False) res_q.put(float(cuda.to_cpu(loss.data))) del loss, x, y
def getAllDataPickle(p_bForce=False): #get relevant paths trainGenreNames, trainGenrePaths = getAllGenrePaths(LIBRARY_PATH + 'train_small/') testGenreNames, testGenrePaths = getAllGenrePaths(LIBRARY_PATH + 'test_small/') pickle_file = LIBRARY_PATH + 'allData.pickle' #obtain data for each genre in their individual pickle file allPickledTrainFilenames = getIndividualGenrePickles(trainGenrePaths, p_bForce) allPickledTestFilenames = getIndividualGenrePickles(testGenrePaths, p_bForce) #merge and randomize data from all genres into wholedatasets for training, validation, and test wholeValidDataset, wholeValidLabels, wholeTrainDataset, wholeTrainLabels = getWholeDataFromIndividualGenrePickles(allPickledTrainFilenames, s_iTrainSize, s_iValid_size) _, _, wholeTestDataset, wholeTestLabels = getWholeDataFromIndividualGenrePickles(allPickledTestFilenames, s_iTestSize) wholeTrainDataset, wholeTrainLabels = randomize(wholeTrainDataset, wholeTrainLabels) wholeTestDataset, wholeTestLabels = randomize(wholeTestDataset, wholeTestLabels) wholeValidDataset, wholeValidLabels = randomize(wholeValidDataset, wholeValidLabels) #save the data for later reuse: try: f = open(pickle_file, 'wb') save = {'wholeTrainDataset': wholeTrainDataset, 'wholeTrainLabels': wholeTrainLabels, 'wholeValidDataset': wholeValidDataset, 'wholeValidLabels': wholeValidLabels, 'wholeTestDataset': wholeTestDataset, 'wholeTestLabels': wholeTestLabels} pickle.dump(save, f, pickle.HIGHEST_PROTOCOL) f.close() except Exception as e: print('Unable to save data to', pickle_file, ':', e) raise print ('\n================== DATASETS BUILT ================') return pickle_file
def mutate_value(self): """ Allows mutation of the value safely. """ # Get the semaphore with an emergency timeout to detect deadlock conditions try: self.semaphore.acquire(self.TIMEOUT) except posix_ipc.BusyError: raise self.deadlock_error try: # Load the value from the shared memory segment (if populated) self.mmap.seek(0) # Memory can be empty but have a length. Pickle opcodes # starts at 0x80. If we read zero, memory was not # initiated yet. if not self.mmap.read_byte(): value = self.DEFAULT_FACTORY() else: self.mmap.seek(0) try: value = pickle.load(self.mmap) except EOFError: value = self.DEFAULT_FACTORY() # Let the inside run yield value # Dump the value back into the shared memory segment self.mmap.seek(0) pickle.dump(value, self.mmap, protocol=2) finally: # Release semaphore self.semaphore.release()
def write_nl(model, nl_filename, **kwds): """ Writes a Pyomo model in NL file format and stores information about the symbol map that allows it to be recovered at a later time for a Pyomo model with matching component names. """ symbol_map_filename = nl_filename+".symbol_map.pickle" # write the model and obtain the symbol_map _, smap_id = model.write(nl_filename, format=ProblemFormat.nl, io_options=kwds) symbol_map = model.solutions.symbol_map[smap_id] # save a persistent form of the symbol_map (using pickle) by # storing the NL file label with a ComponentUID, which is # an efficient lookup code for model components (created # by John Siirola) tmp_buffer = {} # this makes the process faster symbol_cuid_pairs = tuple( (symbol, ComponentUID(var_weakref(), cuid_buffer=tmp_buffer)) for symbol, var_weakref in symbol_map.bySymbol.items()) with open(symbol_map_filename, "wb") as f: pickle.dump(symbol_cuid_pairs, f) return symbol_map_filename
def save_model(self, model, idx, seed): # This should fail if no models directory exists filepath = os.path.join(self.get_model_dir(), '%s.%s.model' % (seed, idx)) with open(filepath, 'wb') as fh: pickle.dump(model, fh, -1)
def parse_ctgs(bestedges, frgtoctg): cache = "frgtoctg.cache" if need_update(frgtoctg, cache): reads_to_ctgs = {} frgtodeg = frgtoctg.replace(".frgctg", ".frgdeg") iidtouid = frgtoctg.replace(".posmap.frgctg", ".iidtouid") fp = open(iidtouid) frgstore = {} for row in fp: tag, iid, uid = row.split() if tag == "FRG": frgstore[uid] = int(iid) for pf, f in zip(("ctg", "deg"), (frgtoctg, frgtodeg)): fp = open(f) logging.debug("Parse posmap file `{0}`".format(f)) for row in fp: frg, ctg = row.split()[:2] frg = frgstore[frg] reads_to_ctgs[frg] = pf + ctg logging.debug("Loaded mapping: {0}".format(len(reads_to_ctgs))) fw = open(cache, "w") dump(reads_to_ctgs, fw) fw.close() logging.debug("Contig mapping written to `{0}`".format(cache)) reads_to_ctgs = load(open(cache)) logging.debug("Contig mapping loaded from `{0}`".format(cache)) return reads_to_ctgs
def fetch_train_thoughts(m, pcs, batches, name="trainthoughts"): all_thoughts = [] for i in range(batches): ipt, opt = multi_training.getPieceBatch(pcs) thoughts = m.update_thought_fun(ipt, opt) all_thoughts.append((ipt, opt, thoughts)) pickle.dump(all_thoughts, open('output/' + name + '.p', 'wb'))
def get_abinit_variables(): """Returns the database with the description of the ABINIT variables.""" global __VARS_DATABASE if __VARS_DATABASE is None: pickle_file = os.path.join(os.getenv("HOME"), ".abinit", "abipy", "abinit_vars.pickle") if os.path.exists(pickle_file): #print("Reading from pickle") with open(pickle_file, "rb") as fh: __VARS_DATABASE = pickle.load(fh) else: # Make dir and file if not present. if not os.path.exists(os.path.dirname(pickle_file)): os.makedirs(os.path.dirname(pickle_file)) #print("Reading database from YAML file and generating pickle version. It may take a while...") from abipy import data as abidata yaml_file = abidata.var_file('abinit_vars.yml') with open(yaml_file, "rt") as fh: var_list = yaml.load(fh) # Build ordered dict with variables in alphabetical order. var_list = sorted(var_list, key=lambda v: v.varname) __VARS_DATABASE = VariableDatabase([(v.varname, v) for v in var_list]) # Save object to pickle file so that can we can reload it from pickle instead of yaml (slower) with open(pickle_file, "wb") as fh: pickle.dump(__VARS_DATABASE, fh) return __VARS_DATABASE
def load_additional_args(self, config): """ """ self.set_attribute(config, 'request_powermin', 'General', 'power min', cast='float') self.set_attribute(config, 'request_powermax', 'General', 'power max', cast='float') # read in the coefficients from file coeffs = self.config_get(config, 'PowerMeter', 'coefficients') if coeffs is not None: self.power_meter_calibration = MeterCalibration(coeffs) coeffs = self.config_get(config, 'PowerOutput', 'coefficients') if coeffs is not None: p = os.path.join(paths.hidden_dir, '{}_power_calibration'.format(self.name.split('.')[0])) obj = MeterCalibration(coeffs) # dump to the hidden dir # the manager will use it directly try: self.info('loading power calibration from config file') with open(p, 'wb') as f: pickle.dump(obj, f) except (OSError, pickle.PickleError): self.warning('failed loading power output calibration') return super(FusionsCO2LogicBoard, self).load_additional_args(config)
def _run_tmva_predict(info, data): """ Run subprocess to train tmva factory :param info: class with additional information """ tmva_process = subprocess.Popen( 'cd "{directory}"; {executable} -c "from rep.estimators import _tmvaReader; _tmvaReader.main()"'.format( directory=info.directory, executable=sys.executable), stdin=PIPE, stdout=PIPE, stderr=subprocess.STDOUT, shell=True) try: cPickle.dump(info, tmva_process.stdin) cPickle.dump(data, tmva_process.stdin) except: # Doing nothing, there is check later. pass stdout, stderr = tmva_process.communicate() assert tmva_process.returncode == 0, \ 'ERROR: TMVA process is incorrect finished \n LOG: %s \n %s' % (stderr, stdout) with open(info.result_filename, 'rb') as predictions_file: predictions = cPickle.load(predictions_file) return predictions
def save_classifier(cl, fn, use_joblib=True, **kwargs): """Save a classifier to disk. Parameters ---------- cl : classifier object Pickleable object or a classify.VigraRandomForest object. fn : string Writeable path/filename. use_joblib : bool, optional Whether to prefer joblib persistence to pickle. kwargs : keyword arguments Keyword arguments to be passed on to either `pck.dump` or `joblib.dump`. Returns ------- None Notes ----- For joblib persistence, `compress=3` is the default. """ if isinstance(cl, VigraRandomForest): cl.save_to_disk(fn) elif use_joblib and sklearn_available: if "compress" not in kwargs: kwargs["compress"] = 3 joblib.dump(cl, fn, **kwargs) else: with open(fn, "wb") as f: pck.dump(cl, f, protocol=kwargs.get("protocol", 2))
def create_pickle(data_folders, force=False): """Function for converting data into separate pickle files for each label. data_folders is the list of folder names of all classes. Set force = False if pickle files are already created and are not to be overwritten. Set force = True to overwrite already created pickle files. """ # List of names of pickle files for individual classes dataset_names = [] for folder in data_folders: set_filename = folder + '.pickle' dataset_names.append(set_filename) if os.path.exists(set_filename) and not force: print('%s already present - Skipping pickling.' % set_filename) else: print('Pickling %s.' % set_filename) dataset = load_emotion(folder) try: with open(set_filename, 'wb') as f: pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL) except Exception as e: print('Unable to save data to', set_filename, ':', e) return dataset_names
def _run_tmva_training(self, info, X, y, sample_weight): """ Run subprocess to train tmva factory :param info: class with additional information """ tmva_process = subprocess.Popen( 'cd "{directory}"; {executable} -c "from rep.estimators import _tmvaFactory; _tmvaFactory.main()"'.format( directory=info.directory, executable=sys.executable), stdin=PIPE, stdout=PIPE, stderr=subprocess.STDOUT, shell=True) try: cPickle.dump(self, tmva_process.stdin) cPickle.dump(info, tmva_process.stdin) cPickle.dump(X, tmva_process.stdin) cPickle.dump(y, tmva_process.stdin) cPickle.dump(sample_weight, tmva_process.stdin) except: # continuing, next we check the output of process pass stdout, stderr = tmva_process.communicate() assert tmva_process.returncode == 0, \ 'ERROR: TMVA process is incorrect finished \n LOG: %s \n %s' % (stderr, stdout) xml_filename = os.path.join(info.directory, 'weights', '{job}_{name}.weights.xml'.format(job=info.tmva_job, name=self._method_name)) with open(xml_filename, 'r') as xml_file: self.formula_xml = xml_file.read()
def train(args): data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) model = Model(args) with tf.Session() as sess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) for e in range(args.num_epochs): sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) data_loader.reset_batch_pointer() state = model.initial_state.eval() for b in range(data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y, model.initial_state: state} train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed) end = time.time() print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, end - start)) if (e * data_loader.num_batches + b) % args.save_every == 0: checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path))
return dataset train_data = load_data('ptb.train.txt') if args.test: train_data = train_data[:100] valid_data = load_data('ptb.valid.txt') if args.test: valid_data = valid_data[:100] test_data = load_data('ptb.test.txt') if args.test: test_data = test_data[:100] print('#vocab =', len(vocab)) with open('vocab.bin', 'wb') as f: pickle.dump(vocab, f) # Prepare RNNLM model, defined in net.py lm = net.RNNLM(len(vocab), n_units) model = L.Classifier(lm) model.compute_accuracy = False # we only want the perplexity for param in model.params(): data = param.data data[:] = np.random.uniform(-0.1, 0.1, data.shape) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() # Setup optimizer optimizer = optimizers.SGD(lr=1.) optimizer.setup(model)
def save_pickle(fname, tweets): with open(fname + '.pickle', 'wb') as f: pickle.dump(tweets, f, pickle.HIGHEST_PROTOCOL)
def train_lstm( dim_proj=128, # word embeding dimension and LSTM number of hidden units. patience=10, # Number of epoch to wait before early stop if no progress max_epochs=5000, # The maximum number of epoch to run dispFreq=10, # Display to stdout the training progress every N updates decay_c=0., # Weight decay for the classifier applied to the U weights. lrate=0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) n_words=10000, # Vocabulary size optimizer=adadelta, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). encoder='lstm', # TODO: can be removed must be lstm. saveto='lstm_model.npz', # The best model will be saved there validFreq=370, # Compute the validation error after this number of update. saveFreq=1110, # Save the parameters after every saveFreq updates maxlen=100, # Sequence longer then this get ignored batch_size=16, # The batch size during training. valid_batch_size=64, # The batch size used for validation/test set. dataset='authors2', # Parameter for extra option noise_std=0., use_dropout=True, # if False slightly faster, but worst test error # This frequently need a bigger model. reload_model=None, # Path to a saved model we want to start from. test_size=-1, # If >0, we keep only this number of test example. ): # Model options model_options = locals().copy() print("model options", model_options) load_data, prepare_data = get_dataset(dataset) print('Loading data') train, valid, test = load_data(n_words=n_words, valid_portion=0.05, maxlen=maxlen) if test_size > 0: # The test set is sorted by size, but we want to keep random # size example. So we must select a random selection of the # examples. idx = numpy.arange(len(test[0])) numpy.random.shuffle(idx) idx = idx[:test_size] test = ([test[0][n] for n in idx], [test[1][n] for n in idx]) #print(len(train[0])); #print(len(train[1])); #print(len(test[0])); #print(len(test[1])); ydim = numpy.max(train[1]) + 1 model_options['ydim'] = ydim print('Building model') # This create the initial parameters as numpy ndarrays. # Dict name (string) -> numpy ndarray params = init_params(model_options) if reload_model: load_params('lstm_model.npz', params) # This create Theano Shared Variable from the parameters. # Dict name (string) -> Theano Tensor Shared Variable # params and tparams have different copy of the weights. tparams = init_tparams(params) # use_noise is for dropout (use_noise, x, mask, y, f_pred_prob, f_pred, cost) = build_model(tparams, model_options) if decay_c > 0.: decay_c = theano.shared(numpy_floatX(decay_c), name='decay_c') weight_decay = 0. weight_decay += (tparams['U']**2).sum() weight_decay *= decay_c cost += weight_decay f_cost = theano.function([x, mask, y], cost, name='f_cost') grads = tensor.grad(cost, wrt=list(tparams.values())) f_grad = theano.function([x, mask, y], grads, name='f_grad') lr = tensor.scalar(name='lr') f_grad_shared, f_update = optimizer(lr, tparams, grads, x, mask, y, cost) print('Optimization') kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size) kf_test = get_minibatches_idx(len(test[0]), valid_batch_size) print("%d train examples" % len(train[0])) print("%d valid examples" % len(valid[0])) print("%d test examples" % len(test[0])) history_errs = [] best_p = None bad_count = 0 if validFreq == -1: validFreq = len(train[0]) // batch_size if saveFreq == -1: saveFreq = len(train[0]) // batch_size uidx = 0 # the number of update done estop = False # early stop start_time = time.time() try: for eidx in range(max_epochs): n_samples = 0 # Get new shuffled index for the training set. kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True) for _, train_index in kf: uidx += 1 use_noise.set_value(1.) # Select the random examples for this minibatch y = [train[1][t] for t in train_index] x = [train[0][t] for t in train_index] # Get the data in numpy.ndarray format # This swap the axis! # Return something of shape (minibatch maxlen, n samples) x, mask, y = prepare_data(x, y) n_samples += x.shape[1] cost = f_grad_shared(x, mask, y) f_update(lrate) if numpy.isnan(cost) or numpy.isinf(cost): print('bad cost detected: ', cost) return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost) if saveto and numpy.mod(uidx, saveFreq) == 0: print('Saving...') if best_p is not None: params = best_p else: params = unzip(tparams) numpy.savez(saveto, history_errs=history_errs, **params) pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1) print('Done') if numpy.mod(uidx, validFreq) == 0: use_noise.set_value(0.) train_err = pred_error(f_pred, prepare_data, train, kf) valid_err = pred_error(f_pred, prepare_data, valid, kf_valid) test_err = pred_error(f_pred, prepare_data, test, kf_test) history_errs.append([valid_err, test_err]) if (best_p is None or valid_err <= numpy.array(history_errs)[:, 0].min()): best_p = unzip(tparams) bad_counter = 0 print(('Train ', train_err, 'Valid ', valid_err, 'Test ', test_err)) if (len(history_errs) > patience and valid_err >= numpy.array(history_errs)[:-patience, 0].min()): bad_counter += 1 if bad_counter > patience: print('Early Stop!') estop = True break print('Seen %d samples' % n_samples) if estop: break except KeyboardInterrupt: print("Training interupted") end_time = time.time() if best_p is not None: zipp(best_p, tparams) else: best_p = unzip(tparams) use_noise.set_value(0.) kf_train_sorted = get_minibatches_idx(len(train[0]), batch_size) train_err = pred_error(f_pred, prepare_data, train, kf_train_sorted) valid_err = pred_error(f_pred, prepare_data, valid, kf_valid) test_err = pred_error(f_pred, prepare_data, test, kf_test) print('Train ', train_err, 'Valid ', valid_err, 'Test ', test_err) if saveto: numpy.savez(saveto, train_err=train_err, valid_err=valid_err, test_err=test_err, history_errs=history_errs, **best_p) print('The code run for %d epochs, with %f sec/epochs' % ((eidx + 1), (end_time - start_time) / (1. * (eidx + 1)))) print(('Training took %.1fs' % (end_time - start_time)), file=sys.stderr) return train_err, valid_err, test_err
def trainHMM_fromDir(dirPath, hmmModelName, mtWin, mtStep): ''' This function trains a HMM model for segmentation-classification using a where WAV files and .segment (ground-truth files) are stored ARGUMENTS: - dirPath: the path of the data diretory - hmmModelName: the name of the HMM model to be stored - mtWin: mid-term window size - mtStep: mid-term window step RETURNS: - hmm: an object to the resulting HMM - classNames: a list of classNames After training, hmm, classNames, along with the mtWin and mtStep values are stored in the hmmModelName file ''' flagsAll = numpy.array([]) initializedFall = False classesAll = [] # for each WAV file for i, f in enumerate(glob.glob(dirPath + os.sep + '*.wav')): wavFile = f # open for annotated file gtFile = f.replace('.wav', '.segments') # if current WAV file does not have annotation -> skip if not os.path.isfile(gtFile): continue [segStart, segEnd, segLabels] = readSegmentGT(gtFile) # read GT data flags, classNames = segs2flags(segStart, segEnd, segLabels, mtStep) # convert to flags # update classnames: for c in classNames: if c not in classesAll: classesAll.append(c) [Fs, x] = audioBasicIO.readAudioFile(wavFile) # read audio data [F, _] = aF.mtFeatureExtraction(x, Fs, mtWin * Fs, mtStep * Fs, round(Fs * 0.050), round(Fs * 0.050)) # feature extraction lenF = F.shape[1] lenL = len(flags) MIN = min(lenF, lenL) F = F[:, 0:MIN] flags = flags[0:MIN] flagsNew = [] for j, fl in enumerate(flags): # append features and labels flagsNew.append(classesAll.index(classNames[flags[j]])) flagsAll = numpy.append(flagsAll, numpy.array(flagsNew)) if not initializedFall: Fall = F initializedFall = True else: Fall = numpy.concatenate((Fall, F), axis=1) startprob, transmat, means, cov = trainHMM_computeStatistics( Fall, flagsAll) # compute HMM statistics hmm = hmmlearn.hmm.GaussianHMM(startprob.shape[0], "diag") # train HMM hmm.startprob_ = startprob hmm.transmat_ = transmat hmm.means_ = means hmm.covars_ = cov fo = open(hmmModelName, "wb") # save HMM model cPickle.dump(hmm, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classesAll, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close() return hmm, classesAll
def finish(self): # type: () -> None # dump the coverage data to a pickle file too picklepath = path.join(self.outdir, 'undoc.pickle') with open(picklepath, 'wb') as dumpfile: pickle.dump((self.py_undoc, self.c_undoc), dumpfile)
def sync_session(self): if self.resume_file: with open(self.resume_file, 'wb') as rf: cPickle.dump(self.scan_session, rf, 2)
def test_nest(): crs = cimgt.GoogleTiles().crs z0 = cimg_nest.ImageCollection('aerial z0 test', crs) z0.scan_dir_for_imgs(os.path.join(_TEST_DATA_DIR, 'z_0'), glob_pattern='*.png', img_class=RoundedImg) z1 = cimg_nest.ImageCollection('aerial z1 test', crs) z1.scan_dir_for_imgs(os.path.join(_TEST_DATA_DIR, 'z_1'), glob_pattern='*.png', img_class=RoundedImg) z2 = cimg_nest.ImageCollection('aerial z2 test', crs) z2.scan_dir_for_imgs(os.path.join(_TEST_DATA_DIR, 'z_2'), glob_pattern='*.png', img_class=RoundedImg) # make sure all the images from z1 are contained by the z0 image. The # only reason this might occur is if the tfw files are handling # floating point values badly for img in z1.images: if not z0.images[0].bbox().contains(img.bbox()): raise IOError('The test images aren\'t all "contained" by the ' 'z0 images, the nest cannot possibly work.\n ' 'img {!s} not contained by {!s}\nExtents: {!s}; ' '{!s}'.format(img, z0.images[0], img.extent, z0.images[0].extent)) nest_z0_z1 = cimg_nest.NestedImageCollection('aerial test', crs, [z0, z1]) nest = cimg_nest.NestedImageCollection('aerial test', crs, [z0, z1, z2]) z0_key = ('aerial z0 test', z0.images[0]) assert_true(z0_key in nest_z0_z1._ancestry.keys()) assert_equal(len(nest_z0_z1._ancestry), 1) # check that it has figured out that all the z1 images are children of # the only z0 image for img in z1.images: key = ('aerial z0 test', z0.images[0]) assert_in(('aerial z1 test', img), nest_z0_z1._ancestry[key]) x1_y0_z1, = [ img for img in z1.images if img.filename.endswith('z_1/x_1_y_0.png') ] assert_equal((1, 0, 1), _tile_from_img(x1_y0_z1)) assert_equal([(2, 0, 2), (2, 1, 2), (3, 0, 2), (3, 1, 2)], sorted([ _tile_from_img(img) for z, img in nest.subtiles(('aerial z1 test', x1_y0_z1)) ])) nest_from_config = gen_nest() # check that the the images in the nest from configuration are the # same as those created by hand. for name in nest_z0_z1._collections_by_name.keys(): for img in nest_z0_z1._collections_by_name[name].images: collection = nest_from_config._collections_by_name[name] assert_in(img, collection.images) assert_equal(nest_z0_z1._ancestry, nest_from_config._ancestry) # check that a nest can be pickled and unpickled easily. s = io.BytesIO() pickle.dump(nest_z0_z1, s) s.seek(0) nest_z0_z1_from_pickle = pickle.load(s) assert_equal(nest_z0_z1._ancestry, nest_z0_z1_from_pickle._ancestry)
def update_msid_files(filetype, archfiles): colnames = pickle.load(open(msid_files['colnames'].abs)) colnames_all = pickle.load(open(msid_files['colnames_all'].abs)) old_colnames = colnames.copy() old_colnames_all = colnames_all.copy() # Setup db handle with autocommit=False so that error along the way aborts insert transactions db = Ska.DBI.DBI(dbi='sqlite', server=msid_files['archfiles'].abs, autocommit=False) # Get the last row number from the archfiles table out = db.fetchone('SELECT max(rowstop) FROM archfiles') row = out['max(rowstop)'] or 0 last_archfile = db.fetchone('SELECT * FROM archfiles where rowstop=?', (row, )) archfiles_overlaps = [] dats = [] archfiles_processed = [] content_is_derived = (filetype['instrum'] == 'DERIVED') for i, f in enumerate(archfiles): get_data = (read_derived if content_is_derived else read_archfile) dat, archfiles_row = get_data(i, f, filetype, row, colnames, archfiles, db) if dat is None: continue # If creating new content type and there are no existing colnames, then # define the column names now. Filter out any multidimensional # columns, including (typically) QUALITY. if opt.create and not colnames: colnames = set(dat.dtype.names) for colname in dat.dtype.names: if len(dat[colname].shape) > 1: logger.info( 'Removing column {} from colnames because shape = {}'. format(colname, dat[colname].shape)) colnames.remove(colname) # Ensure that the time gap between the end of the last ingested archive # file and the start of this one is less than opt.max_gap (or # filetype-based defaults). If this fails then break out of the # archfiles processing but continue on to ingest any previously # successful archfiles if last_archfile is None: time_gap = 0 else: time_gap = archfiles_row['tstart'] - last_archfile['tstop'] max_gap = opt.max_gap if max_gap is None: if filetype['instrum'] in ['EPHEM', 'DERIVED']: max_gap = 601 elif filetype['content'] == 'ACISDEAHK': max_gap = 10000 # From P.Plucinsky 2011-09-23 # If ACIS is executing an Event Histogram run while in FMT1, # the telemetry stream will saturate. The amount of time for # an opening in the telemetry to appear such that DEA HKP # packets can get out is a bit indeterminate. The histograms # integrate for 5400s and then they are telemetered. I would # suggest 6000s, but perhaps you would want to double that to # 12000s. elif filetype['content'] in ['CPE1ENG', 'CCDM15ENG']: # 100 years => no max gap for safe mode telemetry or dwell mode telemetry max_gap = 100 * 3.1e7 else: max_gap = 32.9 if time_gap > max_gap: logger.warning( 'WARNING: found gap of %.2f secs between archfiles %s and %s', time_gap, last_archfile['filename'], archfiles_row['filename']) if opt.create: logger.warning( ' Allowing gap because of opt.create=True') elif DateTime() - DateTime( archfiles_row['tstart']) > opt.allow_gap_after_days: # After 4 days (by default) just let it go through because this is # likely a real gap and will not be fixed by subsequent processing. # This can happen after normal sun mode to SIM products. logger.warning(' Allowing gap because arch file ' 'start is more than {} days old'.format( opt.allow_gap_after_days)) else: break elif time_gap < 0: # Overlapping archfiles - deal with this in append_h5_col archfiles_overlaps.append((last_archfile, archfiles_row)) # Update the last_archfile values. last_archfile = archfiles_row # A very small number of archive files (a few) have a problem where the # quality column tform is specified as 3B instead of 17X (for example). # This breaks things, so in this case just skip the file. However # since last_archfile is set above the gap check considers this file to # have been ingested. if not content_is_derived and dat['QUALITY'].shape[1] != len( dat.dtype.names): logger.warning( 'WARNING: skipping because of quality size mismatch: %d %d' % (dat['QUALITY'].shape[1], len(dat.dtype.names))) continue # Mark the archfile as ingested in the database and add to list for # subsequent relocation into arch_files archive. In the case of a gap # where ingest is stopped before all archfiles are processed, this will # leave files either in a tmp dir (HEAD) or in the stage dir (OCC). # In the latter case this allows for successful processing later when the # gap gets filled. archfiles_processed.append(f) if not opt.dry_run: db.insert(archfiles_row, 'archfiles') # Capture the data for subsequent storage in the hdf5 files dats.append(dat) # Update the running list of column names. Colnames_all is the maximal (union) # set giving all column names seen in any file for this content type. Colnames # was historically the minimal (intersection) set giving the list of column names # seen in every file, but as of 0.39 it is allowed to grow as well to accommodate # adding MSIDs in the TDB. Include only 1-d columns, not things like AEPERR # in PCAD8ENG which is a 40-element binary vector. colnames_all.update(dat.dtype.names) colnames.update(name for name in dat.dtype.names if dat[name].ndim == 1) row += len(dat) if dats: logger.verbose('Writing accumulated column data to h5 file at ' + time.ctime()) data_lens = set() processed_cols = set() for colname in colnames: ft['msid'] = colname if not os.path.exists(msid_files['msid'].abs): make_h5_col_file(dats, colname) if not opt.create: # New MSID was found for this content type. This must be associated with # an update to the TDB. Skip for the moment to ensure that other MSIDs # are fully processed. continue data_len = append_h5_col(dats, colname, archfiles_overlaps) data_lens.add(data_len) processed_cols.add(colname) if len(data_lens) != 1: raise ValueError( 'h5 data length inconsistency {}, investigate NOW!'.format( data_lens)) # Process any new MSIDs (this is extremely rare) data_len = data_lens.pop() for colname in colnames - processed_cols: ft['msid'] = colname append_filled_h5_col(dats, colname, data_len) # Assuming everything worked now commit the db inserts that signify the # new archive files have been processed if not opt.dry_run: db.commit() # If colnames or colnames_all changed then give warning and update files. if colnames != old_colnames: logger.warning('WARNING: updating %s because colnames changed: %s' % (msid_files['colnames'].abs, old_colnames ^ colnames)) if not opt.dry_run: pickle.dump(colnames, open(msid_files['colnames'].abs, 'w')) if colnames_all != old_colnames_all: logger.warning( 'WARNING: updating %s because colnames_all changed: %s' % (msid_files['colnames_all'].abs, colnames_all ^ old_colnames_all)) if not opt.dry_run: pickle.dump(colnames_all, open(msid_files['colnames_all'].abs, 'w')) return archfiles_processed
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=600): """ Demonstrate stochastic gradient descent optimization of a log-linear model This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # generate symbolic variables for input (x and y represent a # minibatch) x = T.matrix('x') # data, presented as rasterized images y = T.ivector('y') # labels, presented as 1D vector of [int] labels # construct the logistic regression class # Each MNIST image has size 28*28 classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10) # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.negative_log_likelihood(y) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta = (W,b) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) # start-snippet-3 # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-3 ############### # TRAIN MODEL # ############### print('... training the model') # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # test it on the test set test_losses = [test_model(i) for i in range(n_test_batches)] test_score = numpy.mean(test_losses) print( ( ' epoch %i, minibatch %i/%i, test error of' ' best model %f %%' ) % ( epoch, minibatch_index + 1, n_train_batches, test_score * 100. ) ) # save the best model with open('best_model.pkl', 'wb') as f: pickle.dump(classifier, f) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print( ( 'Optimization complete with best validation score of %f %%,' 'with test performance %f %%' ) % (best_validation_loss * 100., test_score * 100.) ) print('The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time))) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time))), file=sys.stderr)
def run(args): import libtbx.load_env from dials.util import Sorry usage = "dials.reindex [options] indexed.expt indexed.refl" parser = OptionParser( usage=usage, phil=phil_scope, read_reflections=True, read_experiments=True, check_format=False, epilog=help_message, ) params, options = parser.parse_args(show_diff_phil=True) reflections, experiments = reflections_and_experiments_from_files( params.input.reflections, params.input.experiments) if len(experiments) == 0 and len(reflections) == 0: parser.print_help() return if params.change_of_basis_op is None: raise Sorry("Please provide a change_of_basis_op.") reference_crystal = None if params.reference.experiments is not None: from dxtbx.serialize import load reference_experiments = load.experiment_list( params.reference.experiments, check_format=False) assert len(reference_experiments.crystals()) == 1 reference_crystal = reference_experiments.crystals()[0] if params.reference.reflections is not None: # First check that we have everything as expected for the reference reindexing # Currently only supports reindexing one dataset at a time if params.reference.experiments is None: raise Sorry( """For reindexing against a reference dataset, a reference experiments file must also be specified with the option: reference= """) if not os.path.exists(params.reference.reflections): raise Sorry("Could not locate reference dataset reflection file") if len(experiments) != 1 or len(reflections) != 1: raise Sorry( "Only one dataset can be reindexed to a reference at a time") reference_reflections = flex.reflection_table().from_file( params.reference.reflections) test_reflections = reflections[0] if (reference_crystal.get_space_group().type().number() != experiments.crystals()[0].get_space_group().type().number()): raise Sorry("Space group of input does not match reference") # Set some flags to allow filtering, if wanting to reindex against # reference with data that has not yet been through integration if (test_reflections.get_flags( test_reflections.flags.integrated_sum).count(True) == 0): assert ( "intensity.sum.value" in test_reflections), "No 'intensity.sum.value' in reflections" test_reflections.set_flags( flex.bool(test_reflections.size(), True), test_reflections.flags.integrated_sum, ) if (reference_reflections.get_flags( reference_reflections.flags.integrated_sum).count(True) == 0): assert ("intensity.sum.value" in test_reflections ), "No 'intensity.sum.value in reference reflections" reference_reflections.set_flags( flex.bool(reference_reflections.size(), True), reference_reflections.flags.integrated_sum, ) # Make miller array of the two datasets try: test_miller_set = filtered_arrays_from_experiments_reflections( experiments, [test_reflections])[0] except ValueError: raise Sorry( "No reflections remain after filtering the test dataset") try: reference_miller_set = filtered_arrays_from_experiments_reflections( reference_experiments, [reference_reflections])[0] except ValueError: raise Sorry( "No reflections remain after filtering the reference dataset") from dials.algorithms.symmetry.reindex_to_reference import ( determine_reindex_operator_against_reference, ) change_of_basis_op = determine_reindex_operator_against_reference( test_miller_set, reference_miller_set) elif len(experiments) and params.change_of_basis_op is libtbx.Auto: if reference_crystal is not None: if len(experiments.crystals()) > 1: raise Sorry("Only one crystal can be processed at a time") from dials.algorithms.indexing.compare_orientation_matrices import ( difference_rotation_matrix_axis_angle, ) cryst = experiments.crystals()[0] R, axis, angle, change_of_basis_op = difference_rotation_matrix_axis_angle( cryst, reference_crystal) print("Change of basis op: %s" % change_of_basis_op) print("Rotation matrix to transform input crystal to reference::") print(R.mathematica_form(format="%.3f", one_row_per_line=True)) print( "Rotation of %.3f degrees" % angle, "about axis (%.3f, %.3f, %.3f)" % axis, ) elif len(reflections): assert len(reflections) == 1 # always re-map reflections to reciprocal space refl = reflections.deep_copy() refl.centroid_px_to_mm(experiments) refl.map_centroids_to_reciprocal_space(experiments) # index the reflection list using the input experiments list refl["id"] = flex.int(len(refl), -1) index = AssignIndicesGlobal(tolerance=0.2) index(refl, experiments) hkl_expt = refl["miller_index"] hkl_input = reflections[0]["miller_index"] change_of_basis_op = derive_change_of_basis_op(hkl_input, hkl_expt) # reset experiments list since we don't want to reindex this experiments = [] else: change_of_basis_op = sgtbx.change_of_basis_op( params.change_of_basis_op) if len(experiments): space_group = params.space_group if space_group is not None: space_group = space_group.group() experiments = reindex_experiments(experiments, change_of_basis_op, space_group=space_group) print("Saving reindexed experimental models to %s" % params.output.experiments) experiments.as_file(params.output.experiments) if len(reflections): assert len(reflections) == 1 reflections = reflections[0] miller_indices = reflections["miller_index"] if params.hkl_offset is not None: h, k, l = miller_indices.as_vec3_double().parts() h += params.hkl_offset[0] k += params.hkl_offset[1] l += params.hkl_offset[2] miller_indices = flex.miller_index(h.iround(), k.iround(), l.iround()) non_integral_indices = change_of_basis_op.apply_results_in_non_integral_indices( miller_indices) if non_integral_indices.size() > 0: print( "Removing %i/%i reflections (change of basis results in non-integral indices)" % (non_integral_indices.size(), miller_indices.size())) sel = flex.bool(miller_indices.size(), True) sel.set_selected(non_integral_indices, False) miller_indices_reindexed = change_of_basis_op.apply( miller_indices.select(sel)) reflections["miller_index"].set_selected(sel, miller_indices_reindexed) reflections["miller_index"].set_selected(~sel, (0, 0, 0)) print("Saving reindexed reflections to %s" % params.output.reflections) with open(params.output.reflections, "wb") as fh: pickle.dump(reflections, fh, protocol=pickle.HIGHEST_PROTOCOL)
def train(opt): # opt.use_att = utils.if_use_att(opt.caption_model) opt.use_att = True if opt.use_box: opt.att_feat_size = opt.att_feat_size + 5 opt.vocab_size = 50 opt.seq_length = 10 opt.fc_feat_size = 100 opt.train_true = True opt.train_true_step = 100 np.random.seed(0) data_num = 5000 data_features = np.random.normal(size=[data_num, opt.fc_feat_size]) test_data_num = 1000 test_data_features = np.random.normal( size=[test_data_num, opt.fc_feat_size]) print(opt.checkpoint_path) tb_summary_writer = tb and tb.SummaryWriter(opt.checkpoint_path) infos = {} histories = {} if opt.start_from is not None: # open old infos and check if models are compatible with open(os.path.join(opt.start_from, 'infos_' + opt.id + '.pkl')) as f: infos = cPickle.load(f) saved_model_opt = infos['opt'] need_be_same = [ "caption_model", "rnn_type", "rnn_size", "num_layers" ] for checkme in need_be_same: assert vars(saved_model_opt)[checkme] == vars( opt )[checkme], "Command line argument and saved model disagree on '%s' " % checkme if os.path.isfile( os.path.join(opt.start_from, 'histories_' + opt.id + '.pkl')): with open( os.path.join(opt.start_from, 'histories_' + opt.id + '.pkl')) as f: histories = cPickle.load(f) iteration = infos.get('iter', 0) epoch = infos.get('epoch', 0) val_result_history = histories.get('val_result_history', {}) loss_history = histories.get('loss_history', {}) critic_loss_history = histories.get('critic_loss_history', {}) lr_history = histories.get('lr_history', {}) ss_prob_history = histories.get('ss_prob_history', {}) variance_history = histories.get('variance_history', {}) time_history = histories.get('time_history', {}) if opt.load_best_score == 1: best_val_score = infos.get('best_val_score', None) model = models.setup(opt).cuda() dp_model = model #TODO: save true model true_model = models.setup(opt).cuda() if vars(opt).get('start_from', None) is not None: # check if all necessary files exist assert os.path.isdir( opt.start_from), " %s must be a a path" % opt.start_from assert os.path.isfile( os.path.join(opt.start_from, "infos_" + opt.id + ".pkl") ), "infos.pkl file does not exist in path %s" % opt.start_from true_model.load_state_dict( torch.load(os.path.join(opt.start_from, 'truemodel.pth'))) true_model.eval() ######################### Actor-critic Training ##################################################################### update_lr_flag = True # Assure in training mode dp_model.train() crit = utils.LanguageModelCriterion() rl_crit = utils.RewardCriterion() optimizer = utils.build_optimizer(model.parameters(), opt) tm_optimizer = utils.build_optimizer(true_model.parameters(), opt) # Load the optimizer if vars(opt).get('start_from', None) is not None and os.path.isfile( os.path.join(opt.start_from, "optimizer.pth")): optimizer.load_state_dict( torch.load(os.path.join(opt.start_from, 'optimizer.pth'))) first_order = 0 second_order = 0 while True: if update_lr_flag: # Assign the learning rate if epoch > opt.learning_rate_decay_start and opt.learning_rate_decay_start >= 0: frac = (epoch - opt.learning_rate_decay_start ) // opt.learning_rate_decay_every decay_factor = opt.learning_rate_decay_rate**frac opt.current_lr = opt.learning_rate * decay_factor else: opt.current_lr = opt.learning_rate utils.set_lr(optimizer, opt.current_lr) # Assign the scheduled sampling prob if epoch > opt.scheduled_sampling_start and opt.scheduled_sampling_start >= 0: frac = (epoch - opt.scheduled_sampling_start ) // opt.scheduled_sampling_increase_every opt.ss_prob = min(opt.scheduled_sampling_increase_prob * frac, opt.scheduled_sampling_max_prob) model.ss_prob = opt.ss_prob # If start self critical training if opt.self_critical_after != -1 and epoch >= opt.self_critical_after: sc_flag = True init_scorer(opt.cached_tokens) else: sc_flag = False update_lr_flag = False dp_model.train() torch.cuda.synchronize() start = time.time() gen_result = None start_index = (iteration * opt.batch_size) % data_num end_index = start_index + opt.batch_size fc_feats = torch.from_numpy( data_features[start_index:end_index, :]).cuda().float() att_feats = None att_masks = None labels, total_logits = true_model(fc_feats, att_feats, att_masks, opt={'sample_max': 1}, total_probs=True, mode='sample') labels = torch.cat( [torch.zeros(labels.size(0), 1).cuda().long(), labels], 1) masks = (labels > 0).float() # train true model: if iteration < opt.train_true_step and opt.train_true: tm_optimizer.zero_grad() loss = -((total_logits * F.softmax(total_logits, 2)).sum(2)).mean() loss.backward() tm_optimizer.step() optimizer.zero_grad() if not sc_flag: loss = crit(dp_model(fc_feats, att_feats, labels, att_masks), labels[:, 1:], masks[:, 1:]) else: if opt.rl_type == 'sc': gen_result, sample_logprobs = dp_model(fc_feats, att_feats, att_masks, opt={'sample_max': 0}, mode='sample') gen_result_sc, _ = dp_model(fc_feats, att_feats, att_masks, opt={'sample_max': 1}, mode='sample') reward = reward_fun(gen_result, fc_feats, true_model).unsqueeze(1).repeat( 1, sample_logprobs.size(1)) reward_sc = reward_fun(gen_result_sc, fc_feats, true_model).unsqueeze(1).repeat( 1, sample_logprobs.size(1)) reward = reward - reward_sc loss = rl_crit(sample_logprobs, gen_result.data, reward) reward = np.zeros([2, 2]) elif opt.rl_type == 'reinforce': gen_result, sample_logprobs = dp_model(fc_feats, att_feats, att_masks, opt={'sample_max': 0}, mode='sample') reward = reward_fun(gen_result, fc_feats, true_model).unsqueeze(1).repeat( 1, sample_logprobs.size(1)) loss = rl_crit(sample_logprobs, gen_result.data, reward) reward = np.zeros([2, 2]) elif opt.rl_type == 'reinforce_demean': gen_result, sample_logprobs = dp_model(fc_feats, att_feats, att_masks, opt={'sample_max': 0}, mode='sample') reward = reward_fun(gen_result, fc_feats, true_model).unsqueeze(1).repeat( 1, sample_logprobs.size(1)) loss = rl_crit(sample_logprobs, gen_result.data, reward - reward.mean()) reward = np.zeros([2, 2]) elif opt.rl_type == 'arsm': loss = get_arm_loss(dp_model, fc_feats, att_feats, att_masks, true_model, opt) #print(loss) reward = np.zeros([2, 2]) elif opt.rl_type == 'ars': loss = get_arm_loss(dp_model, fc_feats, att_feats, att_masks, true_model, opt, type='ars') #print(loss) reward = np.zeros([2, 2]) elif opt.rl_type == 'ar': loss = get_ar_loss(dp_model, fc_feats, att_feats, att_masks, true_model, opt) # print(loss) reward = np.zeros([2, 2]) elif opt.rl_type == 'mct_baseline': opt.rf_demean = 0 gen_result, sample_logprobs, probs, mct_baseline = get_mct_loss( dp_model, fc_feats, att_feats, att_masks, opt, true_model) reward = reward_fun(gen_result, fc_feats, true_model).unsqueeze(1).repeat( 1, sample_logprobs.size(1)) reward_cuda = reward #mct_baseline[mct_baseline < 0] = reward_cuda[mct_baseline < 0] loss = rl_crit(sample_logprobs, gen_result.data, reward - mct_baseline) if opt.mle_weights != 0: loss += opt.mle_weights * crit( dp_model(fc_feats, att_feats, labels, att_masks), labels[:, 1:], masks[:, 1:]) #TODO make sure all sampling replaced by greedy for critic #### update the actor loss.backward() # with open(os.path.join(opt.checkpoint_path, 'best_embed.pkl'), 'wb') as f: # cPickle.dump(list(dp_model.embed.parameters())[0].data.cpu().numpy(), f) # with open(os.path.join(opt.checkpoint_path, 'best_logit.pkl'), 'wb') as f: # cPickle.dump(list(dp_model.logit.parameters())[0].data.cpu().numpy(), f) ## compute variance gradient = torch.zeros([0]).cuda() for i in model.parameters(): gradient = torch.cat((gradient, i.grad.view(-1)), 0) first_order = 0.9999 * first_order + 0.0001 * gradient second_order = 0.9999 * second_order + 0.0001 * gradient.pow(2) # print(torch.max(torch.abs(gradient))) variance = torch.mean(torch.abs(second_order - first_order.pow(2))).item() if opt.rl_type != 'arsm' or not sc_flag: utils.clip_gradient(optimizer, opt.grad_clip) optimizer.step() train_loss = loss.item() torch.cuda.synchronize() end = time.time() if (iteration % opt.losses_log_every == 0): if not sc_flag: print("iter {} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(iteration, epoch, train_loss, end - start)) print(opt.checkpoint_path) else: print("iter {} (epoch {}), avg_reward = {:.3f}, variance = {:g}, time/batch = {:.3f}" \ .format(iteration, epoch, reward.mean(), variance, end - start)) # Update the iteration and epoch iteration += 1 if (iteration * opt.batch_size) % data_num == 0: epoch += 1 update_lr_flag = True # Write the training loss summary if (iteration % opt.losses_log_every == 0): add_summary_value(tb_summary_writer, 'train_loss', train_loss, iteration) add_summary_value(tb_summary_writer, 'learning_rate', opt.current_lr, iteration) add_summary_value(tb_summary_writer, 'scheduled_sampling_prob', model.ss_prob, iteration) if sc_flag: add_summary_value(tb_summary_writer, 'avg_reward', reward.mean(), iteration) add_summary_value(tb_summary_writer, 'variance', variance, iteration) #loss_history[iteration] = train_loss if not sc_flag else reward.mean() lr_history[iteration] = opt.current_lr ss_prob_history[iteration] = model.ss_prob variance_history[iteration] = variance time_history[iteration] = end - start # make evaluation on validation set, and save model if (iteration % opt.save_checkpoint_every == 0): # eval model val_loss, lang_stats = eval_utils_syn(dp_model, true_model, test_data_features, opt.batch_size, crit) lang_stats = lang_stats.item() val_loss = val_loss.item() # Write validation result into summary add_summary_value(tb_summary_writer, 'validation loss', val_loss, iteration) val_result_history[iteration] = { 'loss': val_loss, 'lang_stats': lang_stats } # Save model if is improving on validation result print('loss', val_loss, 'lang_stats', lang_stats) if True: # if true checkpoint_path = os.path.join(opt.checkpoint_path, 'model.pth') if not os.path.isdir(opt.checkpoint_path): os.mkdir(opt.checkpoint_path) torch.save(model.state_dict(), checkpoint_path) checkpoint_path = os.path.join(opt.checkpoint_path, 'truemodel.pth') torch.save(true_model.state_dict(), checkpoint_path) print("model saved to {}".format(checkpoint_path)) optimizer_path = os.path.join(opt.checkpoint_path, 'optimizer.pth') torch.save(optimizer.state_dict(), optimizer_path) # Dump miscalleous informations infos['iter'] = iteration infos['epoch'] = epoch infos['best_val_score'] = best_val_score infos['opt'] = opt infos['vocab'] = opt.vocab_size histories['val_result_history'] = val_result_history histories['loss_history'] = loss_history histories['critic_loss_history'] = critic_loss_history histories['lr_history'] = lr_history histories['ss_prob_history'] = ss_prob_history histories['variance_history'] = variance_history histories['time'] = time_history # histories['variance'] = 0 with open( os.path.join(opt.checkpoint_path, 'infos_' + opt.id + '.pkl'), 'wb') as f: cPickle.dump(infos, f) with open( os.path.join(opt.checkpoint_path, 'histories_' + opt.id + '.pkl'), 'wb') as f: cPickle.dump(histories, f) # Stop if reaching max epochs if epoch >= opt.max_epochs and opt.max_epochs != -1: break
def pickle_environment(path, environment=None): """Pickle an environment dictionary to a file.""" cPickle.dump(dict(environment if environment else os.environ), open(path, 'wb'), protocol=2)
def read_doc(self, docname, app=None): """Parse a file and add/update inventory entries for the doctree.""" self.temp_data['docname'] = docname # defaults to the global default, but can be re-set in a document self.temp_data['default_domain'] = \ self.domains.get(self.config.primary_domain) self.settings['input_encoding'] = self.config.source_encoding self.settings['trim_footnote_reference_space'] = \ self.config.trim_footnote_reference_space self.settings['gettext_compact'] = self.config.gettext_compact docutilsconf = path.join(self.srcdir, 'docutils.conf') # read docutils.conf from source dir, not from current dir OptionParser.standard_config_files[1] = docutilsconf if path.isfile(docutilsconf): self.note_dependency(docutilsconf) with sphinx_domains(self): if self.config.default_role: role_fn, messages = roles.role(self.config.default_role, english, 0, dummy_reporter) if role_fn: roles._roles[''] = role_fn else: self.warn(docname, 'default role %s not found' % self.config.default_role) codecs.register_error('sphinx', self.warn_and_replace) # publish manually reader = SphinxStandaloneReader(self.app, parsers=self.config.source_parsers) pub = Publisher(reader=reader, writer=SphinxDummyWriter(), destination_class=NullOutput) pub.set_components(None, 'restructuredtext', None) pub.process_programmatic_settings(None, self.settings, None) src_path = self.doc2path(docname) source = SphinxFileInput(app, self, source=None, source_path=src_path, encoding=self.config.source_encoding) pub.source = source pub.settings._source = src_path pub.set_destination(None, None) pub.publish() doctree = pub.document # post-processing self.process_dependencies(docname, doctree) self.process_images(docname, doctree) self.process_downloads(docname, doctree) self.process_metadata(docname, doctree) self.create_title_from(docname, doctree) for manager in itervalues(self.managers): manager.process_doc(docname, doctree) for domain in itervalues(self.domains): domain.process_doc(self, docname, doctree) # allow extension-specific post-processing if app: app.emit('doctree-read', doctree) # store time of reading, for outdated files detection # (Some filesystems have coarse timestamp resolution; # therefore time.time() can be older than filesystem's timestamp. # For example, FAT32 has 2sec timestamp resolution.) self.all_docs[docname] = max( time.time(), path.getmtime(self.doc2path(docname))) if self.versioning_condition: old_doctree = None if self.versioning_compare: # get old doctree try: with open(self.doc2path(docname, self.doctreedir, '.doctree'), 'rb') as f: old_doctree = pickle.load(f) except EnvironmentError: pass # add uids for versioning if not self.versioning_compare or old_doctree is None: list(add_uids(doctree, self.versioning_condition)) else: list(merge_doctrees( old_doctree, doctree, self.versioning_condition)) # make it picklable doctree.reporter = None doctree.transformer = None doctree.settings.warning_stream = None doctree.settings.env = None doctree.settings.record_dependencies = None # cleanup self.temp_data.clear() self.ref_context.clear() roles._roles.pop('', None) # if a document has set a local default role # save the parsed doctree doctree_filename = self.doc2path(docname, self.doctreedir, '.doctree') ensuredir(path.dirname(doctree_filename)) with open(doctree_filename, 'wb') as f: pickle.dump(doctree, f, pickle.HIGHEST_PROTOCOL)
def save(self, file_name): f = open(file_name, 'wb') pickle.dump(self.weights, f, pickle.HIGHEST_PROTOCOL) f.close()
def main(_): tf.logging.set_verbosity(tf.logging.INFO) albert_config = modeling.AlbertConfig.from_json_file( FLAGS.albert_config_file) validate_flags_or_throw(albert_config) tf.gfile.MakeDirs(FLAGS.output_dir) tokenizer = fine_tuning_utils.create_vocab( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case, spm_model_file=FLAGS.spm_model_file, hub_module=FLAGS.albert_hub_module_handle) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 if FLAGS.do_train: iterations_per_loop = int( min(FLAGS.iterations_per_loop, FLAGS.save_checkpoints_steps)) else: iterations_per_loop = FLAGS.iterations_per_loop run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, keep_checkpoint_max=0, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None train_examples = squad_utils.read_squad_examples( input_file=FLAGS.train_file, is_training=True) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) if FLAGS.do_train: num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) # Pre-shuffle the input to avoid having to make a very large shuffle # buffer in in the `input_fn`. rng = random.Random(12345) rng.shuffle(train_examples) model_fn = squad_utils.v2_model_fn_builder( albert_config=albert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, max_seq_length=FLAGS.max_seq_length, start_n_top=FLAGS.start_n_top, end_n_top=FLAGS.end_n_top, dropout_prob=FLAGS.dropout_prob, hub_module=FLAGS.albert_hub_module_handle) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: # We write to a temporary file to avoid storing very large constant tensors # in memory. if not tf.gfile.Exists(FLAGS.train_feature_file): train_writer = squad_utils.FeatureWriter(filename=os.path.join( FLAGS.train_feature_file), is_training=True) squad_utils.convert_examples_to_features( examples=train_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=True, output_fn=train_writer.process_feature, do_lower_case=FLAGS.do_lower_case) train_writer.close() tf.logging.info("***** Running training *****") tf.logging.info(" Num orig examples = %d", len(train_examples)) # tf.logging.info(" Num split examples = %d", train_writer.num_features) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) del train_examples train_input_fn = squad_utils.input_fn_builder( input_file=FLAGS.train_feature_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True, use_tpu=FLAGS.use_tpu, bsz=FLAGS.train_batch_size, is_v2=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_predict: with tf.gfile.Open(FLAGS.predict_file) as predict_file: prediction_json = json.load(predict_file)["data"] eval_examples = squad_utils.read_squad_examples( input_file=FLAGS.predict_file, is_training=False) if (tf.gfile.Exists(FLAGS.predict_feature_file) and tf.gfile.Exists(FLAGS.predict_feature_left_file)): tf.logging.info("Loading eval features from {}".format( FLAGS.predict_feature_left_file)) with tf.gfile.Open(FLAGS.predict_feature_left_file, "rb") as fin: eval_features = pickle.load(fin) else: eval_writer = squad_utils.FeatureWriter( filename=FLAGS.predict_feature_file, is_training=False) eval_features = [] def append_feature(feature): eval_features.append(feature) eval_writer.process_feature(feature) squad_utils.convert_examples_to_features( examples=eval_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=False, output_fn=append_feature, do_lower_case=FLAGS.do_lower_case) eval_writer.close() with tf.gfile.Open(FLAGS.predict_feature_left_file, "wb") as fout: pickle.dump(eval_features, fout) tf.logging.info("***** Running predictions *****") tf.logging.info(" Num orig examples = %d", len(eval_examples)) tf.logging.info(" Num split examples = %d", len(eval_features)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_input_fn = squad_utils.input_fn_builder( input_file=FLAGS.predict_feature_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False, use_tpu=FLAGS.use_tpu, bsz=FLAGS.predict_batch_size, is_v2=True) def get_result(checkpoint): """Evaluate the checkpoint on SQuAD v2.0.""" # If running eval on the TPU, you will need to specify the number of # steps. reader = tf.train.NewCheckpointReader(checkpoint) global_step = reader.get_tensor(tf.GraphKeys.GLOBAL_STEP) all_results = [] for result in estimator.predict(predict_input_fn, yield_single_examples=True, checkpoint_path=checkpoint): if len(all_results) % 1000 == 0: tf.logging.info("Processing example: %d" % (len(all_results))) unique_id = int(result["unique_ids"]) start_top_log_probs = ([ float(x) for x in result["start_top_log_probs"].flat ]) start_top_index = [ int(x) for x in result["start_top_index"].flat ] end_top_log_probs = ([ float(x) for x in result["end_top_log_probs"].flat ]) end_top_index = [int(x) for x in result["end_top_index"].flat] cls_logits = float(result["cls_logits"].flat[0]) all_results.append( squad_utils.RawResultV2( unique_id=unique_id, start_top_log_probs=start_top_log_probs, start_top_index=start_top_index, end_top_log_probs=end_top_log_probs, end_top_index=end_top_index, cls_logits=cls_logits)) output_prediction_file = os.path.join(FLAGS.output_dir, "predictions.json") output_nbest_file = os.path.join(FLAGS.output_dir, "nbest_predictions.json") output_null_log_odds_file = os.path.join(FLAGS.output_dir, "null_odds.json") result_dict = {} cls_dict = {} squad_utils.accumulate_predictions_v2( result_dict, cls_dict, eval_examples, eval_features, all_results, FLAGS.n_best_size, FLAGS.max_answer_length, FLAGS.start_n_top, FLAGS.end_n_top) return squad_utils.evaluate_v2( result_dict, cls_dict, prediction_json, eval_examples, eval_features, all_results, FLAGS.n_best_size, FLAGS.max_answer_length, output_prediction_file, output_nbest_file, output_null_log_odds_file), int(global_step) def _find_valid_cands(curr_step): filenames = tf.gfile.ListDirectory(FLAGS.output_dir) candidates = [] for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] idx = ckpt_name.split("-")[-1] if idx != "best" and int(idx) > curr_step: candidates.append(filename) return candidates output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best") key_name = "f1" writer = tf.gfile.GFile(output_eval_file, "w") if tf.gfile.Exists(checkpoint_path + ".index"): result = get_result(checkpoint_path) best_perf = result[0][key_name] global_step = result[1] else: global_step = -1 best_perf = -1 checkpoint_path = None while global_step < num_train_steps: steps_and_files = {} filenames = tf.gfile.ListDirectory(FLAGS.output_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = os.path.join(FLAGS.output_dir, ckpt_name) if cur_filename.split("-")[-1] == "best": continue gstep = int(cur_filename.split("-")[-1]) if gstep not in steps_and_files: tf.logging.info( "Add {} to eval list.".format(cur_filename)) steps_and_files[gstep] = cur_filename tf.logging.info("found {} files.".format(len(steps_and_files))) if not steps_and_files: tf.logging.info( "found 0 file, global step: {}. Sleeping.".format( global_step)) time.sleep(60) else: for ele in sorted(steps_and_files.items()): step, checkpoint_path = ele if global_step >= step: if len(_find_valid_cands(step)) > 1: for ext in [ "meta", "data-00000-of-00001", "index" ]: src_ckpt = checkpoint_path + ".{}".format(ext) tf.logging.info("removing {}".format(src_ckpt)) tf.gfile.Remove(src_ckpt) continue result, global_step = get_result(checkpoint_path) tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) if result[key_name] > best_perf: best_perf = result[key_name] for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = checkpoint_path + ".{}".format(ext) tgt_ckpt = checkpoint_path.rsplit( "-", 1)[0] + "-best.{}".format(ext) tf.logging.info("saving {} to {}".format( src_ckpt, tgt_ckpt)) tf.gfile.Copy(src_ckpt, tgt_ckpt, overwrite=True) writer.write("saved {} to {}\n".format( src_ckpt, tgt_ckpt)) writer.write("best {} = {}\n".format(key_name, best_perf)) tf.logging.info(" best {} = {}\n".format( key_name, best_perf)) if len(_find_valid_cands(global_step)) > 2: for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = checkpoint_path + ".{}".format(ext) tf.logging.info("removing {}".format(src_ckpt)) tf.gfile.Remove(src_ckpt) writer.write("=" * 50 + "\n") checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best") result, global_step = get_result(checkpoint_path) tf.logging.info("***** Final Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) writer.write("best perf happened at step: {}".format(global_step))
def train(opt): opt.use_att = utils.if_use_att(opt.caption_model) loader = DataLoader(opt) opt.vocab_size = loader.vocab_size opt.seq_length = loader.seq_length tf_summary_writer = tf and tf.summary.FileWriter(opt.checkpoint_path) infos = {} histories = {} if opt.start_from is not None: # open old infos and check if models are compatible with open(os.path.join(opt.start_from, 'infos_'+opt.old_id+'.pkl')) as f: infos = cPickle.load(f) saved_model_opt = infos['opt'] need_be_same=["rnn_type", "rnn_size", "num_layers"] for checkme in need_be_same: assert vars(saved_model_opt)[checkme] == vars(opt)[checkme], "Command line argument and saved model disagree on '%s' " % checkme if os.path.isfile(os.path.join(opt.start_from, 'histories_'+opt.old_id+'.pkl')): with open(os.path.join(opt.start_from, 'histories_'+opt.old_id+'.pkl')) as f: histories = cPickle.load(f) iteration = infos.get('iter', 0) epoch = infos.get('epoch', 0) val_result_history = histories.get('val_result_history', {}) loss_history = histories.get('loss_history', {}) lr_history = histories.get('lr_history', {}) ss_prob_history = histories.get('ss_prob_history', {}) loader.iterators = infos.get('iterators', loader.iterators) loader.split_ix = infos.get('split_ix', loader.split_ix) loader.syn_iterator_all() if opt.load_best_score == 1: best_val_score = infos.get('best_val_score', None) model = models.setup(opt) model.cuda() if opt.gpu_num > 1 : model_ = torch.nn.DataParallel(model, device_ids=range(opt.gpu_num)) else : model_ = model update_lr_flag = True # Assure in training mode model.train() crit = utils.LanguageModelCriterion() rl_crit = utils.RewardCriterion() optimizer = optim.Adam(model.parameters(), lr=opt.learning_rate, weight_decay=opt.weight_decay) optimizer.zero_grad() # Load the optimizer if vars(opt).get('start_from', None) is not None and os.path.isfile(os.path.join(opt.start_from,"optimizer.pth")): optimizer.load_state_dict(torch.load(os.path.join(opt.start_from, 'optimizer.pth'))) while True: # make evaluation on validation set, and save model if (update_lr_flag): # eval model eval_kwargs = {'split': 'val', 'dataset': opt.input_json} eval_kwargs.update(vars(opt)) val_loss, predictions, lang_stats = eval_utils_t.eval_split(None, model, crit, loader, eval_kwargs) # Write validation result into summary if tf is not None: add_summary_value(tf_summary_writer, 'validation loss', val_loss, iteration) for k,v in lang_stats.items(): add_summary_value(tf_summary_writer, k, v, iteration) tf_summary_writer.flush() val_result_history[iteration] = {'loss': val_loss, 'lang_stats': lang_stats, 'predictions': predictions} # Save model if is improving on validation result if opt.language_eval == 1: current_score = lang_stats['CIDEr'] else: current_score = - val_loss best_flag = False if True: # if true if best_val_score is None or current_score > best_val_score: best_val_score = current_score best_flag = True checkpoint_path = os.path.join(opt.checkpoint_path, 'model.pth') torch.save(model.state_dict(), checkpoint_path) print("model saved to {}".format(checkpoint_path)) optimizer_path = os.path.join(opt.checkpoint_path, 'optimizer.pth') torch.save(optimizer.state_dict(), optimizer_path) # Dump miscalleous informations infos['iter'] = iteration infos['epoch'] = epoch infos['iterators'] = loader.iterators infos['split_ix'] = loader.split_ix infos['best_val_score'] = best_val_score infos['opt'] = opt infos['vocab'] = loader.get_vocab() histories['val_result_history'] = val_result_history histories['loss_history'] = loss_history histories['lr_history'] = lr_history histories['ss_prob_history'] = ss_prob_history with open(os.path.join(opt.checkpoint_path, 'infos_'+opt.id+'.pkl'), 'wb') as f: cPickle.dump(infos, f) with open(os.path.join(opt.checkpoint_path, 'histories_'+opt.id+'.pkl'), 'wb') as f: cPickle.dump(histories, f) if best_flag: checkpoint_path = os.path.join(opt.checkpoint_path, 'model-best.pth') torch.save(model.state_dict(), checkpoint_path) print("model saved to {}".format(checkpoint_path)) with open(os.path.join(opt.checkpoint_path, 'infos_'+opt.id+'-best.pkl'), 'wb') as f: cPickle.dump(infos, f) if update_lr_flag: # Assign the learning rate if epoch > opt.learning_rate_decay_start and opt.learning_rate_decay_start >= 0: frac = (epoch - opt.learning_rate_decay_start) // opt.learning_rate_decay_every decay_factor = opt.learning_rate_decay_rate ** frac opt.current_lr = opt.learning_rate * decay_factor utils.set_lr(optimizer, opt.current_lr) # set the decayed rate else: opt.current_lr = opt.learning_rate # Assign the scheduled sampling prob if epoch > opt.scheduled_sampling_start and opt.scheduled_sampling_start >= 0: frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every opt.ss_prob = min(opt.scheduled_sampling_increase_prob * frac, opt.scheduled_sampling_max_prob) model.ss_prob = opt.ss_prob # If start self critical training if opt.self_critical_after != -1 and epoch >= opt.self_critical_after: sc_flag = True init_cider_scorer(opt.cached_tokens) else: sc_flag = False update_lr_flag = False # Stop if reaching max epochs if epoch >= opt.max_epochs and opt.max_epochs != -1: break start = time.time() # Load data from train split (0) data = loader.get_batch('train') print('Read data:', time.time() - start) torch.cuda.synchronize() start = time.time() tmp = [data['fc_feats'], data['att_feats'], data['labels'], data['masks']] tmp = [Variable(torch.from_numpy(_), requires_grad=False).cuda() for _ in tmp] fc_feats, att_feats, labels, masks = tmp if opt.use_topic: topics = Variable(torch.from_numpy(data['topics']), requires_grad=False).cuda() if not sc_flag: loss = crit(model_(fc_feats, att_feats, topics, labels), labels[:,1:], masks[:,1:]) else: gen_result, sample_logprobs = model.sample(fc_feats, att_feats, topics, {'sample_max':0}) reward, base_cider, explore_cider = get_self_critical_reward_t(model, fc_feats, att_feats, topics, data, gen_result) loss = rl_crit(sample_logprobs, gen_result, Variable(torch.from_numpy(reward).float().cuda(), requires_grad=False)) else: if not sc_flag: loss = crit(model_(fc_feats, att_feats, labels), labels[:,1:], masks[:,1:]) else: gen_result, sample_logprobs = model.sample(fc_feats, att_feats, {'sample_max':0}) reward, base_cider, explore_cider = get_self_critical_reward(model, fc_feats, att_feats, data, gen_result) loss = rl_crit(sample_logprobs, gen_result, Variable(torch.from_numpy(reward).float().cuda(), requires_grad=False)) loss_ = loss / opt.iter_times loss_.backward() if (iteration + 1) % opt.iter_times == 0: utils.clip_gradient(optimizer, opt.grad_clip) optimizer.step() optimizer.zero_grad() train_loss = loss.data[0] torch.cuda.synchronize() end = time.time() if iteration % 25 == 0 : if not sc_flag: print("iter {} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(iteration, epoch, train_loss, end - start)) else: print("iter {} (epoch {}), avg_reward = {:.3f}, base_cider = {:.3f}, explore_cider = {:.3f}, time/batch = {:.3f}" \ .format(iteration, epoch, np.mean(reward[:,0]), base_cider, explore_cider, end - start)) # Update the iteration and epoch iteration += 1 if data['bounds']['wrapped']: epoch += 1 update_lr_flag = True loader.reset_iterator('train') # Write the training loss summary if (iteration % opt.losses_log_every == 0): if tf is not None: add_summary_value(tf_summary_writer, 'train_loss', train_loss, iteration) add_summary_value(tf_summary_writer, 'learning_rate', opt.current_lr, iteration) add_summary_value(tf_summary_writer, 'scheduled_sampling_prob', model.ss_prob, iteration) if sc_flag: add_summary_value(tf_summary_writer, 'avg_reward', np.mean(reward[:,0]), iteration) tf_summary_writer.flush() loss_history[iteration] = train_loss if not sc_flag else np.mean(reward[:,0]) lr_history[iteration] = opt.current_lr ss_prob_history[iteration] = model.ss_prob
########################################## pickle_file = 'notMNIST.pickle' try: f = open(pickle_file, 'wb') save = { 'train_dataset': train_dataset, 'train_labels': train_labels, 'valid_dataset': valid_dataset, 'valid_labels': valid_labels, 'test_dataset': test_dataset, 'test_labels': test_labels, } pickle.dump(save, f, pickle.HIGHEST_PROTOCOL) f.close() except Exception as e: print('Unable to save data to', pickle_file, ':', e) raise statinfo = os.stat(pickle_file) print('Compressed pickle size:', statinfo.st_size) ###############################################3 #prune data, delete repeated samples in training, test and validation sets models = np.unique(train_labels) idx_tr = np.arange(train_labels.size); c_tr = np.zeros(train_labels.size); idx_va = np.arange(valid_labels.size); c_va = np.zeros(valid_labels.size); idx_te = np.arange(test_labels.size); c_te = np.zeros(test_labels.size); for mm in models:
print "Rank: {}, Hidden Layer Size: {}".format( compute_effective_rank(svals), svals.shape[0]) if isinstance(batch_size, int): if batch_size == 1: fig_outfile = 'perf_mlp_seque.png' else: fig_outfile = 'perf_mlp_minibatchsize_%d.png' % batch_size else: fig_outfile = 'perf_mlp_batch.png' if raw_input("Shall we save this model? (y/n)\n") == 'y': model_outfile = fig_outfile.split('.')[0] + ".pkl" fobj = open(model_outfile, 'wb') cPickle.dump(predict, fobj, protocol=cPickle.HIGHEST_PROTOCOL) fobj.close() if raw_input('Save training figure? (y/n): \n') == 'y': performanceplot(cost_record, tr_err_record, te_err_record, "contrast_" + fig_outfile) else: model_outfile = raw_input("Provide path to model_outfile: \n") fobj = open(model_outfile, 'rb') cPickle.load(fobj) fobj.close() if raw_input("Perform failure analysis? (y/n):\n") == 'y': failure_analysis.investigate_mlp(teX, teY, predict(teX) > 0.5)
# Mapping from index to word : that's the vocabulary vocabulary_inv = [x[0] for x in word_counts.most_common()] vocabulary_inv = list(sorted(vocabulary_inv)) # Mapping from word to index vocab = {x: i for i, x in enumerate(vocabulary_inv)} words = [x[0] for x in word_counts.most_common()] #size of the vocabulary vocab_size = len(words) print("vocab size: ", vocab_size) #save the words and vocabulary with open(os.path.join(vocab_file), 'wb') as f: cPickle.dump((words, vocab, vocabulary_inv), f) #create sequences sequences = [] next_words = [] for i in range(0, len(wordlist) - seq_length, sequences_step): sequences.append(wordlist[i:i + seq_length]) next_words.append(wordlist[i + seq_length]) print('nb sequences:', len(sequences)) X = np.zeros((len(sequences), seq_length, vocab_size), dtype=np.bool) y = np.zeros((len(sequences), vocab_size), dtype=np.bool) for i, sentence in enumerate(sequences): for t, word in enumerate(sentence): X[i, t, vocab[word]] = 1
def train(opt): # Deal with feature things before anything opt.use_att = utils.if_use_att(opt.caption_model) if opt.use_box: opt.att_feat_size = opt.att_feat_size + 5 loader = DataLoader(opt) opt.vocab_size = loader.vocab_size opt.seq_length = loader.seq_length tb_summary_writer = tb and tb.SummaryWriter(opt.checkpoint_path) infos = {} histories = {} if opt.start_from is not None: # open old infos and check if models are compatible with open(os.path.join(opt.start_from, 'infos_'+opt.id+'.pkl')) as f: infos = cPickle.load(f) saved_model_opt = infos['opt'] need_be_same=["caption_model", "rnn_type", "rnn_size", "num_layers"] for checkme in need_be_same: assert vars(saved_model_opt)[checkme] == vars(opt)[checkme], "Command line argument and saved model disagree on '%s' " % checkme if os.path.isfile(os.path.join(opt.start_from, 'histories_'+opt.id+'.pkl')): with open(os.path.join(opt.start_from, 'histories_'+opt.id+'.pkl')) as f: histories = cPickle.load(f) iteration = infos.get('iter', 0) epoch = infos.get('epoch', 0) val_result_history = histories.get('val_result_history', {}) loss_history = histories.get('loss_history', {}) lr_history = histories.get('lr_history', {}) ss_prob_history = histories.get('ss_prob_history', {}) loader.iterators = infos.get('iterators', loader.iterators) loader.split_ix = infos.get('split_ix', loader.split_ix) if opt.load_best_score == 1: best_val_score = infos.get('best_val_score', None) model = models.setup(opt).cuda() dp_model = torch.nn.DataParallel(model) epoch_done = True # Assure in training mode dp_model.train() if opt.label_smoothing > 0: crit = utils.LabelSmoothing(smoothing=opt.label_smoothing) else: crit = utils.LanguageModelCriterion() rl_crit = utils.RewardCriterion() if opt.noamopt: assert opt.caption_model == 'transformer', 'noamopt can only work with transformer' optimizer = utils.get_std_opt(model, factor=opt.noamopt_factor, warmup=opt.noamopt_warmup) optimizer._step = iteration elif opt.reduce_on_plateau: optimizer = utils.build_optimizer(model.parameters(), opt) optimizer = utils.ReduceLROnPlateau(optimizer, factor=0.5, patience=3) else: optimizer = utils.build_optimizer(model.parameters(), opt) # Load the optimizer if vars(opt).get('start_from', None) is not None and os.path.isfile(os.path.join(opt.start_from,"optimizer.pth")): optimizer.load_state_dict(torch.load(os.path.join(opt.start_from, 'optimizer.pth'))) while True: if epoch_done: if not opt.noamopt and not opt.reduce_on_plateau: # Assign the learning rate if epoch > opt.learning_rate_decay_start and opt.learning_rate_decay_start >= 0: frac = (epoch - opt.learning_rate_decay_start) // opt.learning_rate_decay_every decay_factor = opt.learning_rate_decay_rate ** frac opt.current_lr = opt.learning_rate * decay_factor else: opt.current_lr = opt.learning_rate utils.set_lr(optimizer, opt.current_lr) # set the decayed rate # Assign the scheduled sampling prob if epoch > opt.scheduled_sampling_start and opt.scheduled_sampling_start >= 0: frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every opt.ss_prob = min(opt.scheduled_sampling_increase_prob * frac, opt.scheduled_sampling_max_prob) model.ss_prob = opt.ss_prob # If start self critical training if opt.self_critical_after != -1 and epoch >= opt.self_critical_after: sc_flag = True init_scorer(opt.cached_tokens) else: sc_flag = False epoch_done = False start = time.time() # Load data from train split (0) data = loader.get_batch('train') print('Read data:', time.time() - start) torch.cuda.synchronize() start = time.time() tmp = [data['fc_feats'], data['att_feats'], data['labels'], data['masks'], data['att_masks']] tmp = [_ if _ is None else torch.from_numpy(_).cuda() for _ in tmp] fc_feats, att_feats, labels, masks, att_masks = tmp optimizer.zero_grad() if not sc_flag: loss = crit(dp_model(fc_feats, att_feats, labels, att_masks), labels[:,1:], masks[:,1:]) else: gen_result, sample_logprobs = dp_model(fc_feats, att_feats, att_masks, opt={'sample_max':0}, mode='sample') reward = get_self_critical_reward(dp_model, fc_feats, att_feats, att_masks, data, gen_result, opt) loss = rl_crit(sample_logprobs, gen_result.data, torch.from_numpy(reward).float().cuda()) loss.backward() utils.clip_gradient(optimizer, opt.grad_clip) optimizer.step() train_loss = loss.item() torch.cuda.synchronize() end = time.time() if not sc_flag: print("iter {} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \ .format(iteration, epoch, train_loss, end - start)) else: print("iter {} (epoch {}), avg_reward = {:.3f}, time/batch = {:.3f}" \ .format(iteration, epoch, np.mean(reward[:,0]), end - start)) # Update the iteration and epoch iteration += 1 if data['bounds']['wrapped']: epoch += 1 epoch_done = True # Write the training loss summary if (iteration % opt.losses_log_every == 0): add_summary_value(tb_summary_writer, 'train_loss', train_loss, iteration) if opt.noamopt: opt.current_lr = optimizer.rate() elif opt.reduce_on_plateau: opt.current_lr = optimizer.current_lr add_summary_value(tb_summary_writer, 'learning_rate', opt.current_lr, iteration) add_summary_value(tb_summary_writer, 'scheduled_sampling_prob', model.ss_prob, iteration) if sc_flag: add_summary_value(tb_summary_writer, 'avg_reward', np.mean(reward[:,0]), iteration) loss_history[iteration] = train_loss if not sc_flag else np.mean(reward[:,0]) lr_history[iteration] = opt.current_lr ss_prob_history[iteration] = model.ss_prob # make evaluation on validation set, and save model if (iteration % opt.save_checkpoint_every == 0): # eval model eval_kwargs = {'split': 'val', 'dataset': opt.input_json} eval_kwargs.update(vars(opt)) val_loss, predictions, lang_stats = eval_utils.eval_split(dp_model, crit, loader, eval_kwargs) if opt.reduce_on_plateau: if 'CIDEr' in lang_stats: optimizer.scheduler_step(-lang_stats['CIDEr']) else: optimizer.scheduler_step(val_loss) # Write validation result into summary add_summary_value(tb_summary_writer, 'validation loss', val_loss, iteration) for k,v in lang_stats.items(): add_summary_value(tb_summary_writer, k, v, iteration) val_result_history[iteration] = {'loss': val_loss, 'lang_stats': lang_stats, 'predictions': predictions} # Save model if is improving on validation result if opt.language_eval == 1: current_score = lang_stats['CIDEr'] else: current_score = - val_loss best_flag = False if True: # if true if best_val_score is None or current_score > best_val_score: best_val_score = current_score best_flag = True checkpoint_path = os.path.join(opt.checkpoint_path, 'model.pth') torch.save(model.state_dict(), checkpoint_path) print("model saved to {}".format(checkpoint_path)) optimizer_path = os.path.join(opt.checkpoint_path, 'optimizer.pth') torch.save(optimizer.state_dict(), optimizer_path) # Dump miscalleous informations infos['iter'] = iteration infos['epoch'] = epoch infos['iterators'] = loader.iterators infos['split_ix'] = loader.split_ix infos['best_val_score'] = best_val_score infos['opt'] = opt infos['vocab'] = loader.get_vocab() histories['val_result_history'] = val_result_history histories['loss_history'] = loss_history histories['lr_history'] = lr_history histories['ss_prob_history'] = ss_prob_history with open(os.path.join(opt.checkpoint_path, 'infos_'+opt.id+'.pkl'), 'wb') as f: cPickle.dump(infos, f) with open(os.path.join(opt.checkpoint_path, 'histories_'+opt.id+'.pkl'), 'wb') as f: cPickle.dump(histories, f) if best_flag: checkpoint_path = os.path.join(opt.checkpoint_path, 'model-best.pth') torch.save(model.state_dict(), checkpoint_path) print("model saved to {}".format(checkpoint_path)) with open(os.path.join(opt.checkpoint_path, 'infos_'+opt.id+'-best.pkl'), 'wb') as f: cPickle.dump(infos, f) # Stop if reaching max epochs if epoch >= opt.max_epochs and opt.max_epochs != -1: break
for epoch in range(args.epoch): accum_loss = 0 print('epoch: {0}'.format(epoch)) indexes = np.random.permutation(skip) for i in indexes: if word_count >= next_count: now = time.time() duration = now - cur_at throuput = 100000. / (now - cur_at) print('{} words, {:.2f} sec, {:.2f} words/sec'.format( word_count, duration, throuput)) next_count += 100000 cur_at = now position = np.array(range(0, args.batchsize)) * skip + (args.window + i) loss = train_model(dataset, position) accum_loss += loss.data word_count += args.batchsize optimizer.zero_grads() loss.backward() optimizer.update() print(accum_loss) model.to_cpu() with open('model.pickle', 'wb') as f: obj = (model, index2word, word2index) pickle.dump(obj, f)
def log_checkpoint(self, epoch, val_loss, metrics, predictions, opt, model, dataset, optimizer=None): # Write validation result into summary if self.tensorboard.tf is not None: self.tensorboard.add_summary_value('validation loss', val_loss, self.iteration) for k, v in metrics.items(): self.tensorboard.add_summary_value(k, v, self.iteration) self.tensorboard.writer.flush() self.val_result_history[self.iteration] = { 'loss': val_loss, 'metrics': metrics, 'predictions': predictions } # Save model if the validation result is improved if opt.metric == 'XE': current_score = -val_loss else: current_score = metrics[opt.metric] best_flag = False if self.best_val_score is None or current_score > self.best_val_score: self.best_val_score = current_score best_flag = True # save the model at current iteration checkpoint_path = os.path.join( self.log_dir, 'model_iter_{}.pth'.format(self.iteration)) torch.save(model.state_dict(), checkpoint_path) # save as latest model checkpoint_path = os.path.join(self.log_dir, 'model.pth') torch.save(model.state_dict(), checkpoint_path) logging.info("model saved to {}".format(checkpoint_path)) # save optimizer if optimizer is not None: optimizer_path = os.path.join(self.log_dir, 'optimizer.pth') torch.save(optimizer.state_dict(), optimizer_path) # Dump miscalleous informations self.infos['iter'] = self.iteration self.infos['epoch'] = epoch self.infos['best_val_score'] = self.best_val_score self.infos['opt'] = opt self.infos['vocab'] = dataset.get_vocab() self.histories['val_result_history'] = self.val_result_history self.histories['loss_history'] = self.loss_history self.histories['lr_history'] = self.lr_history self.histories['ss_prob_history'] = self.ss_prob_history with open(os.path.join(self.log_dir, 'infos.pkl'), 'wb') as f: cPickle.dump(self.infos, f) with open(os.path.join(self.log_dir, 'histories.pkl'), 'wb') as f: cPickle.dump(self.histories, f) if best_flag: checkpoint_path = os.path.join(self.log_dir, 'model-best.pth') torch.save(model.state_dict(), checkpoint_path) logging.info("model saved to {}".format(checkpoint_path)) with open(os.path.join(self.log_dir, 'infos-best.pkl'), 'wb') as f: cPickle.dump(self.infos, f)
def setup_servers(the_object_server=object_server, extra_conf=None): """ Setup proxy, account, container and object servers using a set of fake rings and policies. :param the_object_server: The object server module to use (optional, defaults to swift.obj.server) :param extra_conf: A dict of config options that will update the basic config passed to all server instances. :returns: A dict containing the following entries: orig_POLICIES: the value of storage_policy.POLICIES prior to it being patched with fake policies orig_SysLogHandler: the value of utils.SysLogHandler prior to it being patched testdir: root directory used for test files test_POLICIES: a StoragePolicyCollection of fake policies test_servers: a tuple of test server instances test_sockets: a tuple of sockets used by test servers test_coros: a tuple of greenthreads in which test servers are running """ context = { "orig_POLICIES": storage_policy._POLICIES, "orig_SysLogHandler": utils.SysLogHandler } utils.HASH_PATH_SUFFIX = b'endcap' utils.SysLogHandler = mock.MagicMock() # Since we're starting up a lot here, we're going to test more than # just chunked puts; we're also going to test parts of # proxy_server.Application we couldn't get to easily otherwise. context["testdir"] = _testdir = \ os.path.join(mkdtemp(), 'tmp_test_proxy_server_chunked') mkdirs(_testdir) rmtree(_testdir) for drive in ('sda1', 'sdb1', 'sdc1', 'sdd1', 'sde1', 'sdf1', 'sdg1', 'sdh1', 'sdi1', 'sdj1', 'sdk1', 'sdl1'): mkdirs(os.path.join(_testdir, drive, 'tmp')) conf = { 'devices': _testdir, 'swift_dir': _testdir, 'mount_check': 'false', 'allowed_headers': 'content-encoding, x-object-manifest, content-disposition, foo', 'allow_versions': 't', 'node_timeout': 20 } if extra_conf: conf.update(extra_conf) prolis = listen_zero() acc1lis = listen_zero() acc2lis = listen_zero() con1lis = listen_zero() con2lis = listen_zero() obj1lis = listen_zero() obj2lis = listen_zero() obj3lis = listen_zero() obj4lis = listen_zero() obj5lis = listen_zero() obj6lis = listen_zero() objsocks = [obj1lis, obj2lis, obj3lis, obj4lis, obj5lis, obj6lis] context["test_sockets"] = \ (prolis, acc1lis, acc2lis, con1lis, con2lis, obj1lis, obj2lis, obj3lis, obj4lis, obj5lis, obj6lis) account_ring_path = os.path.join(_testdir, 'account.ring.gz') account_devs = [ { 'port': acc1lis.getsockname()[1] }, { 'port': acc2lis.getsockname()[1] }, ] write_fake_ring(account_ring_path, *account_devs) container_ring_path = os.path.join(_testdir, 'container.ring.gz') container_devs = [ { 'port': con1lis.getsockname()[1] }, { 'port': con2lis.getsockname()[1] }, ] write_fake_ring(container_ring_path, *container_devs) storage_policy._POLICIES = storage_policy.StoragePolicyCollection([ StoragePolicy(0, 'zero', True), StoragePolicy(1, 'one', False), StoragePolicy(2, 'two', False), ECStoragePolicy(3, 'ec', ec_type=DEFAULT_TEST_EC_TYPE, ec_ndata=2, ec_nparity=1, ec_segment_size=4096), ECStoragePolicy(4, 'ec-dup', ec_type=DEFAULT_TEST_EC_TYPE, ec_ndata=2, ec_nparity=1, ec_segment_size=4096, ec_duplication_factor=2) ]) obj_rings = { 0: ('sda1', 'sdb1'), 1: ('sdc1', 'sdd1'), 2: ('sde1', 'sdf1'), # sdg1, sdh1, sdi1 taken by policy 3 (see below) } for policy_index, devices in obj_rings.items(): policy = storage_policy.POLICIES[policy_index] obj_ring_path = os.path.join(_testdir, policy.ring_name + '.ring.gz') obj_devs = [{ 'port': objsock.getsockname()[1], 'device': dev } for objsock, dev in zip(objsocks, devices)] write_fake_ring(obj_ring_path, *obj_devs) # write_fake_ring can't handle a 3-element ring, and the EC policy needs # at least 6 devs to work with (ec_k=2, ec_m=1, duplication_factor=2), # so we do it manually devs = [{ 'id': 0, 'zone': 0, 'device': 'sdg1', 'ip': '127.0.0.1', 'port': obj1lis.getsockname()[1] }, { 'id': 1, 'zone': 0, 'device': 'sdh1', 'ip': '127.0.0.1', 'port': obj2lis.getsockname()[1] }, { 'id': 2, 'zone': 0, 'device': 'sdi1', 'ip': '127.0.0.1', 'port': obj3lis.getsockname()[1] }, { 'id': 3, 'zone': 0, 'device': 'sdj1', 'ip': '127.0.0.1', 'port': obj4lis.getsockname()[1] }, { 'id': 4, 'zone': 0, 'device': 'sdk1', 'ip': '127.0.0.1', 'port': obj5lis.getsockname()[1] }, { 'id': 5, 'zone': 0, 'device': 'sdl1', 'ip': '127.0.0.1', 'port': obj6lis.getsockname()[1] }] pol3_replica2part2dev_id = [[0, 1, 2, 0], [1, 2, 0, 1], [2, 0, 1, 2]] pol4_replica2part2dev_id = [[0, 1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5], [3, 4, 5, 0], [4, 5, 0, 1], [5, 0, 1, 2]] obj3_ring_path = os.path.join( _testdir, storage_policy.POLICIES[3].ring_name + '.ring.gz') part_shift = 30 with closing(GzipFile(obj3_ring_path, 'wb')) as fh: pickle.dump(RingData(pol3_replica2part2dev_id, devs, part_shift), fh) obj4_ring_path = os.path.join( _testdir, storage_policy.POLICIES[4].ring_name + '.ring.gz') part_shift = 30 with closing(GzipFile(obj4_ring_path, 'wb')) as fh: pickle.dump(RingData(pol4_replica2part2dev_id, devs, part_shift), fh) prosrv = proxy_server.Application(conf, logger=debug_logger('proxy')) for policy in storage_policy.POLICIES: # make sure all the rings are loaded prosrv.get_object_ring(policy.idx) # don't lose this one! context["test_POLICIES"] = storage_policy._POLICIES acc1srv = account_server.AccountController(conf, logger=debug_logger('acct1')) acc2srv = account_server.AccountController(conf, logger=debug_logger('acct2')) con1srv = container_server.ContainerController( conf, logger=debug_logger('cont1')) con2srv = container_server.ContainerController( conf, logger=debug_logger('cont2')) obj1srv = the_object_server.ObjectController(conf, logger=debug_logger('obj1')) obj2srv = the_object_server.ObjectController(conf, logger=debug_logger('obj2')) obj3srv = the_object_server.ObjectController(conf, logger=debug_logger('obj3')) obj4srv = the_object_server.ObjectController(conf, logger=debug_logger('obj4')) obj5srv = the_object_server.ObjectController(conf, logger=debug_logger('obj5')) obj6srv = the_object_server.ObjectController(conf, logger=debug_logger('obj6')) context["test_servers"] = \ (prosrv, acc1srv, acc2srv, con1srv, con2srv, obj1srv, obj2srv, obj3srv, obj4srv, obj5srv, obj6srv) nl = NullLogger() logging_prosv = proxy_logging.ProxyLoggingMiddleware( listing_formats.ListingFilter(prosrv, {}, logger=prosrv.logger), conf, logger=prosrv.logger) prospa = spawn(wsgi.server, prolis, logging_prosv, nl, protocol=SwiftHttpProtocol, capitalize_response_headers=False) acc1spa = spawn(wsgi.server, acc1lis, acc1srv, nl, protocol=SwiftHttpProtocol, capitalize_response_headers=False) acc2spa = spawn(wsgi.server, acc2lis, acc2srv, nl, protocol=SwiftHttpProtocol, capitalize_response_headers=False) con1spa = spawn(wsgi.server, con1lis, con1srv, nl, protocol=SwiftHttpProtocol, capitalize_response_headers=False) con2spa = spawn(wsgi.server, con2lis, con2srv, nl, protocol=SwiftHttpProtocol, capitalize_response_headers=False) obj1spa = spawn(wsgi.server, obj1lis, obj1srv, nl, protocol=SwiftHttpProtocol, capitalize_response_headers=False) obj2spa = spawn(wsgi.server, obj2lis, obj2srv, nl, protocol=SwiftHttpProtocol, capitalize_response_headers=False) obj3spa = spawn(wsgi.server, obj3lis, obj3srv, nl, protocol=SwiftHttpProtocol, capitalize_response_headers=False) obj4spa = spawn(wsgi.server, obj4lis, obj4srv, nl, protocol=SwiftHttpProtocol, capitalize_response_headers=False) obj5spa = spawn(wsgi.server, obj5lis, obj5srv, nl, protocol=SwiftHttpProtocol, capitalize_response_headers=False) obj6spa = spawn(wsgi.server, obj6lis, obj6srv, nl, protocol=SwiftHttpProtocol, capitalize_response_headers=False) context["test_coros"] = \ (prospa, acc1spa, acc2spa, con1spa, con2spa, obj1spa, obj2spa, obj3spa, obj4spa, obj5spa, obj6spa) # Create account ts = normalize_timestamp(time.time()) partition, nodes = prosrv.account_ring.get_nodes('a') for node in nodes: conn = swift.proxy.controllers.obj.http_connect( node['ip'], node['port'], node['device'], partition, 'PUT', '/a', { 'X-Timestamp': ts, 'x-trans-id': 'test' }) resp = conn.getresponse() assert (resp.status == 201) # Create another account # used for account-to-account tests ts = normalize_timestamp(time.time()) partition, nodes = prosrv.account_ring.get_nodes('a1') for node in nodes: conn = swift.proxy.controllers.obj.http_connect( node['ip'], node['port'], node['device'], partition, 'PUT', '/a1', { 'X-Timestamp': ts, 'x-trans-id': 'test' }) resp = conn.getresponse() assert (resp.status == 201) # Create containers, 1 per test policy sock = connect_tcp(('localhost', prolis.getsockname()[1])) fd = sock.makefile('rwb') fd.write(b'PUT /v1/a/c HTTP/1.1\r\nHost: localhost\r\n' b'Connection: close\r\nX-Auth-Token: t\r\n' b'Content-Length: 0\r\n\r\n') fd.flush() headers = readuntil2crlfs(fd) exp = b'HTTP/1.1 201' assert headers[:len(exp)] == exp, "Expected '%s', encountered '%s'" % ( exp, headers[:len(exp)]) # Create container in other account # used for account-to-account tests sock = connect_tcp(('localhost', prolis.getsockname()[1])) fd = sock.makefile('rwb') fd.write(b'PUT /v1/a1/c1 HTTP/1.1\r\nHost: localhost\r\n' b'Connection: close\r\nX-Auth-Token: t\r\n' b'Content-Length: 0\r\n\r\n') fd.flush() headers = readuntil2crlfs(fd) exp = b'HTTP/1.1 201' assert headers[:len(exp)] == exp, "Expected '%s', encountered '%s'" % ( exp, headers[:len(exp)]) sock = connect_tcp(('localhost', prolis.getsockname()[1])) fd = sock.makefile('rwb') fd.write( b'PUT /v1/a/c1 HTTP/1.1\r\nHost: localhost\r\n' b'Connection: close\r\nX-Auth-Token: t\r\nX-Storage-Policy: one\r\n' b'Content-Length: 0\r\n\r\n') fd.flush() headers = readuntil2crlfs(fd) exp = b'HTTP/1.1 201' assert headers[:len(exp)] == exp, \ "Expected %r, encountered %r" % (exp, headers[:len(exp)]) sock = connect_tcp(('localhost', prolis.getsockname()[1])) fd = sock.makefile('rwb') fd.write( b'PUT /v1/a/c2 HTTP/1.1\r\nHost: localhost\r\n' b'Connection: close\r\nX-Auth-Token: t\r\nX-Storage-Policy: two\r\n' b'Content-Length: 0\r\n\r\n') fd.flush() headers = readuntil2crlfs(fd) exp = b'HTTP/1.1 201' assert headers[:len(exp)] == exp, \ "Expected '%s', encountered '%s'" % (exp, headers[:len(exp)]) return context
def persist(self, filename=None): if filename is None: filename = self.filename with open(filename, 'w') as f: pickle.dump(self.cache, f)
for cps in list(cCP.values()): if not cps == cp: b[cp].add_others(t, cps, CoPoMap[t, x, y].nTrCP[cp], CoPoMap[t, x, y].nTrCP[cps]) ############################################################### # SAVE DATA ############################################################ btime = datetime.datetime.now() print 'took ', (btime - atime) #if not os.path.exists(odir+EXPID+'/tempdata/'): # os.makedirs(odir+EXPID+'/tempdata/') f = open(odir + EXPID + '/output/cp/Tracer.save', 'wb') cPickle.dump(data, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() if lmap or lmap2: f = open(odir + EXPID + '/output/cp/TracerMap.save', 'wb') cPickle.dump(CoPoMap, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() print 'lmap took:', sumlmaptime if lcpstart: f = open(odir + EXPID + '/output/cp/CPstart.save', 'wb') cPickle.dump(CPinit, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() print 'lcpstart took:', sumlcpstarttime f = open(odir + EXPID + '/output/cp/CPlife.save', 'wb')
model_save_path = os.path.join(sstype_path, rbp_name + '_' + cell_name) nntrainer = nn.NeuralTrainer(nnmodel, save='best', file_path=model_save_path) # initialize session sess = utils.initialize_session(nnmodel.placeholders) # load best model nntrainer.set_best_parameters(sess) # test model on validation set loss, mean_vals, std_vals = nntrainer.test_model(sess, test, batch_size=128, name='test', verbose=1) # store results results.append(mean_vals) sess.close() # save results # store results with open(os.path.join(sstype_path, 'test_scores.pickle'), 'wb') as f: cPickle.dump(np.array(results), f, protocol=cPickle.HIGHEST_PROTOCOL)
def train(args): data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length) args.vocab_size = data_loader.vocab_size # check compatibility if training is continued from previously saved model if args.init_from is not None: # check if all necessary files exist assert os.path.isdir( args.init_from), " %s must be a a path" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "config.pkl") ), "config.pkl file does not exist in path %s" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "chars_vocab.pkl") ), "chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.get_checkpoint_state(args.init_from) assert ckpt, "No checkpoint found" assert ckpt.model_checkpoint_path, "No model path found in checkpoint" # open old config and check if models are compatible with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f: saved_model_args = cPickle.load(f) need_be_same = ["model", "rnn_size", "num_layers", "seq_length"] for checkme in need_be_same: assert vars(saved_model_args)[checkme] == vars( args )[checkme], "Command line argument and saved model disagree on '%s' " % checkme # open saved vocab/dict and check if vocabs/dicts are compatible with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f: saved_chars, saved_vocab = cPickle.load(f) assert saved_chars == data_loader.chars, "Data and loaded model disagree on character set!" assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!" if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) model = Model(args) with tf.Session() as sess: # instrument for tensorboard summaries = tf.summary.merge_all() writer = tf.summary.FileWriter( os.path.join(args.log_dir, time.strftime("%Y-%m-%d-%H-%M-%S"))) writer.add_graph(sess.graph) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) # restore model if args.init_from is not None: saver.restore(sess, ckpt.model_checkpoint_path) for e in range(args.num_epochs): sess.run( tf.assign(model.lr, args.learning_rate * (args.decay_rate**e))) data_loader.reset_batch_pointer() state = sess.run(model.initial_state) for b in range(data_loader.num_batches): start = time.time() x, y = data_loader.next_batch() feed = {model.input_data: x, model.targets: y} for i, (c, h) in enumerate(model.initial_state): feed[c] = state[i].c feed[h] = state[i].h # instrument for tensorboard summ, train_loss, state, _ = sess.run( [summaries, model.cost, model.final_state, model.train_op], feed) writer.add_summary(summ, e * data_loader.num_batches + b) end = time.time() print( "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss, end - start)) if (e * data_loader.num_batches + b) % args.save_every == 0\ or (e == args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b) print("model saved to {}".format(checkpoint_path))
def train_lstm( dim_proj=128, #词嵌入的维数 和 LSTM隐藏层单元的数目 patience=10, #Number of epoch to wait before early stop if no progress max_epochs=5000, # The maximum number of epoch to run dispFreq=10, #Display to stdout the training progress every N updates decay_c=0., #Weight decay(衰减) for the classifier applied to the U weights lrate=0.0001, #随机梯度下降学习率(not used for adadelta and rmsprop) n_words=10000, #词典大小 optimizer=adadelta, #可以用sgd,adadelta and rmsprop,sgd使用非常困难 encoder='lstm', #使用lstm网络 saveto='lstm_model.npz', #将最好的训练模型存储为lstm_model.npz validFreq=370, #更新370后验证错误率 saveFreq=1110, #每隔1110次迭代保存一次参数 maxlen=100, #最大序列值 batch_size=16, #训练时处理数据的批大小 valid_batch_size=64, #用于测试的批大小 dataset='lmdb', #使用lmdb数据集 #其他的一些参数 noise_std=0, #噪声 use_dropout=True, #使用dropout, if False slightly faster, but worst test error # # This frequently need a bigger model. reload_model=None, #保存模型数据的路径 test_size=-1, #当test_size大于0,用来保存测试样本的数量 ): #模型选择 model_options = locals().copy( ) #return a dictionary containing the current scope's local variables. print("model options", model_options) #加载数据 load_data, prepare_data = get_dataset(dataset) print('Loading data') #加载训练,测试,评价数据集 train, valid, test = load_data(n_words=n_words, valid_portion=0.05, maxlen=maxlen) #用于模型评价的数据集为5% if test_size > 0: #由于测试集为根据数据大小排序好的数据,随机的选择测试集 idx = np.arange(len(test[0])) np.random.shuffle(idx) #shuffle操作 idx = idx[:test_size] test = ([test[0][n] for n in range(idx)], [test[1][n] for n in idx]) ydim = np.max(train[1]) + 1 model_options['ydim'] = ydim print('Building model') # This create the initial parameters as numpy ndarrays. # Dict name (string) -> numpy ndarray params = init_params(model_options) if reload_model: load_params('lstm_model.npz', params) # This create Theano Shared Variable from the parameters. # Dict name (string) -> Theano Tensor Shared Variable # params and tparams have different copy of the weights. tparams = init_tparams(params) # use_noise is for dropout (use_noise, x, mask, y, f_pred_prob, f_pred, cost) = build_model(tparams, model_options) if decay_c > 0.: decay_c = theano.shared(numpy_floatX(decay_c), name='decay_c') weight_decay = 0. weight_decay += (tparams['U']**2).sum() weight_decay *= decay_c cost += weight_decay f_cost = theano.function([x, mask, y], cost, name='f_cost') grads = tensor.grad(cost, wrt=list(tparams.values())) f_grad = theano.function([x, mask, y], grads, name='f_grad') lr = tensor.scalar(name='lr') f_grad_shared, f_update = optimizer(lr, tparams, grads, x, mask, y, cost) print('Optimization') kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size) kf_test = get_minibatches_idx(len(test[0]), valid_batch_size) print("%d train examples" % len(train[0])) print("%d valid examples" % len(valid[0])) print("%d test examples" % len(test[0])) history_errs = [] best_p = None bad_count = 0 if validFreq == -1: validFreq = len(train[0]) // batch_size if saveFreq == -1: saveFreq = len(train[0]) // batch_size uidx = 0 # the number of update done estop = False # early stop start_time = time.time() try: for eidx in range(max_epochs): n_samples = 0 # Get new shuffled index for the training set. kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True) for _, train_index in kf: uidx += 1 use_noise.set_value(1.) # Select the random examples for this minibatch y = [train[1][t] for t in train_index] x = [train[0][t] for t in train_index] # Get the data in numpy.ndarray format # This swap the axis! # Return something of shape (minibatch maxlen, n samples) x, mask, y = prepare_data(x, y) n_samples += x.shape[1] cost = f_grad_shared(x, mask, y) f_update(lrate) if np.isnan(cost) or np.isinf(cost): print('bad cost detected: ', cost) return 1., 1., 1. if np.mod(uidx, dispFreq) == 0: print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost) if saveto and np.mod(uidx, saveFreq) == 0: print('Saving...') if best_p is not None: params = best_p else: params = unzip(tparams) np.savez(saveto, history_errs=history_errs, **params) pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1) print('Done') if np.mod(uidx, validFreq) == 0: use_noise.set_value(0.) train_err = pred_error(f_pred, prepare_data, train, kf) valid_err = pred_error(f_pred, prepare_data, valid, kf_valid) test_err = pred_error(f_pred, prepare_data, test, kf_test) history_errs.append([valid_err, test_err]) if (best_p is None or valid_err <= np.array(history_errs)[:, 0].min()): best_p = unzip(tparams) bad_counter = 0 print('Train ', train_err, 'Valid ', valid_err, 'Test ', test_err) if (len(history_errs) > patience and valid_err >= np.array(history_errs)[:-patience, 0].min()): bad_counter += 1 if bad_counter > patience: print('Early Stop!') estop = True break print('Seen %d samples' % n_samples) if estop: break except KeyboardInterrupt: print("Training interupted") end_time = time.time() if best_p is not None: zipp(best_p, tparams) else: best_p = unzip(tparams) use_noise.set_value(0.) kf_train_sorted = get_minibatches_idx(len(train[0]), batch_size) train_err = pred_error(f_pred, prepare_data, train, kf_train_sorted) valid_err = pred_error(f_pred, prepare_data, valid, kf_valid) test_err = pred_error(f_pred, prepare_data, test, kf_test) print('Train ', train_err, 'Valid ', valid_err, 'Test ', test_err) if saveto: np.savez(saveto, train_err=train_err, valid_err=valid_err, test_err=test_err, history_errs=history_errs, **best_p) print('The code run for %d epochs, with %f sec/epochs' % ((eidx + 1), (end_time - start_time) / (1. * (eidx + 1)))) print(('Training took %.1fs' % (end_time - start_time)), file=sys.stderr) return train_err, valid_err, test_err
def save_dict(di_, filename): with open(filename, "wb") as f: pickle.dump(di_, f)
# assume you have a data file named 'example.pickle' # retrive the data from the saved pickle file try: f_read = open('example.pickle', 'rb') # open the file for reading mydata = pickle.load(f_read)['evoked_EMGs'] f_read.close() except Exception as e: # capture the objects from the exception print('Unable to read data from', all_files, ':', e) raise #%% peudo code, save data into pickle file pickle_file_name = 'myfile.pickle' # assume the file name in pickle format named as 'myfile.pickle' pickle_file = os.path.join(directory, pickle_file_name) # directory is where you want to save you data file try: f = open(pickle_file, 'wb') # open the file for writing # save the save into a dictionary format save = { 'key1': data1, # key and value pair 'key2': data2, } pickle.dump(save, f, pickle.HIGHEST_PROTOCOL) # save all the data into the file named in pickle_file, # use the highest protocol version available f.close() except Exception as e: print('Unable to save data to', pickle_file, ':', e) raise