def do_pickle(sol_id, all_traces, all_outputs, testcases, dest_dir): """ Pickle the traces, outputs, and testcases. Cleans up after errors. Not sure why this is a separate function instead of just part of execute_and_pickle. """ to_pickle = { 'traces': all_traces, 'outputs': all_outputs, 'testcases': testcases } # Dump out pickle_path = path.join(dest_dir, sol_id + '.pickle') try: with open(pickle_path, 'w') as f: pickle.dump(to_pickle, f) except (pickle.PicklingError, TypeError): # If something goes wrong, clean up, then pass the exception back up # the stack print 'failed to pickle sol', sol_id os.remove(pickle_path) raise
def savefile(path, tagdata): """Saves tagdata to file at path.""" fp = open(path + ".new", "w") pickle.dump(tagdata, fp) fp.close() shutil.move(path + ".new", path)
def process_file(self, cvs_file_items): marks = {} for lod_items in cvs_file_items.iter_lods(): for cvs_rev in lod_items.cvs_revisions: if not isinstance(cvs_rev, CVSRevisionDelete): mark = self._mark_generator.gen_id() cvs_rev.revision_reader_token = mark marks[cvs_rev.rev] = mark if marks: # A separate pickler is used for each dump(), so that its memo # doesn't grow very large. The default ASCII protocol is used so # that this works without changes on systems that distinguish # between text and binary files. pickle.dump((cvs_file_items.cvs_file.rcs_path, marks), self._pipe.stdin) self._pipe.stdin.flush() # Now that all CVSRevisions' revision_reader_tokens are set, # iterate through symbols and set their tokens to those of their # original source revisions: for lod_items in cvs_file_items.iter_lods(): if lod_items.cvs_branch is not None: self._process_symbol(lod_items.cvs_branch, cvs_file_items) for cvs_tag in lod_items.cvs_tags: self._process_symbol(cvs_tag, cvs_file_items)
def rolf(self): print "running rolf..." for e in self.instanceList: self.dup_dict[e.id] = [] self.dup_dict.get(e.id).append(e.id) try: for indx, i in enumerate(self.instanceList[0:-2]): for j in self.instanceList[indx+1:]: if(self.getScore(i,j) >= 0.75): self.dup_dict.get(i.id).append(j.id) self.dup_dict.get(j.id).append(i.id) except: print "unexpected error occurred : ", sys.exc_info()[0] finally: with open("dup-dict.obj", 'w') as dumpfile: cPickle.dump(self.dup_dict, dumpfile, protocol=cPickle.HIGHEST_PROTOCOL) f = open("submission-file.txt", 'w') for a, values in self.dup_dict.values(): f.write(a+" ") for v in values: f.write(v+" ") f.write("\n") f.flush() f.close()
def save_data(self, filename, data): ''' Saves the data structure using pickle. If the addon data path does not exist it will be automatically created. This save function has the same restrictions as the pickle module. Args: filename (string): name of the file you want to save data to. This file will be saved in your addon's profile directory. data (data object/string): you want to save. Returns: True on success False on failure ''' profile_path = self.get_profile() try: os.makedirs(profile_path) except: pass save_path = os.path.join(profile_path, filename) try: pickle.dump(data, open(save_path, 'wb')) return True except pickle.PickleError: return False
def saveJob(job, workflow, sandbox, wmTask = None, jobNumber = 0, owner = None, ownerDN = None, ownerGroup = '', ownerRole = '', scramArch = None, swVersion = None, agentNumber = 0 ): """ _saveJob_ Actually do the mechanics of saving the job to a pickle file """ if wmTask: # If we managed to load the task, # so the url should be valid job['spec'] = workflow.spec job['task'] = wmTask if job.get('sandbox', None) == None: job['sandbox'] = sandbox job['counter'] = jobNumber job['agentNumber'] = agentNumber cacheDir = job.getCache() job['cache_dir'] = cacheDir job['owner'] = owner job['ownerDN'] = ownerDN job['ownerGroup'] = ownerGroup job['ownerRole'] = ownerRole job['scramArch'] = scramArch job['swVersion'] = swVersion output = open(os.path.join(cacheDir, 'job.pkl'), 'w') cPickle.dump(job, output, cPickle.HIGHEST_PROTOCOL) output.close() return
def save(x, filename, bzip2=False, gzip=False): """ save(x, filename): Saves x to a file. Pretty much the only constraint on x is that it have no circular references (it must be Python pickle-able). This uses the pickle module, so data you save is *guaranteed* to be readable by future versions of Python. INPUT: x -- almost arbitrary object filename -- a string OUTPUT: Creates a file named filename, from which the object x can be reconstructed. """ o=open(filename,"w") # Note: don't use protocol 2 here (use 1), since loading doesn't work # on my extension types. cPickle.dump(x,o,1) o.close() if bzip2: os.system("bzip2 -f %s"%filename) if gzip: os.system("gzip -f %s"%filename)
def __init__(self, descrs, aggFunc='mean', caching=True): self.reportMissing = True self.caching = caching self.cached_file_name = None if isinstance(descrs, str): self.descrs_file = descrs self.descrs = pickle.load(open(self.descrs_file, 'rb')) self.cached_file_name = '%s-%s.pkl' % (self.descrs_file, aggFunc) elif isinstance(descrs, dict): self.descrs = descrs if self.caching and self.cached_file_name is not None and os.path.exists(self.cached_file_name): self.space = pickle.load(open(self.cached_file_name, 'rb')) elif aggFunc in ['mean', 'max']: if aggFunc == 'mean': f = self.aggMean elif aggFunc == 'max': f = self.aggMax self.space = {} for k in self.descrs.keys(): vecs = self.descrs[k].values() if len(vecs) < 2: if self.reportMissing: print('Warning: Not enough vectors for key %s - skipping' % k) continue self.space[k] = f(vecs) if self.caching and self.cached_file_name is not None: pickle.dump(self.space, open(self.cached_file_name, 'wb'))
def getDispersions(self, rescale=True): self.cached_dispersions_file = None if self.caching and hasattr(self, 'descrs_file'): self.cached_dispersions_file = '%s-dispersions.pkl' % (self.descrs_file) if os.path.exists(self.cached_dispersions_file): self.dispersions = pickle.load(open(self.cached_dispersions_file, 'rb')) return def disp(M): l = len(M) d, cnt = 0, 0 for i in range(l): for j in range(i) + range(i+1, l): d += (1 - cosine(M[i], M[j])) cnt += 1 return d / cnt if cnt != 0 else 0 self.dispersions = {} min_disp, max_disp = 1, 0 for k in self.descrs: imgdisp = disp(self.descrs[k].values()) self.dispersions[k] = imgdisp if imgdisp > max_disp: max_disp, max_key = imgdisp, k if imgdisp < min_disp: min_disp, min_key = imgdisp, k # rescale if rescale: for k in self.dispersions: self.dispersions[k] = max(0, min(1, (self.dispersions[k] - min_disp) / (max_disp - min_disp))) if self.caching and self.cached_dispersions_file is not None: pickle.dump(self.dispersions, open(self.cached_dispersions_file, 'wb'))
def create_workload(generator, filename): import cPickle workload = [sample for sample in generator] f = open(filename, 'w') cPickle.dump(workload, f, cPickle.HIGHEST_PROTOCOL) f.close()
def test_pickle(self): """ test that the class can be pickled. This is required! """ X, Y, Z = self.generate_data(nrows=200) task = mmDIFF() task.fit(X, Y, Z) with tempfile.TemporaryFile(mode='w+b') as tf: cPickle.dump(task, tf)
def writeBinModels(self, binIdToModels, filename): """Save HMM model info for each bin to file.""" self.logger.info(" Saving HMM info to file.") with gzip.open(filename, "wb") as output: pickle.dump(binIdToModels, output, pickle.HIGHEST_PROTOCOL)
def get_img2gist(): try: img2gist = None with open(name2gist_file, 'rb') as f: print 'loading existed img2gist...' sys.stdout.flush() img2gist = pickle.load(f) return img2gist except Exception: img2gist = {} total_num = 0 with open(train_file_map, 'r') as f: for line in f: if line.strip(): total_num += 1 count = 0 with open(train_file_map, 'r') as f: for line in f: if line.strip(): count += 1 arr = line.strip().split() name = arr[0].strip() rpath = arr[1].strip() im = Image.open(pjoin(train_images_dir, rpath)) im = crop_resize(im, normal_size, True) desc = leargist.color_gist(im) img2gist[name] = desc sys.stdout.write( '%d/%d\r size:(%d, %d) ' % (count, total_num, im.size[0], im.size[1])) sys.stdout.flush() with open(name2gist_file, 'wb') as f: pickle.dump(img2gist, f) return img2gist
def split(self, dump_sub_results=None, make_sub_outputs=None, output_dir=None, output_file_list=None): if dump_sub_results is None: dump_sub_results = (self.result_dump_file is not None) if make_sub_outputs is None: make_sub_outputs = self.make_outputs if output_dir is None: output_dir = self.output_dir sub_treatments = [FMRITreatment(d, deepcopy(self.analyser), make_outputs=make_sub_outputs, output_dir=output_dir) \ for d in self.analyser.split_data(self.data)] if output_dir is not None: pyhrf.verbose(1, 'Dump sub treatments in: %s ...' %output_dir) cmp_size = lambda t1,t2:cmp(t1.data.get_nb_vox_in_mask(), t2.data.get_nb_vox_in_mask()) for it, sub_t in enumerate(sorted(sub_treatments, cmp=cmp_size, reverse=True)): if dump_sub_results: sub_t.result_dump_file = op.join(output_dir, 'result_%04d.pck' %it) fn = op.join(output_dir, 'treatment_%04d.pck' %it) fout = open(fn, 'w') cPickle.dump(sub_t, fout) fout.close() if output_file_list is not None: output_file_list.append(fn) return sub_treatments
def export_skin(file_path=None, shapes=None): """Exports the skinClusters of the given shapes to disk in a pickled list of skinCluster data. :param file_path: Path to export the data. :param shapes: Optional list of dag nodes to export skins from. All descendent nodes will be searched for skinClusters also. """ if shapes is None: shapes = cmds.ls(sl=True) or [] # If no shapes were selected, export all skins skins = get_skin_clusters(shapes) if shapes else cmds.ls(type='skinCluster') if not skins: raise RuntimeError('No skins to export.') if file_path is None: file_path = cmds.fileDialog2(dialogStyle=2, fileMode=0, fileFilter='Skin Files (*{0})'.format(EXTENSION)) if file_path: file_path = file_path[0] if not file_path: return if not file_path.endswith(EXTENSION): file_path += EXTENSION all_data = [] for skin in skins: skin = SkinCluster(skin) data = skin.gather_data() all_data.append(data) logging.info('Exporting skinCluster %s (%d influences, %d vertices)', skin.node, len(data['weights'].keys()), len(data['blendWeights'])) fh = open(file_path, 'wb') pickle.dump(all_data, fh, pickle.HIGHEST_PROTOCOL) fh.close()
def process_and_save(filename): """ Little script to do reading, selecting of the right data, getting it in the right structure and then pickling it. This is supposed to make reading a lot faster when the light cones are to be made. """ picklename = filename+'.pickled' # First check if the pickled version doesn't already exist if '/' in filename: dir = os.listdir(filename.rsplit('/', 1)[0]) else: dir = os.listdir('.') if picklename.rsplit('/', 1)[1] in dir: print "Pickled version already exists for", filename return False data = read_bolshoi(filename, nopickle=True) if not data: return None with open(picklename, 'w') as picklefile: cPickle.dump(data, picklefile) return True
def prep_test_data(): test_file = './test/input_test_data.txt' output_test_file = './test/input_test.pkl' max_l = 100 test_data = read_data_file(test_file, max_l) cPickle.dump(test_data, open(output_test_file, "wb"))
def save(model, timings, post_fix=""): print "Saving the model..." # ignore keyboard interrupt while saving start = time.time() s = signal.signal(signal.SIGINT, signal.SIG_IGN) model.save( model.state["save_dir"] + "/" + model.state["run_id"] + "_" + model.state["prefix"] + post_fix + "model.npz" ) cPickle.dump( model.state, open( model.state["save_dir"] + "/" + model.state["run_id"] + "_" + model.state["prefix"] + post_fix + "state.pkl", "w", ), ) numpy.savez( model.state["save_dir"] + "/" + model.state["run_id"] + "_" + model.state["prefix"] + post_fix + "timing.npz", **timings ) signal.signal(signal.SIGINT, s) print "Model saved, took {}".format(time.time() - start)
def set(self, key, value, timeout=None, version=None): key = self.make_key(key, version=version) self.validate_key(key) fname = self._key_to_file(key) dirname = os.path.dirname(fname) if timeout is None: timeout = self.default_timeout self._cull() try: if not os.path.exists(dirname): os.makedirs(dirname) f = open(fname, 'wb') try: now = time.time() pickle.dump(now + timeout, f, pickle.HIGHEST_PROTOCOL) pickle.dump(value, f, pickle.HIGHEST_PROTOCOL) finally: f.close() except (IOError, OSError): pass
def extract_point_cloud(tracks, loc, R): locations = [] directions = [] track_ids = [] for track_idx in range(0, len(tracks)): track = tracks[track_idx] for pt_idx in range(0, len(track.utm)): pt = track.utm[pt_idx] if pt[0]>=loc[0]-R and pt[0]<=loc[0]+R and \ pt[1]>=loc[1]-R and pt[1]<=loc[1]+R: locations.append((pt[0], pt[1])) dir1 = np.array((0.0, 0.0)) if pt_idx > 0: dir1 = np.array((track.utm[pt_idx][0]-track.utm[pt_idx-1][0], track.utm[pt_idx][1]-track.utm[pt_idx-1][1])) dir2 = np.array((0.0, 0.0)) if pt_idx < len(track.utm) - 1: dir2 = np.array((track.utm[pt_idx+1][0]-track.utm[pt_idx][0], track.utm[pt_idx+1][1]-track.utm[pt_idx][1])) direction = dir1 + dir2 direction_norm = np.linalg.norm(direction) if direction_norm > 1.0: direction /= direction_norm else: direction *= 0.0 directions.append(direction) track_ids.append(track_idx) point_cloud = PointCloud(locations, directions, track_ids) with open("test_point_cloud.dat", "wb") as fout: cPickle.dump(point_cloud, fout, protocol=2)
def load(self, filename): """Optimized load and return the parsed version of filename. Uses the on-disk parse cache if the file is located in it. """ # Compute sha1 hash (key) with open(filename) as fp: key = sha1(fp.read()).hexdigest() path = self.key_to_path(key) # Return the cached file if available if key in self.hashes: try: with open(path) as fp: return cPickle.load(fp) except EOFError: os.unlink(path) self.hashes.remove(key) except IOError: self.hashes.remove(key) # Create the nested cache directory try: os.makedirs(os.path.dirname(path)) except OSError as exc: if exc.errno != errno.EEXIST: raise # Process the file and save in the cache scratch = kurt.Project.load(filename) # can fail with os.fdopen(os.open(path, os.O_WRONLY | os.O_CREAT, 0400), 'w') as fp: # open file for writing but make it immediately read-only cPickle.dump(scratch, fp, cPickle.HIGHEST_PROTOCOL)
def serialize(self, data, id): if self.__pid != 0: return self.__currentID = id # In-memory case if self.__fileName is None: self.__inMemorySerializedData = zlib.compress(cPickle.dumps(data, -1)) self.__pid = -1 return # File case pid = os.fork() if pid != 0: self.__pid = pid return try: tmpFile = self.__fileName + '.tmp' with open(tmpFile, 'wb') as f: with gzip.GzipFile(fileobj=f) as g: cPickle.dump(data, g, -1) os.rename(tmpFile, self.__fileName) os._exit(0) except: os._exit(-1)
def all_training_examples_cached(): global _all_examples if _all_examples is None: try: _all_examples, cnt = cPickle.load(myopen(training_examples_cache_filename())) assert len(_all_examples) == cnt logging.info("Successfully read %d training examples from %s" % (cnt, training_examples_cache_filename())) logging.info(stats()) except: logging.info("(Couldn't read training examples from %s, sorry)" % (training_examples_cache_filename())) logging.info("Caching all training examples...") logging.info(stats()) _all_examples = [] for l1, l2, f1, f2, falign in bicorpora_filenames(): for e in get_training_biexample(l1, l2, f1, f2, falign): _all_examples.append(e) if len(_all_examples) % 10000 == 0: logging.info("\tcurrently have read %d training examples" % len(_all_examples)) logging.info(stats()) random.shuffle(_all_examples) logging.info("...done caching all %d training examples" % len(_all_examples)) logging.info(stats()) cnt = len(_all_examples) cPickle.dump((_all_examples, cnt), myopen(training_examples_cache_filename(), "wb"), protocol=-1) assert len(_all_examples) == cnt logging.info("Wrote %d training examples to %s" % (cnt, training_examples_cache_filename())) logging.info(stats()) assert _all_examples is not None return _all_examples
def pickle_dump(data, filename): """ Equivalent to pickle.dump(data, open(filename, 'w')) but closes the file to prevent filehandle leakage. """ with open(filename, 'wb') as fh: pickle.dump(data, fh)
def fit(self, X, y, valid_X=None, valid_y=None): input_size = X.shape[1] output_size = len(np.unique(y)) X_sym = T.matrix('x') y_sym = T.ivector('y') self.layers_ = [] self.layer_sizes_ = [input_size] self.layer_sizes_.extend(self.hidden_layer_sizes) self.layer_sizes_.append(output_size) self.dropout_layers_ = [] self.training_scores_ = [] self.validation_scores_ = [] self.training_loss_ = [] self.validation_loss_ = [] if not hasattr(self, 'fit_function'): self._setup_functions(X_sym, y_sym, self.layer_sizes_) batch_indices = list(range(0, X.shape[0], self.batch_size)) if X.shape[0] != batch_indices[-1]: batch_indices.append(X.shape[0]) start_time = time.clock() itr = 0 best_validation_score = np.inf while (itr < self.max_iter): print("Starting pass %d through the dataset" % itr) itr += 1 batch_bounds = list(zip(batch_indices[:-1], batch_indices[1:])) # Random minibatches self.random_state.shuffle(batch_bounds) for start, end in batch_bounds: self.partial_fit(X[start:end], y[start:end]) current_training_score = (self.predict(X) != y).mean() self.training_scores_.append(current_training_score) current_training_loss = self.loss_function(X, y) self.training_loss_.append(current_training_loss) # Serialize each save_frequency iteration if (itr % self.save_frequency) == 0 or (itr == self.max_iter): f = open(self.model_save_name + "_snapshot.pkl", 'wb') cPickle.dump(self, f, protocol=2) f.close() if valid_X is not None: current_validation_score = ( self.predict(valid_X) != valid_y).mean() self.validation_scores_.append(current_validation_score) current_training_loss = self.loss_function(valid_X, valid_y) self.validation_loss_.append(current_training_loss) print("Validation score %f" % current_validation_score) # if we got the best validation score until now, save if current_validation_score < best_validation_score: best_validation_score = current_validation_score f = open(self.model_save_name + "_best.pkl", 'wb') cPickle.dump(self, f, protocol=2) f.close() end_time = time.clock() print("Total training time ran for %.2fm" % ((end_time - start_time) / 60.)) return self
def selective_search_IJCV_roidb(self): """ Return the database of selective search regions of interest. Ground-truth ROIs are also included. This function loads/saves from/to a cache file to speed up future calls. """ cache_file = os.path.join(self.cache_path, '{:s}_selective_search_IJCV_top_{:d}_roidb.pkl'. format(self.name, self.config['top_k'])) if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) print '{} ss roidb loaded from {}'.format(self.name, cache_file) return roidb gt_roidb = self.gt_roidb() ss_roidb = self._load_selective_search_IJCV_roidb(gt_roidb) roidb = datasets.imdb.merge_roidbs(gt_roidb, ss_roidb) with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'wrote ss roidb to {}'.format(cache_file) return roidb
def unigram_selection(i,path_to_training_set,path_to_pickle): (X_train, y_train, X_test, y_test,number_training, number_testing)= extract_data.extract_training_and_testing_set( path_to_training_set+'metrics_training_set_%d.data'%i, path_to_training_set+'metrics_testing_set_%d.data'%i) print(X_train[0].__len__()) clf=svm.SVC(C=1, cache_size=2000, class_weight=None, coef0=0.0, degree=3, gamma=0.1, kernel='linear', max_iter=-1, probability=False, shrinking=True, tol=0.001, verbose=False) clf.fit(X_train, y_train) print () print("Detailed classification report:") print() print("The model is trained on the full development set: %d" % number_training) print("The scores are computed on the full evaluation set: %d" % number_testing) print() y_true=y_test y_prediction=clf.predict(X_test) print(metrics.classification_report(y_true, y_prediction)) clf_metrics=np.vstack((y_true,y_prediction)) with open(path_to_pickle+'60000_all_features_%d.pkl'%i, 'wb') as fid : cPickle.dump(clf_metrics, fid) print()
def create_cache_file( recid, uid, record="", cache_dirty=False, pending_changes=[], disabled_hp_changes={}, undo_list=[], redo_list=[] ): """Create a BibEdit cache file, and return revision and record. This will overwrite any existing cache the user has for this record. datetime. """ if not record: record = get_bibrecord(recid) # Order subfields alphabetically after loading the record record_order_subfields(record) if not record: return file_path = "%s.tmp" % _get_file_path(recid, uid) record_revision = get_record_last_modification_date(recid) if record_revision == None: record_revision = datetime.now().timetuple() cache_file = open(file_path, "w") assert_undo_redo_lists_correctness(undo_list, redo_list) cPickle.dump( [cache_dirty, record_revision, record, pending_changes, disabled_hp_changes, undo_list, redo_list], cache_file ) cache_file.close() return record_revision, record
def get_qual_stats_dict(quals_dict, output_file_path = None, verbose = True): """This function takes quals dict (which can be obtained by calling the utils.utils.get_quals_dict function) and returns a dictionary that simply contains the summary of quality scores per location in the alignment""" # FIXME: get_quals_dict and get_qual_stats_dict functions are only for # 454 technology at this moment. progress = Progress() progress.verbose = verbose progress.new('Summary of quality scores per column is being computed') qual_stats_dict = {} alignment_length = len(quals_dict[quals_dict.keys()[0]]) for pos in range(0, alignment_length): progress.update('Position: %d of %d' % (pos + 1, alignment_length)) qual_stats_dict[pos] = {} quals_for_pos = [q[pos] for q in quals_dict.values() if q[pos]] if not quals_for_pos: qual_stats_dict[pos] = None continue qual_stats_dict[pos]['mean'] = np.mean(quals_for_pos) qual_stats_dict[pos]['std'] = np.std(quals_for_pos) qual_stats_dict[pos]['max'] = np.max(quals_for_pos) qual_stats_dict[pos]['min'] = np.min(quals_for_pos) qual_stats_dict[pos]['count'] = len(quals_for_pos) if output_file_path: cPickle.dump(quals_dict, open(output_file_path, 'w')) progress.end() return qual_stats_dict
def saveSettings(self, *args): #this function will save out the user's preferences they have set in the UI to disk settingsLocation = self.mayaToolsDir + "/General/Scripts/projectSettings.txt" try: f = open(settingsLocation, 'w') #create a dictionary with values settings = {} settings["UseSourceControl"] = cmds.checkBox(self.widgets["useSourceControl"], q = True, v = True) settings["FavoriteProject"] = cmds.optionMenu(self.widgets["favoriteProject_OM"], q = True, v = True) #write our dictionary to file cPickle.dump(settings, f) f.close() except: cmds.confirmDialog(title = "Error", icon = "critical", message = settingsLocation + " is not writeable. Please make sure this file is not set to read only.") #close the UI cmds.deleteUI("AnimationRiggingTool_SettingsUI")
import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.cross_validation import cross_val_score from Functions import print_progress import cPickle datafile = raw_input("Pandas dataframe to open: ") dataname = raw_input("Data savename: ") X = pd.read_hdf("%s.h5" % datafile) y = X.pop('Target 0') y = y.astype(int) mean_scores_l = list() errors = list() progress = 0 n_tree_range = range(1, 25) print_progress(progress, len(n_tree_range), prefix='Progress', suffix='Complete', bar_length=50) for i in n_tree_range: dtree = RandomForestClassifier(n_estimators=i) scores = cross_val_score(dtree, X, y, cv=10, scoring='accuracy') mean_scores_l.append(np.mean(scores)) errors.append(np.std(scores)) progress += 1 print_progress(progress, len(n_tree_range), prefix='Progress', suffix='Complete', bar_length=50) print mean_scores_l print errors data = zip(mean_scores_l, errors) with open('%s.p' % dataname, 'wb') as f: cPickle.dump(data, f)
def dump_vocabulary(): """ Write the word ID map, passed as a parameter. """ logger.info("Writing vocabulary to %s..." % config.VOCABULARY_FILE) with open(config.VOCABULARY_FILE, 'wb') as f: cPickle.dump(words, f)
def test_DBN( finetune_lr=0.1, pretraining_epochs=10, # TODO 100+ pretrain_lr=0.01, k=1, training_epochs=42, # TODO 100+ dataset=DATASET, batch_size=12): """ :type learning_rate: float :param learning_rate: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ print "loading dataset from", dataset ###datasets = load_data(dataset, nframes=N_FRAMES, unit=False, normalize=True, pca_whiten=True, cv_frac=0.0) datasets = load_data(dataset, nframes=N_FRAMES, unit=False, normalize=True, pca_whiten=False, cv_frac=0.1) # unit=False because we don't want the [0-1] binary RBM projection # normalize=True because we want the data to be 0 centered with 1 variance. # pca_whiten=True because we want the data to be decorrelated train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print "dataset loaded!" print "train set size", train_set_x.shape[0] print "validation set size", valid_set_x.shape[0] print "test set size", test_set_x.shape[0] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # construct the Deep Belief Network print "train_set_x.shape.eval()", train_set_x.shape.eval() assert (train_set_x.shape[1].eval() == N_FRAMES * 39) # check dbn = DBN(numpy_rng=numpy_rng, n_ins=train_set_x.shape[1].eval(), hidden_layers_sizes=[300, 300, 300], n_outs=62 * 3) ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise for i in xrange(dbn.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): tmp_lr = pretrain_lr / (1. + 0.05 * batch_index) # TODO if i == 0: tmp_lr /= LEARNING_RATE_DENOMINATOR_FOR_GAUSSIAN c.append(pretraining_fns[i](index=batch_index, lr=tmp_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) with open(output_file_name + '_layer_' + str(i) + '.pickle', 'w') as f: cPickle.dump(dbn, f) print "dumped a partially pre-trained model" end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## #with open('dbn_Gaussian_gpu_layer_2.pickle') as f: # dbn = cPickle.load(f) ###datasets = load_data(dataset, nframes=N_FRAMES, unit=False, normalize=True, cv_frac=0.2) ### # unit=False because we don't want the [0-1] binary RBM projection ### # normalize=True because we want the data to be 0 centered with 1 variance. ###train_set_x, train_set_y, valid_set_x, valid_set_y, test_set_x, test_set_y = None, None, None, None, None, None ###train_set_x, train_set_y = datasets[0] ###valid_set_x, valid_set_y = datasets[1] ###test_set_x, test_set_y = datasets[2] ###n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print '... finetuning the model' # early-stopping parameters patience = 4 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatches before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 print "number of training (fine-tuning) batches", n_train_batches while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = epoch * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(dbn, f) #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def train(data_opt, train_opt): print 'Initializing data provider...' dp_tr = MusicDataProvider(data_opt['dir_path'], 'train', data_opt['pitch_range'], data_opt['dt'], data_opt['batch_size']) dp_vl = MusicDataProvider(data_opt['dir_path'], 'valid', data_opt['pitch_range'], data_opt['dt'], data_opt['batch_size'], shuffle=False) print 'Done. Total training batches:', dp_tr.get_batch_num() print 'initializing parameters...' input_dim = dp_tr.get_data_dims() print 'Input feature dimension:', input_dim params = param_init('train', train_opt, input_dim, isAdp=train_opt['isAdp']) tparams = OrderedDict() for kk, pp in params.items(): tparams[kk] = theano.shared(params[kk].astype(np.float64), name=kk) print 'Building model...' f_grad, f_update = build_model_train(tparams, train_opt) print[(k, tparams[k].get_value().shape) for k in tparams.keys()] print 'Begin training...' uidx = 0 prev_log_like = -np.inf omega = np.array([ i * 2 * np.pi / train_opt['dim_feq'] for i in range(train_opt['dim_feq']) ]) for eidx in range(train_opt['max_epoch']): for bidx in range(dp_tr.get_batch_num()): [epoch, batch, [data, nframe]] = dp_tr.get_next_batch() online_cost = f_grad(data, nframe, omega) f_update(train_opt['lrate']) if bidx % 5 == 0: print 'batch {}-{}: {}'.format(eidx, bidx, online_cost) sys.stdout.flush() if uidx > 0 and uidx % train_opt['test_freq'] == 0: print '{} minibatch trained. Begin testing...'.format(uidx) test_log = 0 test_cnt = 0 for test_bidx in range(dp_vl.get_batch_num()): [epoch, batch, [data, nframe]] = dp_vl.get_next_batch() log_like = eval_batch(data, nframe, tparams, omega, train_opt) test_log += np.sum(log_like) test_cnt += nframe.shape[0] if test_bidx % 10 == 0: print 'batch-{} tested: {}'.format( test_bidx, test_log / test_cnt) sys.stdout.flush() test_log_like = test_log / test_cnt print 'Batch {}-{}, test {} samples, accuracy: {}'.format( eidx, bidx, test_cnt, test_log_like) if test_log_like > prev_log_like: print 'Best parameter so far found. Saving...' param = unzip(tparams) fo = open(train_opt['save_dir'], 'wb') pickle.dump({ 'param': param, 'log': test_log_like }, fo, protocol=pickle.HIGHEST_PROTOCOL) fo.close() prev_log_like = test_log_like uidx += 1
def __dump(self, terms): from cPickle import dump print terms fp = open("plugins/extras/testData/glossary.glossary-terms", "w") dump(terms, fp) fp.close()
validationLabels = loader.loadData('myDataset/dev_label.txt') testSentences = loader.loadData('myDataset/test.txt') testLabels = loader.loadData('myDataset/test_label.txt') # TRAIN THE MODEL print '...training the DCNN' for epoch in range(NUMOFEPOCHS): for i in xrange(len(trainingSentences)): trainDCNN(np.asarray(trainingSentences[i:i+1], dtype = np.int32), np.asarray(trainingLabels[i], dtype = np.int32)) print 'Sentence ', i, ' complete.' # SAVE THE TRAINED MODEL parameters = lasagne.layers.get_all_param_values(output) with open('DCNNParameters.pkl', 'wb') as file: cPickle.dump(parameters, file, protocol = 2) # VALIDATE THE MODEL print '...running the DCNN on Validation Set' accuracy = 0 for i in xrange(len(validationSentences)): score = validateDCNN(np.asarray(validationSentences[i:i+1], dtype = np.int32), np.asarray(validationLabels[i], dtype = np.int32)) accuracy += score print 'Sentence ', i, ' complete.' accuracy /= float(len(validationSentences)) print "Accuracy in Validation =", accuracy
def saveState(): try: cPickle.dump(state, file(_stateFilename, "wb")) except: log.debugWarning("Error saving state", exc_info=True)
def GetBias(config, filename_results_direct, filename_results_reconv): """ @brief load results, calculate the slope and offset for bias (g1_direct-g1_reconv) vs g1_reconv @param config dict used to create the simulations @param filename_results_direct results file for the reconv @param filename_results_reconv results file for the direct @return dict with fields c1,m1,c2,m2,c1_std,c2_std,m1_std,m2_std Errors on m and c are empty for now. """ name1 = os.path.basename(filename_results_reconv).replace( 'results', '').replace('yaml', '').replace('cat', '').replace('..', '.') name1 = name1.strip('.') name2 = os.path.basename(filename_results_direct).replace( 'results', '').replace('yaml', '').replace('cat', '').replace('..', '.') name2 = name2.strip('.') filename_pickle = 'results.%s.%s.pickle' % (name1, name2) import cPickle as pickle if os.path.isfile(filename_pickle): logging.info('using existing results file %s' % filename_pickle) pickle_dict = pickle.load(open(filename_pickle)) bias_moments_list = pickle_dict['moments'] bias_hsmcorr_list = pickle_dict['hsmcorr'] else: logging.info('file %s not found, analysing results' % filename_pickle) # get number of shears, angles and galaxies, useful later n_shears = config['reconvolution_validation_settings']['n_shears'] n_angles = config['reconvolution_validation_settings']['n_angles'] n_gals = config['reconvolution_validation_settings']['n_gals'] # initialise lists for results bias_moments_list = [] bias_hsmcorr_list = [] # load results results_direct = numpy.loadtxt(filename_results_direct) results_reconv = numpy.loadtxt(filename_results_reconv) # check if ring test is complete, we should have n_angles results for each galaxy and each shear for gi in range(n_gals): # initialise lists for results and truth moments_reconv_G1 = [] moments_reconv_G2 = [] hsmcorr_reconv_G1 = [] hsmcorr_reconv_G2 = [] moments_direct_G1 = [] moments_direct_G2 = [] hsmcorr_direct_G1 = [] hsmcorr_direct_G2 = [] true_G1 = [] true_G2 = [] # this will count how many shear we are using n_used_shears = 0 # loop over shears for si in range(n_shears): # calculate indices of galaxies which belong to this ring test start_id = gi * si end_id = gi * si + n_angles # select galaxies from this ring select_reconv = numpy.logical_and( results_reconv[:, 0] >= start_id, results_reconv[:, 0] < end_id) select_direct = numpy.logical_and( results_direct[:, 0] >= start_id, results_direct[:, 0] < end_id) # count how many galaxies we got n_found_angles_reconv = sum(select_reconv) n_found_angles_direct = sum(select_direct) # initialise the variable which will tell us if to skip this shear skip_shear = False # do not include shear which has missing data if (n_found_angles_reconv != n_angles) or (n_found_angles_direct != n_angles): skip_shear = True # do not include the shear which has an error in one of the angles for col in range(1, 7): if any(results_reconv[select_reconv, col].astype(int) == HSM_ERROR_VALUE) or any(results_direct[ select_direct, col].astype(int) == HSM_ERROR_VALUE): skip_shear = True # continue with loop if bad ring if skip_shear: logging.warning( 'gal %d shear %d has HSM errors or missing data- skipping' % (gi, si)) continue # increment the number of used shears n_used_shears += 1 # get the shear from the ring moments_reconv_G1.append( numpy.mean(results_reconv[select_reconv, 1])) moments_reconv_G2.append( numpy.mean(results_reconv[select_reconv, 2])) hsmcorr_reconv_G1.append( numpy.mean(results_reconv[select_reconv, 3])) hsmcorr_reconv_G2.append( numpy.mean(results_reconv[select_reconv, 4])) moments_direct_G1.append( numpy.mean(results_direct[select_direct, 1])) moments_direct_G2.append( numpy.mean(results_direct[select_direct, 2])) hsmcorr_direct_G1.append( numpy.mean(results_direct[select_direct, 3])) hsmcorr_direct_G2.append( numpy.mean(results_direct[select_direct, 4])) true_G1.append(config['reconvolved_images']['gal']['shear'] ['items'][si]['g1']) true_G2.append(config['reconvolved_images']['gal']['shear'] ['items'][si]['g2']) # convert to numpy moments_reconv_G1 = numpy.asarray(moments_reconv_G1) moments_reconv_G2 = numpy.asarray(moments_reconv_G2) hsmcorr_reconv_G1 = numpy.asarray(hsmcorr_reconv_G1) hsmcorr_reconv_G2 = numpy.asarray(hsmcorr_reconv_G2) moments_direct_G1 = numpy.asarray(moments_direct_G1) moments_direct_G2 = numpy.asarray(moments_direct_G2) hsmcorr_direct_G1 = numpy.asarray(hsmcorr_direct_G1) hsmcorr_direct_G2 = numpy.asarray(hsmcorr_direct_G2) true_G1 = numpy.asarray(true_G1) true_G2 = numpy.asarray(true_G2) # get the shear bias for moments c1, m1, cov1 = _getLineFit(true_G1, moments_direct_G1 - moments_reconv_G1, numpy.ones(moments_direct_G1.shape)) c2, m2, cov2 = _getLineFit(true_G2, moments_direct_G2 - moments_reconv_G2, numpy.ones(moments_direct_G2.shape)) # create result dict bias_moments = { 'c1': c1, 'm1': m1, 'c2': c2, 'm2': m2, 'c1_std': 0., 'c2_std': 0., 'm1_std': 0., 'm2_std': 0. } # get the shear bias for hsmcorr c1, m1, cov1 = _getLineFit(true_G1, hsmcorr_direct_G1 - hsmcorr_reconv_G1, numpy.ones(hsmcorr_direct_G1.shape)) c2, m2, cov2 = _getLineFit(true_G2, hsmcorr_direct_G2 - hsmcorr_reconv_G2, numpy.ones(hsmcorr_direct_G2.shape)) # create result dict bias_hsmcorr = { 'c1': c1, 'm1': m1, 'c2': c2, 'm2': m2, 'c1_std': 0., 'c2_std': 0., 'm1_std': 0., 'm2_std': 0. } if config['debug']: name1 = os.path.basename(filename_results_reconv).replace( 'results', '').replace('yaml', '').replace('cat', '').replace( 'reconvolution_validation', '') name1 = name1.strip('.') name2 = os.path.basename(filename_results_direct).replace( 'results', '').replace('yaml', '').replace('cat', '').replace( 'reconvolution_validation', '').replace('..', '.') name2 = name2.strip('.') filename_fig = 'fig.linefit.%s.%s.%03d.png' % (name1, name2, gi) import pylab pylab.figure(figsize=(10, 5)) pylab.plot(true_G1, moments_direct_G1 - moments_reconv_G1, 'bx') pylab.plot(true_G2, moments_direct_G2 - moments_reconv_G2, 'rx') pylab.plot(true_G1, true_G1 * bias_moments['m1'] + bias_moments['c1'], 'b-') pylab.plot(true_G2, true_G2 * bias_moments['m2'] + bias_moments['c2'], 'r-') x1, x2, y1, y2 = pylab.axis() pylab.axis((min(true_G1) * 1.1, max(true_G1) * 1.1, y1, y2)) pylab.xlabel('true_Gi') pylab.ylabel('moments_direct_G1-moments_reconv_G1') pylab.legend(['G1', 'G2']) pylab.savefig(filename_fig) pylab.close() logging.info('saved figure %s' % filename_fig) logging.info( 'gal %3d used %3d shears, m1 = % 2.3e, m2=% 2.3e ' % (gi, n_used_shears, bias_moments['m1'], bias_moments['m2'])) # append the results list bias_moments_list.append(bias_moments) bias_hsmcorr_list.append(bias_hsmcorr) # may want to scatter plot the m1,m2 of all galaxies in the results file if config['debug']: name1 = os.path.basename(filename_results_reconv).replace( 'results', '').replace('yaml', '').replace('cat', '').replace('reconvolution_validation', '') name1 = name1.strip('.') name2 = os.path.basename(filename_results_direct).replace( 'results', '').replace('yaml', '').replace('cat', '').replace('reconvolution_validation', '').replace('..', '.') name2 = name2.strip('.') filename_fig = 'fig.mscatter.%s.%s.png' % (name1, name2) m1_list = numpy.asarray([b['m1'] for b in bias_moments_list]) m2_list = numpy.asarray([b['m2'] for b in bias_moments_list]) pylab.figure() pylab.scatter(m1, m2) pylab.savefig(filename_fig) pylab.close() pickle_dict = { 'moments': bias_moments_list, 'hsmcorr': bias_hsmcorr_list } pickle.dump(pickle_dict, open(filename_pickle, 'w'), protocol=2) logging.info('saved %s' % filename_pickle) return bias_moments_list, bias_hsmcorr_list
def plot(self): """Plot chart""" Preferences = ConfigParser() Preferences.read("psyrc") self.diagrama2D.axes2D.clear() self.diagrama2D.config() filename = "%i.pkl" % P if os.path.isfile(filename): with open(filename, "r") as archivo: data = cPickle.load(archivo) self.status.setText("Loading cached data...") QApplication.processEvents() else: self.progressBar.setVisible(True) self.status.setText("Calculating data, be patient...") QApplication.processEvents() data = PsyCoolprop.calculatePlot(self) cPickle.dump(data, open(filename, "w")) self.progressBar.setVisible(False) self.status.setText("Plotting...") QApplication.processEvents() tmax = Preferences.getfloat("Psychr", "isotdbEnd") - 273.15 t = [ti - 273.15 for ti in data["t"]] Hs = data["Hs"] format = {} format["ls"] = Preferences.get("Psychr", "saturationlineStyle") format["lw"] = Preferences.getfloat("Psychr", "saturationlineWidth") format["color"] = Preferences.get("Psychr", "saturationColor") format["marker"] = Preferences.get("Psychr", "saturationmarker") format["markersize"] = 3 self.diagrama2D.plot(t, Hs, **format) format = {} format["ls"] = Preferences.get("Psychr", "isotdblineStyle") format["lw"] = Preferences.getfloat("Psychr", "isotdblineWidth") format["color"] = Preferences.get("Psychr", "isotdbColor") format["marker"] = Preferences.get("Psychr", "isotdbmarker") format["markersize"] = 3 for i, T in enumerate(t): self.diagrama2D.plot([T, T], [0, Hs[i]], **format) H = data["H"] th = data["th"] format = {} format["ls"] = Preferences.get("Psychr", "isowlineStyle") format["lw"] = Preferences.getfloat("Psychr", "isowlineWidth") format["color"] = Preferences.get("Psychr", "isowColor") format["marker"] = Preferences.get("Psychr", "isowmarker") format["markersize"] = 3 for i, H in enumerate(H): self.diagrama2D.plot([th[i], tmax], [H, H], **format) format = {} format["ls"] = Preferences.get("Psychr", "isohrlineStyle") format["lw"] = Preferences.getfloat("Psychr", "isohrlineWidth") format["color"] = Preferences.get("Psychr", "isohrColor") format["marker"] = Preferences.get("Psychr", "isohrmarker") format["markersize"] = 3 for Hr, H0 in data["Hr"].iteritems(): self.diagrama2D.plot(t, H0, **format) self.drawlabel("isohr", Preferences, t, H0, Hr, "%") format = {} format["ls"] = Preferences.get("Psychr", "isotwblineStyle") format["lw"] = Preferences.getfloat("Psychr", "isotwblineWidth") format["color"] = Preferences.get("Psychr", "isotwbColor") format["marker"] = Preferences.get("Psychr", "isotwbmarker") format["markersize"] = 3 for T, (H, Tw) in data["Twb"].iteritems(): self.diagrama2D.plot(Tw, H, **format) value = T - 273.15 txt = u"ºC" self.drawlabel("isotwb", Preferences, Tw, H, value, txt) format = {} format["ls"] = Preferences.get("Psychr", "isochorlineStyle") format["lw"] = Preferences.getfloat("Psychr", "isochorlineWidth") format["color"] = Preferences.get("Psychr", "isochorColor") format["marker"] = Preferences.get("Psychr", "isochormarker") format["markersize"] = 3 for v, (Td, H) in data["v"].iteritems(): self.diagrama2D.plot(Td, H, **format) value = v txt = u"m³/kg" self.drawlabel("isochor", Preferences, Td, H, value, txt) self.diagrama2D.draw() self.status.setText("P = %i Pa" % P)
def DumpPickleRunsEventTimes(self): if len(self.runs_evets_times) > 0: picklepath = '{d}/{f}.pkl'.format(d=self.currents_logs_dir, f=self.testbeam_log_file_name) pickle.dump(self.runs_evets_times, open(picklepath, 'wb')) print 'Created pickle file with the event times for all the runs in the testbeam'
if recompute_index: print 'building an index for faster search...' for pid in db: p = db[pid] dict_title = makedict(p['title'], forceidf=5, scale=3) dict_authors = makedict(' '.join(x['name'] for x in p['authors']), forceidf=5) if 'and' in dict_authors: # special case for "and" handling in authors list del dict_authors['and'] dict_summary = makedict(p['summary']) SEARCH_DICT[pid] = merge_dicts( [dict_title, dict_authors, dict_summary]) # and cache it in file print 'writing search_dict.p as cache' pickle.dump(SEARCH_DICT, open('search_dict.p', 'wb')) else: print 'loading cached index for faster search...' SEARCH_DICT = pickle.load(open('search_dict.p', 'rb')) # start if args.prod: # run on Tornado instead, since running raw Flask in prod is not recommended print 'starting tornado!' from tornado.wsgi import WSGIContainer from tornado.httpserver import HTTPServer from tornado.ioloop import IOLoop from tornado.log import enable_pretty_logging enable_pretty_logging() http_server = HTTPServer(WSGIContainer(app)) http_server.listen(args.port)
def save_pickle(data, data_path): with open(data_path, "w") as f: pickle.dump(data, f)
if __name__ == '__main__': try: which = sys.argv[1] writer = { 'megam': feat_writer.megam_writer, 'crfsuite': feat_writer.crfsuite_writer }[which] out_dir = sys.argv[2] train_in, dev_in, test_in = sys.argv[3:6] colloc = sys.argv[6] except: print 'Usage: {} which(=megam|crfsuite) out_dir train dev test colloc'.format( sys.argv[0]) exit(1) with open(colloc) as f: COLLOCS = set(f.read().split()) MAX_COLLOCS_LEN = max([len(i.split('_')) for i in COLLOCS]) for (purpose, path) in zip(["train", "dev", "test"], [train_in, dev_in, test_in]): with open(path) as fi: with open(out_dir + '/' + purpose + '.' + which, 'w') as fo: writer(iter_features(common.lazy_load_dyads(fi)), fo) with open(out_dir + '/' + 'map.' + which, 'w') as f: cPickle.dump(LABEL_ID, f)
def test_net(net, imdb): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS max_per_set = 40 * num_images # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 100 # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) thresh = -np.inf * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce # the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, depth, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect': Timer(), 'misc': Timer()} roidb = imdb.roidb for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, roidb[i]['boxes']) depths = roidb[i]['depths'] _t['im_detect'].toc() _t['misc'].tic() for j in xrange(1, imdb.num_classes): inds = np.where((scores[:, j] > thresh[j]) & (roidb[i]['gt_classes'] == 0))[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_depths = depths[inds] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] cls_boxes = cls_boxes[top_inds, :] cls_depths = cls_depths[top_inds] # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis], cls_depths[:])) \ .astype(np.float32, copy=False) if 0: keep = nms(all_boxes[j][i], 0.3) vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :]) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) for j in xrange(1, imdb.num_classes): for i in xrange(num_images): inds = np.where(all_boxes[j][i][:, 4] > thresh[j])[0] all_boxes[j][i] = all_boxes[j][i][inds, :] det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print 'Evaluating detections' imdb.evaluate_detections(nms_dets, output_dir)
def save_model(self): # save the network with open(self.save_file, "wb") as f: cPickle.dump(self.G, f) return
-1, config["test"]["number_of_episodes"], is_train=False ) test_episodes = eval_result[0] test_successful_episodes = eval_result[1] test_collision_episodes = eval_result[2] test_max_len_episodes = eval_result[3] print_state( "validation episodes", test_episodes, test_successful_episodes, test_collision_episodes, test_max_len_episodes, ) with open( os.path.join(test_completed_trajectories_dir, "final_status.txt"), "w" ) as final_message_file: validation_rate = test_successful_episodes / float(test_episodes) final_message_file.write("final validation rate is {}".format(validation_rate)) final_message_file.flush() test_results.append((-1, test_episodes, test_successful_episodes)) rollout_manager.end() test_results_file = os.path.join( test_completed_trajectories_dir, "test_results.test_results_pkl" ) with bz2.BZ2File(test_results_file, "w") as compressed_file: pickle.dump(test_results, compressed_file)
def save(self, path): with open(path, 'w') as outfile: pickle.dump(self, outfile)
def get_scores(model_name): scores = [] base_path = "/home/ml/mgrena/mod-target/pgn/pretrained_model_tf1.2.1/mod-target1375" decoded_path = os.path.join(base_path, "decoded") reference_path = os.path.join(base_path, "reference") if len(os.listdir(decoded_path)) != len(os.listdir(reference_path)): raise ValueError( "Number of reference summaries and decoded summaries do not match") num_articles = len(os.listdir(reference_path)) print(num_articles) if not os.path.exists("decoded_tmp"): os.mkdir("decoded_tmp") if not os.path.exists("reference_tmp"): os.mkdir("reference_tmp") if not os.path.exists("temp-files"): os.mkdir("temp-files") # Hacky housecleaning. Pyrouge stores these massive tmp files and it can't be turned off. So we store them here tempfile.tempdir = os.path.join(os.getcwd(), "temp-files") for i in range(0, num_articles): # File names decoded_filename = str(i).rjust(6, '0') + "_decoded.txt" reference_filename = str(i).rjust(6, '0') + "_reference.txt" # Copy files over to temp folder copyfile(os.path.join(decoded_path, decoded_filename), os.path.join("decoded_tmp/", decoded_filename)) copyfile(os.path.join(reference_path, reference_filename), os.path.join("reference_tmp/", reference_filename)) # ROUGE object r = Rouge155() r._system_dir = 'decoded_tmp/' r._model_dir = 'reference_tmp/' r.system_filename_pattern = '(\d+)_decoded.txt' r.model_filename_pattern = '#ID#_reference.txt' output = r.convert_and_evaluate() output_dict = r.output_to_dict(output) essential_keys = [ 'rouge_1_f_score', 'rouge_2_f_score', 'rouge_3_f_score', 'rouge_l_f_score' ] essential_dict = {key: output_dict[key] for key in essential_keys} scores.append(essential_dict) # Remove temp files os.unlink("decoded_tmp/" + decoded_filename) os.unlink("reference_tmp/" + reference_filename) # Pickle final results pickle_out = open(model_name + ".pic", "wb") pickle.dump(scores, pickle_out) pickle_out.close() # Delete excessive log files print("Removing temp files") rmtree(path="temp-files") rmtree(path="decoded_tmp/") rmtree(path="reference_tmp/")
def pickle_save(contact, filepath): #使用pickle模块将数据对象保存到文件 f = open(filepath, 'w') pickle.dump(contact, f) f.close()
def main(): print 'Start...' input_file = raw_input('Input file: ') # input_file = 'input/me_at_the_zoo.in' file_name = input_file.split('/')[-1].split('.')[0] pickle_files_path = { 'infos': pickles_files_path("infos", file_path=file_name), 'videos_sizes': pickles_files_path("videos_sizes", file_path=file_name), 'endpoints_objects': pickles_files_path("endpoints_objects", file_path=file_name), 'calculation_objects': pickles_files_path("calculation_objects", file_path=file_name), } f = open(input_file, 'r') f_list = f.readlines() first_line = f_list.pop(0)[:-1].split(' ') # ask if load data from caches files or from input files use_cached_data = (raw_input('Use cached data? (y/n): ') == 'y') if exists('tmp/' + file_name) and not use_cached_data: shutil.rmtree('tmp/' + file_name) if not exists('tmp/' + file_name): mkdir('tmp/' + file_name) if isfile(pickle_files_path['infos']): infos = pickle.load(open(pickle_files_path['infos'], "rb")) else: infos = { 'n_videos': int(first_line[0]), 'n_endpoints': int(first_line[1]), 'n_request_descr': int(first_line[2]), 'n_caches': int(first_line[3]), 'caches_size': int(first_line[4]) } pickle.dump(infos, open(pickle_files_path['infos'], "wb")) data = None table_ep_requests = None endpoints_latency_data_center = None table_ep_cchs = None # try to load data from caches files, if not caches files doesn't exists read data from input file try: videos_sizes = pickle.load( open(pickle_files_path['videos_sizes'], "rb")) data = np.load(pickle_files_path['endpoints_objects']) if not ('table_endpoints_requests' and 'endpoints_latency_data_center' and 'table_endpoints_caches') in data.keys(): data.close() raise IOError endpoints_latency_data_center = data['endpoints_latency_data_center'] table_ep_cchs = data['table_endpoints_caches'] table_ep_requests = data['table_endpoints_requests'] data.close() print 'Load from cache!' except IOError: print 'Data not in cache, prepare to read file...' # array with videos sizes # size -> (1D) #videos videos_sizes = map(int, f_list.pop(0)[:-1].split(' ')) pickle.dump(videos_sizes, open(pickle_files_path['videos_sizes'], "wb")) # array with latencies from endpoints to data center # access with enpoint id # size -> (1D) #enpoints endpoints_latency_data_center = np.zeros(shape=infos['n_endpoints']) # table to relation endpoints latency to a specific cache # size -> (2D) lines=#endpoints | columns=#caches table_ep_cchs = np.zeros(shape=(infos['n_endpoints'], infos['n_caches'])) # go to over all endpoints informations # read latency from endpoint to datacenter and save on endpoints_latency_data_center # read latency from that endpoint to cache and save on table_ep_cchs for i in range(0, infos['n_endpoints']): endpoint_info = f_list.pop(0)[:-1].split(' ') endpoints_latency_data_center[i] = int(endpoint_info[0]) for j in range(0, int(endpoint_info[1])): cache = f_list.pop(0)[:-1].split(' ') table_ep_cchs[i, int(cache[0])] = int(cache[1]) print "Reading...\n" # table to relation videos requests with enpoint from they come # size -> (2D) lines=#endpoints | columns=#videos table_ep_requests = np.zeros(shape=(infos['n_endpoints'], infos['n_videos'])) # go over all request descriptions # read request information # verify if video from that request has a size greater than cache size # if so -> save the #requests of that video from the specific endpoint on table_ep_requests for i in range(0, int(infos['n_request_descr'])): videos_info = f_list.pop(0)[:-1].split(' ') video_id = int(videos_info[0]) if videos_sizes[video_id] > infos['caches_size']: continue table_ep_requests[int(videos_info[1]), int(videos_info[0])] = int(videos_info[2]) # caches data np.savez(pickle_files_path['endpoints_objects'], endpoints_latency_data_center=endpoints_latency_data_center, table_endpoints_caches=table_ep_cchs, table_endpoints_requests=table_ep_requests) print 'Data loaded!' if use_cached_data: data = np.load(pickle_files_path['calculation_objects']) matrix_caches_requests = data['matrix_caches_requests'] else: matrix_caches_requests = np.zeros(shape=(infos['n_videos'], infos['n_caches']), dtype='int') total_latency_dataCenter_matrix = table_ep_requests * np.transpose( endpoints_latency_data_center)[:, None] print 'Start calculations...' for i in range(0, table_ep_requests.shape[1]): x = table_ep_requests[:, i] latency_dataCenter = total_latency_dataCenter_matrix[:, i] tmp_matrix = latency_dataCenter[:, None] - table_ep_cchs * x[:, None] matrix_caches_requests[i, :] = np.sum(tmp_matrix, axis=0) print 'Almost done...' np.savez(pickle_files_path['calculation_objects'], matrix_caches_requests=matrix_caches_requests) tmp_matrix = (-matrix_caches_requests).argsort(axis=None, kind='mergesort') tmp_matrix = np.unravel_index(tmp_matrix, matrix_caches_requests.shape) index_matrix_caches_requests_sorted = np.vstack(tmp_matrix).T del tmp_matrix del matrix_caches_requests import gc gc.collect() caches_ocup_size = np.zeros(infos['n_caches']) caches_videos_id = [[] for i in range(infos['n_caches'])] print 'Just some more calculations...' # print table_ep_requests_cpy while True: request_cache = index_matrix_caches_requests_sorted[0] if index_matrix_caches_requests_sorted.shape[0] <= 1: break index_matrix_caches_requests_sorted = index_matrix_caches_requests_sorted[ 1:] ep_of_cch = np.nonzero(table_ep_cchs[:, request_cache[1]])[0] # print ep_of_cch v_reqs = np.nonzero(table_ep_requests[:, request_cache[0]])[0] # print v_reqs v_reqs_to_rm = np.intersect1d(ep_of_cch, v_reqs, assume_unique=True) # print v_reqs_to_rm if v_reqs_to_rm.size <= 0: # print 'ok ;-)' continue # print table_ep_requests_cpy[:, request_cache[1]] if caches_ocup_size[request_cache[1]] + videos_sizes[ request_cache[0]] <= infos['caches_size']: caches_ocup_size[request_cache[1]] += videos_sizes[ request_cache[0]] (caches_videos_id[request_cache[1]]).append(request_cache[0]) table_ep_requests[:, request_cache[0]][v_reqs_to_rm] = 0 # print table_ep_requests_cpy[:, request_cache[1]] print 'Writting output...' f_out = open('output/' + file_name + '.out', 'w') f_out.write(str(len(caches_ocup_size)) + '\n') for i in range(0, len(caches_videos_id)): f_out.write(str(i)) for videos_id in caches_videos_id[i]: f_out.write(' ' + str(videos_id)) f_out.write('\n')
def write(self): log_fid = open(self.log_file,'w') cPickle.dump(self, log_fid) log_fid.close()
def save_binary_pickle(data, filepath): with open(filepath, 'wb') as f: pickle.dump(data, f)
def _dump(att, dat): pickle.dump(tuple(getattr(self, a) for a in att), dat, -1)
def serialise(obj, f): pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def saveParams(para, fname): f = file(fname, 'wb') cPickle.dump(para, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close()
def kitti_eval(detpath, annopath, imagesetfile, classname, cachedir, ovthresh=0.5, use_07_metric=False, imagepath=None): """rec, prec, ap = voc_eval(detpath, annopath, imagesetfile, classname, [ovthresh], [use_07_metric]) Top level function that does the PASCAL VOC evaluation. detpath: Path to detections detpath.format(classname) should produce the detection results file. annopath: Path to annotations annopath.format(imagename) should be the xml annotations file. imagesetfile: Text file containing the list of images, one image per line. classname: Category name (duh) cachedir: Directory for caching the annotations [ovthresh]: Overlap threshold (default = 0.5) [use_07_metric]: Whether to use VOC07's 11 point AP computation (default False) """ # assumes detections are in detpath.format(classname) # assumes annotations are in annopath.format(imagename) # assumes imagesetfile is a text file with each line an image name # cachedir caches the annotations in a pickle file # first load gt if not os.path.isdir(cachedir): os.mkdir(cachedir) cachefile = os.path.join(cachedir, 'annots.pkl') # read list of images with open(imagesetfile, 'r') as f: lines = f.readlines() imagenames = [x.strip() for x in lines] if not os.path.isfile(cachefile): # load annots recs = {} for i, imagename in enumerate(imagenames): recs[imagename] = parse_rec(annopath.format(imagename)) if i % 100 == 0: print 'Reading annotation for {:d}/{:d}'.format( i + 1, len(imagenames)) # save print 'Saving cached annotations to {:s}'.format(cachefile) with open(cachefile, 'w') as f: cPickle.dump(recs, f) else: # load with open(cachefile, 'r') as f: recs = cPickle.load(f) # extract gt objects for this class class_recs = {} npos = 0 n = 0 vis_gt_ex = False for imagename in imagenames: R = [obj for obj in recs[imagename] if obj['name'] == classname] bbox = np.array([x['bbox'] for x in R]) npos = npos + len(bbox) det = [False] * len(R) class_recs[imagename] = {'bbox': bbox, 'det': det} if n < 10 and len(bbox) > 0 and vis_gt_ex == True: im = cv2.imread(imagepath.format(imagename)) vis_detections(im, classname, class_recs[imagename]['bbox'], thresh=0.1) n += 1 # read dets detfile = detpath.format(classname) with open(detfile, 'r') as f: lines = f.readlines() splitlines = [x.strip().split(' ') for x in lines] image_ids = [x[0] for x in splitlines] confidence = np.array([float(x[1]) for x in splitlines]) BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) # sort by confidence sorted_ind = np.argsort(-confidence) sorted_scores = np.sort(-confidence) BB = BB[sorted_ind, :] image_ids = [image_ids[x] for x in sorted_ind] vis_det_ex = True if vis_det_ex: image_ids_to_idx = {} for idx,img_id in enumerate(image_ids): if img_id in image_ids_to_idx.keys(): image_ids_to_idx[img_id] += [idx] else: image_ids_to_idx[img_id] = [idx] n = 10 ids = np.random.permutation(len(BB))[:n] for i,idx in enumerate(ids): # we want "n" unique images # we need to grab "class_recs[image_ids[d]]" print(i,idx) image_id = image_ids[idx] # now find all the indicies with the given image_id image_idx = image_ids_to_idx[image_id] bbox = BB[image_idx,:] conf = -1*sorted_scores[image_idx] print(conf) bboxes = np.concatenate((bbox,conf[:,np.newaxis]),axis=1) if len(bboxes) > 0: im = cv2.imread(imagepath.format(image_id)) vis_detections(im, classname, bboxes, thresh=0.20,name="vis_det_{}.png") n += 1 else: print("ohno!") sys.exit(1) ovthresh = [0.5,0.75,0.95] nd = len(image_ids) tp = np.zeros((nd,len(ovthresh))) fp = np.zeros((nd,len(ovthresh))) for d in range(nd): R = class_recs[image_ids[d]] bb = BB[d, :].astype(float) ovmax = -np.inf BBGT = R['bbox'].astype(float) if BBGT.size > 0: # compute overlaps # intersection ixmin = np.maximum(BBGT[:, 0], bb[0]) iymin = np.maximum(BBGT[:, 1], bb[1]) ixmax = np.minimum(BBGT[:, 2], bb[2]) iymax = np.minimum(BBGT[:, 3], bb[3]) iw = np.maximum(ixmax - ixmin + 1., 0.) ih = np.maximum(iymax - iymin + 1., 0.) inters = iw * ih # union uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + (BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) overlaps = inters / uni ovmax = np.max(overlaps) jmax = np.argmax(overlaps) # print("-=-=-=-=-=-=-=-=-=-=-=-") # print(overlaps) # print(sorted_scores) # if(sorted_scores[d] >= -0.5): # continue #print(sorted_scores[d],sorted_scores[d] < -0.0) inside_any = False for idx in range(len(ovthresh)): if ovmax > ovthresh[idx]: if not R['det'][jmax]: inside_any = True tp[d,idx] = 1. #print("tp") else: fp[d,idx] = 1. #print("fp") else: fp[d,idx] = 1. #print("fp") if inside_any is True: R['det'][jmax] = 1 rec = np.zeros((len(fp),len(ovthresh))) prec = np.zeros((len(fp),len(ovthresh))) ap = np.zeros(len(ovthresh)) for idx in range(len(ovthresh)): # compute precision recall _fp = np.cumsum(fp[:,idx]) _tp = np.cumsum(tp[:,idx]) rec[:,idx] = _tp / float(npos) # avoid divide by zero in case the first detection matches a difficult # ground truth prec[:,idx] = _tp / np.maximum(_tp + _fp, np.finfo(np.float64).eps) #ap = voc_ap(rec, prec, use_07_metric) ap[idx] = voc_ap(rec[:,idx], prec[:,idx], classname, False) #print(fp,tp,rec,prec,ap,npos) return rec, prec, ap, ovthresh
#!/usr/bin/python2 #filename:pickling.py import cPickle as p shoplistfile='shoplist.data' shoplist=['apple','mango','carrot'] f=file(shoplistfile,'w') p.dump(shoplist,f) f.close del shoplist f=file(shoplistfile) storelist=p.load(f) print storelist
def get_data(text_only): #text_only = False if text_only: print("Text only") image_list = [] else: print("Text and image") image_list = read_image() train_data = write_data("train", image_list, text_only) valiate_data = write_data("validate", image_list, text_only) test_data = write_data("test", image_list, text_only) print("loading data...") # w2v_file = '../Data/GoogleNews-vectors-negative300.bin' vocab, all_text = load_data(train_data, valiate_data, test_data) # print(str(len(all_text))) print("number of sentences: " + str(len(all_text))) print("vocab size: " + str(len(vocab))) max_l = len(max(all_text, key=len)) print("max sentence length: " + str(max_l)) # # word_embedding_path = "../Data/weibo/w2v.pickle" w2v = pickle.load(open(word_embedding_path, 'rb')) # print(temp) # # print("word2vec loaded!") print("num words already in word2vec: " + str(len(w2v))) # w2v = add_unknown_words(w2v, vocab) # file_path = "../Data/weibo/event_clustering.pickle" # if not os.path.exists(file_path): # train = [] # for l in train_data["post_text"]: # line_data = [] # for word in l: # line_data.append(w2v[word]) # line_data = np.matrix(line_data) # line_data = np.array(np.mean(line_data, 0))[0] # train.append(line_data) # train = np.array(train) # cluster = AgglomerativeClustering(n_clusters=15, affinity='cosine', linkage='complete') # cluster.fit(train) # y = np.array(cluster.labels_) # pickle.dump(y, open(file_path, 'wb+')) # else: # y = pickle.load(open(file_path, 'rb')) # print("Event length is " + str(len(y))) # center_count = {} # for k, i in enumerate(y): # if i not in center_count: # center_count[i] = 1 # else: # center_count[i] += 1 # print(center_count) # train_data['event_label'] = y # print("word2vec loaded!") print("num words already in word2vec: " + str(len(w2v))) add_unknown_words(w2v, vocab) W, word_idx_map = get_W(w2v) # # rand_vecs = {} # # add_unknown_words(rand_vecs, vocab) W2 = rand_vecs = {} w_file = open("../Data/weibo/word_embedding.pickle", "wb") pickle.dump([W, W2, word_idx_map, vocab, max_l], w_file) w_file.close() return train_data, valiate_data, test_data
def save(self, filename): with open(filename, "wb") as fout: cPickle.dump(self, fout, protocol=2)
for row in reader: if ind==0: dataheader.append(row) ind+=1 else: if(row[2]=='Training'): yTrain.append(int(row[0])) XTrain.append([int(j) for j in row[1].split()]) elif(row[2]=='PublicTest'): yValid.append(int(row[0])) XValid.append([int(j) for j in row[1].split()]) else: yTest.append(int(row[0])) XTest.append([int(j) for j in row[1].split()]) XTrain=np.array(XTrain) XTest=np.array(XTest) XValid=np.array(XValid) pickle.dump(XTrain,open('XTrain','wb')) pickle.dump(yTrain,open('yTrain','wb')) pickle.dump(XValid,open('XValid','wb')) pickle.dump(yValid,open('yValid','wb')) pickle.dump(XTest,open('XTest','wb')) pickle.dump(yTest,open('yTest','wb')) pickle.dump(Emotion,open('emotionList','wb'))