def save_dictionary(worddict, wordcount, loc='./data/book_dictionary_large.pkl'): """ Save a dictionary to the specified location """ with open(loc, 'wb') as f: pkl.dump(worddict, f) pkl.dump(wordcount, f)
def main(): """Generate intial word embedding for headlines and description.""" headlines, desc = load_text() # load headlines and descriptions vocab, vocab_count = build_vocab(headlines + desc) # build vocabulary summarize_vocab(vocab, vocab_count) # summarize vocabulary word2idx, idx2word = get_idx(vocab) # add special tokens and get reverse vocab lookup glove_embedding_weights, glove_index_dict = get_glove() # load GloVe data # initialize embedding embedding = initialize_embedding(vocab_size, embedding_dim, glove_embedding_weights) embedding = copy_glove_weights(embedding, idx2word, glove_embedding_weights, glove_index_dict) # map vocab to GloVe using cosine similarity glove_idx2idx = build_word_to_glove(embedding, word2idx, idx2word, glove_index_dict, glove_embedding_weights) # create a dense vector representation of headlines and descriptions description_vector = to_dense_vector(word2idx, desc, 'description') headline_vector = to_dense_vector(word2idx, headlines, 'headline') # write vocabulary to disk with open(path.join(config.path_data, '{}.pkl'.format(FN)), 'wb') as fp: pickle.dump((embedding, idx2word, word2idx, glove_idx2idx), fp, 2) # write data to disk with open(path.join(config.path_data, '{}.data.pkl'.format(FN)), 'wb') as fp: pickle.dump((description_vector, headline_vector), fp, 2)
def save_results(self, output_name, keep_data=False): ''' Save the results of the SCF to avoid re-computing. The pickled file will not include the data cube by default. Parameters ---------- output_name : str Name of the outputted pickle file. keep_data : bool, optional Save the data cube in the pickle file when enabled. ''' if not output_name.endswith(".pkl"): output_name += ".pkl" self_copy = deepcopy(self) # Don't keep the whole cube unless keep_data enabled. if not keep_data: self_copy._centroid = None self_copy._moment0 = None self_copy._linewidth = None with open(output_name, 'wb') as output: pickle.dump(self_copy, output, -1)
def importAndSave(foldername, saveName): print('Importing, processing, and saving data for analysis...') data = TC.AllExperimentData(foldername) print(saveName) with open(TCG.DATA_SAVE_PATH + saveName, 'wb') as output: pickle.dump(data, output, -1) print('Done.') return data
def save_to_numpy(seriesuid, img, meta): file = '{}/{}'.format(PREPROCESS_PATH, seriesuid) with h5py.File(file + '.h5', 'w') as hf: hf.create_dataset('img', data=img) with open(file + '.meta', 'wb') as f: pickle.dump(meta, f)
def save_binary(self, configuration): YLogger.info(self, "Saving binary brain to [%s]", configuration.binaries.binary_filename) start = datetime.datetime.now() bin_file = open(configuration.binaries.binary_filename, "wb") pickle.dump(self._aiml_parser, bin_file) bin_file.close() stop = datetime.datetime.now() diff = stop - start YLogger.info(self, "Brain save took a total of %.2f sec", diff.total_seconds())
def savePickle(data): #open file for editing pickl = open('data.pkl', 'wb'); #write object to file pickle.dump(data, pickl) # close at the end pickl.close()
def sigmoid(x): with np.errstate(all='raise'): try: rv = 1.0 / (1.0 + np.exp(-x)) except FloatingPointError: pickle.dump(model, open('save_err.p', 'wb')) logger.error('sigmoid(x): Floating point error because of %f',x) sys.exit(1) return rv
def saveWTL(config, p1, p2, w, t, l): if w > 0 or t > 0 or l > 0: data = { "player1": p1, "player2": p2, "wins": w, "ties": t, "losses": l} pickle.dump(data, open(config.data.performance_location+"staged_"+str(time())+".pickle","wb")) sleep(0.05)
def dsigmoid(x): with np.errstate(over='raise'): try: ex = np.exp(x) rv = ex / ((ex + 1.0)**2.0) except FloatingPointError: pickle.dump(model, open('save_err.p', 'wb')) logger.error('dsigmoid(x): Floating point error because of %f',x) sys.exit(1) return rv
def cpdump(data, filename, outDir=None): # import cPickle import _pickle as cPickle if outDir != None: filename = outDir + filename fp = open(filename, "wb") # fp=file(filename,'wb') cPickle.dump(data, fp) fp.close()
def save(self, config): # Saving print("==> Saving models") os.system('mkdir -p ' + config.get('save')) filename = os.path.join(config.get('save'), config.get('name')) _pickle.dump({'config': self.config, 'record': self.record}, open(filename+'_main', 'wb')) _pickle.dump(self.model.getSequense(), open(filename+'_sequence', 'wb'), protocol=4) print("==> Saving done.")
def save(self,filename): """ Pickle the TransferFunction instance :param filename: name of the file to save the instance to :type filename: str. """ with open(filename,"wb") as fp: pkl.dump(self,fp,protocol=2)
def save_strategy_as(self): """ Save the strategy to a pickle file so it can be imported in another copy of the GSF Parser. """ file_name = filedialog.asksaveasfilename( filetypes=[("GSF Strategy", ".str")], defaultextension=".str", title="GSF Strategy Manager: Save a strategy") if file_name == "" or file_name is None: return strategy = self.list.db[self.list.selected_strategy] with open(file_name, "wb") as fo: pickle.dump(strategy, fo)
def play_games(self, env=None): if env is None: env = gym.make(self.gamename) game_results = [] for _ in range(self.worker_replays): game_results.append(self.play_game(env)) filename = 'tmp/'+ str(np.random.rand()) + '.pickle' f = open(filename, 'wb') cPickle.dump(game_results, f) f.close() return filename
def train_model(input_to_softmax, pickle_path, save_model_path, train_json='train_corpus.json', valid_json='valid_corpus.json', minibatch_size=20, spectrogram=True, mfcc_dim=13, optimizer=SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5), epochs=1, verbose=1, sort_by_duration=False, max_duration=10.0): # create a class instance for obtaining batches of data audio_gen = AudioGenerator(minibatch_size=minibatch_size, spectrogram=spectrogram, mfcc_dim=mfcc_dim, max_duration=max_duration, sort_by_duration=sort_by_duration) # add the training data to the generator audio_gen.load_train_data(train_json) audio_gen.load_validation_data(valid_json) # calculate steps_per_epoch num_train_examples=len(audio_gen.train_audio_paths) steps_per_epoch = num_train_examples//minibatch_size # calculate validation_steps num_valid_samples = len(audio_gen.valid_audio_paths) validation_steps = num_valid_samples//minibatch_size # add CTC loss to the NN specified in input_to_softmax model = add_ctc_loss(input_to_softmax) # CTC loss is implemented elsewhere, so use a dummy lambda function for the loss model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=optimizer) # make results/ directory, if necessary if not os.path.exists('results'): os.makedirs('results') # add checkpointer checkpointer = ModelCheckpoint(filepath='results/'+save_model_path, verbose=0) # train the model hist = model.fit_generator(generator=audio_gen.next_train(), steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=audio_gen.next_valid(), validation_steps=validation_steps, callbacks=[checkpointer], verbose=verbose) # save model loss with open('results/'+pickle_path, 'wb') as f: pickle.dump(hist.history, f)
def pickleIt(obj, save_path): ''' function to pickle the given object. @param obj => the python object to be pickled save_path => the path where the pickled file is to be saved @return => nothing (the pickle file gets saved at the given location) ''' if(not os.path.isfile(save_path)): with open(save_path, 'wb') as dumping: pickle.dump(obj, dumping) print("The file has been pickled at:", save_path) else: print("The pickle file already exists: ", save_path)
def get_tasks(task_names, max_seq_len, load): ''' Load tasks ''' tasks = [] for name in task_names: assert name in NAME2INFO, 'Task not found!' pkl_path = NAME2INFO[name][1] + "%s_task.pkl" % name if os.path.isfile(pkl_path) and load: task = pkl.load(open(pkl_path, 'rb')) log.info('\tLoaded existing task %s', name) else: task = NAME2INFO[name][0](NAME2INFO[name][1], max_seq_len, name) pkl.dump(task, open(pkl_path, 'wb')) tasks.append(task) log.info("\tFinished loading tasks: %s.", ' '.join([task.name for task in tasks])) return tasks
def do_python_eval(devkit_path, year, image_set, classes, output_dir = 'results'): annopath = os.path.join( devkit_path, 'VOC' + year, 'Annotations', '{}.xml') imagesetfile = os.path.join( devkit_path, 'VOC' + year, 'ImageSets', 'Main', image_set + '.txt') cachedir = os.path.join(devkit_path, 'annotations_cache') aps = [] # The PASCAL VOC metric changed in 2010 use_07_metric = True if int(year) < 2010 else False print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No')) print('devkit_path=',devkit_path,', year = ',year) if not os.path.isdir(output_dir): os.mkdir(output_dir) for i, cls in enumerate(classes): if cls == '__background__': continue filename = get_voc_results_file_template(image_set).format(cls) rec, prec, ap = voc_eval( filename, annopath, imagesetfile, cls, cachedir, ovthresh=0.5, use_07_metric=use_07_metric) aps += [ap] print('AP for {} = {:.4f}'.format(cls, ap)) with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f: cPickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f) print('Mean AP = {:.4f}'.format(np.mean(aps))) print('~~~~~~~~') print('Results:') for ap in aps: print('{:.3f}'.format(ap)) print('{:.3f}'.format(np.mean(aps))) print('~~~~~~~~') print('') print('--------------------------------------------------------------') print('Results computed with the **unofficial** Python eval code.') print('Results should be very close to the official MATLAB eval code.') print('-- Thanks, The Management') print('--------------------------------------------------------------')
def _resize32x32(self, full_filepath): def _resize(data_in): num_samples = data_in.shape[0] tmp_data_out = np.zeros((num_samples, 1, 32, 32)) for i in range(0, num_samples): tmp_img = data_in[i, :].reshape(28, 28) new_img = cv2.resize(tmp_img, dsize=(32, 32), interpolation=cv2.INTER_NEAREST) tmp_data_out[i, 0, :, :] = new_img return tmp_data_out f = gzip.open(full_filepath, 'rb') train_set, valid_set, test_set = cPickle.load(f) f.close() with gzip.open(full_filepath, 'wb') as handle: cPickle.dump(([_resize(train_set[0]), train_set[1]], [_resize(valid_set[0]), valid_set[1]], [_resize(test_set[0]), test_set[1]]), handle)
def pickle_save(filename,model,y_pred,y_valid): ### SAVE Processed Data pickle_file = '%s/%s'%(Dir,filename) try: f = open(pickle_file, 'wb') save = { 'model': model, 'y_pred': y_pred, 'y_valild': y_valid, } #pickle.dump(save, f, pickle.HIGHEST_PROTOCOL) pickle.dump(save, f) f.close() except Exception as e: print('Unable to save data to', pickle_file, ':', e) raise ### SAVED return
def __init__(self, config_file_path, session_file_path, repo=None): self._gerrit_url = None self._jira = None self._repo = repo if os.path.isfile(session_file_path): with open(session_file_path, 'rb') as session_file: session = pickle.load(session_file) else: session = None if config_file_path is not None: self._read_config(config_file_path, session) if session is not None: session.max_retries = 3 self._jira._session = session else: with open(session_file_path, 'wb') as session_file: pickle.dump(self._jira._session, session_file)
def transform_text_2_sentences(train, test, save_train='../output/train_text.p', save_test='../output/test_text.p'): ''' Transforming raw text into sentences, if @save_train or @save_test in not None - saves pickles for further use ''' train_text = [] test_text = [] for each in train['Combined']: train_text.append(text_process(each)) for each in test['Combined']: test_text.append(text_process(each)) if(save_train != None): cPickle.dump(train_text, open(save_train, 'wb')) if(save_test != None): cPickle.dump(test_text, open(save_test, 'wb')) return train_text, test_text
def write_to_file(self, save_file): "Write all the times to file." try: with TestTimes.LockedFile(save_file, 'a+b') as fd: times = TestTimes.__read_test_times_file(fd) if times is None: times = self.__times else: times.update(self.__times) # We erase data from file while still holding a lock to it. This # way reading old test times and appending new ones are atomic # for external viewer. fd.seek(0) fd.truncate() with gzip.GzipFile(fileobj=fd, mode='wb') as gzf: cPickle.dump(times, gzf, PICKLE_HIGHEST_PROTOCOL) except IOError: pass # ignore errors---saving the times isn't that important
def preprocess(self, input_file, vocab_file, tensor_file): with codecs.open(input_file, "r", encoding=self.encoding) as f: train_data = f.read() train_data = normalize_unicodes(train_data) counter = collections.Counter(train_data) count_pairs = sorted(counter.items(), key=lambda x: -x[1]) threshold = 10 self.chars, counts = zip(*count_pairs) self.chars = START_VOCAB + [c for i, c in enumerate(self.chars) if c not in START_VOCAB and counts[i] > threshold] self.vocab_size = len(self.chars) self.vocab = dict(zip(self.chars, range(len(self.chars)))) with open(vocab_file, 'wb') as f: cPickle.dump(self.chars, f) unk_index = START_VOCAB.index(UNK) self.tensor = np.array([self.vocab.get(c, unk_index) for c in train_data], dtype=np.int64) train_size = int(self.tensor.shape[0] * 0.9) self.valid = self.tensor[train_size:] self.train = self.tensor[:train_size] np.save(tensor_file, self.tensor)
def save(self, fpath='.', fname=None): """ Save a pickled representation of Model state. """ fpathstart, fpathext = os.path.splitext(fpath) if fpathext == '.pkl': # User supplied an absolute path to a pickle file fpath, fname = os.path.split(fpath) elif fname is None: # Generate filename based on date date_obj = datetime.datetime.now() date_str = date_obj.strftime('%Y-%m-%d-%H:%M:%S') class_name = self.__class__.__name__ fname = '%s.%s.pkl' % (class_name, date_str) fabspath = os.path.join(fpath, fname) logger.info("Saving to %s ..." % fabspath) file = open(fabspath, 'wb') state = self.__getstate__() pickle.dump(state, file, protocol=pickle.HIGHEST_PROTOCOL) file.close()
def get_meta_dict(): cache_file = '{}/all_meta_cache.meta'.format(PREPROCESS_PATH) if os.path.exists(cache_file): print('get meta_dict from cache') with open(cache_file, 'rb') as f: return pickle.load(f) meta_dict = {} for f in glob('{}/*.meta'.format(PREPROCESS_PATH)): seriesuid = f[-15:-5] if not os.path.exists('{}/{}.h5'.format(PREPROCESS_PATH, seriesuid)): continue with open(f, 'rb') as f: meta = pickle.load(f) meta_dict[meta['seriesuid']] = meta # cache it with open(cache_file, 'wb') as f: pickle.dump(meta_dict, f) return meta_dict
def train(ai, config): loaded_files = [] x = config.iterations i = len(glob.glob(config.data.model_location+"*.h5")) loaded_files, _ = load_games(ai, loaded_files, config) while(x != 0): if i > config.iter3: ai.update_lr(config.learning_rate3) elif i > config.iter2: ai.update_lr(config.learning_rate2) else: ai.update_lr(config.learning_rate1) loaded_files, diff = load_games(ai, loaded_files, config) total_diff = diff start = time() print("Iteration %04d"%i) end = config.min_new_game_files if i> 0 else config.min_game_file util.print_progress_bar(0, end, start=start) while(total_diff < end): if diff > 0: total_diff += diff util.print_progress_bar(total_diff, end, start=start) sleep(5) loaded_files, diff = load_games(ai, loaded_files, config) util.print_progress_bar(end, end, start=start) print("Training for %d batches on %d samples" % (config.batches_per_iter, len(ai.buffer.buffer))) start = time() history = ai.train_batches(config.batch_size, config.batches_per_iter, config.verbose) for val in history.history.keys(): print("%s: %0.4f" % (val, history.history[val][-1])) if i % config.save_model_cycles == 0: ai.save("%smodel_%04d.h5" % (config.data.model_location, i)) file = open("%shist_%04d.pickle" % (config.data.history_location, i), 'wb') pickle.dump(pickle.dumps(history.history), file) file.close() print("Iteration Time: %0.2f" % (time()-start)) x -= 1 i += 1
def save(self,filename,format=None,**kwargs): """ Save ensemble data in an external file (in arbitrary format) :param filename: file name of the external file :type filename: str. :format: format in which to save the ensemble; if None the format is auto detected from the filename :type format: str.or callable :param kwargs: the keyword arguments are passed to the saver (or to format if callable) :type kwargs: dict. """ #Auto detect format if format is None: if filename.endswith(".npy"): format = "numpy" elif filename.endswith(".mat"): format = "matlab" elif filename.endswith(".pkl"): format = "pickle" else: raise ValueError("Format not recognized!") #Proceed to the saving procedure if format=="numpy": np.save(filename,self.values) elif format=="matlab": sio.savemat(filename,{"values": self.values},**kwargs) elif format=="pickle": with open(filename,"wb") as fp: pickle.dump(self,fp) else: format(self,filename,**kwargs)
def write_uncorrected_flux(self, uncorrected_flux): """writes each power slice to the file output.csv. 3d->file""" # There might be an error here in the cube writing to file, y instead of z split. # screw it, 4d array to 3d file doesn't work well. st="" u_flux=uncorrected_flux # voxel dimensions zlen=len(u_flux) ylen=len(u_flux[0]) xlen=len(u_flux[0][0]) for k in range(0, zlen): s="" power_temp=cell2(xlen, ylen) for i in range(0, ylen): for j in range(0, xlen): # flipdim unnecesary power_temp[i][j]=u_flux[k][i][j] power_temp_s=[list(map(str, i)) for i in power_temp] s+="\n".join([",".join(i) for i in power_temp_s]) if k==0: st+=s else: st+="\n\n"+s f=open("raw_flux.csv", "w") f.write(st) f.close() f2=open("pickle\\raw_flux.pkl", "wb") pickle.dump(u_flux, f2) f2.close()
output_ = output[-1].data targets_ = targets[-1].data targets_ = targets_.unsqueeze(dim=1) if len(output_.shape) < 3: output_.unsqueeze(dim=0) CELoss = torch.gather(output_, dim=1, index=targets_).squeeze() CELoss = -1 * CELoss save_all_losses += CELoss.tolist() loss = torch.sum(CELoss) total_loss += loss n += targets_.shape[0] if (n % 20000) == 0: print('Processed %d examples' % n) del output, targets, hidden print('Last word: %s' % (corpus.dictionary.idx2word[data.data[-1, -1]])) print('Total examples processed:', n) return total_loss / n, save_all_losses loss, all_losses = evaluate(data_, args) res = [args.seq_len, loss, math.exp(loss)] print(res) with open(os.path.join(args.logdir, 'per_token_scores_' + str(args.seq_len)), 'wb') as f: pickle.dump(args.seq_len, f) pickle.dump(all_losses, f)
def main(): train_pos, train_neg, test_pos, test_neg = load_data(path_to_data) # Using saved models and vectors for method == 'nlp'. (Orginal runtime = 5 mins; Current runtime = 10 seconds) if method == "nlp": train_pos_vec, train_neg_vec, test_pos_vec, test_neg_vec = feature_vecs_NLP( train_pos, train_neg, test_pos, test_neg) filename = './' + path_to_data + 'train_pos_vec_nlp.txt' pickle.dump(train_pos_vec, open(filename, 'wb')) train_pos_vec = pickle.load(open(filename, 'rb')) filename = './' + path_to_data + 'train_neg_vec_nlp.txt' pickle.dump(train_neg_vec, open(filename, 'wb')) train_neg_vec = pickle.load(open(filename, 'rb')) filename = './' + path_to_data + 'test_pos_vec_nlp.txt' pickle.dump(test_pos_vec, open(filename, 'wb')) test_pos_vec = pickle.load(open(filename, 'rb')) filename = './' + path_to_data + 'test_neg_vec_nlp.txt' pickle.dump(test_neg_vec, open(filename, 'wb')) test_neg_vec = pickle.load(open(filename, 'rb')) nb_model, lr_model = build_models_NLP(train_pos_vec, train_neg_vec) filename = './' + path_to_data + 'nb_model_nlp.sav' pickle.dump(nb_model, open(filename, 'wb')) nb_model = pickle.load(open(filename, 'rb')) filename = './' + path_to_data + 'lr_model_nlp.sav' pickle.dump(lr_model, open(filename, 'wb')) lr_model = pickle.load(open(filename, 'rb')) # Using saved models and vectors for method == 'd2v'. (Orginal runtime = 10 mins; Current runtime = 10 seconds) if method == "d2v": train_pos_vec, train_neg_vec, test_pos_vec, test_neg_vec = feature_vecs_DOC( train_pos, train_neg, test_pos, test_neg) filename = './' + path_to_data + 'train_pos_vec_d2v.txt' pickle.dump(train_pos_vec, open(filename, 'wb')) train_pos_vec = pickle.load(open(filename, 'rb')) filename = './' + path_to_data + 'train_neg_vec_d2v.txt' pickle.dump(train_neg_vec, open(filename, 'wb')) train_neg_vec = pickle.load(open(filename, 'rb')) filename = './' + path_to_data + 'test_pos_vec_d2v.txt' pickle.dump(test_pos_vec, open(filename, 'wb')) test_pos_vec = pickle.load(open(filename, 'rb')) filename = './' + path_to_data + 'test_neg_vec_d2v.txt' pickle.dump(test_neg_vec, open(filename, 'wb')) test_neg_vec = pickle.load(open(filename, 'rb')) nb_model, lr_model = build_models_DOC(train_pos_vec, train_neg_vec) filename = './' + path_to_data + 'nb_model_d2v.sav' pickle.dump(nb_model, open(filename, 'wb')) nb_model = pickle.load(open(filename, 'rb')) filename = './' + path_to_data + 'lr_model_d2v.sav' pickle.dump(lr_model, open(filename, 'wb')) lr_model = pickle.load(open(filename, 'rb')) if method == "w2v": train_pos_vec, train_neg_vec, test_pos_vec, test_neg_vec = feature_vecs_DOC_W2V( train_pos, train_neg, test_pos, test_neg) filename = './' + path_to_data + 'train_pos_vec_w2v.txt' pickle.dump(train_pos_vec, open(filename, 'wb')) #train_pos_vec = pickle.load(open(filename, 'rb')) filename = './' + path_to_data + 'train_neg_vec_w2v.txt' pickle.dump(train_neg_vec, open(filename, 'wb')) #train_neg_vec = pickle.load(open(filename, 'rb')) filename = './' + path_to_data + 'test_pos_vec_w2v.txt' pickle.dump(test_pos_vec, open(filename, 'wb')) #test_pos_vec = pickle.load(open(filename, 'rb')) filename = './' + path_to_data + 'test_neg_vec_w2v.txt' pickle.dump(test_neg_vec, open(filename, 'wb')) #test_neg_vec = pickle.load(open(filename, 'rb')) nb_model, lr_model = build_models_DOC_W2V(train_pos_vec, train_neg_vec) filename = './' + path_to_data + 'nb_model_w2v.sav' pickle.dump(nb_model, open(filename, 'wb')) filename = './' + path_to_data + 'lr_model_w2v.sav' pickle.dump(lr_model, open(filename, 'wb')) print("Naive Bayes") print("-----------") evaluate_model(nb_model, test_pos_vec, test_neg_vec, True) print("") print("Logistic Regression") print("-------------------") evaluate_model(lr_model, test_pos_vec, test_neg_vec, True)
def generate_dic(self): current_dict = {} doc_length = [] # c2 = 0 ''' parse the documents''' for j in range(1, 21579): filename_txt = 'tokenization\\%d.txt' % j f = open(filename_txt, 'r') ''' strip document into token list''' data = f.read().split() token_stream = data m_length = len(token_stream) doc_length.append([j, m_length]) i = 0 print('read doc %d' % j) while (i < len(token_stream)): while (SPIMI.MemorySize * 1024 * 1024 - self.current_used_memory_size ) > 0 and i < len(token_stream): self.current_used_memory_size = 0 if not token_stream[i] in current_dict: current_dict[token_stream[i]] = [] # c1 = c1 + 1 # print ('current dic size is %d'%(sys.getsizeof(current_dict))) # print ('current dic size of mb is %f'%(sys.getsizeof(current_dict)/1024.0/1024.0)) # # 'check if the current term is already in the same doc in order to compute the term frequency' 'current posting list is empty' if current_dict[token_stream[i]] == []: current_dict[token_stream[i]].append([j, 1]) self.c = self.c + 1 elif current_dict[token_stream[i]][-1][0] == j: current_dict[token_stream[i]][-1][1] += 1 self.c = self.c + 1 else: current_dict[token_stream[i]].append([j, 1]) self.c = self.c + 1 self.current_used_memory_size = sys.getsizeof(current_dict) i = i + 1 ''' memory is over ''' if SPIMI.MemorySize * 1024 * 1024 - self.current_used_memory_size <= 0: self.num = self.num + 1 print(' memory done') print(current_dict) '''sort ''' key_list = sorted(current_dict.keys()) print('new dic %d' % (self.num)) print(key_list) '''write dictionary into disk''' filename = 'dic\\dic%d.txt' % (self.num) f = open(filename, "wb") cPickle.dump(current_dict, f) f.close() '''write key list into disk''' filename = 'dic\\keylist%d.txt' % (self.num) f = open(filename, "wb") cPickle.dump(key_list, f) f.close() self.current_used_memory_size = 0 current_dict = {} elif j == 21578: self.num = self.num + 1 print(' last one ') print(current_dict) '''sort ''' key_list = sorted(current_dict.keys()) print('new dic %d' % (self.num)) print(key_list) '''write dictionary into disk''' filename = 'dic\\dic%d.txt' % (self.num) f = open(filename, "wb") cPickle.dump(current_dict, f) f.close() '''write key list into disk''' filename = 'dic\\keylist%d.txt' % (self.num) f = open(filename, "wb") cPickle.dump(key_list, f) f.close() break '''write doc length into disk''' filename = 'dic\\doc.txt' f = open(filename, "wb") cPickle.dump(doc_length, f) f.close() print('out of loop')
def compressPickle(fName, data): with bz2.BZ2File(fName + '.pbz2', 'w') as f: cPickle.dump(data, f)
xs,hs,dlogps,drs = [],[],[],[] # reset array memory # compute the discounted reward backwards through time discounted_epr = discount_rewards(epr) # standardize the rewards to be unit normal (helps control the gradient estimator variance) discounted_epr -= np.mean(discounted_epr) discounted_epr /= np.std(discounted_epr) epdlogp *= discounted_epr # modulate the gradient with advantage (PG magic happens right here.) grad = policy_backward(eph, epdlogp) for k in model: grad_buffer[k] += grad[k] # accumulate grad over batch # perform rmsprop parameter update every batch_size episodes if episode_number % batch_size == 0: for k,v in model.items(): g = grad_buffer[k] # gradient rmsprop_cache[k] = decay_rate * rmsprop_cache[k] + (1 - decay_rate) * g**2 model[k] += learning_rate * g / (np.sqrt(rmsprop_cache[k]) + 1e-5) grad_buffer[k] = np.zeros_like(v) # reset batch gradient buffer # boring book-keeping running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01 print('resetting env. episode reward total was %f. running mean: %f' % (reward_sum, running_reward)) if episode_number % 10 == 0: cPickle.dump(model, open('save_space.p', 'wb')) reward_sum = 0 observation = env.reset() # reset env prev_x = None if reward != 0: # Pong has either +1 or -1 reward exactly when game ends. print('ep %d: game finished, reward: %f' % (episode_number, reward) + ('' if reward == -1 else ' !!!!!!!!'))
# gather the information into a list of tuple RNA_SS_data = collections.namedtuple('RNA_SS_data', 'seq ss_label length name pairs') RNA_SS_data_list = list() for i in range(len(data_list)): RNA_SS_data_list.append( RNA_SS_data(seq=seq_encoding_list_padded[i], ss_label=stru_encoding_list_padded[i], length=seq_len_list[i], name=file_list[i], pairs=pairs_list[i])) ## training test split RNA_SS_train, RNA_SS_test = train_test_split(RNA_SS_data_list, test_size=0.2, random_state=seed) RNA_SS_test, RNA_SS_val = train_test_split(RNA_SS_test, test_size=0.5, random_state=seed) # savepath = dataset+"_"+"_".join(rna_types) savepath = dataset + '_all_{}'.format(length_limit) # savepath = dataset+'_all' os.mkdir(savepath) for i in ['train', 'test', 'val']: with open(savepath + '/%s.pickle' % i, 'wb') as f: cPickle.dump(eval('RNA_SS_' + i), f)
def train_model(input_to_softmax, pickle_path, save_model_path, train_json='train_corpus.json', valid_json='valid_corpus.json', minibatch_size=100, spectrogram=True, mfcc_dim=13, optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5), epochs=20, verbose=1, sort_by_duration=False, max_duration=10.0): # create a class instance for obtaining batches of data audio_gen = AudioGenerator(minibatch_size=minibatch_size, spectrogram=spectrogram, mfcc_dim=mfcc_dim, max_duration=max_duration, sort_by_duration=sort_by_duration) # add the training data to the generator audio_gen.load_train_data(train_json) audio_gen.load_validation_data(valid_json) # calculate steps_per_epoch num_train_examples = len(audio_gen.train_audio_paths) steps_per_epoch = num_train_examples // minibatch_size # calculate validation_steps num_valid_samples = len(audio_gen.valid_audio_paths) validation_steps = num_valid_samples // minibatch_size # add CTC loss to the NN specified in input_to_softmax model = add_ctc_loss(input_to_softmax) # CTC loss is implemented elsewhere, so use a dummy lambda function for the loss model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=optimizer) # make results/ directory, if necessary if not os.path.exists('results'): os.makedirs('results') # add checkpointer checkpointer = ModelCheckpoint(filepath='results/' + save_model_path, verbose=0) # train the model hist = model.fit_generator(generator=audio_gen.next_train(), steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=audio_gen.next_valid(), validation_steps=validation_steps, callbacks=[checkpointer], verbose=verbose) # save model loss with open('results/' + pickle_path, 'wb') as f: pickle.dump(hist.history, f)
sys.path.append("./data/") from process_documets import load_dataset from utills import * LOAD_DATA_FROM_SCRATCH = True print("Making model") model = get_model() print("Done Making model") if LOAD_DATA_FROM_SCRATCH: X, Y = load_dataset() X, Y = prepare_loaded_dataset_for_training(X, Y, ONE_SIDE_CONTEXT_SIZE) with open("X.pkl", "wb", encoding='utf-8') as f: cPickle.dump(X, f) with open("Y.pkl", "wb", encoding='utf-8') as f: cPickle.dump(Y, f) else: with open("X.pkl", "r", encoding='utf-8') as f: X = cPickle.load(f) with open("Y.pkl", "r", encoding='utf-8') as f: Y = cPickle.load(f) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', 'sparse_categorical_accuracy']) checkpoints = ModelCheckpoint('trained_model.{epoch:02d}-{val_loss:.3f}.hdf5', monitor='acc', verbose=1,
def save_model(self, sess, path): save_target = path + '_iter%d' % self.epochs_trained dirname = os.path.dirname(save_target) if not os.path.exists(dirname): os.makedirs(dirname) self.saver.save(sess, save_target) dictionaries_path = save_target + '.dict' with open(dictionaries_path, 'wb') as file: pickle.dump(self.subtoken_to_index, file) pickle.dump(self.index_to_subtoken, file) pickle.dump(self.subtoken_vocab_size, file) pickle.dump(self.target_to_index, file) pickle.dump(self.index_to_target, file) pickle.dump(self.target_vocab_size, file) pickle.dump(self.node_to_index, file) pickle.dump(self.index_to_node, file) pickle.dump(self.nodes_vocab_size, file) pickle.dump(self.num_training_examples, file) pickle.dump(self.epochs_trained, file) pickle.dump(self.config, file) print('Saved after %d epochs in: %s' % (self.epochs_trained, save_target))
def test_from_rgb_detection(output_filename, result_dir=None): ''' Test frustum pointents with 2D boxes from a RGB detector. Write test results to KITTI format label files. todo (rqi): support variable number of points. ''' ps_list = [] segp_list = [] center_list = [] heading_cls_list = [] heading_res_list = [] size_cls_list = [] size_res_list = [] rot_angle_list = [] score_list = [] onehot_list = [] test_idxs = np.arange(0, len(TEST_DATASET)) print(len(TEST_DATASET)) batch_size = BATCH_SIZE num_batches = int((len(TEST_DATASET) + batch_size - 1) / batch_size) batch_data_to_feed = np.zeros((batch_size, NUM_POINT, NUM_CHANNEL)) batch_one_hot_to_feed = np.zeros((batch_size, 3)) sess, ops = get_session_and_ops(batch_size=batch_size, num_point=NUM_POINT) for batch_idx in range(num_batches): print('batch idx: %d' % (batch_idx)) start_idx = batch_idx * batch_size end_idx = min(len(TEST_DATASET), (batch_idx + 1) * batch_size) cur_batch_size = end_idx - start_idx batch_data, batch_rot_angle, batch_rgb_prob, batch_one_hot_vec = \ get_batch(TEST_DATASET, test_idxs, start_idx, end_idx, NUM_POINT, NUM_CHANNEL, from_rgb_detection=True) batch_data_to_feed[0:cur_batch_size, ...] = batch_data batch_one_hot_to_feed[0:cur_batch_size, :] = batch_one_hot_vec # Run one batch inference batch_output, batch_center_pred, \ batch_hclass_pred, batch_hres_pred, \ batch_sclass_pred, batch_sres_pred, batch_scores = \ inference(sess, ops, batch_data_to_feed, batch_one_hot_to_feed, batch_size=batch_size) for i in range(cur_batch_size): ps_list.append(batch_data[i, ...]) segp_list.append(batch_output[i, ...]) center_list.append(batch_center_pred[i, :]) heading_cls_list.append(batch_hclass_pred[i]) heading_res_list.append(batch_hres_pred[i]) size_cls_list.append(batch_sclass_pred[i]) size_res_list.append(batch_sres_pred[i, :]) rot_angle_list.append(batch_rot_angle[i]) #score_list.append(batch_scores[i]) score_list.append(batch_rgb_prob[i]) # 2D RGB detection score onehot_list.append(batch_one_hot_vec[i]) if FLAGS.dump_result: with open(output_filename, 'wp') as fp: pickle.dump(ps_list, fp) pickle.dump(segp_list, fp) pickle.dump(center_list, fp) pickle.dump(heading_cls_list, fp) pickle.dump(heading_res_list, fp) pickle.dump(size_cls_list, fp) pickle.dump(size_res_list, fp) pickle.dump(rot_angle_list, fp) pickle.dump(score_list, fp) pickle.dump(onehot_list, fp) # Write detection results for KITTI evaluation print('Number of point clouds: %d' % (len(ps_list))) write_detection_results(result_dir, TEST_DATASET.id_list, TEST_DATASET.type_list, TEST_DATASET.box2d_list, center_list, heading_cls_list, heading_res_list, size_cls_list, size_res_list, rot_angle_list, score_list) # Make sure for each frame (no matter if we have measurment for that frame), # there is a TXT file output_dir = os.path.join(result_dir, 'data') if FLAGS.idx_path is not None: to_fill_filename_list = [line.rstrip()+'.txt' \ for line in open(FLAGS.idx_path)] fill_files(output_dir, to_fill_filename_list)
def test(output_filename, result_dir=None): ''' Test frustum pointnets with GT 2D boxes. Write test results to KITTI format label files. todo (rqi): support variable number of points. ''' ps_list = [] seg_list = [] segp_list = [] center_list = [] heading_cls_list = [] heading_res_list = [] size_cls_list = [] size_res_list = [] rot_angle_list = [] score_list = [] test_idxs = np.arange(0, len(TEST_DATASET)) batch_size = BATCH_SIZE num_batches = len(TEST_DATASET) / batch_size sess, ops = get_session_and_ops(batch_size=batch_size, num_point=NUM_POINT) correct_cnt = 0 for batch_idx in range(num_batches): print('batch idx: %d' % (batch_idx)) start_idx = batch_idx * batch_size end_idx = (batch_idx + 1) * batch_size batch_data, batch_label, batch_center, \ batch_hclass, batch_hres, batch_sclass, batch_sres, \ batch_rot_angle, batch_one_hot_vec = \ get_batch(TEST_DATASET, test_idxs, start_idx, end_idx, NUM_POINT, NUM_CHANNEL) batch_output, batch_center_pred, \ batch_hclass_pred, batch_hres_pred, \ batch_sclass_pred, batch_sres_pred, batch_scores = \ inference(sess, ops, batch_data, batch_one_hot_vec, batch_size=batch_size) correct_cnt += np.sum(batch_output == batch_label) for i in range(batch_output.shape[0]): ps_list.append(batch_data[i, ...]) seg_list.append(batch_label[i, ...]) segp_list.append(batch_output[i, ...]) center_list.append(batch_center_pred[i, :]) heading_cls_list.append(batch_hclass_pred[i]) heading_res_list.append(batch_hres_pred[i]) size_cls_list.append(batch_sclass_pred[i]) size_res_list.append(batch_sres_pred[i, :]) rot_angle_list.append(batch_rot_angle[i]) score_list.append(batch_scores[i]) print("Segmentation accuracy: %f" % \ (correct_cnt / float(batch_size*num_batches*NUM_POINT))) if FLAGS.dump_result: with open(output_filename, 'wp') as fp: pickle.dump(ps_list, fp) pickle.dump(seg_list, fp) pickle.dump(segp_list, fp) pickle.dump(center_list, fp) pickle.dump(heading_cls_list, fp) pickle.dump(heading_res_list, fp) pickle.dump(size_cls_list, fp) pickle.dump(size_res_list, fp) pickle.dump(rot_angle_list, fp) pickle.dump(score_list, fp) # Write detection results for KITTI evaluation write_detection_results(result_dir, TEST_DATASET.id_list, TEST_DATASET.type_list, TEST_DATASET.box2d_list, center_list, heading_cls_list, heading_res_list, size_cls_list, size_res_list, rot_angle_list, score_list)
def save_testData(test_sets, path=MODEL_PATH): obj = {"data": test_sets} with bz2.BZ2File(path + "test_data.pbz2", "w") as f: pickle.dump(obj, f)
d = [2] path="./drive/My Drive/HW-1/images" images=os.listdir(path) dict_features={} c=1 for im in images: print (c) print (im) im=path+"/"+im img = Image.open(im) basewidth = 300 wpercent = (basewidth / float(img.size[0])) hsize = int((float(img.size[1]) * float(wpercent))) image = img.resize((basewidth, hsize), Image.ANTIALIAS) dist=get_probdist(image,bins,d) dict_features[im]=dist c=c+1 file = open('result', 'wb') pickle.dump(dict_features, file) file.close() file = open('result', 'rb') print(pickle.load( file))
def __init__(self, dest, msgData): self.destUser = dest self.msgData = msgData def setMessageData(self, text): self.msgData = text def getMessageData(self): return self.msgData def converByteToString(self, byteData): return byteData.decode("utf-8") def converStringToByte(self, strData): return strData.encode(encoding='utf_8', errors='strict') def isGetMessage(self): if self.msgData: return "true" def getUserInfo(self): userInfo = (self.sendUser, self.destUser) return userInfo msg = Message("khy4701", "1234") data_string = cPickle.dump(msg.getMessageData(), ) print('pickle :', data_string)
def __init__( self, task: str, dataroot: str, annotations_jsonpath: str, split: str, image_features_reader: ImageFeaturesH5Reader, gt_image_features_reader: ImageFeaturesH5Reader, tokenizer: BertTokenizer, bert_model, padding_index: int = 0, max_seq_length: int = 20, max_region_num: int = 37, hard_neg: bool = False, ): # All the keys in `self._entries` would be present in `self._image_features_reader` self._entries, self.imgid2entry = _load_annotations( annotations_jsonpath) self.image_id_list = [*self.imgid2entry] self._image_features_reader = image_features_reader self._tokenizer = tokenizer self.num_labels = 1 self._split = split self._padding_index = padding_index self._max_region_num = max_region_num self._max_seq_length = max_seq_length self._hard_neg = hard_neg if self._split == "train" and self._hard_neg: image_info = cPickle.load( open(os.path.join(dataroot, "hard_negative" + ".pkl"), "rb")) for key, value in image_info.items(): setattr(self, key, value) self.train_imgId2pool = { imageId: i for i, imageId in enumerate(self.train_image_list) } if "roberta" in bert_model: cache_path = os.path.join( dataroot, "cache", task + "_" + split + "_" + "roberta" + "_" + str(max_seq_length) + ".pkl", ) else: cache_path = os.path.join( dataroot, "cache", task + "_" + split + "_" + str(max_seq_length) + ".pkl", ) if not os.path.exists(cache_path): self.tokenize() self.tensorize() cPickle.dump(self._entries, open(cache_path, "wb")) else: print("loading entries from %s" % (cache_path)) self._entries = cPickle.load(open(cache_path, "rb"))
def save_params(iter, params): #with open("saved_params_%d.npy" % iter, "w") as f: with open("saved_params_%d.npy" % iter, "wb") as f: pickle.dump(params, f) pickle.dump(random.getstate(), f)
def test(testID): print('\nTesting:') total_test_loss = 0 total_test_loss_length = 0 total_test_loss_number = 0 confusion_matrix = np.zeros((num_classes, num_classes)) bar = progressbar.ProgressBar(maxval=test_set_size, redirect_stdout=False) for i, test_song in enumerate(X_test): X = I_test[i] X = np.expand_dims(X, 0) num_samples = X.shape[0] c = C_test[i] Y = np.asarray(to_categorical(c, num_classes=num_classes)) scores = model.evaluate(X, Y, batch_size=batch_size, verbose=verbose) if reset_states: model.reset_states() total_test_loss += scores[0] Y_predicted = model.predict(X, batch_size=batch_size, verbose=verbose) for y_val, y_predicted in zip(Y, Y_predicted): y_class_test = np.argmax(y_val) y_class_predicted = np.argmax(y_predicted) confusion_matrix[y_class_predicted, y_class_test] += 1 bar.update(i + 1) accuracy = np.sum(np.diagonal(confusion_matrix)) / np.sum(confusion_matrix) total_test_loss_array.append(total_test_loss / test_set_size) total_test_accuracy_array.append(accuracy) print('\nTotal test loss: ', total_test_loss / test_set_size) print('Total accuracy: ' + str(accuracy * 100) + "%") print('-' * 50) plt.figure() plt.title('Style classification on instrument information') plt.plot(total_test_loss_array, label='Total test loss') plt.plot(total_train_loss_array, label='Total train loss') plt.plot(total_test_accuracy_array, label='Total test accuracy') plt.plot(total_train_accuracy_array, label='Total train accuracy') plt.legend(loc='lower left', prop={'size': 8}) if show_plot: plt.show() if save_plot: plt.savefig(model_path + t + 'instrument_train.png') tikz_save(model_path + t + 'instrument_train.tex', encoding='utf-8', show_info=False) pickle.dump(total_test_loss_array, open(model_path + 'total_test_loss_array.pickle', 'wb')) pickle.dump(total_test_accuracy_array, open(model_path + 'total_test_accuracy_array.pickle', 'wb')) pickle.dump(total_train_accuracy_array, open(model_path + 'total_train_accuracy_array.pickle', 'wb')) pickle.dump(total_train_loss_array, open(model_path + 'total_train_loss_array.pickle', 'wb')) if testID % save_step is 0: confusion_matrix = confusion_matrix / confusion_matrix.sum( axis=1, keepdims=True) plt.figure() plt.imshow(confusion_matrix, interpolation='nearest') plt.title('Total accuracy: ' + str(accuracy) + '%') plt.ylabel('True label') plt.xlabel('Predicted label') plt.xticks(np.arange(0, num_classes), classes) plt.yticks(np.arange(0, num_classes), classes) plt.colorbar() if show_plot: plt.show() if save_plot: plt.savefig(model_path + 'confusion_matrix' + str(testID) + '.png') tikz_save(model_path + 'confusion_matrix' + str(testID) + '.tex', encoding='utf-8', show_info=False)
def test_create_profile_data(): df = gathering_profile_data() with open("../files/pickles/profiles.pkl", 'wb') as wb: df.to_csv(r"../files/csv/profiles.csv", encoding='utf-8') pickle.dump(df, wb)
def serialize_testdata(testdata, path='testdata.pkl'): with open(path, 'wb') as file: pkl.dump(testdata, file)
file = sys.argv[1] data = open(file, 'r').read() data = data.decode('utf-8') chars = list(set(data)) # char vocabulary data_size, _vocab_size = len(data), len(chars) print('data has %d characters, %d unique.' % (data_size, _vocab_size)) char_to_idx = {ch: i for i, ch in enumerate(chars)} idx_to_char = {i: ch for i, ch in enumerate(chars)} config = Config.Config() config.vocab_size = _vocab_size cPickle.dump((char_to_idx, idx_to_char), open(config.model_path + '.voc', 'w'), protocol=cPickle.HIGHEST_PROTOCOL) context_of_idx = [char_to_idx[ch] for ch in data] def data_iterator(raw_data, batch_size, num_steps): raw_data = np.array(raw_data, dtype=np.int32) data_len = len(raw_data) batch_len = data_len // batch_size data = np.zeros([batch_size, batch_len], dtype=np.int32) for i in range(batch_size): data[i] = raw_data[batch_len * i:batch_len * (i + 1)] # data的shape是(batch_size, batch_len),每一行是连贯的一段,一次可输入多个段 epoch_size = (batch_len - 1) // num_steps
import os import numpy as np import _pickle as cPickle states_num = 880 fold_path = os.path.join(os.getcwd(), os.path.pardir, os.path.pardir, 'Weights') file_path = os.path.join(fold_path, "q.pkl") if not os.path.exists(file_path): print('[ERROR] The q file %s does not exist' % (fold_path)) else: with open(file_path, 'rb') as readfile: q = cPickle.load(readfile) print('[INFO] Sucessfully load q file from %s' % (file_path)) q[880:891, :] = q[states_num, :] #q_ = q[states_num, :] #q_max = q_.max() #q_index = np.where(q_ == q_max) #print("weight for state %d is:" % (states_num), q_) #print("length:", len(q_)) #print("max:", q_max) #print("index:", q_index) #print(q.shape) #print(q_[252]) with open(file_path, 'wb') as writefile: cPickle.dump(q, writefile) print('[INFO] Sucessfully save q file to %s' % (file_path))
# The gradient boosted model by itself y_pred_grd = grd.predict_proba(X_test)[:, 1] fpr_grd, tpr_grd, _ = roc_curve(y_test, y_pred_grd) ''' # The random forest model by itself rf.fit(X_train, y_train) print(rf.predict(X_test)) print(rf.score(X_test, y_test)) #cnf_matrix = confusion_matrix(y_test, y_pred) with open( 'C:/aditya/Thermal_images/ai_2.0/hog_detectors/hog_rf_F_NF_100pad5_in127_py3', 'wb') as f: cPickle.dump(rf, f) y_pred_rf = rf.predict_proba(X_test)[:, 1] fpr_rf, tpr_rf, _ = roc_curve(y_test, y_pred_rf) roc_auc = auc(fpr_rf, tpr_rf) plt.figure(1) lw = 2 plt.plot([0, 1], [0, 1], 'k--') #plt.plot(fpr_rt_lm, tpr_rt_lm, label='RT + LR') plt.plot(fpr_rf, tpr_rf, lw=lw, label='RF (area = %0.2f)' % roc_auc) #plt.plot(fpr_rf_lm, tpr_rf_lm, label='RF + LR') #plt.plot(fpr_grd, tpr_grd, label='GBT') #plt.plot(fpr_grd_lm, tpr_grd_lm, label='GBT + LR') plt.xlabel('False positive rate', fontsize=22) plt.ylabel('True positive rate', fontsize=22) plt.title('ROC curve foot classifier', fontsize=22)
question_id = 0 for idx, question in enumerate(train_questions): target.append({ 'question_id': question_id, 'image_id': train_images[idx], 'labels': ans2label[train_answers[idx]], 'types': train_types[idx] }) train_question_json.append({ 'question': question, 'question_id': question_id, 'image_id': train_images[idx] }) question_id += 1 cache_file = os.path.join(cache_root, 'coco_train_target.pkl') cPickle.dump(target, open(cache_file, 'wb')) with open("coco_train_question.json", 'w') as file_object: json.dump(train_question_json, file_object) target = [] for idx, question in enumerate(test_questions): target.append({ 'question_id': question_id, 'image_id': test_images[idx], 'labels': ans2label[test_answers[idx]], 'types': test_types[idx] }) val_question_json.append({ 'question': question, 'question_id': question_id, 'image_id': test_images[idx] })
def save_dir(self): f = open('label_dict', 'wb') cPickle.dump(self.label_dict, f)
def merge_dic(self): list_merge = [] dic_merge = [] print('num is %d' % self.num) '''compare and merge dictionaries''' for m in range(1, self.num + 1): if m == 1: print('enter list1, dic1') f1 = open('dic\\keylist1.txt', 'rb') f2 = open('dic\\dic1.txt', 'rb') k1 = cPickle.load(f1) d1 = cPickle.load(f2) list_merge = k1 dic_merge = d1 f1.close() f2.close() # print('list:%d dic:%d'%(len(list_merge), len(dic_merge))) # print(list_merge) # print(dic_merge) else: print('enter list%d dic%d' % (m, m)) f1 = open('dic\\keylist%d.txt' % m, 'rb') f2 = open('dic\\dic%d.txt' % m, 'rb') k1 = cPickle.load(f1) d1 = cPickle.load(f2) f1.close() f2.close() k2 = list_merge d2 = dic_merge n1 = len(k1) n2 = len(k2) list_merge = [] dic_merge = {} i = 0 j = 0 while i < n1 and j < n2: if k1[i] < k2[j]: list_merge.append(k1[i]) dic_merge[k1[i]] = d1[k1[i]] i += 1 elif k1[i] > k2[j]: list_merge.append(k2[j]) dic_merge[k2[j]] = d2[k2[j]] j += 1 else: list_merge.append(k1[i]) dic_merge[k1[i]] = d2[k2[j]] + d1[k1[i]] i += 1 j += 1 if i >= n1: for k in range(j, n2): list_merge.append(k2[k]) dic_merge[k2[k]] = d2[k2[k]] if j >= n2: for k in range(i, n1): list_merge.append(k1[k]) dic_merge[k1[k]] = d1[k1[k]] # print('i:%d j:%d n1:%d n2:%d k:%d'%(i,j,n1,n2,k)) # print(list_merge) # print(dic_merge) print('merge all') # print('list:%d'%(len(list_merge))) # print(list_merge) # print(dic_merge) '''write the dictionary into a disk''' filename = 'dic\\dictionary.txt' f = open(filename, "wb") cPickle.dump(dic_merge, f) f.close() '''write key list into disk''' filename = 'dic\\keys.txt' f = open(filename, "wb") cPickle.dump(list_merge, f) f.close()
def main(): args = parse_args() # Devices if args.local_rank == -1: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") n_gpu = torch.cuda.device_count() else: torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) n_gpu = 1 torch.distributed.init_process_group(backend="nccl") default_gpu = False if dist.is_available() and args.local_rank != -1: rank = dist.get_rank() if rank == 0: default_gpu = True else: default_gpu = True logger.info( f"device: {device} n_gpu: {n_gpu}, distributed training: {bool(args.local_rank != -1)}" ) # Load config config = BertConfig.from_json_file(args.config_file) # Load task config with open(args.tasks_config_file, "r") as f: task_cfg = edict(yaml.safe_load(f)) task_id = args.task.strip() task = "TASK" + task_id task_name = task_cfg[task]["name"] if task_cfg[task].get("fusion_method", None): # VL-BERT pooling for VQA config.fusion_method = task_cfg[task]["fusion_method"] # Output dirs savePath = args.output_dir if default_gpu and not os.path.exists(savePath): os.makedirs(savePath) # Seed random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) # Dataset feats_h5path = task_cfg[task]["features_h5path1"] features_reader = ImageFeaturesH5Reader(feats_h5path, config, args.in_memory) batch_size = task_cfg[task]["batch_size"] num_workers = args.num_workers if args.local_rank != -1: batch_size = int(batch_size / dist.get_world_size()) num_workers = int(num_workers / dist.get_world_size()) logger.info("Loading %s Dataset with batch size %d" % (task_name, batch_size)) eval_split = args.split or task_cfg[task]["val_split"] tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) dset = FlickrVis4LangDataset( task, task_cfg[task]["dataroot"], args.masking, eval_split, features_reader, None, tokenizer, args.bert_model, max_seq_length=task_cfg[task]["max_seq_length"], max_region_num=task_cfg[task]["max_region_num"], num_locs=config.num_locs, threshold=args.overlap_threshold, add_global_imgfeat=config.add_global_imgfeat) dl = DataLoader(dset, shuffle=False, batch_size=batch_size, num_workers=num_workers, pin_memory=True) # Model config.visual_target_weights = {} model = BertForVLPreTraining.from_pretrained(args.from_pretrained, config=config) # Move to GPU(s) model.to(device) if args.local_rank != -1: try: from apex.parallel import DistributedDataParallel as DDP except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training." ) model = DDP(model, delay_allreduce=True) elif n_gpu > 1: model = nn.DataParallel(model) # Print summary if default_gpu: print("***** Running evaluation *****") print(" Num Iters: ", len(dl)) print(" Batch size: ", batch_size) # Evaluate model.eval() loss_fct = nn.CrossEntropyLoss(ignore_index=-1) phrase_ids, image_ids, pred_tokens, true_tokens, pred_scores, lm_losses = [], [], [], [], [], [] for batch in tqdm(dl, total=len(dl)): image_id = batch[-1] batch = batch[:-1] if device.type != 'cpu': batch = tuple( t.cuda(device=device, non_blocking=True) for t in batch) phrase_id, caption, input_mask, segment_ids, lm_label_ids, features, spatials, image_cls, \ obj_labels, obj_confs, attr_labels, attr_confs, image_attrs, image_mask, image_labels = batch with torch.no_grad(): predictions_t, _, _, _, _ = model(caption, features, spatials, token_type_ids=segment_ids, attention_mask=input_mask, image_attention_mask=image_mask, masked_lm_labels=None, image_label=None, image_cls=image_cls, obj_labels=obj_labels, obj_confs=obj_confs, attr_labels=attr_labels, attr_confs=attr_confs, image_attrs=image_attrs) # loss = masked_loss_t + masked_loss_v + pair_match_loss target_ixs = [[] for _ in range(predictions_t.size(0))] xs, ys = torch.where(lm_label_ids != -1) for x, y in zip(xs, ys): target_ixs[x].append(y.item()) for bix in range(predictions_t.size(0)): pred_bix_tokens, true_bix_tokens, bix_predictions = [], [], [] for masked_ix in target_ixs[bix]: predicted_index = torch.argmax( predictions_t[bix, masked_ix]).item() predicted_token = tokenizer.convert_ids_to_tokens( [predicted_index])[0] label_token = tokenizer.convert_ids_to_tokens( [lm_label_ids[bix, masked_ix].item()])[0] pred_bix_tokens.append(predicted_token) true_bix_tokens.append(label_token) bix_predictions.append(predictions_t[bix, masked_ix].numpy()) masked_lm_loss = loss_fct( predictions_t[bix].view(-1, config.vocab_size), lm_label_ids[bix].view(-1), ).unsqueeze(0).item() if args.dump_results: # pred_tokens.append(pred_bix_tokens) # true_tokens.append(true_bix_tokens) # pred_scores.append(bix_predictions) # image_ids.append(image_id[bix].item()) # phrase_ids.append(phrase_id[bix].item()) lm_losses.append(masked_lm_loss) if default_gpu: print("MLM:", np.mean(np.array(lm_losses))) if args.dump_results: eval_path = os.path.join(savePath, eval_split) masking_str = args.masking if args.masking != "ref" else args.masking + str( args.overlap_threshold) # cPickle.dump(pred_tokens, open(eval_path + "_%s_preds.pkl" % masking_str, "wb")) # cPickle.dump(true_tokens, open(eval_path + "_%s_truth.pkl" % masking_str, "wb")) # cPickle.dump(pred_scores, open(eval_path + "_%s_score.pkl" % masking_str, "wb")) # cPickle.dump(image_ids, open(eval_path + "_%s_imgids.pkl" % masking_str, "wb")) # cPickle.dump(phrase_ids, open(eval_path + "_%s_phrids.pkl" % masking_str, "wb")) cPickle.dump(lm_losses, open(eval_path + "_%s_mlm.pkl" % masking_str, "wb"))
def TrainModel(): global vocab_index vocab_index={} global vocab_invindex vocab_invindex={} threadlist=[] #LearnVocabFromTrainFile() InitNet() print("initialize finished") if(negative>0): table=pickle.load( open( "table.p", "rb" ) ) #InitUnigramTable() print("Table finished") print("train starts") try: """ thread1=threading.Thread(target=TrainModelThread, args=(0,)) thread2=threading.Thread(target=TrainModelThread, args=(1,)) thread3=threading.Thread(target=TrainModelThread, args=(2,)) thread1.start() thread2.start() thread3.start() thread1.join() thread2.join() thread3.join() """ for i in range(worker): threadlist.append(threading.Thread(target=TrainModelThread, args=(i,))) for i in range(worker): threadlist[i].start() for i in range(worker): threadlist[i].join() except: print("Error: unable to start thread") while 1: pass print("train finished") #save word index and weight matrix for i in range(vocab_size): vocab_index[vocab[i].word]=i #save word index and weight matrix for i in range(vocab_size): vocab_invindex[i]=vocab[i].word #save with pickle pickle.dump( syn0, open( "WeightMatrix.p", "wb" ) ) pickle.dump( vocab_index, open( "vocab_index.p", "wb" ) ) pickle.dump( vocab_invindex, open( "vocab_invindex.p", "wb" ) ) return vocab_index,vocab_invindex
config = neat.config.Config( neat.genome.DefaultGenome, neat.reproduction.DefaultReproduction, neat.species.DefaultSpeciesSet, neat.stagnation.DefaultStagnation, 'config_neat_pole_balancing') # Use the gym_runner to run this experiment using NEAT. def run(gens, env): winner, stats = run_neat(gens, env, 500, config) print("neat_pole_balancing done") return winner, stats # If run as script. if __name__ == '__main__': # Setup logger and environment. logger = logging.getLogger() logger.setLevel(logging.INFO) env = gym.make("CartPole-v1") # Run! winner = run(500, env)[0] # Save net if wished reused and draw it + winner to file. winner_net = neat.nn.FeedForwardNetwork.create(winner, config) draw_net(winner_net, filename="neat_pole_balancing_winner") with open('neat_pole_balancing_winner.pkl', 'wb') as output: pickle.dump(winner_net, output)
def xo_compressed_pickle(title, data): with bz2.BZ2File(title + '.pbz2', 'w') as f: cPickle.dump(data, f)
digits_test_images_flat = digits_test[:, :(-1)] digits_test_images = digits_test_images_flat.view() digits_test_images.shape = ((-1), 8, 8) digits_test_target = digits_test[:, (-1)].astype(np.int) images_and_labels = list(zip(digits_images, digits_target)) n_samples = len(digits_images) classifier = svm.SVC(gamma=0.001, kernel='linear') classifier.fit(digits_images_flat, digits_target) expected = digits_test_target predicted = classifier.predict(digits_test_images_flat) print('Classification report for classifier %s:\n%s\n' % (classifier, metrics.classification_report(expected, predicted))) print('mutpy' % metrics.confusion_matrix(expected, predicted)) print("accuracy:", metrics.accuracy_score(expected, predicted)) images_and_predictions = list(zip(digits_test_images, predicted)) np.savetxt('output.txt', classifier.decision_function(digits_test_images_flat)) outputData = {'data_array': metrics.confusion_matrix(expected, predicted)} with open('output.pkl', 'wb') as outputFile: cPickle.dump(outputData, outputFile) with open('model.pkl', 'wb') as modelFile: cPickle.dump(classifier, modelFile)