def __call__(self, *args, save=None, **kwargs): o = self.process(*args, **kwargs) self.assert_return_keys(o) if save is not None: with open(save, 'wb') as f: pickle.save(o, f) return o
def zapis_grup(grupy, par=None): """ zapisuje binarny plik grupy pod nazwą i ścieżką par :param grupy: słownik z grupami :param par: ścieżka :return: """ if par is None: par = ["grupy"] with open(par[0] + ".bin", 'wb') as plik: save(grupy, plik)
def trainHMM( self, trainingFiles ): ''' Train the HMM ''' self.hmm = HMM( self.labels, self.featureNames, self.contOrDisc, self.numFVals ) allStrokes = [] allLabels = [] for f in trainingFiles: # print "Loading file", f, "for training" strokes, labels = self.loadLabeledFile( f ) allStrokes.append(strokes) allLabels.append(labels) allObservations = [self.featurefy(s) for s in allStrokes] self.hmm.train(allObservations, allLabels) pickle=Picklefy() pickle.save(self.hmm,'hmmbasic.pickle')
def get_tid_to_filelength_dict(): try: tid_to_filelength_dict = pickle.load(gzip.open(PATH_TOKENIZED + 'tid_to_filelength_dict.pickle', 'rb')) except IOError: print("Preprocessed tid_to_filelength_dict not available. Creating a new one.") # tid_to_filelength_dict = {} tid_to_filelength_arr = np.zeros(DOC_COUNT, dtype=np.int64) db = Database('TOB_FULL') con, cur = db.connect() cur.execute('SELECT tid from docs') count = 0 while True: count += 1 if count % 10000 == 0: print(count) row = cur.fetchone() if not row: break tid = row['tid'] filepath = '{}{}/{}/{}/{}/{}'.format(PATH_OCR_FILES, tid[0], tid[1], tid[2], tid[3], tid + ".txt") array_len = 10000 end = None while True: b = bytearray(array_len) f = io.open(filepath, 'rb') f.readinto(b) str = b.decode('cp1252', errors='ignore') end = str.find('\x00') if end > -1: break else: array_len *= 10 # tid_to_filelength_dict[tid] = end tid_to_filelength_arr[tid] = end # pickle.dump(tid_to_filelength_dict, gzip.open(PATH_TOKENIZED + 'tid_to_filelength_dict.pickle', 'wb')) pickle.save(PATH_TOKENIZED + 'tid_to_filelength_arr.npy', tid_to_filelength_arr) print("Longest file is {} bytes long.".format(max(tid_to_filelength_dict.values()))) # if the number of tids in the dict != DOC_COUNT, something is wrong assert len(tid_to_filelength_dict) == DOC_COUNT, "Length of tid_to_filelength_dict ({}) does not equal DOC_COUNT ({})".format(len(tid_to_filelength_dict), DOC_COUNT) return tid_to_filelength_dict
doOpt = False doMCUncertainty = False doLikelihoodDist = False recalc = True rewrite = True if not recalc: closures = load(submitDir + '/closures_ABCD.p') twosigmas = load(submitDir + '/twosigmas_brazil_ABCD.p') if not doSysts: twosigmas = load(submitDir + '/twosigmas_brazil_ABCD.p') else: twosigmas = load(submitDir + '/twosigmas_brazil_systs_ABCD.p') else: closures, twosigmas = doMLE(doSysts, doMCUncertainty, doLikelihoodDist) if rewrite: if not doSysts: save(twosigmas, open(submitDir + '/twosigmas_brazil_ABCD.p', 'wb')) else: save(twosigmas, open(submitDir + '/twosigmas_brazil_systs_ABCD.p', 'wb')) #save(closures,open(submitDir+'/closures_ABCD.p','wb')) #save(twosigmas,open(submitDir+'/twosigmas_ABCD.p','wb')) #save(fivesigmas,open(submitDir+'/fivesigmas_ABCD.p','wb')) doBrazil = True if doBrazil: for v in VBFmasscuts: plt.errorbar(amasscuts, [twosigmas[a, v][0] for a in amasscuts], label='VBF $M_{jj}$ cut = ' + str(v) + ' GeV', color='black', ls='--') plt.fill_between(amasscuts, [twosigmas[a, v][4] for a in amasscuts],
def save_data(self, filename): pkl.save(filename)
def save_pickle(f, d): if f is not None: with open(f, 'wb') as file: pickle.save(d, file) else: raise ValueError
from tbselenium.tbdriver import TorBrowserDriver import pickle from bs4 import BeautifulSoup tbpath = "tor-browser_en-US" with open("oniontree-source.html", 'r') as f: data = f.read().replace('\n', '') driver = TorBrowserDriver(tbpath) # driver.load_url(website) soup = BeautifulSoup(data, 'html.parser') anchors = soup.find_all('a') l = map(lambda x: x.get("href"), anchors) potential_onions = [] for url in l: driver.load_url(url) e = driver.find_element_by_class_name("urls") onions = e.find_elements_by_tag_name("a") if len(onions) > 5: print("too many onions for {}. skipping".format(url)) for o in onions: print(o.get_attribute('href')) potential_onions.append(o.get_attribute('href')) pickle.save(potential_onions, 'onions.sav')
def save_coords(diction): diction = pickle.save("save.p", diction ) return
def save_coords(diction): diction = pickle.save("save.p", diction) return
print('The image width is', img_width) trn_gen, val_gen, tst_gen = ImageGenerator.get_generators(img_width) model, callbacks = UNetModel.get_unet_model(img_width) num_epochs = 10 print('Training initialized\n') start = time.time() history = model.fit_generator(trn_gen, steps_per_epoch=2035, epochs=num_epochs, validation_data=val_gen, validation_steps=252, callbacks=callbacks) stop = time.time() print('Training complete\nSaving model') model.save('model.h5') pickle.save(history.history, open('history.p', 'wb')) trn_acc = history.history.get('dice_coef') val_acc = history.history.get('val_dice_coef') tst_acc = [ model.evaluate_generator(tst_gen, steps=252)[1] for _ in range(num_epochs) ] print('Training Time:', stop - start, 'seconds') print('Average Training Accuracy: ', np.mean(trn_acc)) print('Average Validation Accuracy: ', np.mean(val_acc)) print('Average Testing Accuracy: ', np.mean(tst_acc))
""" Libraries imports External only """ import sys import socket import pickle import getopt """ Function descriptions """ def ParseCMDArgs(args): pass #Main func parameters = sys.argv[1:] parsedData = ParseCMDArgs(parameters) #execPath,noReplica,debug sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) msg = pickle.save(parsedData) recv_addr = ("127.0.0.1", 8000) sock.send_to(msg, recv_addr) _, data = sock.recvfrom(1024) if data is -1: print("System has exhauted its quota of remote processes") exit(0)
def save(self, filepath): with open(filepath, 'wb') as f: pickle.save(self.rects, f)
'Society Culture', 'Science Mathematics', 'Health', 'Education Reference', 'Computers Internet', 'Sports', 'Business Finance', 'Entertainment Music', 'Family Relationships', 'Politics Government' ] filename = './data/glove.840B.300d.w2vformat.txt' model = gensim.models.KeyedVectors.load_word2vec_format(filename) vector_size = model.vector_size embedding_vectors = np.random.uniform(-0.001, 0.001, (len(wordtoix), vector_size)) glove_vocab = list(model.vocab.keys()) count = 0 mis_count = 0 for word in wordtoix.keys(): idx = wordtoix.get(word) if word in glove_vocab: embedding_vectors[idx] = model.wv[word] count += 1 else: mis_count += 1 print("num of vocab in glove: {}".format(count)) print("num of vocab not in glove: {}".format(mis_count)) # print("load class embedding") # name_list = [ k.lower().split(' ') for k in class_name] # id_list = [ [ wordtoidx[i] for i in l] for l in name_list] # value_list = [ [ opt.W_emb[i] for i in l] for l in id_list] # value_mean = [ np.mean(l) for l in id_list] pickle.save(open('./data/yahoo_emb.p', 'wb'), [embedding_vectors, value_mean])
sys.path.append("src") from fetch import * from sparsify import * from correlate import * #Read training data from files and parse into sparse vectors x,y = getData("small_tfidfvector_byhost_onlybody.csv.txt","webspam-uk2006-set1-labels.txt") #Convert to gensim compatible corpus corpus = gensim.matutils.Sparse2Corpus(x,False) #derive topic model lda = gensim.models.ldamodel.LdaModel(corpus,num_topics=100) #save model to load while testing lda.save('lda_model') #derive topic proportions for training data corpus_lda = lda[corpus] #convert topic proportions to sparse representation topDistSparse = sparsify(corpus_lda) #find correlation between topics corrTopics = correlate(topDistSparse) #train and save classifier clf = RandomForestClassifier(n_estimators=50) clf.fit(corrTopics.toarray(),y) pickle.save(clf,open("classifier","wb"))