def decrypt(key, ct_path="ciphertext.enc", savePT="plaintext.dec"): with open(ct_path, 'rb') as input: u = dill.load(input) ciphertext = dill.load(input) v = modexp(u, key.x, key.p) uv = str(u)+str(v) k = SHA224.new(uv.encode('utf-8')).hexdigest().encode('utf-8') #symmetric key for compute the ciphertext with AES print("K: "+str(k)) bs = Blowfish.block_size iv = ciphertext[:bs] # Remove IV ciphertext = ciphertext[bs:] print("CT-LEN:"+str(len(ciphertext))) cipher = Blowfish.new(k, Blowfish.MODE_CBC, iv) plaintext = cipher.decrypt(ciphertext) # Remove padding last_byte = plaintext[-1] plaintext = plaintext[:- (last_byte if type(last_byte) is int else ord(last_byte))] # Write to file the plaintext decrypted #plaintext = plaintext.decode(plaintext, key.iNumBits) io.open(savePT,"wb").write(plaintext) return plaintext
def decrypt(sk, ct_path="ciphertext.enc", savePT="plaintext.dec"): # Load Y and ciphertext with open(ct_path, 'rb') as input: y = dill.load(input) ciphertext = dill.load(input) # Compute x = RSA^(-1)(y) as y^d mod N x = pow(y, sk.d, sk.n) # Compute symmetric key with Hash function (SHA224) k = SHA224.new(repr(x).encode('utf-8')).hexdigest().encode('utf-8') #symmetric key for compute the ciphertext print("K: "+str(k)) # Count block_size bs = Blowfish.block_size # Retreive IV vector iv = ciphertext[:bs] # Remove IV ciphertext = ciphertext[bs:] print("CT-LEN:"+str(len(ciphertext))) # Initialize Blowfish cipher cipher = Blowfish.new(k, Blowfish.MODE_CBC, iv) # Decrypt plaintext = cipher.decrypt(ciphertext) # Remove padding last_byte = plaintext[-1] plaintext = plaintext[:- (last_byte if type(last_byte) is int else ord(last_byte))] # Write to file the plaintext decrypted io.open(savePT,"wb").write(plaintext) return plaintext
def test_run_advanced(self, tmpdir): temp_path = os.path.join(str(tmpdir), 'temp') log_path = os.path.join(str(tmpdir), 'log') paths = { 'temp': temp_path, 'log': log_path } pipe = Pipeline(paths=paths, create_paths=True) pipe.add_step(test_func1, ['func1'], var1=3, var2=4) pipe.add_step(test_func2, ['func2'], var1=25, var2=10) pipe.add_step(test_func2, ['func2'], var1=1, var2=0) pipe.add_step(test_func3, var1=1, var2=0) with pytest.raises(PipelineError): pipe.run() pipe = dill.load(open(os.path.join(paths['log'], 'pipeline.p'), 'rb')) assert pipe.run_step_idx==2 assert pipe.steps[0].results=={'next_id': 4, 'status': 'success', 'step_id': 0, 'sum': 7} with pytest.raises(ZeroDivisionError): for step in pipe.steps: step.results = None pipe.run(resume=True, ignore_errors=True) new_pipe = dill.load(open(os.path.join(paths['log'], 'pipeline.p'), 'rb')) result = new_pipe.run(start_idx=1, ignore_errors=True, ignore_exceptions=True) assert result['status']=='success' assert new_pipe.steps[0].results==None assert new_pipe.steps[1].results=={'diff': 2.5, 'status': 'success'} assert new_pipe.steps[2].results=={'error': 'Division by 0', 'status': 'error'} assert new_pipe.steps[3].results['status']=='error'
def classify_patch_group(fn_rf, fn_kern, fn_patch_info): t0 = time() RF = dill.load(open(fn_rf, "rb")) # unpack the RF classifier kernels = dill.load(open(fn_kern, "rb")) # unpack the kernels patch_info = dill.load(open(fn_patch_info, "rb")) # unpack the patch info a, b, c = patch_info[0] # get the bounds of the set patches_a = patch_info[1] # grab the set to classify patches_r = patch_info[2] # grab the set to compare with (atlas mask) results = [] for i, patch in enumerate(patches_a): # go through each patch if np.all(patches_r[i]): # if the patch is entirely masked feat = compute_feats(patch, kernels).flatten().reshape(1, -1) intens = np.array(compute_intens(patch)).flatten().reshape(1, -1) feat = np.concatenate((feat, intens), axis=1) prediction = RF.predict(feat) # print("Classifying patch {}/{}: {}".format(i, len(patches), prediction)) results.append(np.full(patch.shape, prediction)) else: # the associated ROI patch is totally zero results.append(np.zeros(patch.shape)) dt = time() - t0 print("Classified group {}-{}/{} in {:.2f} time".format(a, b, c, dt)) return results
def get_SALICON_train(location=None): """ Loads or downloads and caches the SALICON training dataset. For memory reasons no fixation trains are provided. @type location: string, defaults to `None` @param location: If and where to cache the dataset. The dataset will be stored in the subdirectory `SALICON_train` of location and read from there, if already present. @return: Training stimuli, validation stimuli, testing stimuli, training fixation trains, validation fixation trains .. seealso:: Ming Jiang, Shengsheng Huang, Juanyong Duan*, Qi Zhao: SALICON: Saliency in Context, CVPR 2015 http://salicon.net/ """ if location: location = os.path.join(location, 'SALICON_train') if os.path.exists(location): stimuli = dill.load(open(os.path.join(location, 'stimuli.pydat'), 'rb')) fixations = dill.load(open(os.path.join(location, 'fixations.pydat'), 'rb')) return stimuli, fixations os.makedirs(location) stimuli, fixations = _get_SALICON('train', 'https://s3.amazonaws.com/salicon-dataset/2015r1/train.zip', 'd549761c16e59b80cd5981373ada5e98', 'https://s3.amazonaws.com/salicon-dataset/2015r1/fixations_train2014.json', 'ab60a090ee31642fbb4aa41f4953b8bd', location) return stimuli, fixations
def get_SALICON_val(location=None): """ Loads or downloads and caches the SALICON validation dataset. For memory reasons no fixation trains are provided. @type location: string, defaults to `None` @param location: If and where to cache the dataset. The dataset will be stored in the subdirectory `SALICON_train` of location and read from there, if already present. @return: Training stimuli, validation stimuli, testing stimuli, training fixation trains, validation fixation trains .. seealso:: Ming Jiang, Shengsheng Huang, Juanyong Duan*, Qi Zhao: SALICON: Saliency in Context, CVPR 2015 http://salicon.net/ """ if location: location = os.path.join(location, 'SALICON_val') if os.path.exists(location): stimuli = dill.load(open(os.path.join(location, 'stimuli.pydat'), 'rb')) fixations = dill.load(open(os.path.join(location, 'fixations.pydat'), 'rb')) return stimuli, fixations os.makedirs(location) stimuli, fixations = _get_SALICON('val', 'https://s3.amazonaws.com/salicon-dataset/2015r1/val.zip', '62cd6641a5354d3099a693ff90cb6dab', 'https://s3.amazonaws.com/salicon-dataset/2015r1/fixations_val2014.json', '3224f8cf86ea8d248d93583866b60c5f', location) return stimuli, fixations
def main(): with open('./word2index.dict', 'rb') as f: word2index = dill.load(f) with open('./index2word.dict', 'rb') as f: index2word = dill.load(f) model = Seq2Seq( vocab_size=len(word2index), embed_size=300, hidden_size=300, ) serializers.load_npz('seq2seq.npz', model) while True: s = input() test_input = Variable( np.array([word2index.get(word, word2index['UNK']) for word in mecab_wakati(s)], dtype='int32') ) print('入力-> {}'.format(s)) print('出力-> ', end="") for index in model.predict(test_input): print(index2word[index], end='') print()
def _remove_nresults(self, traj, nresults, continue_folder): result_tuple_list = [] n = 0 for filename in os.listdir(continue_folder): _, ext = os.path.splitext(filename) if ext != '.rcnt': continue n += 1 cnt_file = open(os.path.join(continue_folder, filename), 'rb') try: result = dill.load(cnt_file) cnt_file.close() result_tuple_list.append((result)) except Exception: # delete broken files logging.getLogger().exception('Could not open continue snapshot ' 'file `%s`.' % filename) cnt_file.close() os.remove(filename) self.assertGreaterEqual(n, nresults) result_tuple_list = sorted(result_tuple_list, key=lambda x: x[0]) timestamp_list = [x[1]['finish_timestamp'] for x in result_tuple_list] timestamp_list = timestamp_list[-nresults:] for timestamp in timestamp_list: filename = os.path.join(continue_folder, 'result_%s.rcnt' % repr(timestamp).replace('.','_')) os.remove(filename) result_tuple_list = [] for filename in os.listdir(continue_folder): _, ext = os.path.splitext(filename) if ext != '.rcnt': continue cnt_file = open(os.path.join(continue_folder, filename), 'rb') result = dill.load(cnt_file) cnt_file.close() result_tuple_list.append((result)) name_set = set([x[1]['name'] for x in result_tuple_list]) removed = 0 for run_name in traj.f_iter_runs(): if run_name not in name_set: run_dict = traj.f_get_run_information(run_name, copy=False) run_dict['completed'] = 0 idx = run_dict['idx'] traj._updated_run_information.add(idx) removed += 1 self.assertGreaterEqual(removed, nresults) logging.getLogger().error('Removed %s runs for continuing' % removed) traj.f_store(only_init=True)
def _generate_image_segments_and_label_batch(images,img_dir,seg_dir,mean_img): #choosing 5 images at random test_batch = [] batch_labels = [] for i in range(0,images.__len__()): img = io.imread(img_dir+images[i]) segf = open(seg_dir+images[i][0:-3]+'slic','rb') segmap = dill.load(segf) segments_l = dill.load(segf) sal_l = dill.load(segf) segf.close() data = im2mdfin(img,mean_img,segmap,segments_l) for j in range(0,segments_l.__len__()): x = data.segments[j] dat = np.zeros([227,227,9],dtype = np.uint8) dat[:,:,0:3]= x.SP_Region dat[:,:,3:6]=x.SP_Neighbor dat[:,:,6:9]=x.Pic test_batch.append(sal_l[j]) test_batch.append(x.SP_Region) test_batch.append(x.SP_Neighbor) test_batch.append(x.Pic) return test_batch
def experimentLMKLIEP_per_feat(which_d): with open('feat_vec_out.'+which_d+'.en.pickle', 'rb') as handle: out_d = pickle.load(handle) with open('feat_vec_in.'+which_d+'.en.pickle', 'rb') as handle: in_d = pickle.load(handle) labels = - np.ones(out_d.shape[0]) predictions = - np.ones(out_d.shape[0]) labels[-50000:] = 1 W = np.zeros(out_d.shape) for i in xrange(W.shape[1]): kliep = KLIEP(init_b=100, seed=0) lm_out = out_d[:, i].reshape((out_d.shape[0], 1)) lm_in = in_d[:, i].reshape((in_d.shape[0], 1)) kliep.fit_CV(lm_out, lm_in) W[:, i] = kliep.predict(lm_out).ravel() w = np.mean(W, axis=1) predictions[np.where(w > 1.4)[0]] = 1 print 'total positive:', np.where(predictions == 1)[0].shape, ', out of:', out_d.shape[0] # sorted_ind = np.argsort(w, axis=None)[::-1] # predictions[sorted_ind[0:50000]] = 1 p, r, f, s = precision_recall_fscore_support(labels.astype(int), predictions.astype(int), pos_label=1, average='micro') print 'Precision:', p, print 'Recall:', r, print 'F1:', f, print 'Support:', s,
def experiment_LMSVM(which_d): with open('feat_vec_out.'+which_d+'.en.pickle', 'rb') as handle: out_d = pickle.load(handle) with open('feat_vec_in.'+which_d+'.en.pickle', 'rb') as handle: in_d = pickle.load(handle) labels = - np.ones(out_d.shape[0]) labels[-50000:] = 1 print 'Fitting one class SVM' clf = svm.OneClassSVM(kernel='linear') clf.fit(in_d) print 'Predicting for out domain' predictions = clf.predict(out_d) print 'total positive:', np.where(predictions == 1)[0].shape, ', out of:', out_d.shape[0] # sorted_ind = np.argsort(w, axis=None)[::-1] # predictions[sorted_ind[0:50000]] = 1 p, r, f, s = precision_recall_fscore_support(labels.astype(int), predictions.astype(int), pos_label=1, average='micro') print 'Precision:', p, print 'Recall:', r, print 'F1:', f, print 'Support:', s,
def test_run_advanced(self, tmpdir): temp_path = os.path.join(str(tmpdir), "temp") log_path = os.path.join(str(tmpdir), "log") paths = {"temp": temp_path, "log": log_path} pipe = pipeline.Pipeline(paths=paths, create_paths=True) pipe.add_step(test_func1, ["func1"], var1=3, var2=4) pipe.add_step(test_func2, ["func2"], var1=25, var2=10) pipe.add_step(test_func2, ["func2"], var1=1, var2=0) pipe.add_step(test_func3, var1=1, var2=0) with pytest.raises(pipeline.PipelineError): pipe.run() pipe = dill.load(open(os.path.join(paths["log"], "pipeline.p"), "rb")) assert pipe.run_step_idx == 2 assert pipe.steps[0].results == {"next_id": 4, "status": "success", "step_id": 0, "sum": 7} with pytest.raises(ZeroDivisionError): for step in pipe.steps: step.results = None pipe.run(resume=True, ignore_errors=True) new_pipe = dill.load(open(os.path.join(paths["log"], "pipeline.p"), "rb")) result = new_pipe.run(start_idx=1, ignore_errors=True, ignore_exceptions=True) assert result["status"] == "success" assert new_pipe.steps[0].results == None assert new_pipe.steps[1].results == {"diff": 2.5, "status": "success"} assert new_pipe.steps[2].results == {"error": "Division by 0", "status": "error"} assert new_pipe.steps[3].results["status"] == "error"
def put(self): hour = int(request.form['hour']) date = request.form['date'] prcp = float(request.form['prcp'])*100 snow = float(request.form['snow']) * 10 tmax = float(request.form['tmax']) * 10 tmin = float(request.form['tmin']) * 10 date = pd.to_datetime(date) with open(os.path.join(APP_STATIC, 'uniquegeohash.pkl'), 'rb') as f: uniquegeohash = dill.load(f) with open(os.path.join(APP_STATIC, 'predict_pickup_density.pkl'), 'rb') as f: model = dill.load(f) x_dict = [{"pickup_geohash": geostr, "hour": hour, "dayofweek": date.dayofweek, 'month': date.month,'PRCP':prcp,'SNOW':snow,'TMAX':tmax,'TMIN':tmin} for geostr in uniquegeohash] x_df = pd.DataFrame(x_dict) y = model.predict(x_df) geodecode = [Geohash.decode(geocode) for geocode in uniquegeohash] yzipgeo = zip(y, geodecode) sortedlist = sorted(yzipgeo, key=lambda x: -x[0]) top10address = [] top10dict = {} for y, geodecode in sortedlist[0:50]: key = ",".join(geodecode) top10dict[key] = top10dict.get(key,0) + y top10res = [] for key in top10dict: temptuple = (float(key.split(",")[0]),float(key.split(",")[1])) top10res.append([top10dict[key],temptuple]) top10res = sorted(top10res,key=lambda x:-x[0]) top10res = top10res[0:10] if len(top10res) > 10 else top10res for u,geodecode in top10res: g = geocoder.google([geodecode[0], geodecode[1]], method='reverse').address top10address.append(g) return {"top10": top10res,"top10address":top10address}
def create_cifar10(): # Load training data X, y = [], [] for num in range(1,5): f = open('data_batch_' + str(num), 'rb') batch = pickle.load(f) X.append(batch['data']) y.append(batch['labels']) X = np.concatenate(X).reshape(-1, 3, 32, 32).astype(np.float32) y = np.concatenate(y).astype(np.int32) # Load test data f = open('test_batch', 'rb') batch = pickle.load(f) X_test = batch['data'].reshape(-1, 3, 32, 32).astype(np.float32) y_test = np.array(batch['labels'], dtype=np.int32) # Split arrays ii = np.random.permutation(len(X)) X_train = X[ii[1000:]] y_train = y[ii[1000:]] X_val = X[ii[:1000]] y_val = y[ii[:1000]] # Offset data offset = np.mean(X_train, 0) scale = np.std(X_train, 0).clip(min=1) X_train = (X_train - offset) / scale X_val = (X_val - offset) / scale X_test = (X_test - offset) / scale # Save data pickle.dump(X_train, open('cifar10_X_train', "wb")) pickle.dump(y_train, open('cifar10_y_train', "wb")) pickle.dump(X_val, open('cifar10_X_val', "wb")) pickle.dump(y_val, open('cifar10_y_val', "wb")) pickle.dump(X_test, open('cifar10_X_test', "wb")) pickle.dump(y_test, open('cifar10_y_test', "wb")) return True
def get_data(): ''' get the predictor and target and return them ''' nids = dill.load(open('movie_ids.pkl', 'rb')) model = dill.load(open('all2.pkl', 'rb')) W = model.W df = pd.DataFrame(W, index=nids) c = MongoClient() db = c['movies'] boxoffice2 = db.boxoffice2 movie_info = db.movie_info r = list(boxoffice2.find({}, {'_id': 1, 'BoxOffice2': 1})) df_bf = pd.DataFrame(r).set_index('_id') r2 = list(movie_info.find({}, {'_id': 1, 'year': 1, 'title': 1})) df_year = pd.DataFrame(r2) df_year = df_year.set_index('_id') df = df.join(df_bf).join(df_year) cond1 = (df['year'] >= 2010) cond2 = ~ np.isnan(df['BoxOffice2']) cond = cond1 & cond2 df_subset = df[cond] y = df_subset['BoxOffice2'].values X = df_subset.iloc[:, :-3].values return X, y
def main(): argc = len(sys.argv) if argc < 2: print "Error: No input files" exit() data = pickle.load(open(sys.argv[1], "rb")) bwords = None vocabulary = None if argc > 2: vocabulary, bwords, _ = pickle.load(open(sys.argv[2], "rb")) corpus, topics, titles, time, grpid = data BV = BVsmVectorizer(vocabulary) # N = len(corpus) i = 0 time = np.array(time) # corpus = np.array(corpus) topics = np.array(topics) titles = np.array(titles) grpid = np.array(grpid) allDates = sorted(list(set(time))) for d in allDates: try: dcorpus = list() indeces = list(np.nonzero(time == d)[0]) for j in indeces: dcorpus.append(corpus[j]) c, t, b, sab, smb = BV.vectorize(dcorpus, vocab=vocabulary) vocabulary = BV.vocab except KeyboardInterrupt: print "Could Not Vectorise", d exit() try: pickle.dump( ((c, t, b, sab, smb), topics[time == d], titles[time == d], time[time == d], grpid[time == d]) , open("vectors/"+str(d), "wb") ) # pickle.dump( (BV.vocab, BV.bwords, BV.wordWise / BV.N, BV.N, d) , open("vectors/vectorizer."+str(i%2), "wb")) except KeyboardInterrupt: print "Could Not Pickle", d exit() i += 1 print d, (d%2), "\n" if not d % 30: pickle.dump( (BV.vocab, BV.bwords, BV.bvocab, BV.wordWise / BV.N, BV.N, d) , open("vectors/vectorizer."+str(d), "wb")) print len(set(BV.bwords)) print "Finishing..." pickle.dump( (BV.vocab, BV.bwords, BV.bvocab, BV.wordWise / BV.N, BV.N, d) , open("vectors/vectorizer."+str(d), "wb"))
def predict_checkins(id, n_docks, total_docks, features): if not (os.path.exists('estimators/checkin_estimator_high_'+str(id)+'.dill.bz2') and os.path.exists('estimators/checkout_estimator_mid_'+str(id)+'.dill.bz2')): return None, None, None checkin_est = dill.load(bz2.BZ2File('estimators/checkin_estimator_high_'+str(id)+'.dill.bz2', 'r')) checkout_est = dill.load(bz2.BZ2File('estimators/checkout_estimator_mid_'+str(id)+'.dill.bz2', 'r')) flux = checkout_est.predict(features)/15 - checkin_est.predict(features)/15 docks = np.clip(n_docks+flux.cumsum(), 0, total_docks) return int(docks[29]), int(docks[44]), int(docks[59])
def test_2(): max_len = 4 print 'Loading heldout phrase pairs...' with open('/home/christos/SSLP/Project2/heldout/phrase_pairs_.pickle') as handle: phrase_pairs_held = pickle.load(handle) print 'Loading regular phrase pairs...' with open('/home/said/git/SSLP/Project2/training/phrase_pairs_.pickle') as handle: reg_phrase_pairs = pickle.load(handle) print 'Loading combined phrase pairs...' with open('/home/said/git/SSLP/Project2/training/combined_phrase_pairs_4_10.pickle') as handle: comb_phrase_pairs = pickle.load(handle) print 'Measuring sparsity...' ex_sparsity = measure_exact(reg_phrase_pairs, phrase_pairs_held, max_len=max_len) print 'Regular phrase pairs:' for i in xrange(len(ex_sparsity)): print 'For phrases with n =', i+1 print 'In train and heldout:', ex_sparsity[i][0] print 'In train and not in heldout:', ex_sparsity[i][1] print 'In heldout and not in train:', ex_sparsity[i][2] print ex_sparsity = measure_exact(comb_phrase_pairs, phrase_pairs_held, max_len=max_len) print 'Combined phrase pairs:' for i in xrange(len(ex_sparsity)): print 'For phrases with n =', i+1 print 'In train and heldout:', ex_sparsity[i][0] print 'In train and not in heldout:', ex_sparsity[i][1] print 'In heldout and not in train:', ex_sparsity[i][2] print print 'Calcuating precision and recall against Moses...' print 'PR for regular:' precision, recall = pr_vs_moses('phrase-table', reg_phrase_pairs) print 'Precision:', precision, 'Recall:', recall print print 'PR for combined:' precision, recall = pr_vs_moses('phrase-table', comb_phrase_pairs) print 'Precision:', precision, 'Recall:', recall print print 'Calcuating precision and recall against Moses per length...' print 'PR for regular:' precision, recall = pr_vs_moses_per_len('phrase-table', reg_phrase_pairs) for i in xrange(len(precision)): print 'n=', i+1, ':', 'Precision:', precision[i], 'Recall:', recall[i] print print 'PR for combined:' precision, recall = pr_vs_moses_per_len('phrase-table', comb_phrase_pairs) for i in xrange(len(precision)): print 'n=', i+1, ':', 'Precision:', precision[i], 'Recall:', recall[i] print
def test_dynamic_classes(self): test_classes = { algorithms.GradientDescent: {}, algorithms.MinibatchGradientDescent: {'batch_size': 10}, algorithms.Momentum: {'momentum': 0.5}, } for algorithm_class, algorithm_params in test_classes.items(): optimization_classes = [algorithms.WeightDecay, algorithms.SearchThenConverge] bpnet = algorithm_class( (3, 5, 1), addons=optimization_classes, verbose=False, **algorithm_params ) data, target = datasets.make_regression(n_features=3, n_targets=1) data = preprocessing.MinMaxScaler().fit_transform(data) target_scaler = preprocessing.MinMaxScaler() target = target_scaler.fit_transform(target.reshape(-1, 1)) with tempfile.NamedTemporaryFile() as temp: valid_class_name = bpnet.__class__.__name__ dill.dump(bpnet, temp) temp.file.seek(0) restored_bpnet = dill.load(temp) restored_class_name = restored_bpnet.__class__.__name__ temp.file.seek(0) self.assertEqual(valid_class_name, restored_class_name) self.assertEqual(optimization_classes, restored_bpnet.addons) bpnet.train(data, target, epochs=10) real_bpnet_error = bpnet.prediction_error(data, target) updated_input_weight = ( bpnet.input_layer.weight.get_value().copy() ) dill.dump(bpnet, temp) temp.file.seek(0) restored_bpnet2 = dill.load(temp) temp.file.seek(0) restored_bpnet_error = restored_bpnet2.prediction_error( data, target ) np.testing.assert_array_equal( updated_input_weight, restored_bpnet2.input_layer.weight.get_value() ) # Error must be big, because we didn't normalize data self.assertEqual(real_bpnet_error, restored_bpnet_error)
def compute_predictions(self, model_name, inputs): ''' computes prediction for given sample and specified model ''' if self.approx_technique.lower() in ['rsm', 'moa', 'hda', 'gp']: model = gtapprox.Model(self.models_save_path+model_name+self.ext) prediction = model.calc(inputs) elif self.approx_technique.lower() == 'xgboost': model = Xgboost() model.load(self.models_save_path+model_name+self.ext) prediction = model.predict(inputs)[:, np.newaxis] elif self.approx_technique.lower() == 'quadxgboost': model = Xgboost() model.load(self.models_save_path+model_name+'_1_'+self.ext) prediction = model.predict(inputs)[:, np.newaxis] model0 = gtapprox.Model(self.models_save_path+model_name+'_0_'+self.ext) prediction = prediction + model0.calc(inputs) elif self.approx_technique.lower() == 'mars': with open(self.models_save_path+model_name+self.ext, 'rb') as file_object: model = pickle.load(file_object) prediction = model.predict(inputs)[:, np.newaxis] elif self.approx_technique.lower() == 'skgboost': with open(self.models_save_path+model_name+self.ext, 'rb') as file_object: model = pickle.load(file_object) prediction = model.predict(inputs)[:, np.newaxis] elif self.approx_technique.lower() == 'adaboost': with open(self.models_save_path+model_name+self.ext, 'rb') as file_object: model = pickle.load(file_object) prediction = model.predict(inputs)[:, np.newaxis] elif self.approx_technique.lower() == 'zeros': prediction = np.zeros((inputs.shape[0], 1)) elif self.approx_technique.lower() == 'means': with open(self.models_save_path+model_name+self.ext, 'rb') as file_object: model = pickle.load(file_object) prediction = model*np.ones((inputs.shape[0], 1)) else: raise Exception('Wrong approx type specified!') return prediction
def load_actions_from_file(): """ loads Qtrons from previous file, please check that file load_qtrons.pk1 exists in data directory """ with open('./data/load_qtrons.pk1', 'rb') as f: parameters = dill.load(f) actions = dill.load(f) config = dill.load(f) return parameters, actions, config
def _main(fd): with os.fdopen(fd, 'rb', closefd=True) as from_parent: process.current_process()._inheriting = True try: preparation_data = pickle.load(from_parent) prepare(preparation_data) self = pickle.load(from_parent) finally: del process.current_process()._inheriting return self._bootstrap()
def __init__(self, words_file, ep_cache_file, occ_cache_file, accuracy): with open(ep_cache_file, 'rb') as f: self.error_probability = dill.load(f) with open(occ_cache_file, 'rb') as f: self.occurrneces = dill.load(f) self.words = TextCorpusStatisticsCalculator([]).read_words(words_file) self.accuracy = accuracy self.n = sum([v for k, v in self.occurrneces.items()]) self.m = len(self.words) self.calc = ErrorProbabilityCalculator()
def update_session(fname=None): import dill as pickle if fname is None: fname = conf.session try: s = pickle.load(gzip.open(fname,"rb")) except IOError: s = pickle.load(open(fname,"rb")) kamene_session = builtins.__dict__["kamene_session"] kamene_session.update(s)
def load_phrases(folder): print 'Loading...' with open(folder+'phrase_pairs_.pickle', 'rb') as handle: phrase_pairs = pickle.load(handle) with open(folder+'en_given_nl_.pickle', 'rb') as handle: en_given_nl = pickle.load(handle) with open(folder+'nl_given_en_.pickle', 'rb') as handle: nl_given_en = pickle.load(handle) with open(folder+'joint_ennl_.pickle', 'rb') as handle: joint_ennl = pickle.load(handle) print 'Loaded.' return phrase_pairs, en_given_nl, nl_given_en, joint_ennl
def main(): classifier_file = open('saved_classifiers/spam_classifier.pickle', 'rb') classifier_object = dill.load(classifier_file) classifier_file.close() trainer_file = open('saved_classifiers/trainer.pickle', 'rb') trainer_object = dill.load(trainer_file) trainer_file.close() # Testing the accuracy test(trainer_object, classifier_object, 'spam') test(trainer_object, classifier_object, 'ham')
def category_model(record): test_tf={x:1 for x in record['categories']} f=open("./ml/dict_category_model", "r") a=dill.load(f) test_tf1=a.transform(test_tf) f.close() f=open("./ml/category_model","r") b=dill.load(f) f.close() try: return b.predict(test_tf1) except: return 0
def test_dynamic_classes(self): test_classes = { algorithms.Backpropagation: {}, algorithms.MinibatchGradientDescent: {"batch_size": 10}, algorithms.Momentum: {"momentum": 0.5}, algorithms.RPROP: {"maximum_step": 1}, algorithms.IRPROPPlus: {"maximum_step": 1}, algorithms.ConjugateGradient: {"update_function": "fletcher_reeves"}, algorithms.QuasiNewton: {"update_function": "bfgs"}, algorithms.HessianDiagonal: {"min_eigenvalue": 1e-5}, algorithms.LevenbergMarquardt: {"mu": 0.01}, } for algorithm_class, algorithm_params in test_classes.items(): optimization_classes = [algorithms.WeightDecay, algorithms.SearchThenConverge] bpnet = algorithm_class((3, 5, 1), optimizations=optimization_classes, verbose=False, **algorithm_params) data, target = datasets.make_regression(n_features=3, n_targets=1) data = preprocessing.MinMaxScaler().fit_transform(data) target_scaler = preprocessing.MinMaxScaler() target = target_scaler.fit_transform(target.reshape(-1, 1)) with tempfile.NamedTemporaryFile() as temp: valid_class_name = bpnet.__class__.__name__ dill.dump(bpnet, temp) temp.file.seek(0) restored_bpnet = dill.load(temp) restored_class_name = restored_bpnet.__class__.__name__ temp.file.seek(0) self.assertEqual(valid_class_name, restored_class_name) self.assertEqual(optimization_classes, restored_bpnet.optimizations) bpnet.train(data, target, epochs=10) real_bpnet_error = bpnet.error(bpnet.predict(data), target) updated_input_weight = bpnet.input_layer.weight.copy() dill.dump(bpnet, temp) temp.file.seek(0) restored_bpnet2 = dill.load(temp) temp.file.seek(0) actual = restored_bpnet2.predict(data) restored_bpnet_error = restored_bpnet2.error(actual, target) np.testing.assert_array_equal(updated_input_weight, restored_bpnet2.input_layer.weight) # Error must be big, because we didn't normalize data self.assertEqual(real_bpnet_error, restored_bpnet_error)
def sentiment(text): """ sentiment ภาษาไทย ใช้ข้อมูลจาก https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/ รับค่าสตริง str คืนค่า pos , neg""" with open(os.path.join(templates_dir, 'vocabulary.data'), 'rb') as in_strm: vocabulary = dill.load(in_strm) in_strm.close() with open(os.path.join(templates_dir, 'sentiment.data'), 'rb') as in_strm: classifier = dill.load(in_strm) in_strm.close() text=set(word_tokenize(text))-set(stopwords.words('thai')) featurized_test_sentence = {i:(i in text) for i in vocabulary} return classifier.classify(featurized_test_sentence)
#!/home/chenyang/Documents/twilio_webserver/env/bin/python2 # # Author: Mike McKerns (mmckerns @caltech and @uqfoundation) # Copyright (c) 2008-2016 California Institute of Technology. # Copyright (c) 2016-2017 The Uncertainty Quantification Foundation. # License: 3-clause BSD. The full license text is available at: # - http://trac.mystic.cacr.caltech.edu/project/pathos/browser/dill/LICENSE if __name__ == '__main__': import sys import dill for file in sys.argv[1:]: print (dill.load(open(file,'rb')))
res = MyEval.F14Exp(pred, test_label) print(res) with open('../../stat/res_exp_for_paper.csv', 'a') as fout: fout.write('{0},{1},{2},{3},{4},{5},{6},{7}\n'.format( method, i_iter, i_fold, res[0], res[1], res[2], res[3], res[4])) i_fold += 1 if __name__ == "__main__": with open('../data/features_all_v2.5.pkl', 'rb') as my_input: all_pid = dill.load(my_input) all_feature = np.array(dill.load(my_input)) all_label = np.array(dill.load(my_input)) print('features_all shape: ', all_feature.shape) with open('../data/feat_deep_centerwave_v0.1.pkl', 'rb') as my_input: feat_deep_centerwave = np.array(dill.load(my_input)) print('feat_deep_centerwave shape: ', feat_deep_centerwave.shape) with open('../data/feat_resnet.pkl', 'rb') as my_input: feat_resnet = np.array(dill.load(my_input)) print('feat_resnet shape: ', feat_resnet.shape) all_feature = np.c_[all_feature, feat_deep_centerwave, feat_resnet] all_label = np.array(all_label) all_pid = np.array(all_pid)
filename = "51_rubidium87_relevant_2021-08-08T13_42_.pkl" filename = "51_rubidium87_relevant_21*021-08-08T14_56_.pkl" filename = "51_rubidium87_relevant_2021-08-08T18_54_.pkl" filename = "51_rubidium87_relevant_2021-08-09T02_34_.pkl" filename = "51_rubidium87_relevant_2021-08-09T13_23_.pkl" filename = "51_rubidium87_relevant_2021-08-09T13_47_.pkl" filename = "51_rubidium87_relevant_2021-08-09T14_36_.pkl" filename = "51_rubidium87_relevant_2021-08-09T15_28_.pkl" filename = "51_rubidium87_relevant_2021-08-09T16_27_.pkl" filename = "51_rubidium87_relevant_2021-08-09T17_12_.pkl" filename = "51_rubidium87_relevant_2021-08-09T17_37.pkl" filename = "51_rubidium87_relevant_2021-08-09T18_12.pkl" filename = "60_rubidium87_relevant_2021-08-19T16_58.pkl" with open(f"system/simulation/saved_simulations/{filename}", "rb") as f: simulation: Simulation = pickle.load(f) # _raw_dc_calculator = simulation.get_calculator((270, 230)) # simulation.dc_field_calculator = lambda _t: _raw_dc_calculator(_t).round(1) # _raw_dc_calculator = simulation.get_calculator((270, 210)) # simulation.dc_field_calculator = lambda _t: _raw_dc_calculator(_t).round(1) # # simulation.rf_freq_calculator = simulation.get_calculator(230e6 / 1e9) # simulation.rf_field_calculator = lambda t: 3 * np.sin(np.pi * t / 1000 / simulation.t) print(simulation.dc_field) print(simulation.rf_field) print(simulation.rf_freq) print(simulation.t) systems: List[qutip.Qobj] = simulation.results.states
lenDBuser = len(DBuser) lenUserVec = len(userVec) jaccard = matches / (lenDBuser + lenUserVec - matches) tempSim = jaccard * (1 - MSD) if tempSim > bestSim: bestSim = tempSim bestSimUser = u bestSimUser = icamf_recommender.rating_object.ids_user[bestSimUser] return bestSimUser, bestSim with open("dummy_model.pkl", "rb") as f: icamf_recommender = dill.load(f) if __name__ == '__main__': app.run(host="0.0.0.0", port=8080) #To run without clipping set to False or del argument #train_recommender_kfold(kfold=5, regularizer=0.001, learning_rate=0.001, num_factors=20, iterations=50, clipping=5) #train_recommender_kfold(kfold=5, regularizer=0.001, learning_rate=0.002, num_factors=20, iterations=50, clipping=5) #train_recommender_kfold(kfold=5, regularizer=0.001, learning_rate=0.005, num_factors=20, iterations=50, clipping=5) #train_and_save_model(regularizer=0.001, learning_rate=0.002, num_factors=20, iterations=1, clipping=5) #with open("dummy_model.pkl", "rb") as f: # icamf_recommender = dill.load(f)
def sample(save_dir): path_to_config = save_dir + "/config" if not os.path.isfile(path_to_config): raise IOError("Could not find " + path_to_config) with open(path_to_config, "rb") as f: gen_config = pickle.load(f) # # Load vocabulary encoder # glove_dir = '/Users/danfriedman/Box Sync/My Box Files/9 senior spring/gen/glove/glove.6B/glove.6B.50d.txt' # #glove_dir = '/data/corpora/word_embeddings/glove/glove.6B.50d.txt' if gen_config.use_glove: _, _, _, L = data_reader.glove_encoder(gen_config.glove_dir) else: L = None # Rebuild the model with tf.variable_scope("LSTM"): gen_model = lstm_ops.seq2seq_model( encoder_seq_length=gen_config.d_len, decoder_seq_length=1, num_layers=gen_config.num_layers, embed_size=gen_config.embed_size, batch_size=gen_config.batch_size, hidden_size=gen_config.hidden_size, vocab_size=gen_config.vocab_size, dropout=gen_config.dropout, max_grad_norm=gen_config.max_grad_norm, use_attention=gen_config.use_attention, embeddings=L, is_training=False, is_gen_model=True, token_type=gen_config.token_type, reuse=False) with tf.Session() as session: saver = tf.train.Saver() saver.restore(session, tf.train.latest_checkpoint('./' + args.save_dir)) def generate(description, temperature): return lstm_ops.generate_text_beam_search( session=session, model=gen_model, encode=gen_config.encode, decode=gen_config.decode, description=description, d_len=gen_config.d_len, beam=5, stop_length=gen_config.c_len, temperature=temperature) seed = "Three huge birds wait outside of the window of a man's room. The man is talking on the phone." temp = 1.0 print(generate(seed, temp)) while raw_input("Sample again? ([y]/n): ") != "n": new_seed = raw_input("seed: ") if len(gen_config.encode(seed)) > gen_config.d_len: print("Description must be < {} tokens".format( gen_config.d_len)) continue new_temp = raw_input("temp: ") if new_seed != "": seed = new_seed if new_temp != "": temp = float(new_temp) print(generate(seed, temp))
def resume(save_file): simulation = dill.load(save_file) simulation.run(resume=True)
def tokenizer(text): # create a tokenizer function # 返回 a list of <class 'spacy.tokens.token.Token'> return [tok.text for tok in spacy_en.tokenizer(text)] from torchtext import data import numpy as np from data import text_utils args = argument_parser() with open("seq2seq/bak/TEXT.Field", "rb") as f: TEXT = dill.load(f) LENGTH = data.Field(sequential=False, use_vocab=False) embeddings = np.random.random((len(TEXT.vocab.itos), args.embed_size)) args.TEXT = TEXT encoder = SN_MODELS["encoder"](embeddings, args) # atten = SN_MODELS["attention"](args.hidden_size * 4, 300) # decoder = SN_MODELS["decoder"](embeddings, args) atten = SN_MODELS["attention"](args.hidden_size, "general") decoder = SN_MODELS["decoder"](embeddings, args, atten) model_class = SN_MODELS[args.model_name] # model = model_class(encoder, decoder, args)
NAME = '*test13' pth = Path('~/Simulations/coop_extension') flist = list(pth.expanduser().glob(NAME)) def get_params(fname): par = str(fname).split('/')[5].split('-') return par data = [] for fl in flist: with open(fl, 'rb') as fh: d = dill.load(fh) p = get_params(fl) data.append((p, d)) D = len(data) N = len([ x for x in data[0][1][-1].keys() if isinstance(x, tuple) ]) - 1 dataset = np.zeros((D, 10)) NN = tuple(range(N)) for i, (par, dt) in enumerate(data): dataset[i][0] = dt[-1]['roi_coop_theo'] # Theo roi coop dataset[i][1] = dt[-1]['roi_coop_days'].mean() # Mean roi coop dataset[i][2] = 100 * ((dataset[i, 0] / dataset[i , 1]) - 1)
default=10000, help='number of points used for plotting') args = parser.parse_args() args.twofold = bool(args.twofold) if args.target_dims == 'all': args.mds_dims = args.latent_dims if not args.conditioning is None: args.name += "_%s" % args.conditioning #%% Data import import dill with open(args.data, 'rb') as f: audioSet = dill.load(f) #testSet = np.load('data/testSet.npy') if not args.filter is None: wrong_ids = np.where(audioSet.metadata['octave'] > args.filter) audioSet.files = np.delete(np.array(audioSet.files), wrong_ids).tolist() audioSet.data = np.delete(np.array(audioSet.data), wrong_ids).tolist() for k, v in audioSet.metadata.items(): audioSet.metadata[k] = np.delete(v, wrong_ids) if len(args.frames) == 0: print('taking the whole dataset...') audioSet.flattenData(lambda x: x[:]) elif len(args.frames) == 2: print('taking between %d and %d...' % (args.frames[0], args.frames[1])) audioSet.flattenData(lambda x: x[args.frames[0]:args.frames[1]])
def load_model(self, path_to_save): with open(path_to_save, 'rb') as f: hmm_tagger = dill.load(f) self.model = hmm_tagger return hmm_tagger
def load_model(): with open(MODEL_FILE_PATH,'rb') as file: return dill.load(file)
def _load_auxiliary_files(self): super(TextTrainer, self)._load_auxiliary_files() data_indexer_file = open("%s_data_indexer.pkl" % self.model_prefix, "rb") self.data_indexer = pickle.load(data_indexer_file) data_indexer_file.close()
def unpickle(path): with open(path, 'rb') as fp: return dill.load(fp)
def load(identifier): file_name = f'{se.local_storage}model_nav-mode_v{DILL_VERSION}_{identifier}.dill' with open(file_name, 'rb') as handle: bm = dill.load(handle) return bm
import tempfile import dill try: import pathos.multiprocessing as mp except ImportError: pass from spatialist.ancillary import HiddenPrints if __name__ == '__main__': # de-serialize the arguments written by function ancillary.multicore tmpfile = os.path.join(tempfile.gettempdir(), 'spatialist_dump') with open(tmpfile, 'rb') as tmp: func, cores, processlist = dill.load(tmp) # serialize the job arguments to be able to pass them to the processes processlist = [dill.dumps([func, x]) for x in processlist] # a simple wrapper to execute the jobs in the sub-processes # re-import of modules and passing pickled variables is necessary since on # Windows the environment is not shared between parent and child processes def wrapper(job): import dill function, proc = dill.loads(job) return function(**proc) # hide print messages in the sub-processes with HiddenPrints(): # start pool of processes and do the work
def __init__( self, Nlayers=1, # number of layers Ndirs=1, # unidirectional or bidirectional Nx=100, # input size Nh=100, # hidden layer size Ny=100, # output size Ah="relu", # hidden unit activation (e.g. relu, tanh, lstm) Ay="linear", # output unit activation (e.g. linear, sigmoid, softmax) predictPer="frame", # frame or sequence loss=None, # loss function (e.g. mse, ce, ce_group, hinge, squared_hinge) L1reg=0.0, # L1 regularization L2reg=0.0, # L2 regularization seed=15213, # random seed for initializing the weights frontEnd=None, # a lambda function for transforming the input filename=None, # initialize from file initParams=None, # initialize from given dict ): if filename is not None: # load parameters from file with smart_open(filename, "rb") as f: initParams = dill.load(f) if initParams is not None: # load parameters from given dict self.paramNames = [] self.params = [] for k, v in initParams.iteritems(): if type(v) is numpy.ndarray: self.addParam(k, v) else: setattr(self, k, v) self.paramNames.append(k) # F*ck, locals()[k] = v doesn't work; I have to do this statically Nlayers, Ndirs, Nx, Nh, Ny, Ah, Ay, predictPer, loss, L1reg, L2reg, frontEnd \ = self.Nlayers, self.Ndirs, self.Nx, self.Nh, self.Ny, self.Ah, self.Ay, self.predictPer, self.loss, self.L1reg, self.L2reg, self.frontEnd else: # Initialize parameters randomly # Names of parameters to save to file self.paramNames = [ "Nlayers", "Ndirs", "Nx", "Nh", "Ny", "Ah", "Ay", "predictPer", "loss", "L1reg", "L2reg", "frontEnd" ] for name in self.paramNames: value = locals()[name] setattr(self, name, value) # Values of parameters for building the computational graph self.params = [] # Initialize random number generators global rng rng = numpy.random.RandomState(seed) # Construct parameter matrices Nlstm = 4 if Ah == 'lstm' else 1 self.addParam("Win", rand_init((Nx, Nh * Ndirs * Nlstm), Ah)) self.addParam("Wrec", rand_init((Nlayers, Ndirs, Nh, Nh * Nlstm), Ah)) self.addParam( "Wup", rand_init((Nlayers - 1, Nh * Ndirs, Nh * Ndirs * Nlstm), Ah)) self.addParam("Wout", rand_init((Nh * Ndirs, Ny), Ay)) if Ah != "lstm": self.addParam("Bhid", zeros((Nlayers, Nh * Ndirs))) else: self.addParam( "Bhid", numpy.tile( numpy.hstack([ full((Nlayers, Nh), 1.0), zeros((Nlayers, Nh * 3)) ]), (1, Ndirs))) self.addParam("Bout", zeros(Ny)) self.addParam("h0", zeros((Nlayers, Ndirs, Nh))) if Ah == "lstm": self.addParam("c0", zeros((Nlayers, Ndirs, Nh))) # Compute total number of parameters self.nParams = sum(x.get_value().size for x in self.params) # Initialize accumulators for gradients self.aparams = [ theano.shared(zeros(x.get_value().shape)) for x in self.params ] # Build computation graph input = T.ftensor3() mask = T.imatrix() mask_int = [(mask % 2).nonzero(), (mask >= 2).nonzero()] mask_float = [ T.cast((mask % 2).dimshuffle((1, 0)).reshape( (mask.shape[1], mask.shape[0], 1)), theano.config.floatX), T.cast((mask >= 2).dimshuffle((1, 0)).reshape( (mask.shape[1], mask.shape[0], 1)), theano.config.floatX) ] # mask_int = [(mask & 1).nonzero(), (mask & 2).nonzero()] # mask_float = [T.cast((mask & 1).dimshuffle((1, 0)).reshape((mask.shape[1], mask.shape[0], 1)), theano.config.floatX), # T.cast(((mask & 2) / 2).dimshuffle((1, 0)).reshape((mask.shape[1], mask.shape[0], 1)), theano.config.floatX)] def step_rnn(x_t, mask, h_tm1, W, h0): h_tm1 = T.switch(mask, h0, h_tm1) return [ACTIVATION[Ah](x_t + h_tm1.dot(W))] def step_lstm(x_t, mask, c_tm1, h_tm1, W, c0, h0): c_tm1 = T.switch(mask, c0, c_tm1) h_tm1 = T.switch(mask, h0, h_tm1) a = x_t + h_tm1.dot(W) f_t = T.nnet.sigmoid(a[:, :Nh]) i_t = T.nnet.sigmoid(a[:, Nh:Nh * 2]) o_t = T.nnet.sigmoid(a[:, Nh * 2:Nh * 3]) c_t = T.tanh(a[:, Nh * 3:]) * i_t + c_tm1 * f_t h_t = T.tanh(c_t) * o_t return [c_t, h_t] x = input if frontEnd is None else frontEnd(input) for i in range(Nlayers): h = (x.dimshuffle((1, 0, 2)).dot(self.Win) if i == 0 else h.dot(self.Wup[i - 1])) + self.Bhid[i] rep = lambda x: T.extra_ops.repeat( x.reshape((1, -1)), h.shape[1], axis=0) if Ah != "lstm": h = T.concatenate([ theano.scan( fn=step_rnn, sequences=[ h[:, :, Nh * d:Nh * (d + 1)], mask_float[d] ], outputs_info=[rep(self.h0[i, d])], non_sequences=[self.Wrec[i, d], rep(self.h0[i, d])], go_backwards=(d == 1), )[0][::(1 if d == 0 else -1)] for d in range(Ndirs) ], axis=2) else: h = T.concatenate([ theano.scan( fn=step_lstm, sequences=[ h[:, :, Nh * 4 * d:Nh * 4 * (d + 1)], mask_float[d] ], outputs_info=[rep(self.c0[i, d]), rep(self.h0[i, d])], non_sequences=[ self.Wrec[i, d], rep(self.c0[i, d]), rep(self.h0[i, d]) ], go_backwards=(d == 1), )[0][1][::(1 if d == 0 else -1)] for d in range(Ndirs) ], axis=2) h = h.dimshuffle((1, 0, 2)) if predictPer == "sequence": h = T.concatenate([ h[mask_int[1 - d]][:, Nh * d:Nh * (d + 1)] for d in range(Ndirs) ], axis=1) output = ACTIVATION[Ay](h.dot(self.Wout) + self.Bout) # Compute loss function if loss is None: loss = { "linear": "mse", "sigmoid": "ce", "softmax": "ce_group" }[self.Ay] if loss == "ctc": label = T.imatrix() label_time = T.imatrix() tol = T.iscalar() cost = ctc_cost(output, mask, label, label_time, tol) else: if predictPer == "sequence": label = T.fmatrix() y = output t = label elif predictPer == "frame": label = T.ftensor3() indices = (mask >= 0).nonzero() y = output[indices] t = label[indices] cost = T.mean({ "ce": -T.mean(T.log(y) * t + T.log(1 - y) * (1 - t), axis=1), "ce_group": -T.log((y * t).sum(axis=1)), "mse": T.mean((y - t)**2, axis=1), "hinge": T.mean(relu(1 - y * (t * 2 - 1)), axis=1), "squared_hinge": T.mean(relu(1 - y * (t * 2 - 1))**2, axis=1), }[loss]) # Add regularization cost += sum(abs(x).sum() for x in self.params) / self.nParams * L1reg cost += sum(T.sqr(x).sum() for x in self.params) / self.nParams * L2reg # Compute updates for network parameters updates = [] lrate = T.fscalar() clip = T.fscalar() grad = T.grad(cost, self.params) grad_clipped = [T.maximum(T.minimum(g, clip), -clip) for g in grad] for w, a, g in zip(self.params, self.aparams, grad_clipped): updates.append((a, 0.9 * a + 0.1 * g**2)) updates.append((w, w - lrate * g / (a + 1e-8)**0.5)) # Create functions to be called from outside if loss == "ctc": inputs = [input, mask, label, label_time, tol, lrate, clip] else: inputs = [input, mask, label, lrate, clip] self.train = theano.function( inputs=inputs, outputs=cost, updates=updates, ) self.predict = theano.function(inputs=[input, mask], outputs=output)
def generate_mu_timelist( record_max=10**4, no_arms=5, mu_type='biggap', is_timevar=False, timevar_type='General', reward_type='Bernoulli', mu_list=None, direc=None, plot=True, ): ''' generate the value of each arm mean as a function of time ''' # if not provided the list of mean of arms to start with, then generate it if mu_list is None: mu_list = generate_mu_list(no_arms, mu_type, timevar_type, reward_type) # make sure the senatity no_arms = len(mu_list) if not is_timevar: # no changes mu_time_list = lambda t, i: mu_list[i] else: if timevar_type == 'Abrupt': # continuous changes if mu_type == "biggap": f_t = lambda t: 0.1 * pow(-1, np.floor(t / 50000)) * ( t >= 50000 * np.floor(t / 50000) and t < 50000 * (np.floor(t / 50000) + 1)) elif mu_type == "smallgap": f_t = lambda t: 0.01 * pow(-1, np.floor(t / 50000)) * ( t >= 50000 * np.floor(t / 50000) and t < 50000 * (np.floor(t / 50000) + 1)) else: f_t = lambda t: rand() * pow(-1, np.floor(t / 50000)) * ( t >= 50000 * np.floor(t / 50000) and t < 50000 * (np.floor(t / 50000) + 1)) mu_time_list = lambda t, i: mu_list[mod( i + np.int(np.divide(t, 50000)), no_arms)] + f_t(t) elif timevar_type == 'General': # continuous changes f_t = lambda t: sin(np.pi * t / 50000) + 1 mu_time_list = lambda t, i: mu_list[i] * f_t(t + 50000 * i) elif timevar_type == 'RealAbrupt': # Example from the Yahoo! dataset, from article "Nearly Optimal Adaptive Procedure with Change Detection for Piecewise-Stationary Bandit" (M-UCB) https://arxiv.org/abs/1802.03692 # 6 arms, 9 discrete change mu_list = [[0.071, 0.041, 0.032, 0.030, 0.020, 0.011], [0.055, 0.053, 0.032, 0.030, 0.008, 0.011], [0.040, 0.063, 0.032, 0.030, 0.008, 0.011], [0.040, 0.042, 0.043, 0.030, 0.008, 0.011], [0.030, 0.032, 0.055, 0.030, 0.008, 0.011], [0.030, 0.032, 0.020, 0.030, 0.008, 0.021], [0.020, 0.022, 0.020, 0.045, 0.008, 0.021], [0.020, 0.022, 0.020, 0.057, 0.008, 0.011], [0.020, 0.022, 0.034, 0.057, 0.022, 0.011]] mu_time_list = lambda t, i: mu_list[int(np.floor(t / 50000))][i] # save the data to direc if direc is not None: filename = 'noarms%d_mu_type%s_timevar_type%s_reward_type%s' % ( no_arms, mu_type, timevar_type, reward_type) if not os.path.exists("%s/%s.pkl" % (direc, filename)): savelambda(mu_time_list, direc, filename) else: with open('%s/%s.pkl' % (direc, filename), 'rb') as input: mu_dat = dill.load(input) mu_time_list = mu_dat plotfilename = 'mu_time_list%s' % (filename) plotdirec = direc + '/plots' if not os.path.exists("%s/%s.pdf" % (plotdirec, plotfilename)) and plot: plot_mu(plotdirec, plotfilename, no_arms, record_max, mu_time_list, mu_type) # return the list, which is a list of funtion, each represent the mean of arms as a function of time return mu_time_list
from flask import Flask from flask import request from flask import render_template app = Flask(__name__) import pickle import numpy as np import dill model = dill.load(open('./data/model.pkl')) c_feat_mat = model.feat_mat[:len(model.course_list), :] # Form page to submit text #============================================ # create page with a form on it @app.route('/index.html') @app.route('/index') @app.route('/') def submission_page(): return render_template('index.html') # Recommendation page #============================================ # create page with a form on it # Recommending
def LoadNetwork(network2Load): with open(network2Load, "rb") as networkPickled: return pickle.load(networkPickled)
#coding:utf-8 import dill as pickle tmp = pickle.load(open('bpe_deen/bpe_vocab.pkl', 'rb')) # open(opt.data_pkl, 'rb') print(tmp)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) albert_config = modeling.AlbertConfig.from_json_file( FLAGS.albert_config_file) validate_flags_or_throw(albert_config) tf.gfile.MakeDirs(FLAGS.output_dir) # # tokenizer = fine_tuning_utils.create_vocab( # vocab_file=FLAGS.vocab_file, # do_lower_case=FLAGS.do_lower_case, # spm_model_file=FLAGS.spm_model_file, # hub_module=FLAGS.albert_hub_module_handle) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 if FLAGS.do_train: iterations_per_loop = int( min(FLAGS.iterations_per_loop, FLAGS.save_checkpoints_steps)) else: iterations_per_loop = FLAGS.iterations_per_loop run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, keep_checkpoint_max=0, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None if not tf.gfile.Exists(FLAGS.train_feature_file): raise Exception("Train tf-record missed...") cnt = 0 records = tf.python_io.tf_record_iterator(FLAGS.train_feature_file) for _ in records: cnt += 1 print(cnt) num_train_steps = int(cnt / FLAGS.train_batch_size * FLAGS.num_train_epochs) # train_examples = squad_utils.read_squad_examples( # input_file=FLAGS.train_file, is_training=True) # num_train_steps = int( # len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) if FLAGS.do_train: num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) # # # Pre-shuffle the input to avoid having to make a very large shuffle # # buffer in in the `input_fn`. # rng = random.Random(12345) # rng.shuffle(train_examples) tag_info = squad_utils.TagInfo.load(FLAGS.tag_info_file) print(tag_info.__dict__) model_fn = squad_utils.v2_model_fn_builder( albert_config=albert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, max_seq_length=FLAGS.max_seq_length, start_n_top=FLAGS.start_n_top, end_n_top=FLAGS.end_n_top, dropout_prob=FLAGS.dropout_prob, hub_module=FLAGS.albert_hub_module_handle, tag_info=tag_info) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: # We write to a temporary file to avoid storing very large constant tensors # in memory. # if not tf.gfile.Exists(FLAGS.train_feature_file): # train_writer = squad_utils.FeatureWriter( # filename=os.path.join(FLAGS.train_feature_file), is_training=True) # squad_utils.convert_examples_to_features( # examples=train_examples, # tokenizer=tokenizer, # max_seq_length=FLAGS.max_seq_length, # doc_stride=FLAGS.doc_stride, # max_query_length=FLAGS.max_query_length, # is_training=True, # output_fn=train_writer.process_feature, # do_lower_case=FLAGS.do_lower_case) # train_writer.close() tf.logging.info("***** Running training *****") # tf.logging.info(" Num orig examples = %d", len(train_examples)) # tf.logging.info(" Num split examples = %d", train_writer.num_features) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) # del train_examples train_input_fn = squad_utils.input_fn_builder( input_file=FLAGS.train_feature_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True, use_tpu=FLAGS.use_tpu, bsz=FLAGS.train_batch_size, is_v2=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_predict: import dill # with tf.gfile.Open(FLAGS.predict_file) as predict_file: # prediction_json = json.load(predict_file)["data"] # eval_examples = squad_utils.read_squad_examples( # input_file=FLAGS.predict_file, is_training=False) if (tf.gfile.Exists(FLAGS.predict_feature_file) and tf.gfile.Exists(FLAGS.predict_feature_left_file) and tf.gfile.Exists(FLAGS.predict_example_file)): tf.logging.info("Loading eval features from {}".format( FLAGS.predict_feature_left_file)) with tf.gfile.Open(FLAGS.predict_feature_left_file, "rb") as fin: eval_features = dill.load(fin) with tf.gfile.Open(FLAGS.predict_example_file, "rb") as fin: eval_examples = dill.load(fin) # else: # eval_writer = squad_utils.FeatureWriter( # filename=FLAGS.predict_feature_file, is_training=False) # eval_features = [] # # def append_feature(feature): # eval_features.append(feature) # eval_writer.process_feature(feature) # # squad_utils.convert_examples_to_features( # examples=eval_examples, # tokenizer=tokenizer, # max_seq_length=FLAGS.max_seq_length, # doc_stride=FLAGS.doc_stride, # max_query_length=FLAGS.max_query_length, # is_training=False, # output_fn=append_feature, # do_lower_case=FLAGS.do_lower_case) # eval_writer.close() # # with tf.gfile.Open(FLAGS.predict_feature_left_file, "wb") as fout: # pickle.dump(eval_features, fout) tf.logging.info("***** Running predictions *****") tf.logging.info(" Num orig examples = %d", len(eval_examples)) tf.logging.info(" Num split examples = %d", len(eval_features)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_input_fn = squad_utils.input_fn_builder( input_file=FLAGS.predict_feature_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False, use_tpu=FLAGS.use_tpu, bsz=FLAGS.predict_batch_size, is_v2=True) def get_result(checkpoint): """Evaluate the checkpoint on SQuAD v2.0.""" # If running eval on the TPU, you will need to specify the number of # steps. all_results = [] for result in estimator.predict(predict_input_fn, yield_single_examples=True, checkpoint_path=checkpoint): if len(all_results) % 1000 == 0: tf.logging.info("Processing example: %d" % (len(all_results))) unique_id = int(result["unique_ids"]) crf_logits = result["crf_logits"] transition_params = result["transition_params"] softmax = result["softmax"] all_results.append( squad_utils.RawResultV2( unique_id=unique_id, crf_logits=crf_logits, softmax=softmax, transition_params=transition_params, )) output_prediction_file = os.path.join(FLAGS.output_dir, "predictions.json") predictions = squad_utils.write_predictions_et( eval_examples, eval_features, all_results, FLAGS.max_answer_length, tag_info) import numpy class MyEncoder(json.JSONEncoder): def default(self, o): if isinstance(o, numpy.integer): return int(o) if isinstance(o, numpy.floating): return float(o) if isinstance(o, numpy.ndarray): return o.tolist() else: return super(MyEncoder, self).default(o) with tf.gfile.Open(output_prediction_file, 'w') as f: json.dump(predictions, f, ensure_ascii=False, cls=MyEncoder) latest_checkpoint = tf.train.latest_checkpoint(FLAGS.output_dir) get_result(latest_checkpoint)
def load_obj(path): """ Loads object """ return dill.load(open(path, 'rb'))
def load(location, do_unzip_and_model_type_check=True): """ Method used to load a container from the file system. Args: location: The location on the file system where to load the model. do_unzip_and_model_type_check: Whether to unzip the model and check the type. Returns: The loaded model. """ assert tvm_installed(), "TVM Container requires TVM installed." _load_param_dict = tvm._ffi.get_global_func("tvm.relay._load_param_dict") # We borrow this function directly from Relay. # Relay when imported tryies to download schedules data, # but at inference time access to disk or network could be blocked. def load_param_dict(param_bytes): if isinstance(param_bytes, (bytes, str)): param_bytes = bytearray(param_bytes) load_arr = _load_param_dict(param_bytes) return {v.name: v.array for v in load_arr} container = None if do_unzip_and_model_type_check: # Unzip the dir. zip_location = location if not location.endswith("zip"): zip_location = location + ".zip" else: location = zip_location[:-4] assert os.path.exists(zip_location), "Zip file {} does not exist.".format(zip_location) shutil.unpack_archive(zip_location, location, format="zip") assert os.path.exists(location), "Model location {} does not exist.".format(location) # Load the model type. with open(os.path.join(location, constants.SAVE_LOAD_MODEL_TYPE_PATH), "r") as file: model_type = file.readline() if model_type != "tvm": shutil.rmtree(location) raise RuntimeError("Expected TVM model type, got {}".format(model_type)) # Check the versions of the modules used when saving the model. if os.path.exists(os.path.join(location, constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH)): with open(os.path.join(location, constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH), "r") as file: configuration = file.readlines() check_dumped_versions(configuration, hummingbird, torch) else: warnings.warn( "Cannot find the configuration file with versions. You are likely trying to load a model saved with an old version of Hummingbird." ) # Load the actual model. path_lib = os.path.join(location, constants.SAVE_LOAD_TVM_LIB_PATH) graph = open(os.path.join(location, constants.SAVE_LOAD_TVM_GRAPH_PATH)).read() lib = tvm.runtime.module.load_module(path_lib) params = load_param_dict(open(os.path.join(location, constants.SAVE_LOAD_TVM_PARAMS_PATH), "rb").read()) # Load the container. with open(os.path.join(location, constants.SAVE_LOAD_CONTAINER_PATH), "rb") as file: container = dill.load(file) if container is None: shutil.rmtree(location) raise RuntimeError("Failed to load the model container.") # Setup the container. ctx = tvm.cpu() if container._ctx == "cpu" else tvm.gpu container._model = graph_runtime.create(graph, lib, ctx) container._model.set_input(**params) container._extra_config[constants.TVM_GRAPH] = graph container._extra_config[constants.TVM_LIB] = lib container._extra_config[constants.TVM_PARAMS] = params container._extra_config[constants.TVM_CONTEXT] = ctx container._ctx = ctx container._tvm_tensors = {name: container._to_tvm_array(np.array([])) for name in container._input_names} # Need to set the number of threads to use as set in the original container. os.environ["TVM_NUM_THREADS"] = str(container._n_threads) shutil.rmtree(location) return container
import os import dill as pickle import numpy as np import lda from rpy2 import robjects from rpy2.robjects.packages import importr from rpy2.robjects.vectors import IntVector, FloatVector # set working directory os.chdir( "/Users/annekespeijers/Desktop/BGSE/Term3/TextMining/Homework/Project/") # load in corpus with open('./data/corpus.pkl', 'rb') as input: corpus = pickle.load(input) # doc term matrix X = corpus.document_term_matrix(corpus.token_set) X = X.astype(int) # get vocab, article titles and article comments vocab = tuple(corpus.token_set) titles = tuple([t.title for t in corpus.docs]) comments = tuple([com.comments for com in corpus.docs]) ## fit the model: k = 2 k = 2 model = lda.LDA(n_topics=k, n_iter=500, random_state=1, eta=200 / float(len(vocab)),
def main(): if not os.path.exists(os.path.join("saved", model_name)): os.makedirs(os.path.join("saved", model_name)) data_path = '../../data/records_final.pkl' voc_path = '../../data/voc_final.pkl' device = torch.device('cuda:0') data = dill.load(open(data_path, 'rb')) voc = dill.load(open(voc_path, 'rb')) diag_voc, pro_voc, med_voc = voc['diag_voc'], voc['pro_voc'], voc[ 'med_voc'] split_point = int(len(data) * 2 / 3) data_train = data[:split_point] eval_len = int(len(data[split_point:]) / 2) data_test = data[split_point:split_point + eval_len] data_eval = data[split_point + eval_len:] voc_size = (len(diag_voc.idx2word), len(pro_voc.idx2word), len(med_voc.idx2word)) EPOCH = 30 LR = 0.0002 TEST = False END_TOKEN = voc_size[2] + 1 model = Leap(voc_size, device=device) if TEST: model.load_state_dict( torch.load( open(os.path.join("saved", model_name, resume_name), 'rb'))) # pass model.to(device=device) print('parameters', get_n_params(model)) optimizer = Adam(model.parameters(), lr=LR) if TEST: eval(model, data_test, voc_size, 0) else: history = defaultdict(list) for epoch in range(EPOCH): loss_record = [] start_time = time.time() model.train() for step, input in enumerate(data_train): for adm in input: loss_target = adm[2] + [END_TOKEN] output_logits = model(adm) loss = F.cross_entropy( output_logits, torch.LongTensor(loss_target).to(device)) loss_record.append(loss.item()) optimizer.zero_grad() loss.backward(retain_graph=True) optimizer.step() llprint('\rTrain--Epoch: %d, Step: %d/%d' % (epoch, step, len(data_train))) ddi_rate, ja, prauc, avg_p, avg_r, avg_f1 = eval( model, data_eval, voc_size, epoch) history['ja'].append(ja) history['ddi_rate'].append(ddi_rate) history['avg_p'].append(avg_p) history['avg_r'].append(avg_r) history['avg_f1'].append(avg_f1) history['prauc'].append(prauc) end_time = time.time() elapsed_time = (end_time - start_time) / 60 llprint( '\tEpoch: %d, Loss1: %.4f, One Epoch Time: %.2fm, Appro Left Time: %.2fh\n' % (epoch, np.mean(loss_record), elapsed_time, elapsed_time * (EPOCH - epoch - 1) / 60)) torch.save( model.state_dict(), open( os.path.join( 'saved', model_name, 'Epoch_%d_JA_%.4f_DDI_%.4f.model' % (epoch, ja, ddi_rate)), 'wb')) print('') dill.dump(history, open(os.path.join('saved', model_name, 'history.pkl'), 'wb')) # test torch.save( model.state_dict(), open(os.path.join('saved', model_name, 'final.model'), 'wb'))
def fine_tune(fine_tune_name=''): data_path = '../../data/records_final.pkl' voc_path = '../../data/voc_final.pkl' device = torch.device('cuda:0') data = dill.load(open(data_path, 'rb')) voc = dill.load(open(voc_path, 'rb')) diag_voc, pro_voc, med_voc = voc['diag_voc'], voc['pro_voc'], voc[ 'med_voc'] ddi_A = dill.load(open('../../data/ddi_A_final.pkl', 'rb')) split_point = int(len(data) * 2 / 3) data_train = data[:split_point] eval_len = int(len(data[split_point:]) / 2) data_test = data[split_point:split_point + eval_len] # data_eval = data[split_point+eval_len:] voc_size = (len(diag_voc.idx2word), len(pro_voc.idx2word), len(med_voc.idx2word)) model = Leap(voc_size, device=device) model.load_state_dict( torch.load( open(os.path.join("saved", model_name, fine_tune_name), 'rb'))) model.to(device) EPOCH = 30 LR = 0.0001 END_TOKEN = voc_size[2] + 1 optimizer = Adam(model.parameters(), lr=LR) ddi_rate_record = [] for epoch in range(1): loss_record = [] start_time = time.time() random_train_set = [ random.choice(data_train) for i in range(len(data_train)) ] for step, input in enumerate(random_train_set): model.train() K_flag = False for adm in input: target = adm[2] output_logits = model(adm) out_list, sorted_predict = sequence_output_process( output_logits.detach().cpu().numpy(), [voc_size[2], voc_size[2] + 1]) inter = set(out_list) & set(target) union = set(out_list) | set(target) jaccard = 0 if union == 0 else len(inter) / len(union) K = 0 for i in out_list: if K == 1: K_flag = True break for j in out_list: if ddi_A[i][j] == 1: K = 1 break loss = -jaccard * K * torch.mean( F.log_softmax(output_logits, dim=-1)) loss_record.append(loss.item()) optimizer.zero_grad() loss.backward(retain_graph=True) optimizer.step() llprint('\rTrain--Epoch: %d, Step: %d/%d' % (epoch, step, len(data_train))) if K_flag: ddi_rate, ja, prauc, avg_p, avg_r, avg_f1 = eval( model, data_test, voc_size, epoch) end_time = time.time() elapsed_time = (end_time - start_time) / 60 llprint( '\tEpoch: %d, Loss1: %.4f, One Epoch Time: %.2fm, Appro Left Time: %.2fh\n' % (epoch, np.mean(loss_record), elapsed_time, elapsed_time * (EPOCH - epoch - 1) / 60)) torch.save( model.state_dict(), open( os.path.join( 'saved', model_name, 'fine_Epoch_%d_JA_%.4f_DDI_%.4f.model' % (epoch, ja, ddi_rate)), 'wb')) print('') # test torch.save(model.state_dict(), open(os.path.join('saved', model_name, 'final.model'), 'wb'))
with open(filename, 'wb') as fh: dill.dump( dict(seed=seed, dhids=dhids, epochs=epochs, n_per_batch=n_per_batch, eta=eta, alpha=alpha, weights=weights, tau_rc=tau_rc, amp=amp, learners=learners), fh) else: with open(filename, 'rb') as fh: filedata = dill.load(fh) globals().update(filedata) # --- plot results (cols=[train, test], traces=learners) rows = 1 cols = 2 plt.figure(figsize=(7, 4)) # - train subplot ax = plt.subplot(rows, cols, 1) # filt = Alpha(3000, default_dt=n_per_batch) filt = Alpha(10000, default_dt=n_per_batch) for learner in learners:
srep.viz.plotting_style() #%% fig, ax = plt.subplots(2, 2, figsize=(9, 9)) # ########################################################################### # 95% HPD for identifiable expts promoters # ########################################################################### # # loop thru df, not all_samples keys, so we get deterministic order! expt_labels = ("O2_0p5ngmL", "O2_1ngmL", "Oid_1ngmL", "O3_10ngmL") var_labels = ["k_burst", "b", "kR_on", "kR_off"] color_keys = ["green", "blue", "red", "purple"] for i, expt in enumerate(expt_labels): # unpickle sampler, then convert to arviz InfDat obj pklfile = open(f"{repo_rootdir}/data/mcmc_samples/{expt}_sampler.pkl", 'rb') sampler = dill.load(pklfile) pklfile.close() inf_dat = az.convert_to_inference_data(sampler, var_names=var_labels) kR_on_samples = inf_dat.posterior.kR_on.values.flatten() kR_off_samples = inf_dat.posterior.kR_off.values.flatten() x_contour, y_contour = bebi103.viz.contour_lines_from_samples( kR_off_samples, kR_on_samples, levels=0.95, smooth=0.025) ax[0, 1].plot(x_contour[0], y_contour[0], label=expt, linewidth=0.6, color=colors[color_keys[i]]) # ax[0,1].set_xlim(-2,2) # ax[0,1].set_ylim(-2,2) ax[0, 1].set_ylabel(r'$log_{10}(k_R^+/\gamma)$')
import time from sklearn.preprocessing import MinMaxScaler from operator import add def testindexing(): graph_raw_data = open("graph.pkl", "rb") graph_data = pickle.load(graph_raw_data) print(graph_data) if __name__ == "__main__": testindexing() #need to update revised in gce raw_data = open("revised_total_data.pkl", "rb") X_total, X_tr, y_tr, X_te = pickle.load(raw_data) test = np.array([0]) print(X_total.shape) #remove null island graph_raw_data = open("graph.pkl", "rb") graph_data = pickle.load(graph_raw_data) first_ids = graph_data[:, 0] connected_ids = graph_data[:, 1] training_ids = X_total[:, 0] added_features = [] count = 0 length_graph = first_ids.shape[0] length_training = training_ids.shape[0] for row in X_total: id_first = row[0] index = np.searchsorted(first_ids, id_first)
def testindexing(): graph_raw_data = open("graph.pkl", "rb") graph_data = pickle.load(graph_raw_data) print(graph_data)
def experiment_LMKLIEP(which_d): with open('feat_vec_out.'+which_d+'.en.pickle', 'rb') as handle: out_d = pickle.load(handle) with open('feat_vec_in.'+which_d+'.en.pickle', 'rb') as handle: in_d = pickle.load(handle) labels = - np.ones(out_d.shape[0]) predictions = - np.ones(out_d.shape[0]) labels[-50000:] = 1 kliep = KLIEP(init_b=100, seed=0) kliep.fit_CV(out_d, in_d) w = kliep.predict(out_d).ravel() predictions[np.where(w > 1)[0]] = 1 # w = p_te/p_tr print 'total positive:', np.where(predictions == 1)[0].shape, ', out of:', out_d.shape[0] # sorted_ind = np.argsort(w, axis=None)[::-1] # predictions[sorted_ind[0:50000]] = 1 p, r, f, s = precision_recall_fscore_support(labels.astype(int), predictions.astype(int), pos_label=1, average='micro') print 'Precision:', p, print 'Recall:', r, print 'F1:', f, print 'Support:', s,