def load_models(): """ Aim : To load the encoder, the pipeline, the model of our IA ==================================================== Entrie : None ==================================================== Return : the encoder, the pipeline, the model """ try: file = open('my_models/Label_Encoder.pickle', 'rb') # to open the file Label_encod = pickle.load(file) # to load it in a variable file.close() # to close it once we've loaded it file = open('my_models/PipeLine.pickle', 'rb') pipe = pickle.load(file) file.close() file = open('my_models/My_Model_test.pickle', 'rb') model = pickle.load(file) file.close() except: print("problème dans le chargement des outils de machine learning") # returning the 3 variables that contain our sklearn features return Label_encod, pipe, model
def get_datasets_from_path(testing_flag, hchs_or_mesa): working_directory = get_working_directory(testing_flag, hchs_or_mesa) dataset_save_path = os.path.join(os.path.dirname(os.getcwd()), "PickledData", hchs_or_mesa) path_to_embeddings = os.path.join(os.path.dirname(os.getcwd()), "embeddings", hchs_or_mesa) if testing_flag: path_to_test_train_split_dict = os.path.join(dataset_save_path, 'reduced_test_train_split_dict.pickle') # path_to_test_train_split_dict = os.path.join(dataset_save_path, '100_users_reduced_test_train_users.pickle') else: path_to_test_train_split_dict = os.path.join(dataset_save_path, 'test_train_split_dict.pickle') with open(path_to_test_train_split_dict, 'rb') as f: test_train_split_dict = pickle.load(f) disease_user_datasets = {} diseases = dataset_diseases[hchs_or_mesa] for disease in diseases: disease_user_dataset_path = os.path.join(dataset_save_path, f'{disease}_user_datasets.pickle') with open(disease_user_dataset_path, 'rb') as f: user_dataset = pickle.load(f) if testing_flag: reduced_user_dataset = {} for user, data in user_dataset.items(): if user in test_train_split_dict['train']: reduced_user_dataset[user] = data if user in test_train_split_dict['test']: reduced_user_dataset[user] = data user_dataset = reduced_user_dataset disease_user_datasets[disease] = user_dataset return disease_user_datasets, test_train_split_dict, working_directory, path_to_embeddings
def main1(test_point): df = feature_engineering_on_app_train_test(test_point) df_past = get_past_data(int(test_point['SK_ID_CURR'].values)) df = df.join(df_past, how='left', on='SK_ID_CURR') del df_past gc.collect() with open("lgbm_clf_list_7.pkl", "rb") as f: lgbm_list = pkl.load(f) with open("train_column7.pkl", "rb") as f: train_column = pkl.load(f) gc.collect() df = fill_the_missing_values(df) df = calculate_cibil_score(df) scaler = pkl.load(open('scaler_7.sav', 'rb')) X = df[train_column] top5_feat = X[[ 'EXT_SOURCE_1', 'PAYMENT_RATE', 'DAYS_BIRTH', 'EXT_SOURCE_3', 'AMT_ANNUITY' ]] top5_feat['DAYS_BIRTH'] = -1 * top5_feat['DAYS_BIRTH'] / 365 top5_feat.rename(columns={"DAYS_BIRTH": "AGE"}, inplace=True) st.write('These are the values of top 5 features used in prediction') st.dataframe(top5_feat) X = scaler.transform(X) test_pred_proba = 0 for j in range(0, len(lgbm_list)): test_pred_proba += lgbm_list[j].predict_proba( X, num_iteration=lgbm_list[j].best_iteration_)[:, 1] / 10 st.write('Probablility of being a Defaulter: ', str(round(test_pred_proba[0], 6))) st.write('Percentage of being a Defaulter: ', str(round(test_pred_proba[0] * 100, 2)) + '%')
def __init__(self): with open('binaries/lr_model', 'rb') as input_file: self.lr_model = pickle.load(input_file) with open('binaries/count_vectorizer', 'rb') as input_file: self.count_vectorizer = pickle.load(input_file) with open('binaries/tfidf_transformer', 'rb') as input_file: self.tfidf_transformer = pickle.load(input_file)
def load_analize_policies(N, N_sim, verbose, starts): path = f'examples/data/policies_lambdas{N}.pickle' with open(path, 'rb') as file: pols = pickle.load(file) path = f'examples/data/values_lambdas{N}.pickle' with open(path, 'rb') as file: vals = pickle.load(file) p_space, policies, values = read_pol(N) distances = {s:{} for s in starts} choques = {s:{} for s in starts} fig_d, ax_d = plt.subplots() fig_c, ax_c = plt.subplots() for s in starts: for i in pols.keys(): pol = DMSPolicy(p_space, pols[i], from_matrix=True) ship = simulate_and_plot(p_space, pol, start=(0, 0), n=N_sim, verbose=verbose, plot=False) ship.name = i distances[s][i] = ship.average_obj_fun(mean=True) choques[s][i] = ship.average_crashes(mean=True) ax_d.plot(list(distances[s].keys()), list(distances[s].values()), label=f'start:{s}') ax_c.plot(list(choques[s].keys()), list(choques[s].values()), label=f'start:{s}') ax_d.set_xlabel(r'$\lambda$') ax_d.legend() ax_c.set_xlabel(r'$\lambda$') ax_c.legend() plt.show()
def update_model(self, paths): with open(paths['lr_model'], 'rb') as input_file: self.lr_model = pickle.load(input_file) with open(paths['count_vectorizer'], 'rb') as input_file: self.count_vectorizer = pickle.load(input_file) with open(paths['tfidf_transformer'], 'rb') as input_file: self.tfidf_transformer = pickle.load(input_file)
def get_datasets_from_paths(testing_flag): if testing_flag: working_directory = 'chapman_testing/' else: working_directory = 'chapman/' if not os.path.exists(working_directory): os.makedirs(working_directory) dataset_save_path = os.path.join(os.getcwd(), "PickledData", "chapman") path_to_patient_to_rhythm_dict = os.path.join(dataset_save_path, 'patient_to_rhythm_dict.pickle') # paths to user datasets with no nan values if testing_flag: path_to_user_datasets = os.path.join(dataset_save_path, 'reduced_four_lead_user_datasets_no_nan.pickle') path_to_test_train_split_dict = os.path.join(dataset_save_path, 'reduced_test_train_split_dict_no_nan.pickle') else: path_to_user_datasets = os.path.join(dataset_save_path, 'four_lead_user_datasets_no_nan.pickle') path_to_test_train_split_dict = os.path.join(dataset_save_path, "test_train_split_dict_no_nan.pickle") with open(path_to_user_datasets, 'rb') as f: user_datasets = pickle.load(f) print(f'number of patients: {len(user_datasets)}') with open(path_to_patient_to_rhythm_dict, 'rb') as f: patient_to_rhythm_dict = pickle.load(f) with open(path_to_test_train_split_dict, 'rb') as f: test_train_split_dict = pickle.load(f) return user_datasets, patient_to_rhythm_dict, test_train_split_dict, working_directory
def load_struct_authors(name_authors_collaborators, name_authors_info): with open(name_authors_collaborators + '.pickle', 'rb') as fp: authors_collaborators = pickle.load(fp) with open(name_authors_info + '.pickle', 'rb') as fp: authors_info = pickle.load(fp) return authors_collaborators, authors_info
def get_data(train=True): # feats = cPickle.load(open(coco_inception_features_path, "rb"), encoding="latin1") feats = cPickle.load(open('../data/coco_train_v3.pik', "rb"), encoding="latin1") feats.update( cPickle.load(open('../data/coco_val_ins.pik', "rb"), encoding="latin1")) sents = [] final_feats = [] filenames = [] js = json.load(open(coco_dataset_path, "r")) for i, img in enumerate(js["images"]): if train and img["extrasplit"] == "val": continue if (not train) and img["extrasplit"] != "val": continue if img["filename"] not in feats: continue if train: for sen in img["sentences"]: sents.append(sen["rm_style_tokens"]) final_feats.append(feats[img["filename"]]) filenames.append(img["filename"]) else: sents.append(img["sentences"][0]["rm_style_tokens"]) final_feats.append(feats[img["filename"]]) filenames.append(img["filename"]) final_feats = np.array(final_feats) data_file = 'cleaned_sents_train.pkl' if train is True else 'cleaned_test_train.pkl' if os.path.exists(data_file): with open(data_file, 'rb') as f: sents = cPickle.load(f) else: m = [] sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=3) dictionary_path = pkg_resources.resource_filename( "symspellpy", "frequency_dictionary_en_82_765.txt") # term_index is the column of the term and count_index is the # column of the term frequency sym_spell.load_dictionary(dictionary_path, term_index=0, count_index=1) for i in tqdm(sents, position=0): l = [] for j in range(len(i)): t = correct_spell( i[j].replace('NOUNNOUNNOUN', '').replace( "PARTPARTPART", "").replace("FRAMENET", "").replace( "ADJADJADJ", "").replace('INTJINTJINTJ', '').lower(), sym_spell) l.append(t) m.append(l) sents = m with open(data_file, 'wb') as f: cPickle.dump(sents, f) return final_feats, filenames, sents
def unpickle_dataset(): with open(STATIC_QUALITY_SCORE_PATH, 'rb') as f: Static_Quality_Score = pickle.load(f) with open(LEADERS_PATH, 'rb') as f: Leaders_List = pickle.load(f) return Static_Quality_Score, Leaders_List
def import_data(): global pentaData global data with open('data.p', 'rb') as fp: data = pickle.load(fp) with open('pentagram81.p', 'rb') as fp: pentaData = pickle.load(fp) json = { "name": "imported the predictions", "error": "No error", } return jsonify(json)
def load(self, word2index_dic = 'word2index_dic', index2word_dic = 'index2word_dic', word2count_dic = 'word2count_dic'): with open('Save/'+word2index_dic+'.p', 'rb') as fp: self.word2index = pickle.load(fp) with open('Save/'+index2word_dic+'.p', 'rb') as fp: self.index2word = pickle.load(fp) with open('Save/'+word2count_dic+'.p', 'rb') as fp: self.word2count = pickle.load(fp) self.num_words = len(self.word2index)
def load_dataset_file(filename): print(f"Loading {filename}") with open(filename, "rb") as f: try: return pickle.load(f) except ValueError: return pickle5.load(f)
def assemble_scores_no_order(hyperparameter_set): """ Assumes order of the the model vs age loop doesn't matter. """ # hyperparameter_set: wfst or levdist this_load_args = load_models.gen_all_model_args() score_store = [] for split, dataset, tags, context, model_type in this_load_args: this_hyperparameter_folder = hyperparameter_utils.load_hyperparameter_folder( split, dataset, tags, context, model_type) search_string = join( this_hyperparameter_folder, hyperparameter_set + '_run_models_across_time_*.pkl') age_paths = glob.glob(search_string) for this_data_path in age_paths: #data_df = pd.read_pickle(this_data_path) with open(this_data_path, "rb") as fh: data_df = pickle.load(fh) score_store.append(data_df) return score_store
def portfolioLabel(self) -> DataInfo: func_name = sys._getframe().f_code.co_name result_path = os.path.join(self.local_path, func_name + '.pkl') if os.path.exists(result_path): with open(result_path, 'rb') as f: category_label = pickle.load(f) else: # read data print( f"{dt.datetime.now().strftime('%X')}: Construction the label pool" ) data_dict = self.read_data() price_data = data_dict['AStockData.pkl'][self.Mapping["price"] ['columns']] ind_exp = data_dict['AStockData.pkl'][self.Mapping["industry"] ['columns']] stock_mv_data = data_dict['AStockData.pkl'][ self.Mapping["mv"]['columns']] * 10000 # wan yuan->yuan stock_w_data = data_dict['StockPool.pkl'][self.Mapping['stock_w1'] ['columns']] price_data = price_data.rename( columns={ PVN.CLOSE_ADJ.value: PVN.CLOSE.value, PVN.OPEN_ADJ.value: PVN.OPEN.value, PVN.HIGH_ADJ.value: PVN.HIGH.value, PVN.LOW_ADJ.value: PVN.LOW.value }) print( f"{dt.datetime.now().strftime('%X')}: Calculate stock daily return label" ) stock_ret_o = self.api.stock_ret(price_data, return_type=PVN.OPEN.value) print( f"{dt.datetime.now().strftime('%X')}: Calculate industry daily weight label" ) ind_w = self.api.industry_w(stock_w_data, industry_exposure=ind_exp) ############################################################################################################ # merge labels print(f"{dt.datetime.now().strftime('%X')}: Merge labels") category_label = self.merge_labels( data_ret_open=stock_ret_o, ind_exp=ind_exp, mv=stock_mv_data[PVN.LIQ_MV.value], ind_w=ind_w, ) # sort category_label = category_label.sort_index() category_label.to_pickle(result_path) dataClass = DataInfo(data=category_label, data_category=self.__class__.__name__, data_name=func_name) return dataClass
def load_data(self, split, bsz): with open(os.path.join(self.data_dir, f"{split}.bin"), "rb") as fin: data = pickle.load(fin)['data'] nstep = data.size(0) // bsz return data[:nstep * bsz].view(bsz, -1)
def from_bytes_gz(bytes_graph: bytes) -> BELGraph: """Read a graph from gzipped bytes (the result of pickling the graph). :param bytes_graph: File or filename to write """ with gzip.GzipFile(fileobj=BytesIO(bytes_graph), mode='rb') as file: return pickle.load(file)
def calculate_cibil_score(df): #cibil_train=train_data[['3365_LATE_PAYMENT_FLAG_MEAN','CRED_FLAG_LESS_30_MEAN','ABS_YEAR_CREDIT_MAX','UNSEC_LOAN_COUNT_SUM','SEC_LOAN_COUNT_SUM','AMT_REQ_CREDIT_BUREAU_WEEK']].copy() cibil_test = df[[ '3365_LATE_PAYMENT_FLAG_MEAN', 'CRED_FLAG_LESS_30_MEAN', 'ABS_YEAR_CREDIT_MAX', 'UNSEC_LOAN_COUNT_SUM', 'SEC_LOAN_COUNT_SUM', 'AMT_REQ_CREDIT_BUREAU_WEEK' ]].copy() scaler_cibil = pkl.load(open('scaler_cibil_7.sav', 'rb')) cibil_test_std = scaler_cibil.transform(cibil_test) cibil_test = pd.DataFrame(data=cibil_test_std, columns=[ '3365_LATE_PAYMENT_FLAG_MEAN', 'CRED_FLAG_LESS_30_MEAN', 'ABS_YEAR_CREDIT_MAX', 'UNSEC_LOAN_COUNT_SUM', 'SEC_LOAN_COUNT_SUM', 'AMT_REQ_CREDIT_BUREAU_WEEK' ]) num_test = (0.1 * cibil_test['UNSEC_LOAN_COUNT_SUM'].copy() + 0.1 * cibil_test['SEC_LOAN_COUNT_SUM'].copy() + 0.05 * cibil_test['ABS_YEAR_CREDIT_MAX'].copy() + 0.25 * cibil_test['CRED_FLAG_LESS_30_MEAN'].copy()) den_test = (0.30 * cibil_test['3365_LATE_PAYMENT_FLAG_MEAN'].copy() + 0.20 * cibil_test['AMT_REQ_CREDIT_BUREAU_WEEK'].copy()) + 1 df.loc[:, 'CIBIL_SCORE'] = (num_test.copy() / den_test.copy()) df.loc[:, 'CIBIL_SCORE'] = df['CIBIL_SCORE'].fillna(0) return df
def load_pickle(self, name): """ Loads the data collection from a pickle file Parameters ---------- name : string name of the pickle file that is used to load the data collection Returns ---------- DataCollection object data collection that was saved in the pickle file """ out_file_dir = os.path.join(DIR_PATH, '..', 'out', os.path.basename(self.data_folder), 'pickle') os.makedirs(out_file_dir, exist_ok=True) out_file_name = os.path.join(out_file_dir, name + '.pkl') with open(out_file_name, 'rb') as output: try: if self.verbose: print("loading from pickle file ", out_file_name) self = pickle.load(output) self.populated = True output.close() except EOFError: print("not found") pass return self
def test_bspline_pickle_file(self): """Test the custom pickling and un-pickling interface""" import copy M = [0, 1, 1, 0] img1 = sitk.Image([10, 10], sitk.sitkFloat64) img1.SetOrigin((.01, 5.2)) img1.SetDirection(M) img1 = sitk.AdditiveGaussianNoise(img1) img2 = sitk.Image([10, 10], sitk.sitkFloat64) img2.SetOrigin((.01, 5.2)) img2.SetDirection(M) img2 = sitk.AdditiveGaussianNoise(img2) tx = sitk.BSplineTransform([img1, img2], 3) fname = os.path.join(self.test_dir, "bspline_protocol_default.pickle") with open(fname, 'wb') as fp: p = pickle.dump(copy.deepcopy(tx), fp) with open(fname, 'rb') as fp: ret = pickle.load(fp) self.assertEqual(ret, ret, msg="pickle file with default protocol")
def gcs_load_obj(uri): uri = urlparse(uri) storage_client = storage.Client() bucket = storage_client.get_bucket(uri.netloc) b = bucket.blob(uri.path[1:], chunk_size=None) obj = pickle.load(io.BytesIO(b.download_as_string())) return obj
def loadModel(is_display=False, number_words=1): print("Loading...") dictionary = corpora.Dictionary.load('dictorionary.gensim') corpus = pickle.load(open('corpus.pkl', 'rb')) lda = models.LdaModel.load('model5.gensim') topics = lda.print_topics(num_words=number_words) dic_topics = {} for topic in topics: split_topic = topic[1].split("*\"") #split_topic[1] = split_topic.replace("\"", "") print("-" + str(split_topic)) dic_topics[str(topic[0])] = { "topic": split_topic[1][:len(split_topic[1]) - 1], "frecuency": split_topic[0] } print(topic) print(str(dic_topics)) if is_display is True: # pyLDAvis.enable_notebook() lda_display = pyLDAvis.gensim.prepare(lda, corpus, dictionary, sort=False) print("Load") pyLDAvis.save_html(lda_display, 'display.html') return dic_topics
def load_mnist(normalize=True, flatten=True, one_hot_label=False): """MNISTデータセットの読み込み Parameters ---------- normalize : 画像のピクセル値を0.0~1.0に正規化する one_hot_label : one_hot_labelがTrueの場合、ラベルはone-hot配列として返す one-hot配列とは、たとえば[0,0,1,0,0,0,0,0,0,0]のような配列 flatten : 画像を一次元配列に平にするかどうか Returns ------- (訓練画像, 訓練ラベル), (テスト画像, テストラベル) """ if not os.path.exists(save_file): init_mnist() with open(save_file, 'rb') as f: dataset = pickle.load(f) if normalize: for key in ('train_img', 'test_img'): dataset[key] = dataset[key].astype(np.float32) dataset[key] /= 255.0 if one_hot_label: dataset['train_label'] = _change_one_hot_label(dataset['train_label']) dataset['test_label'] = _change_one_hot_label(dataset['test_label']) if not flatten: for key in ('train_img', 'test_img'): dataset[key] = dataset[key].reshape(-1, 1, 28, 28) return (dataset['train_img'], dataset['train_label']), (dataset['test_img'], dataset['test_label'])
def get_cross_augmented_scores(data_child, prior_child): ''' Load individual score from using a specific child's fine-tuned prior on the data associated with another child Args: data_child: name of the child whose data will be tested prior_child: named of the child whose prior will be used Return: A pandas dataframe of scores ''' score_path = utils_child.get_cross_path(data_child, prior_child) try: raw_scores = pd.read_pickle(score_path) except: with open(score_path, "rb") as fh: data = pickle.load(fh) path_to_protocol4 = score_path.replace('.pkl','.pkl4') data.to_pickle(path_to_protocol4) raw_scores = pd.read_pickle(path_to_protocol4) raw_scores['cross_type'] = get_cross_type(data_child, prior_child) raw_scores['data_child'] = data_child raw_scores['prior_child'] = prior_child return raw_scores
def train_classifier(features, y, clf): with open('vectorizer.pkl', 'rb') as f: vectorizer = cPickle.load(f) vectors = vectorizer.transform(features) clf.partial_fit(vectors.toarray(), y, classes=[0, 1])
def load_past(): # with open("df_test.pkl", "rb") as f: # df_past = pkl.load(f) zf = zipfile.ZipFile('df_test.zip', 'r') df_past = pkl.load(zf.open('df_test.pkl')) df_past['SK_ID_CURR'] = df_past.index return df_past
def __init__(self, standard=False, feature_subset="all"): #use if already converted to cartesian #with open('data/pi0_cartesian_train.pkl', 'rb') as f: #x = np.array(pickle.load(f), dtype=np.float32) #Use if not already converted with open('data/pi0.pkl', 'rb') as f: xz = np.array(pickle.load(f), dtype=np.float32) x = cartesian_converter(xz, type='x') z = cartesian_converter(xz, type='z') if feature_subset != "all": x = x[:, feature_subset] z = z[:, feature_subset] xwithoutPid = x self.qt = self.quant_tran(x) #Commented out because currently ton using Quant trans. # df_x = pd.DataFrame(self.qt.transform(x)) #Don't know how to do this without first making it a DF # x_np = df_x.to_numpy() #And then converting back to numpy # self.x = torch.from_numpy(np.array(x_np)) self.xz = xz self.x = torch.from_numpy(np.array(x)) self.xwithoutPid = torch.from_numpy(np.array(xwithoutPid)) self.z = torch.from_numpy(np.array(z)) if standard: self.standardize()
def get_credentials(logger: lg.Logger = None) -> pickle: """Get the proper credentials needed to write to the Google spreadsheet.""" creds = None if osp.exists(GGL_SHEETS_TOKEN): if logger: logger.info(F"osp.exists({GGL_SHEETS_TOKEN})") with open(GGL_SHEETS_TOKEN, "rb") as token: creds = pickle.load(token) # if there are no (valid) credentials available, let the user log in. if not creds or not creds.valid: if logger: logger.info("creds is None or not creds.valid") if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) if logger: logger.debug("creds.refresh(Request())") else: flow = InstalledAppFlow.from_client_secrets_file( CREDENTIALS_FILE, SHEETS_RW_SCOPE) creds = flow.run_local_server() if logger: logger.debug("creds = flow.run_local_server()") # save the credentials for the next run with open(GGL_SHEETS_TOKEN, "wb") as token: if logger: logger.debug("pickle.dump()") pickle.dump(creds, token, pickle.HIGHEST_PROTOCOL) return creds
def launch_jobs(temp_dir: str) -> None: runs = [] with open(os.path.join(temp_dir, JOB_SPEC_PICKLE), "rb") as f: job_spec = pickle.load(f) # nosec singleton_state = job_spec["singleton_state"] sweep_configs = job_spec["sweep_configs"] task_function = job_spec["task_function"] instance_id = _get_instance_id() sweep_dir = None for sweep_config in sweep_configs: with open_dict(sweep_config): sweep_config.hydra.job.id = ( f"{instance_id}_{sweep_config.hydra.job.num}" ) setup_globals() Singleton.set_state(singleton_state) HydraConfig.instance().set_config(sweep_config) ray_init_cfg = sweep_config.hydra.launcher.ray_init_cfg ray_remote_cfg = sweep_config.hydra.launcher.ray_remote_cfg if not sweep_dir: sweep_dir = Path(str(HydraConfig.get().sweep.dir)) sweep_dir.mkdir(parents=True, exist_ok=True) start_ray(ray_init_cfg) ray_obj = launch_job_on_ray( ray_remote_cfg, sweep_config, task_function, singleton_state ) runs.append(ray_obj) result = [ray.get(run) for run in runs] _dump_job_return(result, temp_dir)
def load_steam_cache_from_disk(): try: with open('data.pkl', 'rb') as handle: global from_word_to_steam_cache from_word_to_steam_cache = pickle.load(handle) except EOFError: from_word_to_steam_cache = {}