def main(): # construct the argument parse and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-d", "--data_dir", required=True, help="Path to the images directory") ap.add_argument("-m", "--model_path", required=True, help="Path to the the model") ap.add_argument("-i", "--input", type=int, required=True, default=299, help="The input size") ap.add_argument("-o", "--output", required=True, help="Path to the output file") args = vars(ap.parse_args()) size = args['input'] # model print("Loading model...") subdir = args["model_path"] model_path = glob.glob(subdir + '*.h5')[-1] model = load_model(model_path) # data print("Reading data...") filenames, _, _ = read_data(args["data_dir"]) n_files = len(filenames) # encoding print("Encoding images...") index_to_filename = {} filename_to_path = {} features = np.zeros((n_files, model.output.shape[1])) for i in tqdm.tqdm(range(n_files)): image_id = extract_image_id(filenames[i]) index_to_filename[i] = image_id filename_to_path[image_id] = filenames[i] #print("->", image_id) image = load_image(filenames[i], (size, size)) image = image.reshape((1, ) + image.shape) features[i] = np.squeeze(model(image)) # save transfer values np.save(args["output"], features) with open("index_to_filename.json", "w") as f: json.dump(index_to_filename, f, indent=4, ensure_ascii=False) with open("filename_to_path.json", "w") as f: json.dump(filename_to_path, f, indent=4, ensure_ascii=False)
def cross_validate(args): assert len(args['bw_key']) == len(args['bw']) if not os.path.exists(args['outfolder']): os.makedirs(args['outfolder']) args['phi0'] *= 1e-18 # correct units kf = KFold(n_splits=args['kfold'], random_state=args['rs'], shuffle=True) config = read_config() print('Load MC: {}'.format(config['IC_MC']['path'])) mc = np.load(str(config['IC_MC']['path']))[:] mc = mc_cut(mc) if args['weights'] == 'pl': weights = mc['orig_OW'] * plaw(mc['trueE'], phi0=args['phi0'], gamma=args['gamma']) elif args['weights'] == 'conv': weights = mc['conv'] elif args['weights'] == 'conv+pl': diff_weight = mc['orig_OW'] * plaw(mc['trueE'], phi0=args['phi0'], gamma=args['gamma']) weights = mc['conv'] + diff_weight print('Rates [1/yr]:') print(np.sum(mc['conv']) * np.pi * 1e7) print(np.sum(diff_weight) * np.pi * 1e7) else: print('{} is not a valid weights argument'.format(args['weights'])) sys.exit(0) mc = append_fields(mc, 'cur_weight', weights) args['weights'] = 'default' model, mname = load_model(args['model']) bw_dict = dict() for i, key in enumerate(args['bw_key']): bw_dict[key] = args['bw'][i] lh_arr, zero_arr = [], [] for train_index, val_index in kf.split(mc): args['no_save'] = True res_dict = create_KDE(args, mc=mc[train_index], bws=bw_dict) mc_val = mc[val_index] val_settings, grid = model.setup_KDE(mc_val) lh, zeros = do_validation(res_dict, val_settings, mc_val['cur_weight']) print('Number of zeros {}'.format(zeros)) print('Likelihood Value {}'.format(lh)) zero_arr.append(zeros) lh_arr.append(lh) fname = '' for i in range(len(args['bw'])): fname += '{}_{}_'.format(args['bw_key'][i], args['bw'][i]) fname = fname[:-1] + '.npy' odict = {'zeros': zero_arr, 'lh': lh_arr} np.save(os.path.join(args['outfolder'], fname), odict)
def main(): #Args args1 = arg_parser_test() #Load the model model=load_model(args1.checkpoint) #label with open(args1.cat_name_dir,'r') as json_file: cat_to_name = json.load(json_file) #Prediction probabilities = predict(args1.image_path, model, args1.top_k) labels = [cat_to_name[str(index + 1)] for index in np.array(probabilities[1][0])] probability = np.array(probabilities[0][0]) i=0 while i < args1.top_k: print("{} with a probability of {}".format(labels[i], probability[i])) i += 1 print("Predictiion is done !")
def main(): #print("hello world") for luck in_arg = predict_input_args() print(" image =", in_arg.image_dir, "\n model checkpoint =", in_arg.load_dir, "\n top k =", in_arg.top_k, "\n device =", in_arg.device, "\n json =", in_arg.json) model, optimizer = load_model(in_arg.load_dir, in_arg.device) probs, classes, labels = predict(in_arg.image_dir, model, in_arg.json, in_arg.top_k, in_arg.device) results = dict(zip(labels, probs)) print("-" * 40) for x in results: print(" {:20s} {:.2f}%".format(x.title(), results[x] * 100))
def main(): args = parser_fun_test() with open(args.cat_to_name, 'r') as f: cat_to_name = json.load(f) # load the model loaded = torch.load("trained_model.pth") model = load_model(loaded) # prediction probabilities = predict(args.image_path, model, args.top_k, 'gpu') labels = [ cat_to_name[str(index + 1)] for index in np.array(probabilities[1][0]) ] probability = np.array(probabilities[0][0]) i = 0 while i < args.top_k: print("{} with a probability of {:.5f}".format(labels[i], probability[i])) i += 1 print("Predictiion is done !")
def predict(id_curr): """returns the elements of prediction page""" sns.reset_orig() X_train = load_training_data() X_test = load_test_data() lgbm = load_model() print("id data", hex(id(X_train))) print("id lgbm", hex(id(lgbm))) explainer = compute_tree_explainer(lgbm, X_train) print("id explainer", hex(id(explainer))) ids_avail = X_test["SK_ID_CURR"] if (ids_avail == id_curr).sum() > 0: to_analyse = X_test.loc[X_test["SK_ID_CURR"] == id_curr, :].drop( columns=["SK_ID_CURR"]) st.write("Default loan probability for client id", id_curr, "is", predict_api(to_analyse.iloc[0, :]), "%") st.subheader("Score interpretation") st.write("The following plot must be intepreted as follows :") st.write( "- **Arrows are contribution** of each client attribute (family status, income, ...) on the **final score**, the bigger the arrow, the greater its contribution" ) st.write( "- **Blue** arrows are **good contributions** : they tend to reduce client's default risk" ) st.write( "- **Red** arrows are **bad contributions** : they tend to increase client's default risk" ) st.write( "- Intersection of blue and red arrows is the predicted level of risk" ) st.write( "- This intersection is surrounded feature contributions, from big to small as step aside from predicted value" ) shap.initjs() shap_values = explainer.shap_values(to_analyse, check_additivity=True) shap.force_plot(explainer.expected_value, shap_values[0], to_analyse.round(2), matplotlib=True, link="logit") st.pyplot(bbox_inches='tight', dpi=500, pad_inches=0) shap_named = pd.Series( np.copy(shap_values[0]), index=X_test.drop(columns=["SK_ID_CURR"]).columns) most_imp_feat = abs(shap_named).sort_values( ascending=False).head(10).index displ_feat = shap_named[most_imp_feat].sort_values() variables = load_variable_description() info_feat = st.selectbox( "Select the variable you want to know more about", displ_feat.index) st.write(info_feat) st.write( to_analyse.loc[:, info_feat].values[0].round(2), variables.loc[variables["Row"] == info_feat, "Description"].values[0]) else: st.error("Solve error in the sidebar before accessing this module")
if not os.path.exists('logs'): os.mkdir('logs') # set logging to save log file to logs folder logging.basicConfig( filename= f"logs/{sys.argv[0].replace('.py', '')}-{datetime.now().strftime('%Y%d%m%H%M%S')}.log", filemode='w', level=logging.INFO) else: logging.basicConfig(level=logging.INFO) logging.info( f"using {m}.{e} model to calculate submitid {i}") if v else None # load word embedding model start = datetime.now() vectors = load_model(m, e) logging.info(f"model loaded in {datetime.now() - start}") if v else None # get source code and problem text from database that corresponds with input submit ID code, problem = get(i) # preprocessing includes normalization and tokenization logging.info("preprocessing code and problem text...") if v else None problem_processed, comments_processed, code_only = preprocess( problem, code) # count words in code comment comment_word_count_raw = 0 for line in comments_processed: comment_word_count_raw += len(line) logging.info("preprocessing finished") if v else None
""" Another useful tool is the T-distributed Stochastic Neighbor Embedding (TSNE) a nonlinear dimensionality reduction technique well-suited for embedding high-dimensional data for visualization in a low-dimensional space of two or three dimensions. """ from sklearn.manifold import TSNE import pandas as pd from functions import load_model # load data generated in 3_Clustering and 4_PCA pca_result = load_model('models/PCA.sav') clustered = load_model('models/clustered.pkl') # instantiate TSNE object with two main clusters tsne = TSNE(n_components=2) # we fit (train) and transform into TSNE tsne_result = tsne.fit_transform(pca_result) # DataFrame creation with information generated TSNE_df = pd.DataFrame(tsne_result) # columns renamed TSNE_df.columns = ['x1', 'x2'] # create cluster column TSNE_df['cluster'] = clustered # saving our DataFrame TSNE_df.to_csv('data/TSNE.csv') # displaying a 20 line preview of the DataFrame print(TSNE_df.head(20))
and it is a tool commonly used for dimensionality reduction. The way it work is by projecting each data point onto only the first few principal components to obtain lower-dimensional data while preserving as much of the data's variation as possible. """ from sklearn.decomposition import PCA import numpy as np import pandas as pd from functions import load_model import pickle # we load data generated in 2_Word2Vec_SentenceVectors and 3_Clustering X = np.load('data/X.npy') clustered = load_model('models/clustered.pkl') # instantiate PCA object into two main components pca = PCA(n_components=2) # we fit (train) and transform into PCA pca_result = pca.fit_transform(X) # DataFrame creation with information generated PCA_df = pd.DataFrame(pca_result) # columns renamed PCA_df.columns = ['x1', 'x2'] # create cluster column PCA_df['cluster'] = clustered # saving our DataFrame PCA_df.to_csv('data/PCA.csv')
""" It's time to see the accuracy of our model by evaluating our own news, this news were manually downloaded from internet and saved in a txt file. This script only pre process the news, same as did before """ import numpy as np import pickle from glob import glob from functions import prepare_news, load_model # load data generated in 1_DataAnalysis_Cleanup, 2_Word2Vec_SentenceVectors and 3_Clustering processed_data = np.load('data/processed_data.npy', allow_pickle=True) model = load_model('models/model.pkl') kmeans = load_model('models/kmeans.pkl') # getting a list of all news paths to analyze glob_list = glob('news/*.txt') # getting a list of all news in string format news_list = [open(new, 'r', encoding='utf8').read() for new in glob_list] # processing news and saving result pickle.dump(prepare_news(news_list), open('news/news_list.pkl', 'wb'))
def logger(metadata): with open('online.log', 'a') as f: f.write(metadata) if logging: logger('timestamp(s) : dns bucket : predictions : diverged') if __name__ == '__main__': gravity = fn.load_gravity()['domain'].values tokenizer = fn.load_yttm() max_timestamp = 1600397395 # arbitrary recent timestamp model = fn.load_model() #model that learns online ref_model = tf.keras.models.clone_model(model) #reference model ref_model.set_weights(model.get_weights()) model.optimizer.lr.assign(1e-4) df = fn.load_gravity(table='domainlist') epsilon = 0.1 i = 0 bad_domains_all = [] while True: queries, pos_samples, neg_samples, anchor_pos_samples, anchor_neg_samples, parsed_df, max_timestamp = fn.run_all( tokenizer=tokenizer, timestamp=max_timestamp) if i == 0: i = +1 continue print('{0} pieces of matter entering the photon sphere..'.format(
def create_KDE(args, inds=None, bws={}, mc=None): if 'mc' not in args.keys(): args['mc'] = None if 'phi0' not in args.keys(): args['phi0'] = 1 if args['outfolder'] is None: args['outfolder'] = os.path.join(os.path.dirname(args['model']), 'out') args['phi0'] *= 1e-18 # correct units t0 = time.time() model, mname = load_model(args['model']) print('---- Run KDE with args:') print(args) if not os.path.exists(args['outfolder']): os.makedirs(args['outfolder']) print('Load and Update the Monte Carlo') config = read_config() cfg_keys = config['keys'] if mc is None: if args['mc'] is not None: mc_path = args['mc'] else: mc_path = str(config['IC_MC']['path']) mc = np.load(str(mc_path)) mc = mc_cut(mc, config) if inds is not None: print('Cut on given indices..') mc = mc[inds] settings, grid = model.setup_KDE(mc, cfg_keys) mc_conv = len(mc) print('Use {} mc events'.format(mc_conv)) for key in settings.keys(): settings[key]['name'] = key for key in bws.keys(): settings[key]['bandwidth'] = bws[key] plaw = np.vectorize(powerlaw) # create binned pdf if args['weights'] == 'default': print('Use pre-calculated input weights') weights = mc['cur_weight'] elif args['weights'] == 'pl': weights = mc[cfg_keys['ow']] * plaw( mc[cfg_keys['trueE']], phi0=args['phi0'], gamma=args['gamma']) elif args['weights'] == 'conv': weights = mc[cfg_keys['conv']] elif args['weights'] == 'conv+pl': #diff_weight = mc['orig_OW'] * plaw(mc['trueE'], phi0=args['phi0'], # gamma=args['gamma']) weights = mc[cfg_keys['conv']] + mc[cfg_keys['astro']] print('Rates [1/yr]:') print(np.sum(mc[cfg_keys['conv']]) * np.pi * 1e7) print(np.sum(mc[cfg_keys['astro']]) * np.pi * 1e7) else: print('{} is not a valid weights argument'.format(args['weights'])) sys.exit(0) inp_arr = [settings[key] for key in settings.keys()] if args['adaptive']: m_input = meerkat_input(inp_arr, weights, mc_conv=mc_conv) m_kde4d_fb = meerkat_kde(m_input) adtv_input = meerkat_input(inp_arr, weights, pdf_seed=m_kde4d_fb.kde, adaptive=True, mc_conv=mc_conv) m_kde4d = meerkat_kde(adtv_input) else: m_input = meerkat_input(inp_arr, weights, mc_conv=mc_conv) m_kde4d = meerkat_kde(m_input) nbins = args['eval_bins'] eval_grid = OrderedDict() if grid is None: grid = {} for key in settings.keys(): if key in grid.keys(): if isinstance(grid[key], list): eval_grid[key] = np.linspace(grid[key][0], grid[key][1], nbins) elif isinstance(grid[key], numpy.ndarray): eval_grid[key] = grid[key] else: eval_grid[key] = np.linspace(settings[key]['range'][0], settings[key]['range'][1], nbins) print(eval_grid.keys()) out_bins = [eval_grid[key] for key in settings.keys()] coords = np.array(list(itertools.product(*out_bins))) bws = [settings[key]['bandwidth'] for key in settings.keys()] print('Evaluate KDEs:') pdf_vals = np.asarray([m_kde4d.eval_point(coord) for coord in coords]) shpe = np.ones(len(settings.keys()), dtype=int) * nbins pdf_vals = pdf_vals.reshape(*shpe) add_str = '' if args['weights'] != 'pl': add_str = '_' + args['weights'] else: add_str = '_' + 'g_{}'.format(args['gamma']) if args['save_str'] != '': add_str = add_str + '_' + args['save_str'] odict = dict({ 'vars': eval_grid.keys(), 'bins': out_bins, 'coords': coords, 'pdf_vals': pdf_vals, 'bw': bws }) if not args['no_save']: with open(os.path.join(args['outfolder'], mname + add_str + '.pkl'), 'wb') as fp: pickle.dump(odict, fp) t1 = time.time() print('Finished after {} minutes'.format((t1 - t0) / 60)) return odict
if len(sys.argv) > -1: ap.add_argument('--modeldir', required=True, help="Path to the model") ap.add_argument('--datasetdir', required=False, help="Path to the dataset") ap.add_argument('--nepochs', required=False, help="Number of training epochs") ap.add_argument('--lr', required=False, help="Learning rate") args = vars(ap.parse_args()) model_directory = args['modeldir'] dataset_directory = value_or_default(args['datasetdir'], os.path.join('..', 'dataset')) nepochs = value_or_default(args['nepochs'], 100) lr = value_or_default(float(args['lr']), 1e-3) architecture_file = os.path.join(model_directory, 'architecture.json') parameters_file = os.path.join(model_directory, 'parameters.json') assert(os.path.isfile(architecture_file) and os.path.isfile(parameters_file)), 'No architecture or parameters found in the specified directory.' model = load_model(model_directory) parameters = json.load(parameters_file) date = datetime.datetime.now().strftime("%Y%m%dT_%H%M%S") session_directory = os.path.join(model_directory, f"session_{date}_epochs_{nepochs}") if(not os.path.isdir(session_directory)): os.makedirs(session_directory) training_generator = generator(filenames=load_filenames(os.path.join(dataset_directory, 'training')), batch_size=parameters['batchsize'], dim=[*parameters['shape']]) validation_generator = generator(filenames=load_filenames(os.path.join(dataset_directory, 'validation')), batch_size=parameters['batchsize'], dim=[*parameters['shape']]) checkpoint = ModelCheckpoint(os.path.join(session_directory, "weights.h5"), monitor='val_acc', verbose=1, save_best_only=True, mode='min') earlystop = EarlyStopping(monitor='val_loss', patience=20, verbose=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1, mode='auto', min_delta=0.01, cooldown=0, min_lr=1e-9) callbacks_list = [checkpoint, earlystop, reduce_lr]
image = results.image_path top_k = results.topk gpu = results.gpu cat_names = results.cat_name_dir with open(cat_names, 'r') as f: cat_to_name = json.load(f) model = getattr(models,pt_model)(pretrained = True) # Load model loaded_model = load_model(model, save_dir, gpu) # Preprocess image processed_image = process_image(image) # Define top K likely classes with probabilities probs, classes = predict(processed_image, loaded_model, top_k, gpu) # Define names for Classes names = [cat_to_name[i] for i in classes] # Print out top K classes and probabilities print(f"Top {top_k} classes are: {classes}, with assocatied probabilities: {probs}") # Print out most likely output print(f"The most likely outcome is a: '{names[0]} ({round(probs[0]*100, 2)}%)'")
# actual code starts from here # logging setup (tensorboard, log.txt) logging_handlers = [logging.StreamHandler()] if args.save: logging_handlers += [logging.FileHandler(f'{args.save_path}/log.txt')] if args.tensorboard: writer = SummaryWriter('runs/' + args.suffix + "_" + args.timestamp[-5:]) logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=logging_handlers) # load model & tokenizer model, tokenizer = load_model(args, BERT_DIR, device) if args.verbose: if args.bert_path != None: logging.info( f"* loaded a bert model from {args.bert_path} and its tokenizer") if args.load_from: logging.info(f"* loaded a bert params from {args.load_from}") else: logging.info(f"* loaded {args.bert_model} model and its tokenizer") # load data if args.verbose: logging.info(f"loading data from {args.data_path}") emo_mapping_e2i = {e: i for i, e in enumerate(emotions)} target_emotion_idx = emo_mapping_e2i[args.target_emotion] data, src_data, tgt_data = import_data(args.data_path, args.source,
# Img uploader img = st.file_uploader(label="Load X-Ray Chest image", type=['jpeg', 'jpg', 'png'], key="xray") if img is not None: # Preprocessing Image p_img = functions.preprocess_image(img) if st.checkbox('Zoom image'): image = np.array(Image.open(img)) st.image(image, use_column_width=True) else: st.image(p_img) # Loading model loading_msg = st.empty() loading_msg.text("Predicting...") model = functions.load_model() # Predicting result prob, prediction = functions.predict(model, p_img) loading_msg.text('') if prediction: st.markdown(unsafe_allow_html=True, body="<span style='color:red; font-size: 50px'><strong><h4>Pneumonia! :slightly_frowning_face:</h4></strong></span>") else: st.markdown(unsafe_allow_html=True, body="<span style='color:green; font-size: 50px'><strong><h3>Healthy! :smile: </h3></strong></span>") st.text(f"*Probability of pneumonia is {round(prob[0][0] * 100, 2)}%")
""" Here we predict the now processed news and see results in a DataFrame """ import pandas as pd from functions import load_model # load data generated in 3_Clustering and 7_TestNewsProcessing kmeans = load_model('models/kmeans.pkl') news_list = load_model('news/news_list.pkl') # printing predictions prediction = kmeans.predict(news_list) print(prediction) # printing a DataFrame with results obtained df = pd.DataFrame({ 'Sentence': ['news' + str(num + 1).zfill(2) for num, item in enumerate(news_list)], 'Prediction': prediction }) print(df)