def main(): # get hyperparameters from input args opt = ArgParser().get_options() # deterministic seed across numpy, torch and cuda # store as variable due to garbage collecting seeder = Seeder(opt.seed) seeder.activate() # data from ./zoo.py opt.model_class = models[opt.model_name] opt.initializer = initializers[opt.initializer] opt.optimizer = optimizers[opt.optimizer] # data from ./config.yml opt.dataset_file = config['datasets'][opt.dataset] opt.model_inputs = config['model_inputs'][opt.model_name] # run on gpu if available device = opt.device if device is None: device = 'cuda' if torch.cuda.is_available() else 'cpu' opt.device = torch.device(device) runner = ModelRunner(opt) runner.run()
def __init__(self, port=80, threaded=True, debug=True): self.port = port self.threaded = threaded self.debug = debug self.app = Flask(__name__.split('.')[0]) self.app.config['SECRET_KEY'] = str( UUID(int=random.randint(0, 10000000000))) self.app.add_url_rule('/', 'index', self.index, methods=['GET', 'POST']) self.app.add_url_rule('/players/<p1>/<p2>', 'players', self.players, methods=['GET', 'POST']) self.app.add_url_rule('/bets', 'bets', self.bets) self.app.add_url_rule('/rankings', 'rankings', self.rankings) self.app.add_url_rule('/recent', 'recent', self.recentMatches) helper.fillRegionDict() model = MLP(max_epochs=10000, batch_size=128, learning_rate=5e-3, width=50, layers=1) print('Model Created') self.runner = ModelRunner(model, "data/matchResults_aligulac.csv", trainRatio=0.8, testRatio=0.2, lastGameId="302054", keepPercent=1.0, decay=False) print(datetime.now(), 'Model Runner Created') print(self.runner.getLastId()) self.runner.loadProfiles() self.runner.model.loadBackup() self.runner.clearMemory() print("Memory Cleared") rankingsList = self.runner.generateRanking(20) rank = 1 print( "Rank, Name, Race, Country, Project W/R, Elo, Glicko, MatchExpAvg") for [rate, profile] in rankingsList: print(rank, profile.name, profile.race, profile.country, rate, profile.elo, profile.glickoRating, profile.expOverall) rank += 1 self.liveThread = threading.Thread(target=self.runner.getLive) self.liveThread.start()
def main(): parser = argparse.ArgumentParser(description="-----[IMDB-classifier]-----") parser.add_argument("--sample", default=False, action='store_true', help="flag whether use sample dataset") parser.add_argument( "--mode", default="train", help="train: train (with test) a model / test: test saved models") parser.add_argument("--model", default="simple-gru", help="available models: simple-gru, ...") parser.add_argument("--epoch", default=10, type=int, help="number of max epoch") parser.add_argument("--learning_rate", default=0.001, type=float, help="learning rate") parser.add_argument("--batch_size", default=32, type=int, help="batch size") options = parser.parse_args() params = { 'sample': options.sample, 'model': options.model, 'mode': options.mode, 'batch_size': options.batch_size, 'epoch': options.epoch, 'learning_rate': options.learning_rate } modelRunner = ModelRunner(params) if options.mode == 'train': print("=" * 20 + "TRAINING STARTED" + "=" * 20) modelRunner.train() elif options.mode == 'test': print("=" * 20 + "TESTING STARTED" + "=" * 20) modelRunner.load_model() modelRunner.test()
def download_model_from_storage(self): if len(self.model_url) == 0: return try: self.logger.info("request for downloading model file: %s", self.model_url) r = requests.get(self.model_url, stream=True) self.logger.info("request status for downloading model file: %d", r.status_code) filepath = 'ml_model' with open(filepath, 'wb') as fd: for chunk in r.iter_content(2000): fd.write(chunk) self.modelrunner = ModelRunner(filepath) except Exception as e: self.logger.info("model file cannot be downloaded: %s", str(e))
def main(args): # create dataloader data_loader = DataLoaderFactory() train_data_loader = data_loader.get_input_for_ccf(file_path, batch_size, max_seq_length, shuffle, drop_last) test_data_loader = data_loader.get_input_for_ccf(file_path, batch_size, max_seq_length, shuffle, drop_last) dev_data_loader = data_loader.get_input_for_ccf(file_path, batch_size, max_seq_length, shuffle, drop_last) # 设置task model task_model = TaskModel(num_labels=len(args.label_to_id), dropout_prob=args.dropout_prob, bret_pretrainded_path=args.bert_pretrain_path) # 重新加载模型参数 # print("从test_model.bin 加载参数") # task_model.load_state_dict(torch.load(os.path.join(os.path.dirname(curr_path), "model_save", "model_89.15.bin"), # "cuda" if torch.cuda.is_available() else None)) # 设置优化器 optimizer = get_optim(task_model.parameters(), args) # print config print("args", args) # 开始模型训练 cls_app = ModelRunner(task_type="cls", is_bert=False, label_to_id=args.label_to_id) cls_app.train(total_step=args.total_step, eval_per_step=args.eval_per_step, task_model=task_model, model_save_path=args.model_save_path, optimizer=optimizer, train_data_loader=train_data_loader, dev_data_loader=dev_data_loader, eval_label_list=list(args.label_to_id.values()), compare_param="f1", eval_file_save_name=args.eval_file_save_name) cls_app.predict_for_ccf(dataiter=dev_data_loader, model=task_model, save_file_path="test_predict_out.txt", model_path=args.model_save_path, load_from_onnx=False) print("模型运行完成")
def main(argv): data_loader = BaseLoader.get_loader_from_flags(FLAGS.data_set) train_set, valid_set, test_set = data_loader.load_dataset(FLAGS.num_steps, FLAGS.shuffle_train) model = DualStageRNN(encoder_dim=FLAGS.encoder_dim, decoder_dim=FLAGS.decoder_dim, num_steps=FLAGS.num_steps, num_series=data_loader.num_series, use_cur_exg=FLAGS.use_cur_exg) save_path = 'data/data_nasdaq' model_runner = ModelRunner(model, data_loader.label_scaler, FLAGS,save_path) model_runner.train(train_set, valid_set, test_set, FLAGS.max_epoch) # model_runner.restore('logs/ModelWrapper/lr-0.001_encoder-32_decoder-32/20190922-125838/saved_model') model_runner.evaluate(test_set, plot=FLAGS.plot_prediction) return
def prep_trial(self, input_params, grid_logger, grid_logger_avg): runners = [] for it in range(input_params['iterations']): # train and test split if Train_test_split == 'bipartite': person_data = np.delete(np.arange(len(self._labels[0])), np.arange(Bipartite_products)) rand_test_indices = np.random.choice(person_data, round(len(person_data) * 0.9), replace=False) rand_train_indices = np.delete(np.arange(len(self._labels[0])), rand_test_indices) else: rand_test_indices = np.random.choice(len(self._labels[0]), round(len(self._labels[0]) * 0.9), replace=False) rand_train_indices = np.delete(np.arange(len(self._labels[0])), rand_test_indices) train = [[k for k in rand_train_indices if self._labels[j][k] != -1] for j in range(len(self._labels))] test = [[k for k in rand_test_indices if self._labels[j][k] != -1] for j in range(len(self._labels))] test_labels = [torch.tensor([self._labels[j][k] for k in rand_test_indices if self._labels[j][k] != -1], dtype=torch.double).to(self._device) for j in range(input_params['time_inds'])] train_labels = [torch.tensor([self._labels[j][k] for k in rand_train_indices if self._labels[j][k] != -1], dtype=torch.double).to(self._device) for j in range(input_params['time_inds'])] input_params['it_num'] = it input_params['activation'] = torch.nn.functional.relu input_params['train_person'] = rand_train_indices input_params['test_person'] = rand_test_indices input_params['training_inds'] = train input_params['test_inds'] = test input_params['training_labels'] = train_labels input_params['test_labels'] = test_labels input_params['adj_matrices'] = self._adjacency_matrices input_params['feature_matrices'] = self._feature_matrices dumping_name = "" logger = self.fix_logger(dumping_name) runner = ModelRunner(input_params, logger=logger) runners.append(runner) execute_runners(runners, grid_logger, grid_logger_avg, is_nni=self._nni)
import logging # pylint: disable-msg=F0401 # pylint: disable=no-name-in-module from app_config import mlflow_config, service_config from model_runner import ModelRunner model_handler = ModelRunner(mlflow_config, service_config) if __name__ == '__main__': logging_level = service_config['log_level'] log = logging.getLogger('werkzeug') log.setLevel(logging_level) model_handler.run_flask_server(port=service_config['port'], host='0.0.0.0')
def main() -> None: device = f"cuda:{gpu_number}" if torch.cuda.is_available() else torch.device('cpu') print(device) train_boxes_df = pd.read_csv(META_TRAIN) train_boxes_df = preprocess_boxes(train_boxes_df) train_images_df = pd.read_csv('folds/orig_alex_folds.csv') print(f'\nTotal images: {len(train_images_df.image_id.unique())}') # Leave only images with bboxes image_id_column = 'image_id' print('Leave only train images with boxes') with_boxes_filter = train_images_df[image_id_column].isin(train_boxes_df[image_id_column].unique()) # train/val images images_val = train_images_df.loc[ (train_images_df['fold'] == fold) & with_boxes_filter, image_id_column].values images_train = train_images_df.loc[ (train_images_df['fold'] != fold) & with_boxes_filter, image_id_column].values print(f'\nTrain images:{len(images_train)}, validation images {len(images_val)}') # get datasets train_dataset = WheatDataset( image_ids = images_train[:16], image_dir = TRAIN_DIR, #train_box_callback, labels_df = train_boxes_df, transforms = get_train_transforms(image_size), is_test = False ) valid_dataset = WheatDataset( image_ids = images_val[:16], image_dir = TRAIN_DIR, labels_df = train_boxes_df, #train_box_callback, transforms=get_valid_transforms(image_size), is_test=True ) # get dataloaders train_data_loader = DataLoader( train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, collate_fn=collate_fn ) valid_data_loader = DataLoader( valid_dataset, batch_size=inf_batch_size, shuffle=False, num_workers=num_workers, collate_fn=collate_fn ) # efficientdet config config = get_efficientdet_config(f'tf_efficientdet_d{model_name[-1]}') config.num_classes = 1 config.image_size = image_size net = EfficientDet(config, pretrained_backbone=False) net.class_net = HeadNet(config, num_outputs=config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01)) weights_file = f'{experiment_name}.pth' # If resume training if os.path.exists(weights_file): print(f'Continue training, loading weights from: {weights_file}') load_weights(net, weights_file) else: print(f'Use coco pretrain') pretrain = get_effdet_pretrain_names(model_name) load_weights(net, '../../timm-efficientdet-pytorch/{pretrain}') model = DetBenchTrain(net, config) runner = ModelRunner(model, device) weights_file = f'{experiment_name}.pth' # add tags neptune.log_text('save checkpoints as', weights_file[:-4]) # run training runner.run_train(train_data_loader, valid_data_loader, n_epoches=n_epochs, weights_file=weights_file, factor=factor, start_lr=start_lr, min_lr=min_lr, lr_patience=lr_patience, overall_patience=overall_patience, loss_delta=loss_delta) neptune.stop()
n_items, iter=iter) X_neg_sppmi = convert_to_SPPMI_matrix(X_neg, max_row=n_items, shifted_K=SHIFTED_K_VALUE) Y_neg_sppmi = None t2 = time.time() print 'Time : %d seconds' % (t2 - t1) # build the model print 'build the model...' t1 = time.time() runner = ModelRunner(train_data, vad_data, None, X_sppmi, X_neg_sppmi, Y_sppmi, None, save_dir=save_dir) U, V, ndcg100 = runner.run("rme", n_jobs=1, lam=lam, lam_emb=lam_emb, n_components=n_components, ret_params_only=1) t2 = time.time() print 'Time : %d seconds' % (t2 - t1) print '*************************************ITER %d ******************************************' % iter print 'NDCG@100 at this iter:', ndcg100 # if best_ndcg100 < ndcg100:
def main() -> None: device = f"cuda:{gpu_number}" if torch.cuda.is_available( ) else torch.device('cpu') print(device) train_boxes_df = pd.read_csv(META_TRAIN) train_boxes_df = preprocess_boxes(train_boxes_df) print(train_boxes_df.head()) #train_images_df = pd.read_csv('orig_alex_folds.csv') image_id_column = 'image_id' train_images_df = pd.read_csv('folds/train_alex_folds.csv') print(f'\nTotal images: {len(train_images_df[image_id_column].unique())}') # Leave only images with bboxes #print('Leave only train images with boxes (all)') img_list = train_boxes_df[image_id_column].unique() print(len(img_list)) with_boxes_filter = train_images_df[image_id_column].isin(img_list) fold = 0 # val images images_val = img_list #images_val = train_images_df.loc[ # (train_images_df['fold'] == fold) & with_boxes_filter, image_id_column].values print(f'\nValidation images {len(images_val)}') # get dataset valid_dataset = WheatDataset( image_ids=images_val[:16], image_dir=TRAIN_DIR, labels_df=train_boxes_df, #train_box_callback, transforms=get_valid_transforms(image_size), is_test=True) valid_data_loader = DataLoader(valid_dataset, batch_size=inf_batch_size, shuffle=False, num_workers=num_workers, collate_fn=collate_fn) # efficientdet config config = get_efficientdet_config(f'tf_efficientdet_d{model_name[-1]}') config.num_classes = 1 config.image_size = image_size net = EfficientDet(config, pretrained_backbone=False) net.class_net = HeadNet(config, num_outputs=config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01)) weights_file = f'{experiment_name}.pth' if os.path.exists(weights_file): print(f'Loading weights from: {weights_file}') load_weights(net, weights_file) else: print(f'No {weights_file} checkpoint') model = DetBenchTrain(net, config) # get predictions manager = ModelRunner(model, device) true_boxes, pred_boxes, pred_scores = manager.predict(valid_data_loader) nms_thresholds = np.linspace(min_thres, max_thres, num=points, endpoint=False) best_metric = 0 for thr in nms_thresholds: print('thr', thr) cur_metric = competition_metric(true_boxes, pred_boxes, pred_scores, thr) if cur_metric > best_metric: best_thr = thr best_metric = cur_metric print(f'best_metric: {best_metric}, best thr: {best_thr}')
print(f"lang: {LANG}", f"epoch: {epoch}", f"train_loss: {train_loss}", f"test_loss: {test_loss}", sep='\t') logs.append({"train": train_loss, "test": test_loss, "epoch": epoch}) # if epoch % 1 == 0: # liveloss.send() if epoch % 10 == 0: subprocess.call(["bash", "-c", "rm *{prediction,balanced.tsv}"]) if epoch % 20 == 0: model_runner = ModelRunner(model, char2index, device) # model_runner.run_model_on_word("bare") datasets = build_datasets(model_runner, model_filename="turmodel_size_6_epoch_" + str(epoch)) for task_fn in datasets: for hash in map(str, range(1)): with_agg_v_vals = (True, False) with_agg_v_vals = (False, ) for with_agg_v in with_agg_v_vals: probing_metainfo = dt_probe_dataset(task_fn, hash + 'TREE' + str(with_agg_v), with_agg=with_agg_v, tree=True)
def start_processing(cls, path_to_base_folder): start = time() Utilities.prepare_properties_dictionary() if not os.path.exists( os.path.join( path_to_base_folder, Utilities.get_prop_value(Utilities.BOOK_DESCRIPTOR_KEY))): print("Please provide the book descriptor file") return if not os.path.exists( os.path.join(path_to_base_folder, Utilities.get_prop_value( Utilities.BOOK_REPO_KEY))): print("Please provide the book folder") return if not os.path.exists( os.path.join( os.getcwd(), Utilities.get_prop_value(Utilities.DATA_POINT_KEY))): data_start = time() DataPointSelector.select_datapoints(path_to_base_folder) data_end = time() print("Data Selection took : {} minutes".format( (data_end - data_start) / 60)) else: print( "Data Point CSV found in directory, continuing to Feature Extraction" ) if not os.path.exists( os.path.join( os.getcwd(), Utilities.get_prop_value(Utilities.PYTHON_FEATURE_CSV))): py_start = time() extractor = FeatureExtractor( base_folder_address=path_to_base_folder) extractor.extract_features() py_end = time() print("Python Extractor took : {} minutes".format( (py_end - py_start) / 60)) else: print( "Python Feature Vector CSV found in directory, continuing to run Java project" ) if not os.path.exists( os.path.join( os.getcwd(), Utilities.get_prop_value(Utilities.JAVA_FEATURE_CSV))): bat_file_name = r'command.bat' folder_path = os.path.join( path_to_base_folder, Utilities.get_prop_value(Utilities.BOOK_REPO_KEY)) output_file_name = ".\\" + Utilities.get_prop_value( Utilities.JAVA_FEATURE_CSV) book_descriptor_file_name = ".\\" + Utilities.get_prop_value( Utilities.BOOK_DESCRIPTOR_KEY) data_points_file_name = ".\\" + Utilities.get_prop_value( Utilities.DATA_POINT_KEY) java_start = time() x = subprocess.call([ bat_file_name, folder_path, output_file_name, book_descriptor_file_name, data_points_file_name ]) java_end = time() print("Java Project took : {} minutes".format( (java_end - java_start) / 60)) else: print( "Java output Feature Vector CSV found in directory, continuing to Model Runner" ) runner = ModelRunner(path_to_base_folder) runner.drive_model_runner() end = time() total = end - start print("Total Time for the whole process : {} minutes".format( (end - start) / 60))
class API: def __init__(self, port=80, threaded=True, debug=True): self.port = port self.threaded = threaded self.debug = debug self.app = Flask(__name__.split('.')[0]) self.app.config['SECRET_KEY'] = str( UUID(int=random.randint(0, 10000000000))) self.app.add_url_rule('/', 'index', self.index, methods=['GET', 'POST']) self.app.add_url_rule('/players/<p1>/<p2>', 'players', self.players, methods=['GET', 'POST']) self.app.add_url_rule('/bets', 'bets', self.bets) self.app.add_url_rule('/rankings', 'rankings', self.rankings) self.app.add_url_rule('/recent', 'recent', self.recentMatches) helper.fillRegionDict() model = MLP(max_epochs=10000, batch_size=128, learning_rate=5e-3, width=50, layers=1) print('Model Created') self.runner = ModelRunner(model, "data/matchResults_aligulac.csv", trainRatio=0.8, testRatio=0.2, lastGameId="302054", keepPercent=1.0, decay=False) print(datetime.now(), 'Model Runner Created') print(self.runner.getLastId()) self.runner.loadProfiles() self.runner.model.loadBackup() self.runner.clearMemory() print("Memory Cleared") rankingsList = self.runner.generateRanking(20) rank = 1 print( "Rank, Name, Race, Country, Project W/R, Elo, Glicko, MatchExpAvg") for [rate, profile] in rankingsList: print(rank, profile.name, profile.race, profile.country, rate, profile.elo, profile.glickoRating, profile.expOverall) rank += 1 self.liveThread = threading.Thread(target=self.runner.getLive) self.liveThread.start() def start(self): http_server = WSGIServer(('', self.port), self.app) http_server.serve_forever() # self.app.run(debug=self.debug, port=self.port, threaded=self.threaded) def index(self): form = PlayersForm() if form.validate_on_submit(): return redirect( url_for('players', p1=form.player1.data, p2=form.player2.data)) elif request.method == 'GET': return render_template('index.html', form=form) def players(self, p1, p2): p1List = self.runner.profiles[p1.lower()] p2List = self.runner.profiles[p2.lower()] firstKeyPairs = [(i, str(p1List[i])) for i in range(len(p1List))] secondKeyPairs = [(i, str(p2List[i])) for i in range(len(p2List))] cform = ChooseForm() cform.choice1.choices = firstKeyPairs cform.choice2.choices = secondKeyPairs if request.method == 'POST': bestOf = cform.bo.data profile1 = p1List[int(cform.choice1.data)] profile2 = p2List[int(cform.choice2.data)] seriesPred = self.runner.predictSeries(player1=profile1.name, p1Race=profile1.race, p1Country=profile1.country, player2=profile2.name, p2Race=profile2.race, p2Country=profile2.country, bestOf=bestOf) tableList = [] tableList.append( "<table><tr><th>Probability</th><th>Score</th><th>Score</th><th>Probability</th></tr>" ) keySize = len(seriesPred[0].keys()) numKeys = 0 t1 = 0 t2 = 0 for key in sorted(seriesPred[0].keys(), reverse=True): numKeys += 2 seriesPred[0][key] = seriesPred[0][key].item() t1 += seriesPred[0][key] backwardsKey = key[::-1] seriesPred[0][backwardsKey] = seriesPred[0][backwardsKey].item( ) t2 += seriesPred[0][backwardsKey] tableList.append("<tr><td>") tableList.append("%.4f" % round(seriesPred[0][key], 4)) tableList.append("</td>") tableList.append("<td>") tableList.append(key) tableList.append("</td>") tableList.append("<td>") tableList.append(backwardsKey) tableList.append("</td>") tableList.append("<td>") tableList.append("%.4f" % round(seriesPred[0][backwardsKey], 4)) tableList.append("</td>") tableList.append("</tr>") if numKeys >= keySize: break tableList.append("<tr><td>") tableList.append("%.4f" % round(t1, 4)) tableList.append("</td>") tableList.append("<td>") tableList.append("Winner (") tableList.append(profile1.name) tableList.append(")</td><td>") tableList.append("Winner (") tableList.append(profile2.name) tableList.append(")</td>") tableList.append("<td>") tableList.append("%.4f" % round(t2, 4)) tableList.append("</td></tr></table>") tableString = "".join(tableList) style = "<style>* {margin: 0;font-family: Arial, Helvetica, sans-serif;}table {font-family: arial, sans-serif;border-collapse: collapse;width: 100%;}td, th {border: 1px solid #dddddd;text-align: left;padding: 8px;}tr:nth-child(even) {background-color: #dddddd;}</style>" return "{6}<h2>Best of {4}: {0} {1} vs {3} {2}</h2><br><h3>{5}</h3>".format( profile1.name, "%.4f" % round(seriesPred[1].item(), 4), profile2.name, "%.4f" % round(seriesPred[2].item(), 4), bestOf, tableString, style) return render_template('players.html', form=cform) def bets(self): graph_url = "" try: graph_url = self.runner.graphBalance() except NoBetsYetExceptions: print("Not enough data points in balance history") return render_template("bets.html", graph=graph_url) def rankings(self): rankingsList = self.runner.generateRanking(20) ranks = [] rates = [] names = [] races = [] countries = [] elos = [] glickos = [] rank = 1 for [rate, profile] in rankingsList: ranks.append(rank) rates.append("%.4f" % round(rate, 4)) names.append(profile.name) races.append(profile.race) countries.append(profile.country) elos.append("%.2f" % round(profile.elo, 2)) glickos.append("%.2f" % round(profile.glickoRating, 2)) rank += 1 return render_template("rankings.html", ranks=ranks, rates=rates, names=names, races=races, countries=countries, elos=elos, glickos=glickos) def recentMatches(self): matchList = reversed(self.runner.recentMatches) return render_template("recent.html", matches=matchList)
from model_runner import ModelRunner runner = ModelRunner() runner.train_and_save() runner.load_model() runner.test()