def main(args): print("Loading dataset...") vocab = read_vocab(args.vocab) X_train, y_train = read_data_set(args.training_set, vocab) # Split training further into train and valid X_train, X_valid, y_train, y_valid = train_test_split_tensors( X_train, y_train, test_size=VAL_PERC) train_set = EpisodesDataset(X_train, y_train, k=args.k) valid_set = EpisodesDataset(X_valid, y_valid, k=args.k) print("Initialising model...") model_name = get_model_name( distance=args.distance_metric, embeddings='vanilla', N=args.N, k=args.k) model = MatchingNetwork( model_name, fce=True, processing_steps=args.processing_steps, distance_metric=args.distance_metric) print("Starting to train...") train_loader = _get_loader(train_set, args.N) valid_loader = _get_loader(valid_set, args.N, episodes_multiplier=30) train( model, learning_rate=LEARNING_RATE, train_loader=train_loader, valid_loader=valid_loader)
def example_train_capture(): # we will caputre 480x480 video with new frame every 3 epochs vidmaker = VideoMaker(dims=(480, 480), capture_rate=3) model = models.Simple() dataset = MandelbrotDataSet(100000) train(model, dataset, 10, batch_size=8000, vm=vidmaker)
def main(actions): vocab, data = build_dataset(src_path="data/toy-ende/src-train.txt", tgt_path="data/toy-ende/tgt-train.txt") src_vocab, tgt_vocab = vocab Model = Transformer(src_vocab=src_vocab, tgt_vocab=tgt_vocab) if actions.__contains__("train"): train(Model, datasets=data, save=True) return
def example_train(): print("Initializing model...") model = models.Simple(150, 10).cuda() # see src.models for more models # show the space before we've learned anything plt.imshow(renderModel(model, 600, 600), vmin=0, vmax=1, cmap='inferno') plt.show() dataset = MandelbrotDataSet( 200000) # generate a dataset with 200000 random training points train(model, dataset, 10, batch_size=10000, use_scheduler=True) # train for 20 epochs # show the space again plt.imshow(renderModel(model, 600, 600), cmap='inferno') plt.show()
def main(args): nlp = spacy.load('en',disable=['parser', 'tagger', 'ner']) if args.command == 'train': if args.comment is not None: model_path = args.model_path + '_' + args.comment else: model_path = args.model_path if not os.path.exists(model_path): os.makedirs(model_path) config = read_config(args.model_config) hparams = read_hparams(args.train_specs) print('Loading data...', flush=True) dataset = Dataset(args.data_root, nlp=nlp, image_size=(224,224), size=args.ds, split='train', random_seed=RANDOM_SEED) print('Creating new model...', flush=True) model = create_model(config, args={'image_shape':dataset[0][0].shape, 'vocab_size':dataset.vocab_size}, cuda=args.cuda) print('Model initialized!', flush=True) print('Training model...', flush=True) train(model, hparams, dataset, model_path, log_interval=6) elif args.command == 'test': if not os.path.exists(args.model_path): print("Model doesn't exist!") exit(0) print('Loading data...', flush=True) dataset = Dataset(args.data_root, nlp=nlp, image_size=(224,224), split='val') print('Loding model...', flush=True) model = load_model(args.model_path, args={'image_shape':dataset[0][0].shape, 'vocab_size':dataset.vocab_size}, cuda=args.cuda, weights=not args.test_init) print('Model loaded!', flush=True) print('Testing model...', flush=True) test(model, dataset, args.model_path)
def test_train_with_order_turn_on_me(): field = [ [" ", " ", " ", " ", " ", " ", " "], [" ", " ", " ", " ", " ", " ", " "], [" ", " ", "↓", "↓", "↓", " ", " "], [" ", " ", "↓", "↓", "↓", " ", " "], [" ", " ", "↓", "↓", "↓", " ", " "], [" ", " ", " ", " ", " ", " ", " "], [" ", " ", " ", " ", " ", " ", " "], ] result = train("Turn on me!", field) expected_result = [ [" ", " ", " ", " ", " ", " ", " "], [" ", " ", " ", " ", " ", " ", " "], [" ", " ", "↑", "↑", "↑", " ", " "], [" ", " ", "↑", "↑", "↑", " ", " "], [" ", " ", "↑", "↑", "↑", " ", " "], [" ", " ", " ", " ", " ", " ", " "], [" ", " ", " ", " ", " ", " ", " "], ] assert result == expected_result
for cb in callback_list: cb.on_train_init(registration, starting_epoch=parameter_dict['STARTING_EPOCH']) for epoch in range(parameter_dict['STARTING_EPOCH'], parameter_dict['N_EPOCHS']): epoch_start_time = time.time() logs_dict = {} for cb in callback_list: cb.on_epoch_init(registration, epoch) registration.train() if model_type == 'standard': train(registration, optimizer, device, generator_train, epoch, loss_function_dict, loss_weights_dict, callback_list, da_model, **kwargs_training) elif model_type == 'bidir': train_bidirectional(registration, optimizer, device, generator_train, epoch, loss_function_dict, loss_weights_dict, callback_list, da_model, **kwargs_training) else: raise ValueError("Please, specify a valid model_type") epoch_end_time = time.time() logs_dict['time_duration (s)'] = epoch_end_time - epoch_start_time for cb in callback_list:
paths, masks_paths = get_files(path_to_data=config["path_to_data"]) images, masks = load_data(paths=paths, masks_paths=masks_paths, train_modalities=config["train_modalities"], image_shape=config["image_shape"], train_validate_rate=0) model = HyperDenseModel(kernelshapes2d=config["kernelshapes"], numkernelsperlayer2d=config["numkernelsperlayer"], input_shape2d=config["input_shape"], n_labels=config["n_labels"], activation_name=config["activation_name"], dropout_rate=config["dropout_rate"], initial_learning_rate=config["initial_learning_rate"], loss_function=config["loss_function"], optimizer=config["optimizer"]) model = train(model2d=model, images=images, masks=masks, image_shape=config["image_shape"], input_shape=config["input_shape"], train_validate_rate=config["train_validate_rate"], patience=config["patience"], steps_per_epoch=config["steps_per_epoch"], validation_steps=config["validation_steps"], epochs=config["epochs"]) model.save(config["path_to_model"])
# To create the directory model_path = os.path.join('./', args.modelName) if not os.path.exists(model_path): os.makedirs(model_path) # Create Model model, model_config = createModel(args.modelSpec) # Create Hparams hparams = readHparams(args.trainSpec) print('Model initialized!') # Model created, Start loading training data print('Loading data...') if args.train_size is None: images = torch.Tensor(torchfile.load(args.data)) labels = torch.Tensor(torchfile.load(args.target)) else: images = torch.Tensor(torchfile.load(args.data))[:args.train_size] labels = torch.Tensor(torchfile.load(args.target))[:args.train_size] if args.downsample: downsample_idx = range(0,108,2) images = images[:,downsample_idx,:][:,:,downsample_idx] # Reshape to (#instances, -1) and Scale to [0,1] images = images.view(images.size(0), -1)/255.0 print('Training model...') train(model, hparams, images, labels, model_path, model_config, log_interval=1)
1), "Time Steps", "linear", statistics.equality_of_opportunity().name, statistics.equality_of_opportunity())) plots.append( Plot( range(training_parameters["parameter_optimization"]["time_steps"] + 1), "Time Steps", "linear", "Lagrangian Multipliers", *model_parameters.get_lagrangians())) return plots save_path = '../res/TEST/FICO'.format(fairness_lr) Path(save_path).mkdir(parents=True, exist_ok=True) # training_parameters["save_path"] = "../res/local_experiments/TEST" overall_statistic, overall_model_parameters, _ = train(training_parameters, fairness_rates=[0.0]) plot_median(performance_plots=get_plots(overall_statistic, overall_model_parameters), fairness_plots=[], file_path="{}/run_0.png".format(save_path), figsize=(20, 10)) for r in range(9): statistics, model_parameters, _ = train(training_parameters, fairness_rates=[0.0]) plot_median(performance_plots=get_plots(statistics, model_parameters), fairness_plots=[], file_path="{}/run_{}.png".format(save_path, r + 1), figsize=(20, 10)) overall_statistic.merge(statistics)
source = csv.reader(y, delimiter=',') for row in source: data_y.append(row[0]) data = [data_x, data_y] encodeData = encodeData(DIGITS, data, CHARS) diff_training_size = [10000, 20000, 30000, 40000] # Iterate Different Training Size with open('./log/test_acc.csv', 'w') as output: output.write('model,test_acc\n') for training_size in diff_training_size: DATA_SIZE['TRAINING_SIZE'] = training_size TRAINING_SIZE = DATA_SIZE['TRAINING_SIZE'] # Training data - validating data REAL_TRAINING_SIZE = int((TRAINING_SIZE - TRAINING_SIZE / 10) / 1000) # set training & testing data trainingOutputPath = './log/d' + str(DIGITS) + '/s' + str( REAL_TRAINING_SIZE) + '.csv' dataSet = splitData(DATA_SIZE, encodeData) # build model & training model = buildModel(DIGITS, CHARS) training_model = train(dataSet, BATCH_SIZE, trainingOutputPath, model) test_acc = test(dataSet, model, CHARS) output.write(trainingOutputPath + ',') output.write(str(test_acc) + '\n')
dataloader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True) dataloader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=True) # Initialize optimizer and criterion criterion = torch.nn.BCELoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Train the model for the total number of epochs train(model, criterion, dataloader_train, dataloader_test, optimizer, num_epochs=total_iterations) # Save the trained model torch.save({'model_state_dict': model.state_dict()}, pretrained_model_path) """ ================ Prediction on testing images ================ """ test_dir = root_data_path + "test_set_images/" submission_dataloader = DataLoader(FullSubmissionImageDataset(test_dir), batch_size=1) model.eval() toPIL = transforms.ToPILImage()
def main(args=None): global mode, categories_type, train_data, categories, train_type file_train_bank = FILE_TRAIN_BANK file_test_bank = FILE_TEST_BANK file_train_ttk = FILE_TRAIN_TTK file_test_ttk = FILE_TEST_TTK file_test_ttk_etalon = FILE_TEST_TTK_ETALON file_test_bank_etalon = FILE_TEST_BANK_ETALON # Start print "Menu:" print "1. Convert data from xml to tsv/csv" print "2. Train model" print "3. Test model" print "4. Exit" while True: try: mode = int(raw_input("Select action: ")) if 0 < mode < 5: break except ValueError: print "ERROR: select number from menu" if mode != 4: # Select categories for sentiment analysis print "Select categories:" print "1. Positive, negative" print "2. Positive, neutral, negative" while True: try: categories_type = int(raw_input("Select categories for training and predictions: ")) if 0 < categories_type < 3: break except ValueError: print "ERROR: select number from menu" if categories_type == 1: categories = ['positive', 'negative'] elif categories_type == 2: categories = ['positive', 'negative', 'neutral'] if mode == 1: # # Convert xml # convert_xml2tsv(".\\data\\raw\\") elif mode == 2: # # Load train data and train model # print "Run:" print "1. Preprocessing and training" print "2. Training" while True: try: train_type = int(raw_input("Select train type: ")) if 0 < train_type < 3: break except ValueError: print "ERROR: select number from menu" print "Input data:" print "1. Telecommunication companies" print "2. Banks" while True: try: train_data = int(raw_input("Select data type: ")) if 0 < train_data < 3: break except ValueError: print "ERROR: select number from menu" # TODO change if train_data == 1: train_file = ".\\data\\parsed\\ttk_train.tsv" test_file = ".\\data\\parsed\\ttk_test_etalon.tsv" elif train_data == 2: train_file = ".\\data\\parsed\\bank_train.tsv" test_file = ".\\data\\parsed\\bank_test_etalon.tsv" with open(train_file) as train_in, \ open(test_file) as test_in: train_in = csv.reader(train_in, delimiter='\t') test_in = csv.reader(test_in, delimiter='\t') # train_in = pandas.read_csv(train_in, sep='\t', skiprows=[0], header=None) # test_in = pandas.read_csv(test_in, sep='\t', skiprows=[0], header=None) # start = time.time() if train_type == 1: print("pre") elif train_type == 2: train(train_in, test_in, categories) # end = time.time() # print "INFO: Training during " + str(end - start) + " sec" elif mode == 3: # # Load model, test data and perform prediction # test(categories) elif mode == 4: # # Exit # print "Press any key for exit" exit(0)
def train_validation(file, predection=False): info = '[ info ] ' err = '[ error ] ' yield info + "File Validation in Process, Please wait<br/><br/>\n" with open('Logs/PreprocessingLogs.txt', 'a') as f: f.write( "-------------------------------------------------------------------------------------------------------------\n" ) if file.endswith('.asc'): f.write(str(datetime.datetime.now()) + ' File name is correct\n') try: f.write(str(datetime.datetime.now()) + ' Reading File\n') df = pd.read_csv('data/' + file, sep=' ') f.write(str(datetime.datetime.now()) + ' Reading Columns\n') if (df.shape[1] == 21 and df.shape[0] != 0): yield info + " File Validation Successfull<br/><br/>\n" yield info + " Data Preprocessing Started, Please wait....<br/><br/>\n" try: df.columns = [ 'status', 'duration', 'credit_history', 'purpose', 'amount', 'savings', 'employment_duration', 'installment_rate', 'personal_status_sex', 'other_debtors', 'present_residence', 'property', 'age', 'other_installment_plans', 'housing', 'number_credits', 'job', 'people_liable', 'telephone', 'foreign_worker', 'credit_risk' ] f.write( str(datetime.datetime.now()) + ' Columns Renaming Successful\n') yield info + "Column Name Changed successful<br/><br/>\n" except Exception as e: f.write( str(datetime.datetime.now()) + ' {}\n'.format(e)) shutil.move("data/" + file, "BadDataFile/" + file) yield err + "Problem in renaming Columns, Please Check log file and retry uploading<br/><br/>\n" return try: # assigning the appropriate categories labels to the data of each feature df['status'].replace( to_replace=[1, 2, 3, 4], value=[ "no checking account", "..<0 DM", "0<=..<200 DM", "..>= 200 DM/salary for at least 1 year" ], inplace=True) df['credit_history'].replace( to_replace=[0, 1, 2, 3, 4], value=[ "delay in paying off in the past", "critical account/other credits elsewhere", "no credits taken/all credits paid back duly", "existing credits paid back duly till now", "all credits at this bank paid back duly" ], inplace=True) df['purpose'].replace( to_replace=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], value=[ "others", "car (new)", "car (used)", "furniture/equipment", "radio/television", "domestic appliances", "repairs", "education", "vacation", "retraining", "business" ], inplace=True) df['savings'].replace(to_replace=[1, 2, 3, 4, 5], value=[ "unknown/no savings account", "..<100 DM", "100<=..<500 DM", "500<=..<1000 DM", "..>=1000 DM" ], inplace=True) df['other_debtors'].replace( to_replace=[1, 2, 3], value=["None", "co-applicant", "guarantor"], inplace=True) df['personal_status_sex'].replace( to_replace=[1, 2, 3, 4], value=[ "male : divorced/separated", "female : non-single or male : single", "male : married/widowed", "female : single" ], inplace=True) df['installment_rate'].replace( to_replace=[1, 2, 3, 4], value=[">=35", "25<=..<35", "20<=..<25", "<20"], inplace=True) df['present_residence'].replace( to_replace=[1, 2, 3, 4], value=[ "<1 yr", "1<=..<4 yrs", "4<=..<7 yrs", ">=7 yrs" ], inplace=True) df['property'].replace( to_replace=[1, 2, 3, 4], value=[ "unknown / no property", "car or other", "building soc. savings agr./life insurance", "real estate" ], inplace=True) df['other_installment_plans'].replace( to_replace=[1, 2, 3], value=["bank", "stores", "none"], inplace=True) df['housing'].replace( to_replace=[1, 2, 3], value=["for free", "rent", "own"], inplace=True) df['number_credits'].replace( to_replace=[1, 2, 3, 4], value=["1", "2-3", "4-5", ">= 6"], inplace=True) df['job'].replace( to_replace=[1, 2, 3, 4], value=[ "unemployed/unskilled - non-resident", "unskilled - resident", "skilled employee/official", "manager/self-empl./highly qualif. employee" ], inplace=True) df['employment_duration'].replace( to_replace=[1, 2, 3, 4, 5], value=[ "unemployed", "<1 yr", "1<=..<4 yrs", "4<=..<7 yrs", ">=7 yrs" ], inplace=True) df['people_liable'].replace( to_replace=[1, 2], value=["3 or more", "0 to 2"], inplace=True) df['telephone'].replace(to_replace=[1, 2], value=["No", "Yes"], inplace=True) df['foreign_worker'].replace(to_replace=[1, 2], value=["yes", "no"], inplace=True) f.write( str(datetime.datetime.now()) + ' data labeling Successful\n') yield info + "data Labeling Successful<br/><br/>\n" except Exception as e: f.write( str(datetime.datetime.now()) + ' {}\n'.format(e)) yield err + "Problem in labeling features, Please Check log file and retry uploading<br/><br/>\n" return try: # defining the categorial columns categorical_col = [ 'status', 'credit_history', 'purpose', 'savings', 'installment_rate', 'employment_duration', 'personal_status_sex', 'other_debtors', 'present_residence', 'property', 'other_installment_plans', 'housing', 'number_credits', 'job', 'people_liable', 'telephone', 'foreign_worker' ] # defining the num,erica columns num_col = ['duration', 'amount', 'age'] # fixing the Skewness and outliers using log function for col in num_col: df[col] = np.log1p(df[col]) f.write( str(datetime.datetime.now()) + ' Skewness removed Successfully\n') yield info + "Skewness removed Successful<br/><br/>\n" # droping the duration feature #df.drop(columns='duration', inplace=True) # Arranging the Columns data = df[num_col[:]] # one-hot-encoding on categorical features for i in categorical_col: dum_col = pd.get_dummies(df[i], drop_first=True) data = pd.concat([data, dum_col], axis=1) data = pd.concat([data, df['credit_risk']], axis=1) f.write( str(datetime.datetime.now()) + ' One Hot Encoding Successfully\n') yield info + "One Hot Encoding Successfully<br/><br/>\n" except Exception as e: f.write( str(datetime.datetime.now()) + ' {}\n'.format(e)) yield err + "Problem in feature Selection, Please Check log file and retry uploading<br/><br/>\n" return try: yield info + "Saving data into database<br/><br/>\n" if sql_connection(data): yield info + "Saved into Database<br/><br/>\n" else: yield err + "Error occured while inserting into Database. Please check log file<br/><br/>\n" except Exception as e: f.write( str(datetime.datetime.now()) + ' {}\n'.format(e)) yield err + "Problem in saving Preprocessed Data, Please Check log file and retry uploading<br/><br/>\n" return else: f.write( str(datetime.datetime.now()) + ' Invalid Colums/Row Length {}\n'.format(df.shape[1])) yield err + "Problem in Colums/Rwow Length, Please retry uploading correct file<br/><br/>\n" return except Exception as e: f.write(str(datetime.datetime.now()) + ' {}\n'.format(e)) yield err + "Problem in reading File, Please retry uploading correct file or check logs<br/><br/>\n" return try: f.write(str(datetime.datetime.now()) + ' Training the model\n') yield info + " Training started Please wait!<br/><br/>\n" x_train, x_test, y_train, y_test = training.train() f.write( str(datetime.datetime.now()) + ' Training Logistic Regression model\n') yield info + " Training on Logistic Regression Model<br/><br/>\n" log_model, acc1 = training.log_reg(x_train, x_test, y_train, y_test) yield info + "accuracy = {}<br/><br/>\n".format(acc1) f.write( str(datetime.datetime.now()) + ' Training Decision Tree model\n') yield info + " Training on Decision Tree Model<br/><br/>\n" dec_model, acc2 = training.dec_tree(x_train, x_test, y_train, y_test) yield info + "accuracy = {}<br/><br/>\n".format(acc2) f.write( str(datetime.datetime.now()) + ' Training Random Forest model\n') yield info + " Training on Random Forest Model<br/><br/>\n" ran_model, acc3 = training.ran_for(x_train, x_test, y_train, y_test) yield info + "accuracy = {}<br/><br/>\n".format(acc3) f.write( str(datetime.datetime.now()) + ' Training XGboost Classifier model\n') yield info + " Training on XGboost Classifier Model<br/><br/>\n" xgb_model, acc4 = training.xbg_class(x_train, x_test, y_train, y_test) yield info + "accuracy = {}<br/><br/>\n".format(acc4) yield info + 'Training Completed<br/><br/>\n' f.write(str(datetime.datetime.now()) + ' Training Completed\n') dict_ = { acc1: log_model, acc2: dec_model, acc3: ran_model, acc4: xgb_model } joblib.dump(dict_[max(dict_)], 'model/model.sav') yield info + "Saved best performing model" f.write( str(datetime.datetime.now()) + ' Saved best performing model\n') except Exception as e: f.write(str(datetime.datetime.now()) + ' {}\n'.format(e)) yield err + "Problem in Training, Please Check log file and retry<br/><br/>" return else: f.write(str(datetime.datetime.now()) + ' File name is incorrect\n') shutil.move("data/" + file, "BadDataFile/" + file) yield err + " File name is incorrect, Please upload correct file<br/><br/>\n" return
def main(): args = parse_args() configs = get_config(args.config) paths = get_config(args.paths) print(f'Configs\n{configs}\n') print(f'Paths\n{paths}\n') ####### DATA ###### train_loader, val_loader = make_ct_datasets(configs, paths) ####### MODEL ###### model = pydoc.locate(configs['train_params']['model'])() model_name = configs['train_params']['model_name'] if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' model.to(device) print(f'Current device: {device}') if torch.cuda.device_count() > 1: model = nn.DataParallel(model) print(f'Number of CUDA devices: {torch.cuda.device_count()}') try: pretrained = configs['train_params']['pretrained'] if pretrained: model_dumps = torch.load(configs['train_params']['path_weights'], map_location=device) model.load_state_dict(model_dumps['model_state_dict']) print( f'Weights loaded from model {configs["train_params"]["path_weights"]}' ) except KeyError: print('A parameter wasn`t found in the config file') ####### OPTIMIZER ###### optimizer_name = configs['train_params']['optimizer'] optimizer = pydoc.locate('torch.optim.' + optimizer_name)( model.parameters(), **configs['train_params']['optimizer_params']) ####### SCHEDULER ###### scheduler_name = configs['train_params']['scheduler'] scheduler = pydoc.locate('torch.optim.lr_scheduler.' + scheduler_name)( optimizer, **configs['train_params']['scheduler_params']) ####### CRITERION ###### loss = pydoc.locate(configs['train_params']['loss'])() ####### TRAINING ###### max_epoch = int(configs['train_params']['max_epoch']) train(model, optimizer, loss, train_loader, max_epoch, device, val_loader, scheduler=scheduler, weights_path=paths['dumps']['weights'], model_name=model_name)
def main(): # Argparse custom actions class SetModes(argparse.Action): """Set the modes of operations.""" def __call__(self, parser, args, values, option_string=None): for value in values: setattr(args, value, True) # yapf: disable parser = argparse.ArgumentParser(description='Fake News Classifier') # Initialization parser.add_argument('--init', action='store_true', default=False, help='perform initialization') # Modes parser.add_argument('-m', '--mode', action=SetModes, nargs='+', choices=['train', 'test', 'demo', 'plot'], help='specify the mode of operation: train, test, demo, plot') parser.add_argument('--train', action='store_true', default=False, help='train the model') parser.add_argument('--test', action='store_true', default=False, help='test the model (must either train or load a model)') parser.add_argument('--demo', action='store_true', default=False, help='demo the model on linewise samples from a file (must either train or load a model)') parser.add_argument('--plot', action='store_true', default=False, help='plot training data (must either train or have existing training data)') # Options parser.add_argument('-b', '--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('-c', '--config', type=str, help='path to configuration json file (overrides args)') parser.add_argument('--data-loader', type=str, default='BatchLoader', help='data loader to use (default: "BatchLoader")') parser.add_argument('--dataset', type=str, default='FakeRealNews', help='dataset to use (default: "FakeRealNews")') parser.add_argument('-e', '--epochs', type=int, default=10, help='number of epochs to train (default: 10)') parser.add_argument('-f', '--file', type=str, help='specify a file for another argument') parser.add_argument('--lr', '--learning-rate', dest='learning_rate', type=float, default=1e-4, help='learning rate (default: 1e-4)') parser.add_argument('-l', '--load', type=int, metavar='EPOCH', help='load a model and its training data') parser.add_argument('--loss', type=str, default='BCEWithLogitsLoss', help='loss function (default: "BCEWithLogitsLoss")') parser.add_argument('--model', type=str, default='FakeNewsNet', help='model architecture to use (default: "FakeNewsNet")') parser.add_argument('-s', '--sample-size', type=int, metavar='N', help='limit sample size for training') parser.add_argument('--seed', type=int, default=0, help='random seed (default: 0)') parser.add_argument('--save', action='store_true', default=True, help='save model checkpoints and training data (default: True)') parser.add_argument('--no-save', dest='save', action='store_false') args = parser.parse_args() # yapf: enable # Print help if no args if len(sys.argv) == 1: parser.print_help() parser.exit() # Configure logger logging.basicConfig(level=logging.DEBUG) logging.getLogger('matplotlib').setLevel(logging.WARNING) # Load configuration file if specified if args.config is not None: utils.load_config(args) # Exit if no mode is specified if not args.init and not args.train and not args.test and not args.demo and not args.plot: logging.error( 'No mode specified. Please specify with: --mode {init,train,test,demo,plot}' ) exit(1) # Exit on `--load` if run directory not found if (args.load is not None or (args.plot and not args.train)) and not os.path.isdir(utils.get_path(args)): logging.error( 'Could not find directory for current configuration {}'.format( utils.get_path(args))) exit(1) # Exit on `test` or `demo` without `train` or `--load EPOCH` if (args.test or args.demo) and not (args.train or args.load is not None): logging.error( 'Cannot run `test` or `demo` without a model. Try again with either `train` or `--load EPOCH`.' ) exit(1) # Exit on `demo` without a string file if args.demo and not args.file: logging.error( 'Cannot run `demo` without a file. Try again with `--file FILE`.') exit(1) # Setup run directory if args.save and not args.init and not (args.train or args.test or args.demo or args.plot): utils.save_config(args) path = utils.get_path(args) + '/output.log' os.makedirs(os.path.dirname(path), exist_ok=True) logging.getLogger().addHandler(logging.FileHandler(path)) # Set random seeds random.seed(args.seed) torch.manual_seed(args.seed) # Variable declarations training_data = None # Load GloVe vocabulary if args.init or args.train or args.test or args.demo: glove = torchtext.vocab.GloVe(name='6B', dim=50) # Perform initialization if args.init or args.train or args.test: # Determine which dataset to use dataset = utils.get_dataset(args) # Preload the dataset dataset.load() # Get preprocessed samples samples = preprocessing.get_samples(dataset, glove, args.init) random.shuffle(samples) # DataLoader setup for `train`, `test` if args.train or args.test: # Select data loader to use DataLoader = utils.get_data_loader(args) # Split samples split_ratio = [.6, .2, .2] trainset, validset, testset = list( DataLoader.splits(samples, split_ratio)) if args.sample_size is not None: # limit samples used in training trainset = trainset[:args.sample_size] validset = validset[:int(args.sample_size * split_ratio[1] / split_ratio[0])] # Get data loaders train_loader, valid_loader, test_loader = [ DataLoader(split, batch_size=args.batch_size) for split in [trainset, validset, testset] ] # Load samples for demo if args.demo: if os.path.isfile(args.file): # Read samples from the input file with open(args.file, 'r') as f: samples = [line for line in f if line.strip()] data = pd.DataFrame({ 'text': samples, 'label': [0.5] * len(samples) }) # Preprocess samples preprocessing.clean(data) samples = preprocessing.encode(data, glove) samples = [(torch.tensor(text).long(), label) for text, label in samples] # Select data loader to use DataLoader = utils.get_data_loader(args) # Get data loader data_loader = DataLoader(samples, batch_size=1, shuffle=False) else: logging.error('Could not find file for demo at {}'.format( args.file)) exit(1) # Model setup for `train`, `test`, `demo` if args.train or args.test or args.demo: # Create the model model = utils.get_model(glove, args) # Load a model if args.load is not None: utils.load_model(args.load, model, args) # Run `train` if args.train: training_data = training.train(model, train_loader, valid_loader, args) # Run `test` if args.test: if args.train or args.load is not None: criterion = utils.get_criterion(args.loss) acc, loss = training.evaluate(model, test_loader, criterion) logging.info('Testing accuracy: {:.4%}, loss: {:.6f}'.format( acc, loss)) else: logging.error('No model loaded for testing') exit(1) # Run `demo` if args.demo: if args.train or args.load is not None: model.eval() # set model to evaluate mode logging.info('-- Results --') for i, (text, _) in enumerate(data_loader): preview = data['text'][i][:32] + '...' out = model(text).flatten() prob = torch.sigmoid(out) # apply sigmoid to get probability pred = (prob > 0.5).long() # predict `true` if greater than 0.5 label = ['fake', 'true'][pred.item()] label = '{}{}{}'.format( '\033[92m' if pred.item() else '\033[93m', label, '\033[0m') confidence = (prob if pred.item() else 1 - prob).item() logging.info( 'Report {}: {} with {:.2%} confidence - "{}"'.format( i, label, confidence, preview)) else: logging.error('No model loaded for demo') exit(1) # Run `plot` if args.plot: if training_data is None: training_data = utils.load_training_data(args, allow_missing=False) if args.load is not None and not args.train: for k, v in training_data.items(): training_data[k] = v[:args.load + 1] logging.info('Plotting training data') training.plot(training_data)
def single_run(args): if args.data == "FICO": distibution = FICODistribution(bias=True, fraction_protected=0.5) elif args.data == "COMPAS": distibution = COMPASDistribution(bias=True, test_percentage=0.2) elif args.data == "ADULT": distibution = AdultCreditDistribution(bias=True, test_percentage=0.2) elif args.data == "GERMAN": distibution = GermanCreditDistribution(bias=True, test_percentage=0.3) if args.policy_type == "LOG": model = LogisticPolicy( IdentityFeatureMap(distibution.feature_dimension), False) elif args.policy_type == "NN": model = NeuralNetworkPolicy(distibution.feature_dimension, False) if args.policy_algorithm == "ADAM": if args.policy_type == "LOG": policy_alg = ADAM elif args.policy_type == "NN": policy_alg = torch.optim.Adam elif args.policy_algorithm == "SGD": if args.policy_type == "LOG": policy_alg = SGD elif args.policy_type == "NN": policy_alg = torch.optim.SGD optimization_target, initial_lambda = _build_optimization_target(args) training_parameters = { "model": model, "distribution": distibution, "optimization_target": optimization_target, "parameter_optimization": { "batch_size": args.batch_size, "epochs": args.epochs, "learning_rate": args.learning_rate, "learn_on_entire_history": args.history_learning, "time_steps": args.time_steps, "clip_weights": args.ip_weight_clipping, "change_percentage": args.change_percentage, "change_iterations": args.change_iterations, "training_algorithm": policy_alg }, "data": { "num_train_samples": args.num_samples, "num_test_samples": args.num_samples_test, "fix_seeds": True }, "evaluation": { UTILITY: { "measure_function": utility, "detailed": False }, COVARIANCE_OF_DECISION_DP: { "measure_function": covariance_of_decision, "detailed": False } } } if args.fairness_type is not None and args.fairness_learning_rate is not None: if args.fairness_algorithm == "ADAM": fairness_alg = ADAM elif args.fairness_algorithm == "SGD": fairness_alg = SGD training_parameters["lagrangian_optimization"] = { "epochs": args.fairness_epochs, "batch_size": args.fairness_batch_size, "learning_rate": args.fairness_learning_rate, "training_algorithm": fairness_alg } if args.path: if args.fairness_type is not None: training_parameters[ "save_path"] = "{}{}/c{}/lr{}/ts{}-ep{}-bs{}".format( args.path, "/history" if args.history_learning else "/no_history", args.cost, args.learning_rate, args.time_steps, args.epochs, args.batch_size) if args.fairness_learning_rate is not None: subfolder = "flr{}/fe{}-fbs{}-fd{}".format( args.fairness_learning_rate, args.fairness_epochs, args.fairness_batch_size, args.fairness_delta if args.fairness_delta else 0.0) else: subfolder = args.fairness_value if args.process_id is not None: training_parameters["save_path_subfolder"] = "{}/{}".format( subfolder, args.process_id) else: training_parameters["save_path_subfolder"] = subfolder else: training_parameters[ "save_path"] = "{}{}/no_fairness/c{}/lr{}/ts{}-ep{}-bs{}".format( args.path, "/history" if args.history_learning else "/no_history", args.cost, args.learning_rate, args.time_steps, args.epochs, args.batch_size) if args.process_id is not None: training_parameters["save_path_subfolder"] = args.process_id statistics, model_parameters, run_path = train( training_parameters, fairness_rates=[initial_lambda])
drop_last=False, ) val_dataloader = data_utils.DataLoader( val_dataset, batch_size=curriculum['ScenesPerBatch'], shuffle=True, num_workers=16, drop_last=False, ) start_epoch = 1 if args.load: checkpoint = torch.load(curriculum['output_dir'] + '/latest.pth', map_location=device) model = (checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) scheduler.load_state_dict(checkpoint['scheduler_state_dict']) start_epoch = checkpoint['epoch'] + 1 train(model, optimizer, scheduler, dataloader, start_epoch, device, curriculum['num_epochs'], curriculum['training_mode'], curriculum['context_mode'], output_dir=curriculum['output_dir'], val_dataloader=val_dataloader)
prepare_cifar(ds_train, 50, True) prepare_cifar(ds_val, 50, False) # prepare embedding model resnet18 = models.resnet18(pretrained=False) resnet18.fc = nn.Identity() backbone = Backbone(resnet18) embedding_model = EmbeddingModel(backbone) opt = SGD(embedding_model.parameters(), lr=0.0005) # Paper suggest M hard examples and 2M easier ones; # batch size from experiment section is 128; we'll use 42+84=126 to have the first assumtion ready dataloader_train = CustomBatchSampler(ds_train, mOHNM=True, batch_size=500, hard_batchs_size=42, norm_batchs_size=84, embedding_network=embedding_model) accuracies_train, accuracies_val = train(logger=logger, ds_train=ds_train, ds_val=ds_val, epochs=50, opt=opt, model=embedding_model, dataloader=dataloader_train, device=device)
def test_train(): env = gym.envs.make('CartPole-v0') net = get_net(env) approximator = Approximator(net, alpha=1e-3, loss=nn.MSELoss) train(approximator, env, n_episodes=1)
tokenizer = tokenizer_class.from_pretrained(MODEL_NAME_OR_PATH, do_lower_case=DO_LOWER_CASE, cache_dir=OUTPUT_DIR) model = model_class.from_pretrained(MODEL_NAME_OR_PATH).to(device) train_dataset = load_examples(file_path=TRAIN_FILE, tokenizer=tokenizer, output_examples=False, run_config=run_config) train(train_dataset=train_dataset, model=model, tokenizer=tokenizer, model_type=MODEL_TYPE, output_dir=OUTPUT_DIR, predict_file=EVAL_FILE, device=device, log_file=log_file, run_config=run_config ) if not OUTPUT_DIR.is_dir(): OUTPUT_DIR.mkdir(parents=True, exist_ok=True) if run_config.save_model: model_to_save = model.module if hasattr(model, "module") else model model_to_save.save_pretrained(OUTPUT_DIR) tokenizer.save_pretrained(OUTPUT_DIR) logger.info("Saving final model to %s", OUTPUT_DIR) logger.info("Saving log file to %s", OUTPUT_DIR) with open(os.path.join(OUTPUT_DIR, "logs.json"), 'w') as f: json.dump(log_file, f, indent=4)
if __name__ == '__main__': torch.manual_seed(RANDOM_SEED) np.random.seed(RANDOM_SEED) args = parse_args() # To create the directory model_path = os.path.join('./', args.modelName) if not os.path.exists(model_path): os.makedirs(model_path) # Create Model model, model_config = createModel(args.modelSpec) # Create Hparams hparams = readHparams(args.trainSpec) print('Model initialized!') # Model created, Start loading training data print('Loading data...') data, lengths = get_data(args.data, limit=args.train_size) labels = get_labels(args.target, limit=args.train_size) print('Training model...') train(model, hparams, data, lengths, labels, model_path, model_config, log_interval=1)