Beispiel #1
0
def main(args):
    print("Loading dataset...")
    vocab = read_vocab(args.vocab)
    X_train, y_train = read_data_set(args.training_set, vocab)

    # Split training further into train and valid
    X_train, X_valid, y_train, y_valid = train_test_split_tensors(
        X_train, y_train, test_size=VAL_PERC)
    train_set = EpisodesDataset(X_train, y_train, k=args.k)
    valid_set = EpisodesDataset(X_valid, y_valid, k=args.k)

    print("Initialising model...")
    model_name = get_model_name(
        distance=args.distance_metric,
        embeddings='vanilla',
        N=args.N,
        k=args.k)
    model = MatchingNetwork(
        model_name,
        fce=True,
        processing_steps=args.processing_steps,
        distance_metric=args.distance_metric)

    print("Starting to train...")
    train_loader = _get_loader(train_set, args.N)
    valid_loader = _get_loader(valid_set, args.N, episodes_multiplier=30)
    train(
        model,
        learning_rate=LEARNING_RATE,
        train_loader=train_loader,
        valid_loader=valid_loader)
Beispiel #2
0
def example_train_capture():
    # we will caputre 480x480 video with new frame every 3 epochs
    vidmaker = VideoMaker(dims=(480, 480), capture_rate=3)

    model = models.Simple()
    dataset = MandelbrotDataSet(100000)
    train(model, dataset, 10, batch_size=8000, vm=vidmaker)
Beispiel #3
0
def main(actions):

    vocab, data = build_dataset(src_path="data/toy-ende/src-train.txt",
                                tgt_path="data/toy-ende/tgt-train.txt")

    src_vocab, tgt_vocab = vocab

    Model = Transformer(src_vocab=src_vocab, tgt_vocab=tgt_vocab)
    if actions.__contains__("train"):
        train(Model, datasets=data, save=True)

    return
Beispiel #4
0
def example_train():
    print("Initializing model...")

    model = models.Simple(150, 10).cuda()  # see src.models for more models

    # show the space before we've learned anything
    plt.imshow(renderModel(model, 600, 600), vmin=0, vmax=1, cmap='inferno')
    plt.show()

    dataset = MandelbrotDataSet(
        200000)  # generate a dataset with 200000 random training points

    train(model, dataset, 10, batch_size=10000,
          use_scheduler=True)  # train for 20 epochs

    # show the space again
    plt.imshow(renderModel(model, 600, 600), cmap='inferno')
    plt.show()
Beispiel #5
0
def main(args):
	nlp = spacy.load('en',disable=['parser', 'tagger', 'ner'])
	if args.command == 'train':
		if args.comment is not None:
			model_path = args.model_path + '_' + args.comment
		else:
			model_path = args.model_path
		if not os.path.exists(model_path):
			os.makedirs(model_path)

		config = read_config(args.model_config)
		hparams = read_hparams(args.train_specs)

		print('Loading data...', flush=True)
		dataset = Dataset(args.data_root, nlp=nlp, image_size=(224,224), size=args.ds, split='train', random_seed=RANDOM_SEED)

		print('Creating new model...', flush=True)
		model = create_model(config, args={'image_shape':dataset[0][0].shape, 'vocab_size':dataset.vocab_size}, cuda=args.cuda)
		print('Model initialized!', flush=True)

		print('Training model...', flush=True)
		train(model, hparams, dataset, model_path, log_interval=6)
	elif args.command == 'test':
		if not os.path.exists(args.model_path):
			print("Model doesn't exist!")
			exit(0)

		print('Loading data...', flush=True)
		dataset = Dataset(args.data_root, nlp=nlp, image_size=(224,224), split='val')

		print('Loding model...', flush=True)
		model = load_model(args.model_path, args={'image_shape':dataset[0][0].shape, 'vocab_size':dataset.vocab_size}, cuda=args.cuda, weights=not args.test_init)
		print('Model loaded!', flush=True)

		print('Testing model...', flush=True)
		test(model, dataset, args.model_path)
Beispiel #6
0
def test_train_with_order_turn_on_me():
    field = [
        [" ", " ", " ", " ", " ", " ", " "],
        [" ", " ", " ", " ", " ", " ", " "],
        [" ", " ", "↓", "↓", "↓", " ", " "],
        [" ", " ", "↓", "↓", "↓", " ", " "],
        [" ", " ", "↓", "↓", "↓", " ", " "],
        [" ", " ", " ", " ", " ", " ", " "],
        [" ", " ", " ", " ", " ", " ", " "],
    ]
    result = train("Turn on me!", field)

    expected_result = [
        [" ", " ", " ", " ", " ", " ", " "],
        [" ", " ", " ", " ", " ", " ", " "],
        [" ", " ", "↑", "↑", "↑", " ", " "],
        [" ", " ", "↑", "↑", "↑", " ", " "],
        [" ", " ", "↑", "↑", "↑", " ", " "],
        [" ", " ", " ", " ", " ", " ", " "],
        [" ", " ", " ", " ", " ", " ", " "],
    ]
    assert result == expected_result
Beispiel #7
0
    for cb in callback_list:
        cb.on_train_init(registration,
                         starting_epoch=parameter_dict['STARTING_EPOCH'])

    for epoch in range(parameter_dict['STARTING_EPOCH'],
                       parameter_dict['N_EPOCHS']):
        epoch_start_time = time.time()

        logs_dict = {}
        for cb in callback_list:
            cb.on_epoch_init(registration, epoch)

        registration.train()
        if model_type == 'standard':
            train(registration, optimizer, device, generator_train, epoch,
                  loss_function_dict, loss_weights_dict, callback_list,
                  da_model, **kwargs_training)

        elif model_type == 'bidir':
            train_bidirectional(registration, optimizer, device,
                                generator_train, epoch, loss_function_dict,
                                loss_weights_dict, callback_list, da_model,
                                **kwargs_training)

        else:
            raise ValueError("Please, specify a valid model_type")

        epoch_end_time = time.time()
        logs_dict['time_duration (s)'] = epoch_end_time - epoch_start_time

        for cb in callback_list:
Beispiel #8
0
paths, masks_paths = get_files(path_to_data=config["path_to_data"])

images, masks = load_data(paths=paths,
                          masks_paths=masks_paths,
                          train_modalities=config["train_modalities"],
                          image_shape=config["image_shape"],
                          train_validate_rate=0)

model = HyperDenseModel(kernelshapes2d=config["kernelshapes"],
                        numkernelsperlayer2d=config["numkernelsperlayer"],
                        input_shape2d=config["input_shape"],
                        n_labels=config["n_labels"],
                        activation_name=config["activation_name"],
                        dropout_rate=config["dropout_rate"],
                        initial_learning_rate=config["initial_learning_rate"],
                        loss_function=config["loss_function"],
                        optimizer=config["optimizer"])

model = train(model2d=model,
              images=images,
              masks=masks,
              image_shape=config["image_shape"],
              input_shape=config["input_shape"],
              train_validate_rate=config["train_validate_rate"],
              patience=config["patience"],
              steps_per_epoch=config["steps_per_epoch"],
              validation_steps=config["validation_steps"],
              epochs=config["epochs"])

model.save(config["path_to_model"])
    # To create the directory
    model_path = os.path.join('./', args.modelName)
    if not os.path.exists(model_path):
        os.makedirs(model_path)

    # Create Model
    model, model_config = createModel(args.modelSpec)
    # Create Hparams
    hparams = readHparams(args.trainSpec)
    print('Model initialized!')

    # Model created, Start loading training data
    print('Loading data...')
    if args.train_size is None:
        images = torch.Tensor(torchfile.load(args.data))
        labels = torch.Tensor(torchfile.load(args.target))
    else:
        images = torch.Tensor(torchfile.load(args.data))[:args.train_size]
        labels = torch.Tensor(torchfile.load(args.target))[:args.train_size]

    if args.downsample:
        downsample_idx = range(0,108,2)
        images = images[:,downsample_idx,:][:,:,downsample_idx]

    # Reshape to (#instances, -1) and Scale to [0,1]
    images = images.view(images.size(0), -1)/255.0

    print('Training model...')
    train(model, hparams, images, labels, model_path, model_config, log_interval=1)
Beispiel #10
0
                  1), "Time Steps", "linear",
            statistics.equality_of_opportunity().name,
            statistics.equality_of_opportunity()))
    plots.append(
        Plot(
            range(training_parameters["parameter_optimization"]["time_steps"] +
                  1), "Time Steps", "linear", "Lagrangian Multipliers",
            *model_parameters.get_lagrangians()))
    return plots


save_path = '../res/TEST/FICO'.format(fairness_lr)
Path(save_path).mkdir(parents=True, exist_ok=True)

# training_parameters["save_path"] = "../res/local_experiments/TEST"
overall_statistic, overall_model_parameters, _ = train(training_parameters,
                                                       fairness_rates=[0.0])
plot_median(performance_plots=get_plots(overall_statistic,
                                        overall_model_parameters),
            fairness_plots=[],
            file_path="{}/run_0.png".format(save_path),
            figsize=(20, 10))

for r in range(9):
    statistics, model_parameters, _ = train(training_parameters,
                                            fairness_rates=[0.0])
    plot_median(performance_plots=get_plots(statistics, model_parameters),
                fairness_plots=[],
                file_path="{}/run_{}.png".format(save_path, r + 1),
                figsize=(20, 10))

    overall_statistic.merge(statistics)
Beispiel #11
0
    source = csv.reader(y, delimiter=',')
    for row in source:
        data_y.append(row[0])

data = [data_x, data_y]
encodeData = encodeData(DIGITS, data, CHARS)

diff_training_size = [10000, 20000, 30000, 40000]

# Iterate Different Training Size
with open('./log/test_acc.csv', 'w') as output:
    output.write('model,test_acc\n')
    for training_size in diff_training_size:
        DATA_SIZE['TRAINING_SIZE'] = training_size
        TRAINING_SIZE = DATA_SIZE['TRAINING_SIZE']
        # Training data - validating data
        REAL_TRAINING_SIZE = int((TRAINING_SIZE - TRAINING_SIZE / 10) / 1000)

        # set training & testing data
        trainingOutputPath = './log/d' + str(DIGITS) + '/s' + str(
            REAL_TRAINING_SIZE) + '.csv'
        dataSet = splitData(DATA_SIZE, encodeData)

        # build model & training
        model = buildModel(DIGITS, CHARS)
        training_model = train(dataSet, BATCH_SIZE, trainingOutputPath, model)
        test_acc = test(dataSet, model, CHARS)

        output.write(trainingOutputPath + ',')
        output.write(str(test_acc) + '\n')
Beispiel #12
0
    dataloader_train = DataLoader(dataset_train,
                                  batch_size=batch_size,
                                  shuffle=True)

    dataloader_test = DataLoader(dataset_test,
                                 batch_size=batch_size,
                                 shuffle=True)

    # Initialize optimizer and criterion
    criterion = torch.nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Train the model for the total number of epochs
    train(model,
          criterion,
          dataloader_train,
          dataloader_test,
          optimizer,
          num_epochs=total_iterations)

    # Save the trained model
    torch.save({'model_state_dict': model.state_dict()}, pretrained_model_path)
""" ================ Prediction on testing images ================ """

test_dir = root_data_path + "test_set_images/"

submission_dataloader = DataLoader(FullSubmissionImageDataset(test_dir),
                                   batch_size=1)

model.eval()
toPIL = transforms.ToPILImage()
Beispiel #13
0
def main(args=None):
    global mode, categories_type, train_data, categories, train_type
    file_train_bank = FILE_TRAIN_BANK
    file_test_bank = FILE_TEST_BANK
    file_train_ttk = FILE_TRAIN_TTK
    file_test_ttk = FILE_TEST_TTK
    file_test_ttk_etalon = FILE_TEST_TTK_ETALON
    file_test_bank_etalon = FILE_TEST_BANK_ETALON

    # Start
    print "Menu:"
    print "1. Convert data from xml to tsv/csv"
    print "2. Train model"
    print "3. Test model"
    print "4. Exit"

    while True:
        try:
            mode = int(raw_input("Select action: "))
            if 0 < mode < 5:
                break
        except ValueError:
            print "ERROR: select number from menu"

    if mode != 4:
        # Select categories for sentiment analysis
        print "Select categories:"
        print "1. Positive, negative"
        print "2. Positive, neutral, negative"

        while True:
            try:
                categories_type = int(raw_input("Select categories for training and predictions: "))
                if 0 < categories_type < 3:
                    break
            except ValueError:
                print "ERROR: select number from menu"

        if categories_type == 1:
            categories = ['positive', 'negative']
        elif categories_type == 2:
            categories = ['positive', 'negative', 'neutral']

    if mode == 1:
        #
        # Convert xml
        #
        convert_xml2tsv(".\\data\\raw\\")
    elif mode == 2:
        #
        # Load train data and train model
        #
        print "Run:"
        print "1. Preprocessing and training"
        print "2. Training"
        while True:
            try:
                train_type = int(raw_input("Select train type: "))
                if 0 < train_type < 3:
                    break
            except ValueError:
                print "ERROR: select number from menu"

        print "Input data:"
        print "1. Telecommunication companies"
        print "2. Banks"
        while True:
            try:
                train_data = int(raw_input("Select data type: "))
                if 0 < train_data < 3:
                    break
            except ValueError:
                print "ERROR: select number from menu"

        # TODO change
        if train_data == 1:
            train_file = ".\\data\\parsed\\ttk_train.tsv"
            test_file = ".\\data\\parsed\\ttk_test_etalon.tsv"
        elif train_data == 2:
            train_file = ".\\data\\parsed\\bank_train.tsv"
            test_file = ".\\data\\parsed\\bank_test_etalon.tsv"

        with open(train_file) as train_in, \
                open(test_file) as test_in:
            train_in = csv.reader(train_in, delimiter='\t')
            test_in = csv.reader(test_in, delimiter='\t')
            # train_in = pandas.read_csv(train_in, sep='\t', skiprows=[0], header=None)
            # test_in  = pandas.read_csv(test_in, sep='\t', skiprows=[0], header=None)

            # start = time.time()
            if train_type == 1:
                print("pre")
            elif train_type == 2:
                train(train_in, test_in, categories)
            # end = time.time()
            # print "INFO: Training during " + str(end - start) + " sec"

    elif mode == 3:
        #
        # Load model, test data and perform prediction
        #
        test(categories)

    elif mode == 4:
        #
        # Exit
        #
        print "Press any key for exit"
        exit(0)
Beispiel #14
0
def train_validation(file, predection=False):
    info = '[ info ] '
    err = '[ error ] '
    yield info + "File Validation in Process, Please wait<br/><br/>\n"
    with open('Logs/PreprocessingLogs.txt', 'a') as f:
        f.write(
            "-------------------------------------------------------------------------------------------------------------\n"
        )
        if file.endswith('.asc'):
            f.write(str(datetime.datetime.now()) + ' File name is correct\n')
            try:
                f.write(str(datetime.datetime.now()) + ' Reading File\n')
                df = pd.read_csv('data/' + file, sep=' ')
                f.write(str(datetime.datetime.now()) + ' Reading Columns\n')
                if (df.shape[1] == 21 and df.shape[0] != 0):
                    yield info + " File Validation Successfull<br/><br/>\n"
                    yield info + " Data Preprocessing Started, Please wait....<br/><br/>\n"
                    try:
                        df.columns = [
                            'status', 'duration', 'credit_history', 'purpose',
                            'amount', 'savings', 'employment_duration',
                            'installment_rate', 'personal_status_sex',
                            'other_debtors', 'present_residence', 'property',
                            'age', 'other_installment_plans', 'housing',
                            'number_credits', 'job', 'people_liable',
                            'telephone', 'foreign_worker', 'credit_risk'
                        ]
                        f.write(
                            str(datetime.datetime.now()) +
                            ' Columns Renaming Successful\n')
                        yield info + "Column Name Changed successful<br/><br/>\n"
                    except Exception as e:
                        f.write(
                            str(datetime.datetime.now()) + ' {}\n'.format(e))
                        shutil.move("data/" + file, "BadDataFile/" + file)
                        yield err + "Problem in renaming Columns, Please Check log file and retry uploading<br/><br/>\n"
                        return

                    try:
                        # assigning the appropriate categories labels to the data of each feature
                        df['status'].replace(
                            to_replace=[1, 2, 3, 4],
                            value=[
                                "no checking account", "..<0 DM",
                                "0<=..<200 DM",
                                "..>= 200 DM/salary for at least 1 year"
                            ],
                            inplace=True)

                        df['credit_history'].replace(
                            to_replace=[0, 1, 2, 3, 4],
                            value=[
                                "delay in paying off in the past",
                                "critical account/other credits elsewhere",
                                "no credits taken/all credits paid back duly",
                                "existing credits paid back duly till now",
                                "all credits at this bank paid back duly"
                            ],
                            inplace=True)

                        df['purpose'].replace(
                            to_replace=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                            value=[
                                "others", "car (new)", "car (used)",
                                "furniture/equipment", "radio/television",
                                "domestic appliances", "repairs", "education",
                                "vacation", "retraining", "business"
                            ],
                            inplace=True)

                        df['savings'].replace(to_replace=[1, 2, 3, 4, 5],
                                              value=[
                                                  "unknown/no savings account",
                                                  "..<100 DM",
                                                  "100<=..<500 DM",
                                                  "500<=..<1000 DM",
                                                  "..>=1000 DM"
                                              ],
                                              inplace=True)

                        df['other_debtors'].replace(
                            to_replace=[1, 2, 3],
                            value=["None", "co-applicant", "guarantor"],
                            inplace=True)

                        df['personal_status_sex'].replace(
                            to_replace=[1, 2, 3, 4],
                            value=[
                                "male : divorced/separated",
                                "female : non-single or male : single",
                                "male : married/widowed", "female : single"
                            ],
                            inplace=True)

                        df['installment_rate'].replace(
                            to_replace=[1, 2, 3, 4],
                            value=[">=35", "25<=..<35", "20<=..<25", "<20"],
                            inplace=True)

                        df['present_residence'].replace(
                            to_replace=[1, 2, 3, 4],
                            value=[
                                "<1 yr", "1<=..<4 yrs", "4<=..<7 yrs",
                                ">=7 yrs"
                            ],
                            inplace=True)

                        df['property'].replace(
                            to_replace=[1, 2, 3, 4],
                            value=[
                                "unknown / no property", "car or other",
                                "building soc. savings agr./life insurance",
                                "real estate"
                            ],
                            inplace=True)

                        df['other_installment_plans'].replace(
                            to_replace=[1, 2, 3],
                            value=["bank", "stores", "none"],
                            inplace=True)

                        df['housing'].replace(
                            to_replace=[1, 2, 3],
                            value=["for free", "rent", "own"],
                            inplace=True)

                        df['number_credits'].replace(
                            to_replace=[1, 2, 3, 4],
                            value=["1", "2-3", "4-5", ">= 6"],
                            inplace=True)

                        df['job'].replace(
                            to_replace=[1, 2, 3, 4],
                            value=[
                                "unemployed/unskilled - non-resident",
                                "unskilled - resident",
                                "skilled employee/official",
                                "manager/self-empl./highly qualif. employee"
                            ],
                            inplace=True)

                        df['employment_duration'].replace(
                            to_replace=[1, 2, 3, 4, 5],
                            value=[
                                "unemployed", "<1 yr", "1<=..<4 yrs",
                                "4<=..<7 yrs", ">=7 yrs"
                            ],
                            inplace=True)

                        df['people_liable'].replace(
                            to_replace=[1, 2],
                            value=["3 or more", "0 to 2"],
                            inplace=True)

                        df['telephone'].replace(to_replace=[1, 2],
                                                value=["No", "Yes"],
                                                inplace=True)

                        df['foreign_worker'].replace(to_replace=[1, 2],
                                                     value=["yes", "no"],
                                                     inplace=True)

                        f.write(
                            str(datetime.datetime.now()) +
                            ' data labeling Successful\n')
                        yield info + "data Labeling Successful<br/><br/>\n"
                    except Exception as e:
                        f.write(
                            str(datetime.datetime.now()) + ' {}\n'.format(e))
                        yield err + "Problem in labeling features, Please Check log file and retry uploading<br/><br/>\n"
                        return

                    try:
                        # defining the categorial columns
                        categorical_col = [
                            'status', 'credit_history', 'purpose', 'savings',
                            'installment_rate', 'employment_duration',
                            'personal_status_sex', 'other_debtors',
                            'present_residence', 'property',
                            'other_installment_plans', 'housing',
                            'number_credits', 'job', 'people_liable',
                            'telephone', 'foreign_worker'
                        ]
                        # defining the num,erica columns
                        num_col = ['duration', 'amount', 'age']
                        # fixing the Skewness and outliers using log function
                        for col in num_col:
                            df[col] = np.log1p(df[col])
                        f.write(
                            str(datetime.datetime.now()) +
                            ' Skewness removed Successfully\n')
                        yield info + "Skewness removed Successful<br/><br/>\n"
                        # droping the duration feature
                        #df.drop(columns='duration', inplace=True)
                        # Arranging the Columns
                        data = df[num_col[:]]
                        # one-hot-encoding on categorical features
                        for i in categorical_col:
                            dum_col = pd.get_dummies(df[i], drop_first=True)
                            data = pd.concat([data, dum_col], axis=1)
                        data = pd.concat([data, df['credit_risk']], axis=1)
                        f.write(
                            str(datetime.datetime.now()) +
                            ' One Hot Encoding Successfully\n')
                        yield info + "One Hot Encoding Successfully<br/><br/>\n"
                    except Exception as e:
                        f.write(
                            str(datetime.datetime.now()) + ' {}\n'.format(e))
                        yield err + "Problem in feature Selection, Please Check log file and retry uploading<br/><br/>\n"
                        return

                    try:
                        yield info + "Saving data into database<br/><br/>\n"
                        if sql_connection(data):
                            yield info + "Saved into Database<br/><br/>\n"
                        else:
                            yield err + "Error occured while inserting into Database. Please check log file<br/><br/>\n"
                    except Exception as e:
                        f.write(
                            str(datetime.datetime.now()) + ' {}\n'.format(e))
                        yield err + "Problem in saving Preprocessed Data, Please Check log file and retry uploading<br/><br/>\n"
                        return
                else:
                    f.write(
                        str(datetime.datetime.now()) +
                        ' Invalid Colums/Row Length {}\n'.format(df.shape[1]))
                    yield err + "Problem in Colums/Rwow Length, Please retry uploading correct file<br/><br/>\n"
                    return
            except Exception as e:
                f.write(str(datetime.datetime.now()) + ' {}\n'.format(e))
                yield err + "Problem in reading File, Please retry uploading correct file or check logs<br/><br/>\n"
                return

            try:
                f.write(str(datetime.datetime.now()) + ' Training the model\n')
                yield info + " Training started Please wait!<br/><br/>\n"
                x_train, x_test, y_train, y_test = training.train()

                f.write(
                    str(datetime.datetime.now()) +
                    ' Training Logistic Regression model\n')
                yield info + " Training on Logistic Regression Model<br/><br/>\n"
                log_model, acc1 = training.log_reg(x_train, x_test, y_train,
                                                   y_test)
                yield info + "accuracy = {}<br/><br/>\n".format(acc1)

                f.write(
                    str(datetime.datetime.now()) +
                    ' Training Decision Tree model\n')
                yield info + " Training on Decision Tree Model<br/><br/>\n"
                dec_model, acc2 = training.dec_tree(x_train, x_test, y_train,
                                                    y_test)
                yield info + "accuracy = {}<br/><br/>\n".format(acc2)

                f.write(
                    str(datetime.datetime.now()) +
                    ' Training Random Forest model\n')
                yield info + " Training on Random Forest Model<br/><br/>\n"
                ran_model, acc3 = training.ran_for(x_train, x_test, y_train,
                                                   y_test)
                yield info + "accuracy = {}<br/><br/>\n".format(acc3)

                f.write(
                    str(datetime.datetime.now()) +
                    ' Training XGboost Classifier model\n')
                yield info + " Training on XGboost Classifier Model<br/><br/>\n"
                xgb_model, acc4 = training.xbg_class(x_train, x_test, y_train,
                                                     y_test)
                yield info + "accuracy = {}<br/><br/>\n".format(acc4)

                yield info + 'Training Completed<br/><br/>\n'
                f.write(str(datetime.datetime.now()) + ' Training Completed\n')
                dict_ = {
                    acc1: log_model,
                    acc2: dec_model,
                    acc3: ran_model,
                    acc4: xgb_model
                }
                joblib.dump(dict_[max(dict_)], 'model/model.sav')
                yield info + "Saved best performing model"
                f.write(
                    str(datetime.datetime.now()) +
                    ' Saved best performing model\n')

            except Exception as e:
                f.write(str(datetime.datetime.now()) + ' {}\n'.format(e))
                yield err + "Problem in Training, Please Check log file and retry<br/><br/>"
                return

        else:
            f.write(str(datetime.datetime.now()) + ' File name is incorrect\n')
            shutil.move("data/" + file, "BadDataFile/" + file)
            yield err + " File name is incorrect, Please upload correct file<br/><br/>\n"
            return
Beispiel #15
0
def main():
    args = parse_args()
    configs = get_config(args.config)
    paths = get_config(args.paths)

    print(f'Configs\n{configs}\n')
    print(f'Paths\n{paths}\n')

    ####### DATA ######
    train_loader, val_loader = make_ct_datasets(configs, paths)

    ####### MODEL ######
    model = pydoc.locate(configs['train_params']['model'])()
    model_name = configs['train_params']['model_name']

    if torch.cuda.is_available():
        device = 'cuda'
    else:
        device = 'cpu'
    model.to(device)
    print(f'Current device: {device}')

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        print(f'Number of CUDA devices: {torch.cuda.device_count()}')

    try:
        pretrained = configs['train_params']['pretrained']
        if pretrained:
            model_dumps = torch.load(configs['train_params']['path_weights'],
                                     map_location=device)
            model.load_state_dict(model_dumps['model_state_dict'])
            print(
                f'Weights loaded from model {configs["train_params"]["path_weights"]}'
            )
    except KeyError:
        print('A parameter wasn`t found in the config file')

    ####### OPTIMIZER ######
    optimizer_name = configs['train_params']['optimizer']
    optimizer = pydoc.locate('torch.optim.' + optimizer_name)(
        model.parameters(), **configs['train_params']['optimizer_params'])
    ####### SCHEDULER ######
    scheduler_name = configs['train_params']['scheduler']
    scheduler = pydoc.locate('torch.optim.lr_scheduler.' + scheduler_name)(
        optimizer, **configs['train_params']['scheduler_params'])
    ####### CRITERION ######
    loss = pydoc.locate(configs['train_params']['loss'])()

    ####### TRAINING ######
    max_epoch = int(configs['train_params']['max_epoch'])

    train(model,
          optimizer,
          loss,
          train_loader,
          max_epoch,
          device,
          val_loader,
          scheduler=scheduler,
          weights_path=paths['dumps']['weights'],
          model_name=model_name)
Beispiel #16
0
def main():
    # Argparse custom actions
    class SetModes(argparse.Action):
        """Set the modes of operations."""
        def __call__(self, parser, args, values, option_string=None):
            for value in values:
                setattr(args, value, True)

    # yapf: disable
    parser = argparse.ArgumentParser(description='Fake News Classifier')
    # Initialization
    parser.add_argument('--init', action='store_true', default=False,
                        help='perform initialization')
    # Modes
    parser.add_argument('-m', '--mode', action=SetModes, nargs='+', choices=['train', 'test', 'demo', 'plot'],
                        help='specify the mode of operation: train, test, demo, plot')
    parser.add_argument('--train', action='store_true', default=False,
                        help='train the model')
    parser.add_argument('--test', action='store_true', default=False,
                        help='test the model (must either train or load a model)')
    parser.add_argument('--demo', action='store_true', default=False,
                        help='demo the model on linewise samples from a file (must either train or load a model)')
    parser.add_argument('--plot', action='store_true', default=False,
                        help='plot training data (must either train or have existing training data)')
    # Options
    parser.add_argument('-b', '--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('-c', '--config', type=str,
                        help='path to configuration json file (overrides args)')
    parser.add_argument('--data-loader', type=str, default='BatchLoader',
                        help='data loader to use (default: "BatchLoader")')
    parser.add_argument('--dataset', type=str, default='FakeRealNews',
                        help='dataset to use (default: "FakeRealNews")')
    parser.add_argument('-e', '--epochs', type=int, default=10,
                        help='number of epochs to train (default: 10)')
    parser.add_argument('-f', '--file', type=str,
                        help='specify a file for another argument')
    parser.add_argument('--lr', '--learning-rate', dest='learning_rate', type=float, default=1e-4,
                        help='learning rate (default: 1e-4)')
    parser.add_argument('-l', '--load', type=int, metavar='EPOCH',
                        help='load a model and its training data')
    parser.add_argument('--loss', type=str, default='BCEWithLogitsLoss',
                        help='loss function (default: "BCEWithLogitsLoss")')
    parser.add_argument('--model', type=str, default='FakeNewsNet',
                        help='model architecture to use (default: "FakeNewsNet")')
    parser.add_argument('-s', '--sample-size', type=int, metavar='N',
                        help='limit sample size for training')
    parser.add_argument('--seed', type=int, default=0,
                        help='random seed (default: 0)')
    parser.add_argument('--save', action='store_true', default=True,
                        help='save model checkpoints and training data (default: True)')
    parser.add_argument('--no-save', dest='save', action='store_false')
    args = parser.parse_args()
    # yapf: enable

    # Print help if no args
    if len(sys.argv) == 1:
        parser.print_help()
        parser.exit()

    # Configure logger
    logging.basicConfig(level=logging.DEBUG)
    logging.getLogger('matplotlib').setLevel(logging.WARNING)

    # Load configuration file if specified
    if args.config is not None:
        utils.load_config(args)

    # Exit if no mode is specified
    if not args.init and not args.train and not args.test and not args.demo and not args.plot:
        logging.error(
            'No mode specified. Please specify with: --mode {init,train,test,demo,plot}'
        )
        exit(1)
    # Exit on `--load` if run directory not found
    if (args.load is not None or
        (args.plot
         and not args.train)) and not os.path.isdir(utils.get_path(args)):
        logging.error(
            'Could not find directory for current configuration {}'.format(
                utils.get_path(args)))
        exit(1)
    # Exit on `test` or `demo` without `train` or `--load EPOCH`
    if (args.test or args.demo) and not (args.train or args.load is not None):
        logging.error(
            'Cannot run `test` or `demo` without a model. Try again with either `train` or `--load EPOCH`.'
        )
        exit(1)
    # Exit on `demo` without a string file
    if args.demo and not args.file:
        logging.error(
            'Cannot run `demo` without a file. Try again with `--file FILE`.')
        exit(1)

    # Setup run directory
    if args.save and not args.init and not (args.train or args.test
                                            or args.demo or args.plot):
        utils.save_config(args)
        path = utils.get_path(args) + '/output.log'
        os.makedirs(os.path.dirname(path), exist_ok=True)
        logging.getLogger().addHandler(logging.FileHandler(path))

    # Set random seeds
    random.seed(args.seed)
    torch.manual_seed(args.seed)

    # Variable declarations
    training_data = None

    # Load GloVe vocabulary
    if args.init or args.train or args.test or args.demo:
        glove = torchtext.vocab.GloVe(name='6B', dim=50)

    # Perform initialization
    if args.init or args.train or args.test:
        # Determine which dataset to use
        dataset = utils.get_dataset(args)
        # Preload the dataset
        dataset.load()
        # Get preprocessed samples
        samples = preprocessing.get_samples(dataset, glove, args.init)
        random.shuffle(samples)

    # DataLoader setup for `train`, `test`
    if args.train or args.test:
        # Select data loader to use
        DataLoader = utils.get_data_loader(args)

        # Split samples
        split_ratio = [.6, .2, .2]
        trainset, validset, testset = list(
            DataLoader.splits(samples, split_ratio))
        if args.sample_size is not None:  # limit samples used in training
            trainset = trainset[:args.sample_size]
            validset = validset[:int(args.sample_size * split_ratio[1] /
                                     split_ratio[0])]

        # Get data loaders
        train_loader, valid_loader, test_loader = [
            DataLoader(split, batch_size=args.batch_size)
            for split in [trainset, validset, testset]
        ]

    # Load samples for demo
    if args.demo:
        if os.path.isfile(args.file):
            # Read samples from the input file
            with open(args.file, 'r') as f:
                samples = [line for line in f if line.strip()]
            data = pd.DataFrame({
                'text': samples,
                'label': [0.5] * len(samples)
            })
            # Preprocess samples
            preprocessing.clean(data)
            samples = preprocessing.encode(data, glove)
            samples = [(torch.tensor(text).long(), label)
                       for text, label in samples]

            # Select data loader to use
            DataLoader = utils.get_data_loader(args)

            # Get data loader
            data_loader = DataLoader(samples, batch_size=1, shuffle=False)
        else:
            logging.error('Could not find file for demo at {}'.format(
                args.file))
            exit(1)

    # Model setup for `train`, `test`, `demo`
    if args.train or args.test or args.demo:
        # Create the model
        model = utils.get_model(glove, args)

        # Load a model
        if args.load is not None:
            utils.load_model(args.load, model, args)

    # Run `train`
    if args.train:
        training_data = training.train(model, train_loader, valid_loader, args)

    # Run `test`
    if args.test:
        if args.train or args.load is not None:
            criterion = utils.get_criterion(args.loss)
            acc, loss = training.evaluate(model, test_loader, criterion)
            logging.info('Testing accuracy: {:.4%}, loss: {:.6f}'.format(
                acc, loss))
        else:
            logging.error('No model loaded for testing')
            exit(1)

    # Run `demo`
    if args.demo:
        if args.train or args.load is not None:
            model.eval()  # set model to evaluate mode
            logging.info('-- Results --')
            for i, (text, _) in enumerate(data_loader):
                preview = data['text'][i][:32] + '...'
                out = model(text).flatten()
                prob = torch.sigmoid(out)  # apply sigmoid to get probability
                pred = (prob >
                        0.5).long()  # predict `true` if greater than 0.5
                label = ['fake', 'true'][pred.item()]
                label = '{}{}{}'.format(
                    '\033[92m' if pred.item() else '\033[93m', label,
                    '\033[0m')
                confidence = (prob if pred.item() else 1 - prob).item()
                logging.info(
                    'Report {}: {} with {:.2%} confidence - "{}"'.format(
                        i, label, confidence, preview))
        else:
            logging.error('No model loaded for demo')
            exit(1)

    # Run `plot`
    if args.plot:
        if training_data is None:
            training_data = utils.load_training_data(args, allow_missing=False)
        if args.load is not None and not args.train:
            for k, v in training_data.items():
                training_data[k] = v[:args.load + 1]

        logging.info('Plotting training data')
        training.plot(training_data)
Beispiel #17
0
def single_run(args):
    if args.data == "FICO":
        distibution = FICODistribution(bias=True, fraction_protected=0.5)
    elif args.data == "COMPAS":
        distibution = COMPASDistribution(bias=True, test_percentage=0.2)
    elif args.data == "ADULT":
        distibution = AdultCreditDistribution(bias=True, test_percentage=0.2)
    elif args.data == "GERMAN":
        distibution = GermanCreditDistribution(bias=True, test_percentage=0.3)

    if args.policy_type == "LOG":
        model = LogisticPolicy(
            IdentityFeatureMap(distibution.feature_dimension), False)
    elif args.policy_type == "NN":
        model = NeuralNetworkPolicy(distibution.feature_dimension, False)

    if args.policy_algorithm == "ADAM":
        if args.policy_type == "LOG":
            policy_alg = ADAM
        elif args.policy_type == "NN":
            policy_alg = torch.optim.Adam
    elif args.policy_algorithm == "SGD":
        if args.policy_type == "LOG":
            policy_alg = SGD
        elif args.policy_type == "NN":
            policy_alg = torch.optim.SGD

    optimization_target, initial_lambda = _build_optimization_target(args)

    training_parameters = {
        "model": model,
        "distribution": distibution,
        "optimization_target": optimization_target,
        "parameter_optimization": {
            "batch_size": args.batch_size,
            "epochs": args.epochs,
            "learning_rate": args.learning_rate,
            "learn_on_entire_history": args.history_learning,
            "time_steps": args.time_steps,
            "clip_weights": args.ip_weight_clipping,
            "change_percentage": args.change_percentage,
            "change_iterations": args.change_iterations,
            "training_algorithm": policy_alg
        },
        "data": {
            "num_train_samples": args.num_samples,
            "num_test_samples": args.num_samples_test,
            "fix_seeds": True
        },
        "evaluation": {
            UTILITY: {
                "measure_function": utility,
                "detailed": False
            },
            COVARIANCE_OF_DECISION_DP: {
                "measure_function": covariance_of_decision,
                "detailed": False
            }
        }
    }

    if args.fairness_type is not None and args.fairness_learning_rate is not None:
        if args.fairness_algorithm == "ADAM":
            fairness_alg = ADAM
        elif args.fairness_algorithm == "SGD":
            fairness_alg = SGD

        training_parameters["lagrangian_optimization"] = {
            "epochs": args.fairness_epochs,
            "batch_size": args.fairness_batch_size,
            "learning_rate": args.fairness_learning_rate,
            "training_algorithm": fairness_alg
        }

    if args.path:
        if args.fairness_type is not None:
            training_parameters[
                "save_path"] = "{}{}/c{}/lr{}/ts{}-ep{}-bs{}".format(
                    args.path,
                    "/history" if args.history_learning else "/no_history",
                    args.cost, args.learning_rate, args.time_steps,
                    args.epochs, args.batch_size)

            if args.fairness_learning_rate is not None:
                subfolder = "flr{}/fe{}-fbs{}-fd{}".format(
                    args.fairness_learning_rate, args.fairness_epochs,
                    args.fairness_batch_size,
                    args.fairness_delta if args.fairness_delta else 0.0)
            else:
                subfolder = args.fairness_value

            if args.process_id is not None:
                training_parameters["save_path_subfolder"] = "{}/{}".format(
                    subfolder, args.process_id)
            else:
                training_parameters["save_path_subfolder"] = subfolder
        else:
            training_parameters[
                "save_path"] = "{}{}/no_fairness/c{}/lr{}/ts{}-ep{}-bs{}".format(
                    args.path,
                    "/history" if args.history_learning else "/no_history",
                    args.cost, args.learning_rate, args.time_steps,
                    args.epochs, args.batch_size)
            if args.process_id is not None:
                training_parameters["save_path_subfolder"] = args.process_id

    statistics, model_parameters, run_path = train(
        training_parameters, fairness_rates=[initial_lambda])
Beispiel #18
0
        drop_last=False,
    )
    val_dataloader = data_utils.DataLoader(
        val_dataset,
        batch_size=curriculum['ScenesPerBatch'],
        shuffle=True,
        num_workers=16,
        drop_last=False,
    )

    start_epoch = 1

    if args.load:
        checkpoint = torch.load(curriculum['output_dir'] + '/latest.pth',
                                map_location=device)
        model = (checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        start_epoch = checkpoint['epoch'] + 1

    train(model,
          optimizer,
          scheduler,
          dataloader,
          start_epoch,
          device,
          curriculum['num_epochs'],
          curriculum['training_mode'],
          curriculum['context_mode'],
          output_dir=curriculum['output_dir'],
          val_dataloader=val_dataloader)
Beispiel #19
0
prepare_cifar(ds_train, 50, True)
prepare_cifar(ds_val, 50, False)

# prepare embedding model
resnet18 = models.resnet18(pretrained=False)
resnet18.fc = nn.Identity()

backbone = Backbone(resnet18)
embedding_model = EmbeddingModel(backbone)

opt = SGD(embedding_model.parameters(), lr=0.0005)

# Paper suggest M hard examples and 2M easier ones;
# batch size from experiment section is 128; we'll use 42+84=126 to have the first assumtion ready
dataloader_train = CustomBatchSampler(ds_train,
                                      mOHNM=True,
                                      batch_size=500,
                                      hard_batchs_size=42,
                                      norm_batchs_size=84,
                                      embedding_network=embedding_model)

accuracies_train, accuracies_val = train(logger=logger,
                                         ds_train=ds_train,
                                         ds_val=ds_val,
                                         epochs=50,
                                         opt=opt,
                                         model=embedding_model,
                                         dataloader=dataloader_train,
                                         device=device)
def test_train():
    env = gym.envs.make('CartPole-v0')
    net = get_net(env)

    approximator = Approximator(net, alpha=1e-3, loss=nn.MSELoss)
    train(approximator, env, n_episodes=1)
Beispiel #21
0
        tokenizer = tokenizer_class.from_pretrained(MODEL_NAME_OR_PATH,
                                                    do_lower_case=DO_LOWER_CASE,
                                                    cache_dir=OUTPUT_DIR)
        model = model_class.from_pretrained(MODEL_NAME_OR_PATH).to(device)

        train_dataset = load_examples(file_path=TRAIN_FILE,
                                      tokenizer=tokenizer,
                                      output_examples=False,
                                      run_config=run_config)

        train(train_dataset=train_dataset,
              model=model,
              tokenizer=tokenizer,
              model_type=MODEL_TYPE,
              output_dir=OUTPUT_DIR,
              predict_file=EVAL_FILE,
              device=device,
              log_file=log_file,
              run_config=run_config
              )
        if not OUTPUT_DIR.is_dir():
            OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
        if run_config.save_model:
            model_to_save = model.module if hasattr(model, "module") else model
            model_to_save.save_pretrained(OUTPUT_DIR)
            tokenizer.save_pretrained(OUTPUT_DIR)
            logger.info("Saving final model to %s", OUTPUT_DIR)
        logger.info("Saving log file to %s", OUTPUT_DIR)
        with open(os.path.join(OUTPUT_DIR, "logs.json"), 'w') as f:
            json.dump(log_file, f, indent=4)
if __name__ == '__main__':
    torch.manual_seed(RANDOM_SEED)
    np.random.seed(RANDOM_SEED)

    args = parse_args()

    # To create the directory
    model_path = os.path.join('./', args.modelName)
    if not os.path.exists(model_path):
        os.makedirs(model_path)

    # Create Model
    model, model_config = createModel(args.modelSpec)
    # Create Hparams
    hparams = readHparams(args.trainSpec)
    print('Model initialized!')

    # Model created, Start loading training data
    print('Loading data...')
    data, lengths = get_data(args.data, limit=args.train_size)
    labels = get_labels(args.target, limit=args.train_size)

    print('Training model...')
    train(model,
          hparams,
          data,
          lengths,
          labels,
          model_path,
          model_config,
          log_interval=1)