Ejemplo n.º 1
0
    def train_function(config):
        model = model_class_(
            dataset_cls=dataset_class_,
            #network_fn=network_fn_,
            dataset_args=dataset_args,
            model_args=config,
            network_args=config['network_args'],
            optimizer_args=config['optimizer_args']
        )
        train_model(
            model,
            dataset,
            epochs=train_args['epochs'],
            batch_size=train_args['batch_size'],
            flags=train_args.get('flags', {}),
            save_dir=save_dir_function(config)
        )
        # fit the validation loss, optimize w.r.t. val_loss at epoch=(5*epochs)
        val_loss = model.val_loss
        num_epochs = train_args['epochs']

        popt, pcov = curve_fit(val_loss_fit_func, np.arange(num_epochs), val_loss)
        estimated_loss = val_loss_fit_func(5*num_epochs, *popt)
        print("Estimated val_loss at epoch", num_epochs*5, ": ", estimated_loss)

        return estimated_loss
Ejemplo n.º 2
0
def main():
    args = parse_args()

    nlayers = 7
    cell_type = 'GRU'

    net_input = Input(shape=(args.lookback, 2))

    x_0 = Dense(args.ncells, activation='relu')(net_input)
    x_1 = Dense(args.ncells, activation='relu')(x_0)
    x_2 = GRU(args.ncells,
              return_sequences=True,
              dropout=args.dropout,
              recurrent_dropout=args.recurrent_dropout)(add([x_0, x_1]))
    x_3 = Dense(args.ncells, activation='relu')(add([x_0, x_1, x_2]))
    x_4 = GRU(args.ncells,
              return_sequences=True,
              dropout=args.dropout,
              recurrent_dropout=args.recurrent_dropout)(add(
                  [x_0, x_1, x_2, x_3]))
    x_5 = Dense(args.ncells, activation='relu')(add([x_0, x_1, x_2, x_3, x_4]))
    x_6 = Flatten()(GRU(args.ncells,
                        return_sequences=True,
                        dropout=args.dropout,
                        recurrent_dropout=args.recurrent_dropout)(add(
                            [x_0, x_1, x_2, x_3, x_4, x_5])))

    out_x = Dense(args.delay, activation='linear')(x_6)
    out_y = Dense(args.delay, activation='linear')(x_6)

    model = Model(inputs=net_input, outputs=[out_x, out_y])

    train_model(model, args, nlayers=nlayers, cell_type=cell_type)
Ejemplo n.º 3
0
def do_training():
    confidence = 0.5
    if 'confidence' in request.args:
        confidence = request.args.get('confidence')
    extract_data(confidence)
    train_model()
    return Response(json.dumps({'result': 'ok'}))
Ejemplo n.º 4
0
    def train(self):
        ''''
        This function is used to load all utils and train model.

        '''
        #finetuning last layer of model
        for param in self.model.parameters():
            param.requires_grad = False

        self.model.Conv2d_1out3_1x1 = nn.Conv2d(256,
                                                self.outFeatures,
                                                kernel_size=(1, 1),
                                                stride=(1, 1),
                                                bias=False)
        self.model.Conv2d_1out2_1x1 = nn.Conv2d(512,
                                                self.outFeatures,
                                                kernel_size=(1, 1),
                                                stride=(1, 1),
                                                bias=False)
        self.model.Conv2d_1out1_1x1 = nn.Conv2d(1024,
                                                self.outFeatures,
                                                kernel_size=(1, 1),
                                                stride=(1, 1),
                                                bias=False)

        optimizer, lr_schedular, criterion = self.load_utils()

        #pass to trainig script
        train_model(self.model, self.trainLoader, self.testLoader, criterion,
                    optimizer, lr_schedular, self.numEpoch, self.batchSize,
                    self.trainLen, self.testLen)
Ejemplo n.º 5
0
 def test_y_input_dtype(self):
     """
     Tests that train_model will not run with wrong data types for y_var.
     :params self:
     :returns boolean:
     """
     with self.assertRaises(TypeError):
         tm.train_model(X_VAR, 1, 'Hawaii')
         tm.train_model(X_VAR, 'check', 'Hawaii')
Ejemplo n.º 6
0
 def test_x_input_dtype(self):
     """
     Test that train_model will not run with wrong data types for x_var
     :params self:
     :returns boolean
     """
     with self.assertRaises(AttributeError):
         tm.train_model(1, Y_VAR, 'Hawaii')
         tm.train_model('check', Y_VAR, 'Hawaii')
Ejemplo n.º 7
0
 def test_input_size(self):
     """
     Tests that train_model will not run if sizes of x_var and y_var
     do not match.
     :params self:
     :returns boolean:
     """
     with self.assertRaises(IndexError):
         tm.train_model(X_VAR[:500], Y_VAR, 'Hawaii')
         tm.train_model(X_VAR, Y_VAR[:500], 'Hawaii')
Ejemplo n.º 8
0
def train_xz_init_model_chorda(num_classes, epochs):
    model_name = "XZ-Unet-Init"
    # create a new model folder based on name and date
    now = datetime.datetime.now()
    now_str = now.strftime('%Y-%m-%d_%H-%M-%S')

    rootDir = "C:/users/jfauser/IPCAI2019/ModelData/" + model_name + "/"

    print("Started at {}".format(now_str))

    input_shape = (128, 512)
    model = unet(input_shape + (1, ), num_classes)
    batch_size = 8

    print("loading data set 1")
    file_dataset1 = rootDir + "dataset1.h5"
    f1 = h5py.File(file_dataset1.strip(), "r")
    images_1 = f1["images"][()]
    all_labels = f1["labels"][()]  # already in to_categorical
    slices = []
    for set in all_labels:
        slices.append(set[:, :, 5])  #chorda label
    slices = np.stack(slices)
    labels_1 = utils.to_categorical(slices, num_classes)

    print("loading data set 2")
    file_dataset2 = rootDir + "dataset2.h5"
    f2 = h5py.File(file_dataset2.strip(), "r")
    images_2 = f2["images"][()]
    all_labels = f2["labels"][()]
    slices = []
    for set in all_labels:
        slices.append(set[:, :, 5])  #chorda label
    slices = np.stack(slices)
    labels_2 = utils.to_categorical(slices, num_classes)

    out_dir = rootDir + now_str + "/"
    filename_weigths1 = out_dir + "weights1.hdf5"
    filename_weigths2 = out_dir + "weights2.hdf5"

    if not os.path.isdir(out_dir):
        os.mkdir(out_dir)

    print("train model 1")
    train_model.train_model([images_1, labels_1], filename_weigths1, model,
                            batch_size,
                            epochs)  # commences training on the set
    del model

    print("train model 2")
    model = unet(input_shape + (1, ), num_classes)
    train_model.train_model([images_2, labels_2], filename_weigths2, model,
                            batch_size,
                            epochs)  # commences training on the set
    del model
Ejemplo n.º 9
0
def main(args, logger):

    # Instantiate an optimizer.
    optimizer = keras.optimizers.Adam(learning_rate=args['LR'])
    # Instantiate a loss function.
    criterion = keras.losses.BinaryCrossentropy(from_logits=True)

    #===============================================
    # loader = Loader()
    #
    # print('start data load..')
    #
    # uids, iids, df_train, df_test, \
    # df_neg, users, items, item_lookup = loader.load_dataset()
    # user_input, item_input, labels = loader.get_train_instances(uids, iids, args['NUM_NEG'], len(items))
    #
    # print('end data load..')
    #
    # # input data 준비
    # user_data_shuff, item_data_shuff, label_data_shuff = shuffle(user_input, item_input, labels)
    # user_data_shuff = np.array(user_data_shuff).reshape(-1, 1)
    # item_data_shuff = np.array(item_data_shuff).reshape(-1, 1)
    # label_data_shuff = np.array(label_data_shuff).reshape(-1, 1)
    # ===============================================

    # collab_mapped.csv 있어야함
    loader = Data(args)
    df_order = loader.load_df()
    uids, iids, df_train, df_test, df_test_neg, users, items, \
    user_input, item_input, year_input, month_input, weekday_input, daytime_input, labels = \
        loader.prepare_df(df_order)
    """
    # Prepare the training dataset.
    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

    # Reserve 10,000 samples for validation.
    x_val = x_train[-10000:]
    y_val = y_train[-10000:]
    x_train = x_train[:-10000]
    y_train = y_train[:-10000]

    # Prepare the training dataset.
    train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    train_dataset = train_dataset.shuffle(buffer_size=1024).batch(args['BATCH_SIZE'])

    # Prepare the validation dataset.
    val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
    val_dataset = val_dataset.batch(args['BATCH_SIZE'])
    """
    # Model initialize
    model = NeuMF(len(user_input), len(item_input))
    train_model(model, train_dataset, val_dataset, criterion, optimizer, args,
                logger)
Ejemplo n.º 10
0
def main():
    test_df, train_df = load_data()

    train_x, train_y, indices_for_masking, pca_instance, scaler_instance = preprocess_data(
        train_df)
    test_x = preprocess_data(test_df, indices_for_masking,
                             pca_instance, scaler_instance)

    model = create_model(train_x.shape[1], MODEL_TYPE)
    train_model(model, MODEL_TYPE, train_x, train_y)
    predictions = predict_data(model, MODEL_TYPE, test_x)

    submission_file_directory = write_output(predictions)
Ejemplo n.º 11
0
def main(args):

    preprocessor = Preprocessor(FLAGS)

    word_dict = preprocessor.build_dict()
    embeddings = preprocessor.get_init_embedding()

    model = Model(embeddings, FLAGS, len(word_dict))
    model.build()

    if FLAGS.mode == "train":
        train_model(model, preprocessor, word_dict, FLAGS)
    elif FLAGS.mode == "test":
        test_model(model, preprocessor, word_dict, FLAGS)
Ejemplo n.º 12
0
def create_database(videoPaths, dataset_path, embeddings_path, detector_path,
                    embemodel_path, confidence_path, data_path,
                    recognizer_path, le_path):
    # Process videos
    process_videos(videoPaths, dataset_path)

    # Extract embeddings and update pickle files
    extract_embeddings(embeddings_path, detector_path, embemodel_path,
                       confidence_path, data_path)

    # train model and update le.pickle file
    train_model(recognizer_path, embeddings_path, le_path)

    print("All Done.")
Ejemplo n.º 13
0
def train_xy_init_model(num_classes, epochs):
    # first get the time
    now = datetime.datetime.now()
    now_str = now.strftime('%Y-%m-%d_%H-%M-%S')
    print("Started at {}".format(now_str))
    # then create a new model folder based on name and date
    model_name = "XY-Unet-Init"
    rootDir = "C:/users/jfauser/IPCAI2019/ModelData/" + model_name + "/"
    out_dir = rootDir + now_str + "/"
    if not os.path.isdir(out_dir):
        os.mkdir(out_dir)

    # compile the net architecture
    batch_size = 16
    input_shape = (512, 512)
    model = unet(input_shape + (1, ), num_classes)

    # load the two training sets
    print("loading data set 1")
    if num_classes == 2:
        images_1, labels_1 = tools.get_data_set_for_chorda(rootDir +
                                                           "dataset1.h5")
    else:
        images_1, labels_1 = tools.get_data_set(rootDir + "dataset1.h5")

    print("loading data set 2")
    if num_classes == 2:
        images_2, labels_2 = tools.get_data_set_for_chorda(rootDir +
                                                           "dataset2.h5")
    else:
        images_2, labels_2 = tools.get_data_set(rootDir + "dataset2.h5")

    # and train
    filename_weigths1 = out_dir + "weights1.hdf5"
    filename_weigths2 = out_dir + "weights2.hdf5"

    print("train model 1")
    train_model.train_model([images_1, labels_1], filename_weigths1, model,
                            batch_size,
                            epochs)  # commences training on the set
    del model

    print("train model 2")
    model = unet(input_shape + (1, ), num_classes)
    train_model.train_model([images_2, labels_2], filename_weigths2, model,
                            batch_size,
                            epochs)  # commences training on the set
    del model

    return now_str
Ejemplo n.º 14
0
    def update_model(self):

        reply2 = QMessageBox.question(self.focusWidget(), 'The model needs to be trained',
                                      ' Do you want to train the model? It may take a while', QMessageBox.Yes,
                                      QMessageBox.No)

        if reply2 == QtGui.QMessageBox.No:
            return

        else:
            ExEm.extract_embeddings()
            TM.train_model()
            self.read_files()

        print ("[INFO] Model updated ")
Ejemplo n.º 15
0
def train_and_verify(parameters,
                     directory,
                     birds,
                     debug=False,
                     skip_clean=True):
    try:
        os.chdir(directory)
        classifierType = parameters[0]
        mtStep = parameters[1]
        mtWin = parameters[2]
        stStep = parameters[3]
        stWin = parameters[4]
        train_rootdir = os.path.join(directory, 'Training')
        test_rootdir = os.path.join(directory, 'Validation')
        train_dirs = test_params(train_rootdir, birds)
        model = 'x'.join(
            [classifierType,
             str(mtStep),
             str(mtWin),
             str(stStep),
             str(stWin)])
        model_path = os.path.join(os.getcwd(), model)
        if not os.path.exists(model):
            train_model(list_of_dirs=train_dirs,
                        mtWin=mtWin,
                        mtStep=mtStep,
                        stWin=stWin,
                        stStep=stStep,
                        classifierType=classifierType,
                        modelName=model)
        png_path = '.'.join([model_path, 'png'])
        stats_path = '.'.join([model_path, 'stats'])
        if not os.path.exists(png_path) and not os.path.exists(stats_path):
            stats = clean_and_test(directory=test_rootdir,
                                   classifierType=classifierType,
                                   no_sanitize=True,
                                   skip_clean=skip_clean,
                                   show_graphs=False,
                                   model_file=model_path,
                                   birds=birds,
                                   verbose=False)
            with open(stats_path, 'w') as stats_file:
                cPickle.dump(stats, stats_file)
    except:
        if debug:
            raise
        else:
            return
Ejemplo n.º 16
0
def test(path_excel_dir, para_name, para, data, iter_num):
    for i in range(iter_num):
        print_params(para_name, para)
        path_excel = path_excel_dir + str(int(time.time())) + str(int(rd.uniform(100, 900))) + '.xlsx'
        save_params(para_name, para, path_excel)
        _ = train_model(para, data, path_excel)
        if para[2] not in ['GCMC', 'NGCF', 'SCF', 'CGMC', 'LightGCN']: tf.reset_default_graph()
Ejemplo n.º 17
0
    def train(self, plot_stats=False, show_qsummary=False):
        

        while any(m.activated for m in self.modelList):
            while self.current_model < len(self.modelList):
                self.save(f'lesson_{self.current_lesson:03d}.model_{self.current_model:03d}')
                m = self.modelList[self.current_model]
                if m.activated:
                    #train for one lesson and output somewhere
                    m.build_model()
                    m.total_episodes += train_model(m.model, m.curriculum, m.cfg,
                        stats_filename  = m.stats_filename,
                        max_lesson      = self.current_lesson,
                        initial_episode = m.total_episodes,
                        plot_stats      = plot_stats,
                        show_qsummary   = show_qsummary)
                    K.clear_session()
                    m.destroy_model()
                    gc.collect()
                self.current_model += 1

            for m in self.modelList:
                if m.activated and (m.curriculum.lesson_num() <= self.current_lesson or m.curriculum.is_completed()):
                    m.activated = False
                    m.completed = m.curriculum.is_completed()
                    self.summary.append(f'Model {m.name} deactivated during round {self.current_lesson}\nCompleted: {m.completed}')

            # analyze aggregate population stats and deactivate poor performers
            self.current_lesson+=1
            self.current_model = 0
            self.save(f'lesson_{self.current_lesson:03d}.model_{self.current_model:03d}')

        for s in self.summary:
            print(s)
Ejemplo n.º 18
0
 def train_model(
     self,
     train_loader,
     test_loader,
     validation_loader,
     num_inference_samples,
     max_epochs,
     early_stopping_patience,
     desc,
     log_interval,
     device,
     epoch_results_store=None,
 ):
     model = self.create_bayesian_model(device)
     optimizer = self.create_optimizer(model)
     num_epochs, test_metrics = train_model(
         model,
         optimizer,
         max_epochs,
         early_stopping_patience,
         num_inference_samples,
         test_loader,
         train_loader,
         validation_loader,
         log_interval,
         desc,
         device,
         epoch_results_store=epoch_results_store,
         **self.create_train_model_extra_args(optimizer),
     )
     return model, num_epochs, test_metrics
Ejemplo n.º 19
0
def main(args):
    rePrint('-------------------------------------------------------------------------------------')
    vadDict = pickle.load(open(args.vad_file, 'rb'))
    validFile = os.path.join(args.valid_path, args.valid_file.format(args.split_num-1))
    rePrint('  [LoadModelData {:s}]'.format(validFile))
    #validData = LoadModelData(validFile, 'valid')
    validData = LoadModelDataVAD(validFile, vadDict, args.alpha, 'valid')
    validLoad = torch.utils.data.DataLoader(validData,
                        batch_size=args.batch_size, shuffle=False, num_workers=4)

    rePrint('  [Create model: {:s} num_classes: {:d}]'.format(args.model, args.num_classes))
    net = eval(args.model)(num_classes=args.num_classes)
    net = net.cuda()

    optimizer = torch.optim.SGD(net.parameters(), lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    scheduler = ReduceLROnPlateau(optimizer, mode='min',
                                  factor=args.factor,
                                  patience=args.patience,
                                  verbose=False, threshold=1e-6,
                                  threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-8)

    rePrint('')
    rePrint('-------------------------------------------------------------------------------------')
    rePrint('  [Train {:s}]'.format(args.model))
    rePrint('-------------------------------------------------------------------------------------')
    modelPath = os.path.join(args.model_path, args.model+args.fea_type)
    make_path(modelPath)
    for epoch in range(args.epochs):
        #print type(epoch), epoch
        save_file = os.path.join(modelPath, args.model_file).format(epoch)

        for splitID in range(args.split_num-1):
            train_file = os.path.join(args.train_path, args.train_file.format(splitID))
            rePrint('  [LoadModelData {:s}]'.format(train_file))
            #trainData = LoadModelData(train_file, 'train')
            trainData = LoadModelDataVAD(train_file, vadDict, args.alpha, 'train')
            trainLoad = torch.utils.data.DataLoader(trainData,
                                batch_size=args.batch_size, shuffle=True, num_workers=4)

            net = train_model(net=net,
                        trainLoad=trainLoad,
                        optimizer=optimizer,
                        log_interval=args.log_interval,
                        epoch=epoch,
                        batch_size=args.batch_size,
                        lr=optimizer.param_groups[0]['lr'],
                        save_file=None)

        torch.save(net.state_dict(), save_file)
        rePrint(save_file)    

        eval_loss = valid_model(net=net, validLoad=validLoad)
        scheduler.step(eval_loss)

        rePrint('-------------------------------------------------------------------------------------')
    rePrint('  [Done]')
    rePrint('-------------------------------------------------------------------------------------')
Ejemplo n.º 20
0
def plot_cost_function(data, max_iterations, ratio):
    utils.print_colored("[cost_function][Plotting cost function...] start!",
                        utils.bcolors.YELLOW)
    cost = []

    utils.print_colored("[cost_function][Calculating cost function...] start!",
                        utils.bcolors.YELLOW)
    for _ in range(1, max_iterations + 1):
        data.norm_theta0, data.norm_theta1 = train_model.train_model(
            ratio, data.norm_theta0, data.norm_theta1, data.norm_tab_x,
            data.norm_tab_y)
        cost.append(cost_function(data))
    utils.print_colored(
        "[cost_function][Calculating cost function...] finished!",
        utils.bcolors.GREEN)

    utils.print_colored("[cost_function][Generating figure...] start!",
                        utils.bcolors.YELLOW)
    max_iter_table = np.arange(max_iterations)
    fig = go.Figure(data=go.Scatter(x=max_iter_table, y=cost))
    fig.show()
    utils.print_colored("[cost_function][Generating figure...] finished!",
                        utils.bcolors.GREEN)
    utils.print_colored(
        "[cost_function][Plotting cost function...] finished!\n",
        utils.bcolors.GREEN)
Ejemplo n.º 21
0
def test_train_model():
    """Test if train_model returns a xgboost classification model """
    path = os.getcwd()
    df_input = pd.read_csv(path + '/test/data/test_data.csv')
    kwarg_dic = {
        'method': 'xgboost',
        'get_target': {
            'target': 'TARGET'
        },
        'choose_features_all': {
            'features_to_use': [
                'DAYS_BIRTH', 'DAYS_EMPLOYED', 'DAYS_EMPLOYED_PERC',
                'BURO_DAYS_CREDIT_MEAN', 'DAYS_ID_PUBLISH',
                'ANNUITY_INCOME_PERC', 'INSTAL_DAYS_ENTRY_PAYMENT_MEAN',
                'INSTAL_DBD_MEAN', 'PAYMENT_RATE', 'INCOME_CREDIT_PERC',
                'INSTAL_AMT_PAYMENT_MEAN', 'APPROVED_DAYS_DECISION_MEAN',
                'DAYS_LAST_PHONE_CHANGE', 'BURO_DAYS_CREDIT_ENDDATE_MEAN'
            ]
        },
        'split_data': {
            'train_size': 0.9,
            'test_size': 0.1
        },
        'parameter': {
            'objective': 'binary:logistic',
            'n_estimators': 300,
            'learning_rate': 0.2,
            'max_depth': 3
        }
    }
    model = train_model.train_model(df_input, **kwarg_dic)

    assert (str(type(model))) == "<class 'xgboost.sklearn.XGBClassifier'>"
Ejemplo n.º 22
0
def main(args):
    print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
    base_output_directory = dir_utils.clean_directory_string(args.o[0])

    dir_utils.mkdir(base_output_directory)

    ref_file = args.fa[0]
    read_file = args.fq[0]

    batch_size = args.bs[0]
    rnn_dim = args.hu[0]
    embedding_dim = args.ed[0]
    epochs = args.e[0]
    min_seed_length = args.sl[0]
    replicates = args.r[0]
    prediction_length = args.pl[0]
    patience = args.es[0]
    seed_range_upper = args.sr[0]
    replicate_offset = args.os[0]
    beam_length = args.bl[0]

    terminal_directory_character = dir_utils.get_terminal_directory_character()
    gap_id = ref_file.split(terminal_directory_character)[-1].split(".")[0]

    base_output_directory += gap_id

    for i in range(replicates):
        replicate_num = i + replicate_offset
        output_directory = dir_utils.clean_directory_string(base_output_directory + "_R_" + str(replicate_num))

        dir_utils.mkdir(output_directory)

        print("Training model")
        train.train_model(output_directory, min_seed_length, ref_file, read_file, epochs, [batch_size], [rnn_dim], [embedding_dim], 1, patience, seed_range_upper)

        print("Greedy prediction")
        greedy.predict_reference(output_directory, gap_id, ref_file, embedding_dim, rnn_dim, min_seed_length, base_path=output_directory)
        greedy.predict_arbitrary_length(output_directory, gap_id, ref_file, embedding_dim, rnn_dim, prediction_length, base_path=output_directory)

        print("Beam search prediction")
        beam_search.predict_reference(output_directory, gap_id, ref_file, embedding_dim, rnn_dim, min_seed_length,
                          beam_length, base_path=output_directory)
        beam_search.predict_arbitrary_length(output_directory, gap_id, ref_file, embedding_dim, rnn_dim, prediction_length,
                                 beam_length, base_path=output_directory)

        reset_states()
Ejemplo n.º 23
0
def plot_linear_regression(data, slope, max_iterations, ratio):
    utils.print_colored(
        "[linear_regression][Plotting linear regression...] start!",
        utils.bcolors.YELLOW)
    graph_data = {}
    graph_data["figure"] = generate_fig(max_iterations, "Kilometers", "Price")
    graph_data["sliders"] = generate_sliders()

    # Build initial figure data, and frame_data for i == 0
    utils.print_colored(
        "[linear_regression][Building initial figure and frame datas...] start!",
        utils.bcolors.YELLOW)
    estimations = []
    for x in data.tab_x:
        estimations.append(est.estimate_price(x, data.theta0, data.theta1))
    build_fig_data_linear_regression(data, estimations, graph_data)
    build_frame_data_linear_regression(data, estimations, 0, graph_data)
    utils.print_colored(
        "[linear_regression][Building initial figure and frame datas...] finished!",
        utils.bcolors.GREEN)

    # Iterate max_iterations times through the dataset to train the model
    utils.print_colored("[linear_regression][Building frame data...] start!",
                        utils.bcolors.YELLOW)
    for i in range(1, max_iterations + 1):
        data.norm_theta0, data.norm_theta1 = train_model.train_model(
            ratio, data.norm_theta0, data.norm_theta1, data.norm_tab_x,
            data.norm_tab_y)
        # Every slope times, we make a snapshot of the estimations with the current thetas (slope can be equal to 1)
        if i % slope == 0:
            # De-normalize our thetas to use them for the estimations
            data.theta0 = utils.de_normalize(data.norm_theta0, data.max_y,
                                             data.min_y)
            data.theta1 = utils.de_normalize(
                data.norm_theta1, data.max_y,
                data.min_y) / (data.max_x - data.min_x)
            estimations = []
            # Iterate through the dataset to estimate for each entry
            for x in data.tab_x:
                estimations.append(
                    est.estimate_price(x, data.theta0, data.theta1))
            build_frame_data_linear_regression(data, estimations, i,
                                               graph_data)
    utils.print_colored(
        "[linear_regression][Building frame data...] finished!",
        utils.bcolors.GREEN)

    utils.print_colored("[linear_regression][Generating figure...] start!",
                        utils.bcolors.YELLOW)
    graph_data["figure"]["layout"]["sliders"] = [graph_data["sliders"]]
    fig = go.Figure(graph_data["figure"])
    fig.show()
    utils.print_colored("[linear_regression][Generating figure...] finished!",
                        utils.bcolors.GREEN)
    utils.print_colored(
        "[linear_regression][Plotting linear regression...] finished!\n",
        utils.bcolors.GREEN)
Ejemplo n.º 24
0
def new_data():
    words, labels, docs_x, docs_y = extract_data()
    training, output = onehot_encoding(words, labels, docs_x, docs_y)
    model = train_model(training, output)


# new_data()
# chat()
# print (ask("Tell me about your courses"))
# new_data()
Ejemplo n.º 25
0
 def refresh_model(self):
     """
     Model will be trained on this button press.
     All the pending cases will be downloaded from db and
     a KNN Classifier will be trained on it.
     """
     if train_model() is True:
         QMessageBox.about(self, "Success", "Model is trained")
     else:
         QMessageBox.about(self, "Error", "Something went wrong")
Ejemplo n.º 26
0
def main(opt):
    start_time = time.time()
    train_bow_loader, valid_bow_loader, word2idx, idx2word, vocab, bow_dictionary \
                                        = load_data_and_vocab(opt, load_train=True)
    opt.bow_vocab_size = len(bow_dictionary)
    load_data_time = time_since(start_time)
    logging.info('Time for loading the data: %.1f' % load_data_time)

    start_time = time.time()
    ntm_model = NTM(opt).to(opt.device)
    optimizer_ntm = init_optimizers(ntm_model, opt)

    train_model.train_model(ntm_model, optimizer_ntm, bow_dictionary,
                            train_bow_loader, valid_bow_loader, opt)

    training_time = time_since(start_time)

    logging.info('Time for training: %.1f' % training_time)

    return
Ejemplo n.º 27
0
def train():
    feature_extraction_config = {
        'text_to_vector_uni_vocabulary':
        'vocabularies/text_to_vector_uni_vocabulary_10.txt',
        'text_to_vector_bi_vocabulary':
        'vocabularies/text_to_vector_bi_vocabulary.txt',
        'tf_idf_vector': False,
        'counter_vector': True,
        'binary_vector': False,
        'best_representing_words_list':
        'vocabularies/fast_rank_best_words_custom.txt',
        'surrounding_words': True,
        'polarity_vocabulary': 'vocabularies/polarity_words.txt',
        'positive_words_count': True,
        'negative_words_count': True,
        'polarity_count': True,
        'parts_of_speech': True,
        'uni_gram': True,
        'bi_gram': False,
        'not_count': False,
        'remove_stop_words': False
    }

    class_map = {
        0: 0,
        1: 1,
        # 2: 2,
        3: 3
    }

    clf = SVC(kernel='linear')

    train_model.train_model(clf,
                            src_path=src_path,
                            data_field=data_field,
                            target_field=target_field,
                            export_path='models/fast_rank_model.pkl',
                            class_map=class_map,
                            balance_classes=False,
                            randomize=False,
                            feature_config=feature_extraction_config)
Ejemplo n.º 28
0
def train_xz_init_model(num_classes, epochs):
    #This needs to be set according to your directory
    #Needs to contain /rawdata and /groundtruth with equally named mhd-files
    model_name = "XZ-Unet-Init"
    # create a new model folder based on name and date
    now = datetime.datetime.now()
    now_str = now.strftime('%Y-%m-%d_%H-%M-%S')

    rootDir = "C:/users/jfauser/IPCAI2019/ModelData/" + model_name + "/"

    print("Started at {}".format(now_str))

    input_shape = (128, 512)
    model = unet(input_shape + (1, ), num_classes)
    batch_size = 8

    print("loading data set 1")
    images_1, labels_1 = get_data_set(rootDir + "dataset1.h5")
    print("loading data set 2")
    images_2, labels_2 = get_data_set(rootDir + "dataset2.h5")

    out_dir = rootDir + now_str + "/"
    filename_weigths1 = out_dir + "weights1.hdf5"
    filename_weigths2 = out_dir + "weights2.hdf5"

    if not os.path.isdir(out_dir):
        os.mkdir(out_dir)

    print("train model 1")
    train_model.train_model([images_1, labels_1], filename_weigths1, model,
                            batch_size,
                            epochs)  # commences training on the set
    del model

    print("train model 2")
    model = unet(input_shape + (1, ), num_classes)
    train_model.train_model([images_2, labels_2], filename_weigths2, model,
                            batch_size,
                            epochs)  # commences training on the set
    del model
Ejemplo n.º 29
0
Archivo: main.py Proyecto: busesese/MKR
def main(args):
    n_user, n_item, train_rec, eval_rec, test_rec = load_rating()
    n_entity, n_relation, kg = load_kg()

    kg_data = (kg[:, 0], kg[:, 1], kg[:, 2])
    rec_data = (train_rec[:, 0], train_rec[:, 1], train_rec[:, 2])
    rec_val = (eval_rec[:, 0], eval_rec[:, 1], eval_rec[:, 2])

    train_data_kg = TrainSet(kg_data)
    train_loader_kg = DataLoader(train_data_kg,
                                 batch_size=args.batch_size,
                                 shuffle=args.shuffle_train)

    train_data_rec = TrainSet(rec_data)
    eval_data_rec = TrainSet(rec_val)

    train_loader_rec = DataLoader(train_data_rec,
                                  batch_size=args.batch_size,
                                  shuffle=args.shuffle_train)
    eval_loader_rec = DataLoader(eval_data_rec,
                                 batch_size=args.batch_size,
                                 shuffle=args.shuffle_test)

    model = MultiKR(n_user + 1,
                    n_item + 1,
                    n_entity + 1,
                    n_relation + 1,
                    n_layer=args.n_layer,
                    embed_dim=args.batch_size,
                    hidden_layers=args.hidden_layers,
                    dropouts=args.dropouts,
                    output_rec=args.output_rec)
    optimizer = torch.optim.Adam(model.parameters(),
                                 weight_decay=args.weight_decay,
                                 lr=args.lr)
    loss_function = nn.BCEWithLogitsLoss()
    epochs = args.epochs
    train_model(model, train_loader_rec, train_loader_kg, eval_loader_rec,
                optimizer, loss_function, epochs)
Ejemplo n.º 30
0
def do_predict(need_train=True, need_save=True):
    # data preprocessor
    data_path = base_path + market_info_path
    if os.path.exists(data_path):
        market_info = pd.read_pickle(data_path)
    else:
        market_info = query_and_clean()
        market_info.to_pickle(data_path)

    training_set, test_set, lstm_training_inputs, lstm_training_outputs, lstm_test_inputs, lstm_test_outputs \
        = data_preprocessor(market_info, split_date, window_len, currency_type=currency_type)

    # train or load model
    if need_train:
        model = train_model(lstm_training_inputs,
                            lstm_training_outputs,
                            need_save_model=need_save)
    else:
        model = load_model(persistence_path)

    training_x_ticks = [
        datetime.date(i, j, 1) for i in range(2013, 2019) for j in [1, 5, 9]
    ]
    training_x_ticks_labels = [
        datetime.date(i, j, 1).strftime('%b %Y') for i in range(2013, 2019)
        for j in [1, 5, 9]
    ]

    test_x_ticks = [datetime.date(2017, i + 1, 1) for i in range(12)]
    test_x_ticks_labels = [
        datetime.date(2017, i + 1, 1).strftime('%b %d %Y') for i in range(12)
    ]

    # Eth predict plot
    predict_and_plot(lstm_training_inputs,
                     training_set['eth_Close'],
                     training_set,
                     training_x_ticks,
                     training_x_ticks_labels,
                     model,
                     "[ETH] Training Set: Single Timepoint Prediction",
                     need_zoom=True)
    predict_and_plot(lstm_test_inputs,
                     test_set['eth_Close'],
                     test_set,
                     test_x_ticks,
                     test_x_ticks_labels,
                     model,
                     "[ETH] Test Set: Single Timepoint Prediction",
                     need_zoom=False)
Ejemplo n.º 31
0
#!/usr/bin/env python

import sys
import os.path
from sklearn.ensemble import RandomForestClassifier

sys.path.insert(0, '.')
from train_model import train_model

submission_file = 'submission_rf_f4imp_all.csv'

features_files = ['data/Dog_1/features_02.txt',
                  'data/Dog_2/features_01.txt',
                  'data/Dog_3/features_01.txt',
                  'data/Dog_4/features_01.txt',
                  'data/Dog_5/features_01.txt',
                  'data/Patient_1/features_01.txt',
                  'data/Patient_2/features_01.txt']

model, train_prob, test_prob = train_model(features_files, [11, 5, 4, 6],
                                           RandomForestClassifier,
                                           {'n_estimators': 50,
                                            'criterion': 'entropy',
                                            'min_samples_leaf': 10},
                                           outlier_sigma=2, n_cv=20,
                                           plot=True, save_settings=True,
                                           submission_file=submission_file,
                                           verbose=True)

raw_input('Press Enter to end.')
Ejemplo n.º 32
0
import sys
import os.path
from sklearn.linear_model import LogisticRegression

sys.path.insert(0, '.')
from train_model import train_model

submission_file = 'submission_log_reg_f1to3_rocslope10.csv'

features_files = [['data/Dog_1/features_02.txt'],
                  ['data/Dog_2/features_01.txt'],
                  ['data/Dog_3/features_01.txt'],
                  ['data/Dog_4/features_01.txt'],
                  ['data/Dog_5/features_01.txt'],
                  ['data/Patient_1/features_01.txt'],
                  ['data/Patient_2/features_01.txt']]
features_cols = [[8, 21], [12, 13], [3, 17], [11], [7, 10, 11], [10, 15], [5]]
C_vals = [0.01, 0.1, 1, 0.01, 10, 0.01, 10]

for ff, fc, C in zip(features_files, features_cols, C_vals):
    model, train_prob, test_prob = train_model(ff, fc, LogisticRegression,
                                            {'C': C,
                                            'class_weight': 'auto'},
                                            outlier_sigma=2, n_cv=100,
                                            normalize_probs='ROCSlope',
                                            plot=True, save_settings=True,
                                            submission_file=submission_file,
                                            verbose=True)

raw_input('Press Enter to end.')
def optimize_model(features_files, submission_file,
                   classifier=linear_model.LogisticRegression,
                   feature_columns=range(2, 27),
                   min_features=1, max_features=1,
                   parameters={'C': np.logspace(-3, 1, 5),
                               'class_weight': ['auto']},
                   **kwargs):
    """
    Find the combination of features (selected from columns listed
    in feature_columns in the files listed in features_files,
    including 1 to max_features different features) and model
    parameters that optimizes the AUC for the chosen classifier,
    then update predicted test probabilities in submission_file.
    In the parameters dictionary, the keys are the keywords to be used
    when initializing the classifier, and each value is an array of
    arguments to loop over when searching for the best model.
    Additional settings are passed to train_model in kwargs.
    """
    best_model = {'AUC': 0.}
    
    # vary number of features
    if min_features > max_features:
        sys.exit('min_features must be <= max_features')
    for n_features in range(min_features, max_features+1):
        # vary features used in model training
        if n_features == min_features:
            if min_features == 1:
                feature_columns_grid = [[i] for i in feature_columns]
            else:
                feature_columns_grid = list(itertools.combinations( \
                                           feature_columns, min_features))
        elif len(best_model['columns']) < n_features-1:
            # exit loop since last iteration didn't add new features
            break
        else:
            remaining_features = list(feature_columns)
            for i in best_model['columns']:
                remaining_features.remove(i)
            feature_columns_grid = [list(best_model['columns']) + [i] \
                                    for i in remaining_features]
        for f_cols in feature_columns_grid:
            # vary model parameters
            for model_args in list(itertools.product( \
                                  *[[(k, v) for v in parameters[k]] \
                                  for k in parameters.keys()])):
                # train the classifier and compute AUC
                model, auc_mean, auc_std = train_model(features_files,
                                                       f_cols, classifier,
                                                       dict(model_args),
                                                       **kwargs)
                print '\r' + ', '.join([str(fc) for fc in f_cols]) + \
                        ' AUC = {0:.2f}+/-{1:.2f}'.format(auc_mean, auc_std),
                sys.stdout.flush()
                if auc_mean > best_model['AUC']:
                    print '\n    ', model_args
                    best_model = {'AUC': auc_mean,
                                  'columns': f_cols,
                                  'parameters': model_args}
    print '\r' + 70*' ' + '\n'

    # compute predictions for best model, update submission CSV,
    # and plot learning curves and ROC curve
    model, auc_mean, auc_std = train_model(features_files, 
                                           best_model['columns'], classifier,
                                           dict(best_model['parameters']),
                                           submission_file=submission_file,
                                           save_settings=True, plot=True,
                                           **kwargs)
Ejemplo n.º 34
0
sys.path.insert(0, '.')
from train_model import train_model

submission_file = 'submission_svm_f1to2_rocslope5.csv'

features_files = [['data/Dog_1/features_02.txt'],
                  ['data/Dog_2/features_01.txt'],
                  ['data/Dog_3/features_01.txt'],
                  ['data/Dog_4/features_01.txt'],
                  ['data/Dog_5/features_01.txt'],
                  ['data/Patient_1/features_01.txt'],
                  ['data/Patient_2/features_01.txt']]
features_cols = [[5, 11], [13], [3, 7], [5, 11], [14], [4, 11], [17, 18]]
C_vals = [10, 10, 0.1, 0.1, 0.1, 10, 10]
gamma_vals = [1, 0.5, 0.25, 0.25, 1, 1, 1]

for ff, fc, C, gamma in zip(features_files, features_cols, C_vals, gamma_vals):
    model, train_prob, test_prob = train_model(ff, fc, SVC,
                                            {'C': C, 'kernel': 'rbf',
                                            'gamma': gamma,
                                            'probability': True,
                                            'class_weight': 'auto'},
                                            outlier_sigma=2, n_cv=10,
                                            normalize_probs='ROCSlope',
                                            plot=True, save_settings=True,
                                            submission_file=submission_file,
                                            verbose=True)

raw_input('Press Enter to end.')
Ejemplo n.º 35
0
    ssplit90 = json.load(f)

srefdf_t = filter_by_filelist(srefdf, ssplit90['train'])

### III. Preprocess: remove relational expressions from training

srefdf_tr = filter_relational_expr(srefdf_t)

### IV. Set list of words to train

wordlist = wordlist_by_criterion(srefdf_tr, model['wrdl'], model['wprm'])

### V. Get the region features

X = np.load('../../ExtractFeats/ExtrFeatsOut/saiapr.npz')
X = X['arr_0']

### VI. And... train away!

clsf = train_model(srefdf_tr, X, wordlist,
                   (linear_model.LogisticRegression, {'penalty':'l1'}),
                   nneg=model['nneg'], nsrc=model['nsrc'])

with gzip.open('../TrainedModels/' + basename + '.pklz', 'w') as f:
    pickle.dump(clsf, f)

with open('../TrainedModels/' + basename + '.json', 'w') as f:
    json.dump(model, f)

print strftime("%Y-%m-%d %H:%M:%S")