Esempio n. 1
0
    def seed(self):
        for id in self.get_ids():
            task = self.call_func(
                "Task.create",
                id,
                lambda id_: Task.create(project_name="MLFlow Migration",
                                        task_name=id_),
                self.get_run_name_by_id(id),
            )

            self.call_func(
                "transmit_information",
                id,
                lambda id_: self.transmit_information(id_),
                id,
            )

            self.call_func("transmit_metrics", id,
                           lambda id_: self.transmit_metrics(id_), id)

            self.call_func("transmit_artifacts", id,
                           lambda id_: self.transmit_artifacts(id_), id)

            task.mark_started()
            task.completed()
            output_log_web_page = task.get_output_log_web_page()
            url_parts = output_log_web_page.split("projects")
            project_id = url_parts[1].split("/")[1]
            self.project_link = url_parts[0] + "/projects/" + project_id
            self.migration_count += 1
            self.pbar.update(1)
 def remote_run_experiment(self):
     for parameter_setup in self._parameter_setups:
         print(parameter_setup)
         task = Task.create(
             project_name=f"{self._project_name}",
             task_name=self.make_task_name(parameter_setup),
             repo=self._repo,
             branch=self._branch,
             script=self._script,
             requirements_file="../requirements.txt"
         )
         task.set_parent(Task.current_task().id)
         task.connect(parameter_setup)
         Task.enqueue(task, self._queue)
Esempio n. 3
0
def _run_fixed_lambda_bbcluster(train_batch_size,
                                num_epochs,
                                lambda_val,
                                reg,
                                use_model_device,
                                eval_steps,
                                out_path,
                                warmup_frac=0.1,
                                model_name='distilbert-base-uncased',
                                out_features=256):
    exp_task = Task.create(project_name='Optuna Hyperparam optim',
                           task_name='trial')
    config_dict = {'lambda_val': lambda_val, 'reg': reg}
    config_dict = task.connect(config_dict)
    if torch.cuda.is_available():
        device = torch.device('cuda')
        print('CUDA is available and using device: ' + str(device))
    else:
        device = torch.device('cpu')
        print('CUDA not available, using device: ' + str(device))
    word_embedding_model = models.Transformer(model_name)

    pooling_model = models.Pooling(
        word_embedding_model.get_word_embedding_dimension(),
        pooling_mode_mean_tokens=True,
        pooling_mode_cls_token=False,
        pooling_mode_max_tokens=False)

    doc_dense_model = models.Dense(
        in_features=pooling_model.get_sentence_embedding_dimension(),
        out_features=out_features,
        activation_function=nn.Tanh())

    model = CustomSentenceTransformer(
        modules=[word_embedding_model, pooling_model, doc_dense_model])
    loss_model = BBClusterLossModel(model=model,
                                    device=device,
                                    lambda_val=config_dict.get(
                                        'lambda_val', lambda_val),
                                    reg_const=config_dict.get('reg', reg))

    train_dataloader = DataLoader(train_cluster_data,
                                  shuffle=True,
                                  batch_size=train_batch_size)
    evaluator = ClusterEvaluator.from_input_examples(val_cluster_data,
                                                     use_model_device)

    warmup_steps = int(len(train_dataloader) * num_epochs *
                       warmup_frac)  # 10% of train data

    model.to(device)

    # Train the model
    model.fit(train_objectives=[(train_dataloader, loss_model)],
              epochs=num_epochs,
              warmup_steps=warmup_steps,
              evaluator=evaluator,
              evaluation_steps=eval_steps,
              output_path=out_path)
    best_model = CustomSentenceTransformer(out_path)
    return evaluator(best_model)