Beispiel #1
0
 def __prepare_tensor_dataset(self):
     tensor_dataset_path = os.path.join(
         self.metaconf["ws_path"], "tensor_datasets",
         self.dataset_params.tensor_dataset_name)
     # compare configs, if not same, refresh dataset
     current_config_snapshot_exists = H.config_snapshot(
         "dataset_params",
         self.dataset_params.params,
         "src/data/aux/.dataset_config_snapshot.json",
     )
     if not current_config_snapshot_exists:
         H.makedirs(tensor_dataset_path)
         _tqdm_kwargs = {
             "desc": "Preparing TensorDataset",
             "total": len(self.generic_dataset)
         }
         for i, sample in tqdm(enumerate(self.generic_dataset),
                               **_tqdm_kwargs):
             f_folder_path = os.path.join(tensor_dataset_path,
                                          f"{sample['texture']}")
             H.makedirs(f_folder_path)
             f_path = os.path.join(f_folder_path, f"nodule_{i}.pt")
             save_nodules = {
                 "nodule": sample["nodule"],
                 "texture": sample["texture"]
             }
             torch.save(save_nodules, f_path)
     return tensor_dataset_path
Beispiel #2
0
def patient_workflow(config, patient_id, patient_input, output_file):
    workflow = pypeliner.workflow.Workflow()

    patient_bam_dir = config["bam_directory"] + patient_id
    patient_result_dir = config["results_dir"] + patient_id

    helpers.makedirs(patient_bam_dir)
    helpers.makedirs(patient_result_dir)

    input_args = helpers.create_input_args(patient_input, patient_bam_dir)

    workflow.setobj(obj=mgd.OutputChunks('sample_id', ),
                    value=input_args['all_samples'])

    workflow.subworkflow(name='align_samples',
                         func=alignment.align_sample,
                         axes=('sample_id', ),
                         args=(
                             config,
                             mgd.InputFile('fastq_1',
                                           'sample_id',
                                           fnames=input_args['fastqs_r1']),
                             mgd.InputFile('fastq_2',
                                           'sample_id',
                                           fnames=input_args['fastqs_r2']),
                             mgd.InputInstance('sample_id'),
                             mgd.OutputFile('sample.bam',
                                            'sample_id',
                                            fnames=input_args['all_bams']),
                             mgd.OutputFile('sample.bam.bai',
                                            'sample_id',
                                            fnames=input_args['all_bais']),
                         ))

    workflow.subworkflow(name='run_analyses',
                         func=analysis.partition_tumour,
                         args=(
                             config,
                             input_args,
                             patient_id,
                             patient_result_dir,
                             mgd.InputFile('sample.bam',
                                           'sample_id',
                                           fnames=input_args['all_bams'],
                                           axes_origin=[]),
                             mgd.InputFile('sample.bam.bai',
                                           'sample_id',
                                           fnames=input_args['all_bais'],
                                           axes_origin=[]),
                             mgd.OutputFile(output_file),
                         ))

    return workflow
Beispiel #3
0
def ctDNA_workflow(args):
    pyp = pypeliner.app.Pypeline(config=args)
    workflow = pypeliner.workflow.Workflow()

    config = helpers.load_yaml(args['config'])
    for arg, value in args.iteritems():
        config[arg] = value

    helpers.makedirs(config["bam_directory"])

    helpers.makedirs(config["results_dir"])

    inputs = helpers.load_yaml(args['input_yaml'])
    patients = inputs.keys()

    workflow.setobj(obj=mgd.OutputChunks('patient_id', ), value=patients)

    workflow.transform(name='get_input_by_patient',
                       func=helpers.get_input_by_patient,
                       ret=mgd.TempOutputObj('patient_input', 'patient_id'),
                       axes=('patient_id', ),
                       args=(
                           inputs,
                           mgd.InputInstance('patient_id'),
                       ))

    workflow.subworkflow(name='patient_workflow',
                         func=patient_workflow,
                         axes=('patient_id', ),
                         args=(
                             config,
                             mgd.InputInstance('patient_id'),
                             mgd.TempInputObj('patient_input', 'patient_id'),
                             mgd.OutputFile(
                                 os.path.join(config['results_dir'],
                                              '{patient_id}.log'),
                                 'patient_id'),
                         ))

    pyp.run(workflow)
Beispiel #4
0
 def optimizer_step(self,
                    current_epoch,
                    batch_idx,
                    optimizer,
                    optimizer_idx,
                    second_order_closure=None):
     gradplot_savepath = H.makedirs(
         os.path.join(self.metaconf["ws_path"], "artifacts",
                      "gradflow_plots"))
     fig = H.plot_grad_flow(self.named_parameters(), "VAE",
                            self.global_step, gradplot_savepath)
     self.logger.experiment.add_figure("gradflow_plots", fig,
                                       self.global_step)
     optimizer.step()
     optimizer.zero_grad()
Beispiel #5
0
 def optimizer_step(
     self, current_epoch, batch_idx, optimizer, optimizer_idx, second_order_closure=None
 ):
     g_norm = H.get_gradient_norm(self.parameters())
     gradplot_savepath = H.makedirs(
         os.path.join(self.metaconf["ws_path"], "artifacts", "gradflow_plots")
     )
     fig = H.plot_grad_flow(self.named_parameters(), "RLS", 0, gradplot_savepath)
     self.logger.experiment.add_figure("gradflow_plots", fig, self.global_step)
     if np.isnan(g_norm):
         log.warning("  gradient norm is NaN -> skip")
         optimizer.zero_grad()
         return
     elif g_norm > self.hparams.optimizer_max_grad_norm:
         log.warning(f"  gradient norm is too high: {g_norm:.5f} -> clip to OPTIMIZER_MAX_GRAD_NORM")
         torch.nn.utils.clip_grad_norm_(self.parameters(), self.hparams.optimizer_max_grad_norm)
     else:
         log.info(f"  gradient norm: {g_norm:.5f}")
     optimizer.step()
     optimizer.zero_grad()