def run(): model_state_path = "/home/ryan/code/nanopore_assembly/output/training_2018-8-30-14-31-58-3-242/model_checkpoint_4" # directory = "/home/ryan/code/nanopore_assembly/output/spoa_pileup_generation_chr1_full/test" # spoa 2 pass variants excluded directory = "/home/ryan/code/nanopore_assembly/output/chr1_800k_2500_windows/test" # spoa 2 pass arbitray region 2500 windows file_paths = FileManager.get_all_file_paths_by_type( parent_directory_path=directory, file_extension=".npz", sort=False) # Architecture parameters hidden_size = 16 input_channels = 1 # 1-dimensional signal output_size = 5 # '-','A','C','T','G' one hot vector n_layers = 3 # Hyperparameters dropout_rate = 0 # Training parameters batch_size_train = 1 checkpoint_interval = 300 data_loader = DataLoader(file_paths=file_paths, batch_size=batch_size_train) model = Decoder(hidden_size=hidden_size, input_size=input_channels, output_size=output_size, n_layers=n_layers, dropout_rate=dropout_rate) model.load_state_dict(torch.load(model_state_path)) # consensus_caller = ConsensusCaller(sequence_to_index, sequence_to_float) test(model=model, data_loader=data_loader)
def run(): model_state_path = "/home/ryan/code/nanopore_assembly/output/training_2018-9-14-14-17-3-4-257/model_checkpoint_5" directory = "/home/ryan/code/nanopore_assembly/output/chr1_800k-1200k_standard_20width/chr1/test" # no variants, chr1:1200k 200 window test file_paths = FileManager.get_all_file_paths_by_type(parent_directory_path=directory, file_extension=".npz", sort=False) # Architecture parameters hidden_size = 128 input_channels = 1 # 1-dimensional signal output_size = 5 # '-','A','C','T','G' one hot vector n_layers = 1 # Hyperparameters dropout_rate = 0.1 # Training parameters batch_size_train = 1 n_batches = 8000 checkpoint_interval = 1000 data_loader = DataLoader(file_paths=file_paths, batch_size=batch_size_train) model = EncoderDecoder(hidden_size=hidden_size, input_size=input_channels, output_size=output_size, n_layers=n_layers, dropout_rate=dropout_rate) model.load_state_dict(torch.load(model_state_path)) test(model=model, data_loader=data_loader) consensus_caller = ConsensusCaller(sequence_to_float=sequence_to_float, sequence_to_index=sequence_to_index) test_consensus(consensus_caller=consensus_caller, data_loader=data_loader)
def run(): # model_state_path = "/home/ryan/code/nanopore_assembly/output/training_2018-8-28-17-13-26-1-240/model_checkpoint_15" # model_state_path = "/home/ryan/code/nanopore_assembly/output/training_2018-8-29-12-11-15-2-241/model_checkpoint_21" # model_state_path = "/home/ryan/code/nanopore_assembly/output/training_2018-8-30-11-49-32-3-242/model_checkpoint_43" # directory = "/home/ryan/code/nanopore_assembly/output/spoa_pileup_generation_chr1_full/test" # spoa 2 pass variants excluded # directory = "/home/ryan/code/nanopore_assembly/output/spoa_pileup_generation_2018-9-4-17-30-38-1-247" # arbitrary 2500 window test region directory = "/home/ryan/code/nanopore_assembly/output/spoa_pileup_generation_celegans_chr1_1mbp_NONRUNLENGTH_2018-9-19" # c elegans # directory = "/home/ryan/code/nanopore_assembly/output/spoa_pileup_generation_human_chr1_1mbp_NONRUNLENGTH_2018-9-18" # human file_paths = FileManager.get_all_file_paths_by_type( parent_directory_path=directory, file_extension=".npz", sort=False) # Architecture parameters hidden_size = 16 input_channels = 1 # 1-dimensional signal output_size = 5 # '-','A','C','T','G' one hot vector n_layers = 3 # Hyperparameters dropout_rate = 0 # Training parameters batch_size_train = 1 n_batches = 5000 checkpoint_interval = 300 data_loader = DataLoader(file_paths=file_paths, batch_size=batch_size_train) # model = EncoderDecoder(hidden_size=hidden_size, input_size=input_channels, output_size=output_size, n_layers=n_layers, dropout_rate=dropout_rate) # model.load_state_dict(torch.load(model_state_path)) consensus_caller = ConsensusCaller(sequence_to_index, sequence_to_float) # test(model=model, data_loader=data_loader) print(n_batches, len(data_loader)) test_consensus(consensus_caller=consensus_caller, data_loader=data_loader, n_batches=n_batches)
def run(): model_state_path = "/home/ryan/code/nanopore_assembly/output/simple_rnn_2_layer_32_hidden_celegans/model_checkpoint_10" # directory = "/home/ryan/code/nanopore_assembly/output/chr1_800k-1200k_standard_20width/chr1/test" # no variants, chr1:1200k 200 window test directory = "/home/ryan/code/nanopore_assembly/output/celegans_250_window_test_1m" # celegans 50 windows file_paths = FileManager.get_all_file_paths_by_type( parent_directory_path=directory, file_extension=".npz", sort=False) # Architecture parameters hidden_size = 32 input_channels = 5 # 1-dimensional signal output_size = 5 # '-','A','C','T','G' one hot vector n_layers = 2 # Hyperparameters dropout_rate = 0.1 # Training parameters batch_size_train = 1 n_batches = 8000 checkpoint_interval = 1000 data_loader = DataLoader(file_paths=file_paths, batch_size=batch_size_train, convert_to_frequency=True) model = Decoder(hidden_size=hidden_size, input_size=input_channels, output_size=output_size, n_layers=n_layers, dropout_rate=dropout_rate) model.load_state_dict(torch.load(model_state_path)) test(model=model, data_loader=data_loader) consensus_caller = ConsensusCaller(sequence_to_float=sequence_to_float, sequence_to_index=sequence_to_index) test_consensus(consensus_caller=consensus_caller, data_loader=data_loader)
def run(load_model=False, model_state_path=None): directory = "/home/ryan/code/nanopore_assembly/output/chr1_800k-1200k_standard_20width/chr1/train" # spoa 2 pass arbitray region 2500 windows file_paths = FileManager.get_all_file_paths_by_type( parent_directory_path=directory, file_extension=".npz", sort=False) results_handler = ResultsHandler() # Architecture parameters hidden_size = 128 input_channels = 1 # 1-dimensional signal output_size = 5 # '-','A','C','T','G' one hot vector n_layers = 1 # Hyperparameters learning_rate = 1e-3 weight_decay = 1e-5 dropout_rate = 0.1 # Training parameters batch_size_train = 1 n_batches = None checkpoint_interval = 1000 data_loader = DataLoader(file_paths=file_paths, batch_size=batch_size_train) model = EncoderDecoder(hidden_size=hidden_size, input_size=input_channels, output_size=output_size, n_layers=n_layers, dropout_rate=dropout_rate) # Initialize the optimizer with above parameters optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) # Define the loss function # loss_fn = nn.MSELoss() loss_fn = nn.CrossEntropyLoss() if load_model: # get weight parameters from saved model state model.load_state_dict(torch.load(model_state_path)) # Train and get the resulting loss per iteration losses = train(model=model, data_loader=data_loader, optimizer=optimizer, loss_fn=loss_fn, n_batches=n_batches, results_handler=results_handler, checkpoint_interval=checkpoint_interval) # test(model=model, # data_loader=data_loader, # n_batches=4) results_handler.save_model(model) results_handler.save_plot(losses) print(model)
def run(load_model=False, model_state_path=None): # directory = "/home/ryan/code/nanopore_assembly/output/pileup_generation_2018-8-24-12-54-20-4-236" # poapy # directory = "/home/ryan/code/nanopore_assembly/output/spoa_pileup_generation_2018-8-27-13-51-41-0-239" # spoa # directory = "/home/ryan/code/nanopore_assembly/output/spoa_pileup_generation_2018-8-27-16-13-23-0-239" # spoa with 2 pass alignment directory = "/home/ryan/code/nanopore_assembly/output/spoa_pileup_generation_chr1_full/train" # spoa 2 pass variants excluded file_paths = FileManager.get_all_file_paths_by_type( parent_directory_path=directory, file_extension=".npz", sort=False) results_handler = ResultsHandler() # Architecture parameters hidden_size = 16 input_channels = 1 # 1-dimensional signal output_size = 5 # '-','A','C','T','G' one hot vector n_layers = 3 # Hyperparameters learning_rate = 1e-3 weight_decay = 0 dropout_rate = 0 # Training parameters batch_size_train = 1 n_batches = 8000 checkpoint_interval = 1000 data_loader = DataLoader(file_paths=file_paths, batch_size=batch_size_train) model = EncoderDecoder(hidden_size=hidden_size, input_size=input_channels, output_size=output_size, n_layers=n_layers, dropout_rate=dropout_rate) # Initialize the optimizer with above parameters optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) # Define the loss function # loss_fn = nn.MSELoss() loss_fn = nn.CrossEntropyLoss() if load_model: # get weight parameters from saved model state model.load_state_dict(torch.load(model_state_path)) # Train and get the resulting loss per iteration losses = train(model=model, data_loader=data_loader, optimizer=optimizer, loss_fn=loss_fn, n_batches=n_batches, results_handler=results_handler, checkpoint_interval=checkpoint_interval) # test(model=model, # data_loader=data_loader, # n_batches=4) results_handler.save_model(model) results_handler.save_plot(losses) print(model)