type=str, default=None, help= "Input file could either be a directory with multiple audio files or just one single audio file" ) ARGS = parser.parse_args() log = Logger("PREDICT", ARGS.debug, ARGS.log_dir) """ Main function to compute prediction by using a trained model together with the given input """ if __name__ == "__main__": if ARGS.checkpoint_path is not None: log.info("Restoring checkpoint from {} instead of using a model file.". format(ARGS.checkpoint_path)) checkpoint = torch.load(ARGS.checkpoint_path) model = UNet(1, 1, bilinear=False) model.load_state_dict(checkpoint["modelState"]) log.warning( "Using default preprocessing options. Provide Model file if they are changed" ) dataOpts = DefaultSpecDatasetOps else: if ARGS.jit_load: extra_files = {} extra_files['dataOpts'] = '' model = torch.jit.load(ARGS.model_path, _extra_files=extra_files) unetState = model.state_dict() dataOpts = eval(extra_files['dataOpts']) log.debug("Model successfully load via torch jit: " +
ARGS.lr *= ARGS.batch_size patience_lr = math.ceil(ARGS.lr_patience_epochs / ARGS.epochs_per_eval) patience_lr = int(max(1, patience_lr)) log.debug("dataOpts: " + json.dumps(dataOpts, indent=4)) sequence_len = int( float(ARGS.sequence_len) / 1000 * dataOpts["sr"] / dataOpts["hop_length"]) log.debug("Training with sequence length: {}".format(sequence_len)) input_shape = (ARGS.batch_size, 1, dataOpts["n_freq_bins"], sequence_len) log.info("Setting up model") encoder = Encoder(encoderOpts) log.debug("Encoder: " + str(encoder)) encoder_out_ch = 512 * encoder.block_type.expansion classifierOpts["num_classes"] = 2 classifierOpts["input_channels"] = encoder_out_ch classifier = Classifier(classifierOpts) log.debug("Classifier: " + str(classifier)) split_fracs = {"train": .7, "val": .15, "test": .15} input_data = DatabaseCsvSplit(split_fracs, working_dir=ARGS.data_dir, split_per_dir=True)
parser.add_argument("audio_files", type=str, nargs="+", help="Audio file to predict the call locations") ARGS = parser.parse_args() log = Logger("PREDICT", ARGS.debug, ARGS.log_dir) models = {"encoder": 1, "classifier": 2} """ Main function to compute prediction (segmentation) by using a trained model together with a given audio tape by processing a sliding window approach """ if __name__ == "__main__": if ARGS.checkpoint_path is not None: log.info("Restoring checkpoint from {} instead of using a model file.". format(ARGS.checkpoint_path)) checkpoint = torch.load(ARGS.checkpoint_path, map_location="cpu") encoder = Encoder(DefaultEncoderOpts) classifier = Classifier(DefaultClassifierOpts) model = nn.Sequential( OrderedDict([("encoder", encoder), ("classifier", classifier)])) model.load_state_dict(checkpoint["modelState"]) log.warning( "Using default preprocessing options. Provide Model file if they are changed" ) dataOpts = DefaultSpecDatasetOps else: if ARGS.jit_load: extra_files = {} extra_files['dataOpts'] = '' extra_files['encoderOpts'] = ''
# update the respective parameters if given in terminal for arg, value in vars(ARGS).items(): if arg in encoderOpts and value is not None: encoderOpts[arg] = value if arg in decoderOpts and value is not None: decoderOpts[arg] = value else: raise ValueError( "Expected plain_ae or conv_ae as model but received: {}". format(ARGS.model)) else: raise ValueError( "--model could not be None. Pls choose one model: plain_ae or conv_ae" ) log.info("Setting up model") # create a model and load it to the specified device, either gpu or cpu if ARGS.model is not None: if ARGS.model == "plain_ae": model = autoencoder(ARGS.n_bottleneck).to(ARGS.device) elif ARGS.model == "conv_ae": encoder = Encoder(encoderOpts).to(ARGS.device) decoder = Decoder(decoderOpts).to(ARGS.device) model = nn.Sequential( OrderedDict([("encoder", encoder), ("decoder", decoder)])) #log.debug("Encoder: " + str(encoder)) #log.debug("Decoder: " + str(decoder)) log.debug(encoderOpts) log.debug(decoderOpts)
if ARGS.decod_dir is not None and os.path.isdir(ARGS.decod_dir): os.makedirs(ARGS.decod_dir, exist_ok=True) # load the trained model if ARGS.model_path is not None: model_dict = torch.load(ARGS.model_path) encoder = Encoder(model_dict["encoderOpts"]).to(ARGS.device) encoder.load_state_dict(model_dict["encoderState"]) decoder = Decoder(model_dict["decoderOpts"]).to(ARGS.device) decoder.load_state_dict(model_dict["decoderState"]) #model = encoder model = nn.Sequential( OrderedDict([("encoder", encoder), ("decoder", decoder)])) dataOpts = model_dict["dataOpts"] log.info(model) if torch.cuda.is_available() and ARGS.cuda: model = model.cuda() model.eval() sr = dataOpts["sr"] # modified, s.t. not hard-coded hop_length = dataOpts["hop_length"] n_fft = dataOpts["n_fft"] try: n_freq_bins = dataOpts["num_mels"] except KeyError: n_freq_bins = dataOpts["n_freq_bins"] freq_compression = dataOpts[