def __init__(self, filepath): wave_read = wave.open(filepath, 'rb') wave_params = wave_read.getparams() wave_framesNR = wave_params[3] # number of frames wave_buffer = wave_read.readframes(wave_framesNR) wave_buffer_softer = numpy.fromstring( wave_buffer, numpy.int16) // 2 # half amplitude self.wave_obj_acc = sa.WaveObject(wave_buffer, wave_params[0], wave_params[1], wave_params[2]) self.wave_obj = sa.WaveObject(wave_buffer_softer, wave_params[0], wave_params[1], wave_params[2])
def build(name: str, song: pydub.AudioSegment, applied_length: Optional[int] = None, volume: int = 255, volume_floor: int = -50, bytes_per_sample: int = 2, sample_rate: int = 44100): if applied_length is not None and applied_length < 0: raise BadAudioLengthException( "The length is {} but cannot be negative".format( applied_length)) if volume < 0 or volume > 255: raise BadVolumeException( "The volume is {} but must be 0–255".format(volume)) if applied_length is None: resized = song else: n_repeats = math.ceil(applied_length / len(song)) resized = (song * n_repeats)[0:applied_length] if volume == 0 or applied_length == 0: final = pydub.AudioSegment.silent(duration=0.5) else: final = resized + (volume * (volume_floor / 255) - volume_floor) if applied_length is not None: assert len(resized) << approxeq >> applied_length or applied_length == 1,\ "The actual audio stimulus length is {}, but the length in stimulus_frames is {}".format(len(resized), applied_length) play_obj = sa.WaveObject(final.raw_data, 1, bytes_per_sample, sample_rate) return AudioInfo(name, play_obj, applied_length, volume)
def fun_party(self): os.environ['WRAP_STDERR'] = 'true' path_to_file = 'data/PartyStart.wav' wave_read = wave.open(path_to_file, 'rb') audio_data = wave_read.readframes(wave_read.getnframes()) num_channels = wave_read.getnchannels() bytes_per_sample = wave_read.getsampwidth() sample_rate = wave_read.getframerate() wave_obj = sa.WaveObject(audio_data, num_channels, bytes_per_sample, sample_rate) play_obj = wave_obj.play() self.send_command('color_mode') for m, t in zip(messages, message_times): progress_bar(m, t) self.send_command('hue', 240) party_list = 'data/dancetypes.txt' partyfile = open(party_list, 'r') lines = partyfile.readlines() lines = [l.rstrip() for l in lines] ntypes = len(lines) total_time = 11.2 delay = total_time / ntypes for i in progressbar.progressbar(range(ntypes)): logging.error(f'{lines[i]}') time.sleep(delay) self.party()
def play_signal(audio_signal): wave_obj = sa.WaveObject(audio_signal, 1, 4, 16000) # 4 bytes per one float32 sample play_obj = wave_obj.play() play_obj.wait_done() return
def play_sound(self, wave): try: wave_obj = sa.WaveObject(wave, 1, 1, self.sample_rate) while self.play_obj and self.play_obj.is_playing(): pass self.play_obj = wave_obj.play() except: pass
def sorting_mode(): team_name = random.choice(TEAM_NAMES) synthesis_input = texttospeech.types.SynthesisInput(text=team_name) response = clientT2S.synthesize_speech(synthesis_input, voice, audio_config) wave_obj = sa.WaveObject(response.audio_content, 1, 2, 22050) play_obj = wave_obj.play() play_obj.wait_done()
def addWhiteNoise(self, key, duration=5.0, volume=1.0, sample_rate=44100): logger.debug( 'Sound %s : White noise random generated, duration = %s s, sample_rate = %s, volume = %s', key, duration, sample_rate, volume) noise = np.random.normal(0, 1, round(duration * sample_rate)) noise *= 32767 * volume / np.max(np.abs(noise)) noise = noise.astype(np.int16) self.sound[key] = sa.WaveObject(noise, 1, 2, sample_rate)
def test_getFileByLocation(self): location = '2/heavy/KICK.wav' service = SampleService() data = service.getFileByLocation(location) wave_obj = sa.WaveObject(data, 1, 2, 16000) name = ''
def step(self): print('Called Wave') wave = [] address = IO_Registers.WAVE_PATTERN_START while address <= IO_Registers.WAVE_PATTERN_END: wave.append(self.mmu.read_byte(address)) address += 1 wave_obj = sa.WaveObject(bytes(wave), 2, 2, 44100) wave_obj.play()
def play(self): self.playing = True wave_object = simpleaudio.WaveObject(self.audio_segment.raw_data, self.audio_segment.channels, self.audio_segment.sample_width, self.audio_segment.frame_rate) self.last_start_time = time.time() self.play_object = wave_object.play() self.played_duration = 0
def resume(self): if not self.playing: self.playing = True wave_object = simpleaudio.WaveObject( self.audio_segment[int(self.played_duration * 1000):].raw_data, self.audio_segment.channels, self.audio_segment.sample_width, self.audio_segment.frame_rate) self.play_object = wave_object.play() self.last_start_time = time.time()
def __init__(self, audio_bytes): wave_read = wave.open(BytesIO(audio_bytes)) audio_data = wave_read.readframes(wave_read.getnframes()) num_channels = wave_read.getnchannels() bytes_per_sample = wave_read.getsampwidth() sample_rate = wave_read.getframerate() self._wave_obj = sa.WaveObject(audio_data, num_channels, bytes_per_sample, sample_rate)
def play(self): """Plays the SpectroGraphic sound. """ # get sound array audio = self.sound_array # play it using simpleaudio wave_object = sa.WaveObject(audio, 1, 2, self.SAMPLE_RATE) play_object = wave_object.play() play_object.wait_done()
def p(f): # print(f) f.load() b = f.getBuffer().copy() b *= 32767 b = b.astype(np.int16) # print(f.channels) # print(b.shape) wave_obj = sa.WaveObject(b, f.channels, 2, f.samplerate) play_obj = wave_obj.play() play_obj.wait_done()
def addBufferSound(self, key, buff, sample_rate=44100): ''' buff should be a numpy array of np.int16 with one or two dimensions (mono or stereo) ''' logger.debug('Custom sound channels %s', len(buff)) # convert to 16-bit data buff = buff.astype(np.int16) if len(buff) == 2: chan = 2 else: chan = 1 self.sound[key] = sa.WaveObject(buff, chan, 2, sample_rate) logger.debug(str(buff))
def play(self): t1 = time.time() self.buffer = self._calculate_frames() WO = simpleaudio.WaveObject(self.buffer, sample_rate=bw.FRAMERATE) t2 = time.time() logging.debug('_calculate_frames took ', t2 - t1, ' seconds') while not self.stop_playing: play_buffer = WO.play() if self.write_to_disk: self.write_to_disk = False with wave.open(self.filename, 'wb') as wav: wav.setparams( (bw.CHANNELS, bw.SAMPLEWIDTH, bw.FRAMERATE, self.chunk_length, 'NONE', 'not compressed')) wav.writeframes(self.buffer) play_buffer.wait_done()
def play(key, mode): x = { "ionian": get_ionian_l(key), "dorian": get_dorian_pa(key), "mixolydian": get_mixolydian(key), "locrian": get_locrian(key) } c = x[mode] a = np.array( (c[random.randint(0, len(c) - 1)], c[random.randint(0, len(c) - 1)], c[random.randint(0, len(c) - 1)], c[random.randint(0, len(c) - 1)])) b = np.array( (c[random.randint(0, len(c) - 1)], c[random.randint(0, len(c) - 1)], c[random.randint(0, len(c) - 1)], c[random.randint(0, len(c) - 1)])) d = np.array( (c[random.randint(0, len(c) - 1)], c[random.randint(0, len(c) - 1)], c[random.randint(0, len(c) - 1)], c[random.randint(0, len(c) - 1)])) audio = np.vstack((a, b, a, d)) # normalize to 16-bit range audio *= 32767 / np.max(np.abs(audio)) # convert to 16-bit data audio = audio.astype(np.int16) wave_obj = sa.WaveObject(audio, 1, 2, sample_rate) # start playback play_obj = wave_obj.play() # wait for playback to finish before exiting play_obj.wait_done()
def _prepare_audio(path_to_file: str) -> WaveObject: """Prepare an audio file from a specific path to be played by simpleaudio Parameters ---------- path_to_file : str path to the file relative to the working directory or absolute Returns ------- WaveObject the audio file to be played by simpleaudio's WaveObject's `play()` """ wave_read = wave.open( str(PurePath(path_to_file)), "rb", ) audio_data = wave_read.readframes(wave_read.getnframes()) num_channels = wave_read.getnchannels() bytes_per_sample = wave_read.getsampwidth() sample_rate = wave_read.getframerate() return sa.WaveObject(audio_data, num_channels, bytes_per_sample, sample_rate)
def play(self, left, right, ticks): self.ticks += ticks if self.ticks <= self.div: return self.ticks = 0 self.buffer[self.i] = left self.buffer[self.i + 1] = right self.i += 2 if self.i >= SoundDriver.BUFFER_SIZE / 2: wave = bytes(self.buffer) wave_obj = sa.WaveObject(wave, 2, 1, self.sample_rate) try: self.play_obj.stop() except: pass self.play_obj = wave_obj.play() #self.play_obj.wait_done() self.i = 0
def handle_audio(obj, wait=False): """Handle an audio event. This function plays an audio file. Currently only `.wav` format is supported. :param obj: An :py:class:`~turberfield.dialogue.model.Model.Audio` object. :param bool wait: Force a blocking wait until playback is complete. :return: The supplied object. """ if not simpleaudio: return obj fp = pkg_resources.resource_filename(obj.package, obj.resource) data = wave.open(fp, "rb") nChannels = data.getnchannels() bytesPerSample = data.getsampwidth() sampleRate = data.getframerate() nFrames = data.getnframes() framesPerMilliSecond = nChannels * sampleRate // 1000 offset = framesPerMilliSecond * obj.offset duration = nFrames - offset duration = min( duration, framesPerMilliSecond * obj.duration if obj.duration is not None else duration) data.readframes(offset) frames = data.readframes(duration) for i in range(obj.loop): waveObj = simpleaudio.WaveObject(frames, nChannels, bytesPerSample, sampleRate) playObj = waveObj.play() if obj.loop > 1 or wait: playObj.wait_done() return obj
def measure(settings, SW): print('\nGenerating the sound sample', end='...') wf, raw = generate_waveform(BASE_WAVEFORM, NOISE_PROFILE, **settings) wave_obj = sa.WaveObject(wf, num_channels=settings['channels'], bytes_per_sample=settings['bit_depth'], sample_rate=settings.get('sample_rate', 44100)) print('Done!') print('\nPress \'%s\' when you hear the white noise.' % DETECT_KEY) input('Press enter to start the experiment!') # s = timer() SW.start() play_obj = wave_obj.play() remove = add_hotkey(DETECT_KEY, play_obj.stop) play_obj.wait_done() # e = timer() remove_hotkey(remove) SW.join(forceStop=True) # t = (e-s)/1e9 # if t > settings.get('duration', 10): # print('You didn\'t detect the white noise!') # t=None # else: # print('You detected the white noise at %.3f s.' % t) t = None if SW.getValue() is None: print('You didn\'t detect any white noise!') else: t = SW.getValue() / 1e9 print('You detected noise at %.3f s.' % t) return wf, raw, t
def _build_audio(audio_file_id: int, applied_length: Optional[int] = None, volume: int = 255): if applied_length is not None and applied_length < 0: raise BadWriteError(f"File {audio_file_id}: length {applied_length} < 0") if volume < 0 or volume > 255: raise BadWriteError(f"The volume is {volume} but must be 0–255") import valarpy.model as model valar_obj = model.AudioFiles.select().where(model.AudioFiles.id == audio_file_id).first() if valar_obj is None: raise UnrecognizedKeyError(f"No audio file with ID {audio_file_id}".) song = pydub.AudioSegment(data=valar_obj.data, sample_width=2, frame_rate=44100, channels=1) n_sec_valar = valar_obj.n_seconds * 1000 length_delta = abs(len(song) - n_sec_valar) if length_delta > 0.00001: raise AssertionError(f"File {audio_file_id} is {len(song)}, but Valar says it’s {n_sec_valar}") if applied_length is None: resized = song else: n_repeats = math.ceil(applied_length / len(song)) resized = (song * n_repeats)[0:applied_length] if volume == 0 or applied_length == 0: final = pydub.AudioSegment.silent(duration=0.5) else: # noinspection PyTypeChecker volume_floor = config.get_float("sauron.hardware.stimuli.audio.audio_floor") volume_ceil = config.get_float("sauron.hardware.stimuli.audio.audio_ceil") # final = resized + (volume * (volume_floor / 255) - volume_floor) # print(volume * (volume_ceil - volume_floor) / 255 + volume_floor) final = resized + volume * (volume_ceil - volume_floor) / 255 + volume_floor play_obj = sa.WaveObject(final.raw_data, 1, 2, 44100) return AudioInfo(play_obj, applied_length, volume)
def addTone(self, key, duration=1.0, freq=1000.0, volume=1.0, sample_rate=44100): logger.debug( 'Sound %s : Tone freq = %s Hz, duration = %s s, sample_rate = %s, volume = %s', key, freq, duration, sample_rate, volume) # get timesteps for each sample, T is note duration in seconds T = duration t = np.linspace(0, T, int(T * sample_rate), False) # generate sine wave notes buff = np.sin(freq * t * 2 * np.pi) logger.debug(str(buff)) # normalize to 16-bit range buff *= 32767 * volume / np.max(np.abs(buff)) # convert to 16-bit data buff = buff.astype(np.int16) self.sound[key] = sa.WaveObject(buff, 1, 2, sample_rate) logger.debug(str(buff))
def play(self): global output self.wave_obj = sa.WaveObject(output.astype("int16"), 2, 2, 44100) self.play_obj = self.wave_obj.play() return
def getPhraseWaveObj(self, key, phrase): if key not in self.pollyPhrases: self.pollyPhrases[key] = sa.WaveObject(self.getPollyPhrase(phrase), 1, 2, 16000) return self.pollyPhrases[key]
volume = reader.read_dynamic(composition['dynamic']) musics = [] for part in composition['parts']: instrument_name = composition['parts'][part] instructions = reader.parse_music(composition['score'][part]) musics.append([]) count = 0 for cmd in instructions: # TODO: handle more than notes musics[-1].extend(instruments[instrument_name].play_note( cmd['frequency'], (60 / BPM) * FPS * cmd['beats'])) count += 1 instruments[instrument_name].set_legato(count > 12) score_len = min([len(x) for x in musics]) for i in range(score_len): sound1.append(int(sum([x[i] for x in musics]) / len(musics))) print(time.time() - start) a = sa.WaveObject(sound1, 1, 2, FPS) b = a.play() with wave.open(sys.argv[3], mode='wb') as f: f.setframerate(FPS) f.setnchannels(1) f.setsampwidth(2) f.writeframes(sound1) b.wait_done()
def play_from_buffer(buffer, bytes_per_sample=2, sample_rate=44100): simpleaudio.WaveObject(buffer, num_channels=1, bytes_per_sample=bytes_per_sample, sample_rate=sample_rate).play()
def __init__(self, wave_file=None, label_file=None, start_time=0., volume=100., mqtt_client=None): self.name = ','.join( [fn for fn in [wave_file, label_file] if fn is not None]) self.logger = logging.getLogger(f'putzini_track:{self.name}') if (wave_file is None) and (label_file is None): raise ValueError( 'You must either specify a wave file or a label file!') self.wave_file = wave_file self.label_file = label_file self.mqtt_client = mqtt_client self.wave = None self.timing = None self.start_time = start_time self._loop = False self.playback = sa.PlayObject(0) if (label_file is not None) and (self.mqtt_client is not None): self.timing = TimedMessageDispatcher(self.mqtt_client) with open(self.label_file, newline='') as fh: reader = csv.DictReader(fh, delimiter='\t', fieldnames=['start', 'end', 'text']) lbls = [] for row in reader: stp = {} stp['time'] = float(row['start']) txt = row['text'].split(',') stp['comment'] = txt[0] stp['speed'] = int(txt[1]) if txt[1] else None stp['trigger'] = txt[2].strip() == 'T' lbls.append(stp) lbls = [lbl for lbl in lbls if lbl['time'] >= self.start_time] # self.logger.info('Have label list with %s entries', len(lbls)) self.timing.set_label_list(lbls) if wave_file is not None: t0 = time() if (volume == 100.) and (self.start_time == 0.): self.wave = sa.WaveObject.from_wave_file(wave_file) else: self.logger.info( 'Loading wave file %s explicitly into array...', wave_file) sr, waveform = wavfile.read(wave_file) self.logger.info( 'Wave file has %.1f seconds (%.2f minutes) at %s Hz sample rate', waveform.shape[0] / sr, waveform.shape[0] / sr / 60, sr) waveform = (volume / 100 * waveform[int(sr * start_time):, :]).astype( np.int16) self.wave = sa.WaveObject(waveform, sample_rate=sr) self.logger.info('Loading wave file %s took %.1f seconds.', wave_file, time() - t0)
def PlayVideo(summary_frame_path, summary_audio_path): # video = sys.argv[1] videobuffer = [] files = [ int(os.path.splitext(f)[0]) for f in os.listdir(summary_frame_path) if isfile(join(summary_frame_path, f)) ] # sort the files # see python reference https://docs.python.org/3/howto/sorting.html files.sort() for i in range(len(files)): filename = summary_frame_path + str(files[i]) + ".jpg" img = cv2.imread(filename) videobuffer.append(img) audiocap = AudioSegment.from_file(summary_audio_path, "wav") # audiocap = AudioSegment.from_wav(debug_audio) cv2.namedWindow('image') cv2.moveWindow('image', 320, 180) cv2.namedWindow('controls') cv2.moveWindow('controls', 250, 50) controls = np.zeros((50, 750), np.uint8) cv2.putText( controls, "F: Resume/Play, P: Pause, R: Rewind, N: Fast Forward, Esc: Exit", (120, 30), cv2.FONT_HERSHEY_PLAIN, 1, 200) framerate = audiocap.frame_rate wave_obj = sa.WaveObject(audiocap.raw_data, num_channels=audiocap.channels, bytes_per_sample=audiocap.sample_width, sample_rate=audiocap.frame_rate) play_obj = None FPS = 1.0 / 30.0 * 1000.0 tots = len(videobuffer) i = 0 frame_rate = 30 def process(im): return cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) status = 'stay' last_audio_sync = 0 while True: new_time = time.time() cv2.imshow("controls", controls) try: if i == tots - 1: i = 0 # cap.set(cv2.CAP_PROP_POS_FRAMES, i) im = videobuffer[i] # r = 750.0 / im.shape[1] # dim = (750, int(im.shape[0] * r)) # im = cv2.resize(im, dim, interpolation = cv2.INTER_AREA) # if im.shape[0]>600: # im = cv2.resize(im, (500,500)) # controls = cv2.resize(controls, (im.shape[1],25)) #cv2.putText(im, status, ) cv2.imshow('image', im) status = { ord('p'): 'stay', ord('P'): 'stay', ord('f'): 'play', ord('F'): 'play', ord('r'): 'prev_frame', ord('R'): 'prev_frame', ord('n'): 'next_frame', ord('N'): 'next_frame', -1: status, 27: 'exit' }[cv2.waitKey(10)] if status == 'play': frame_rate = cv2.getTrackbarPos('F', 'image') if play_obj is None: play_obj = wave_obj.play() if not play_obj.is_playing() or last_audio_sync > 30: if play_obj is not None: play_obj.stop() # must have changed position audio_frame_index = (i * 1000.0) // 30 newaudiocap = audiocap[audio_frame_index:] wave_obj = sa.WaveObject( newaudiocap.raw_data, num_channels=audiocap.channels, bytes_per_sample=audiocap.sample_width, sample_rate=audiocap.frame_rate) play_obj = wave_obj.play() last_audio_sync = 0 # audio_frame_index = i / 30.0 * 1000.0 #print(str(i) + ", " + str(audio_frame_index)) # asa = audiocap[audio_frame_index:audio_frame_index+msbetweenframes] # play_buffer(asa.raw_data, 2, 2, 48000) last_audio_sync += 1 i += 1 while time.time() - new_time < 1.0 / 30.0: pass cv2.setTrackbarPos('S', 'image', i) continue if status == 'stay': # i = cv2.getTrackbarPos('S','image') if play_obj is not None: play_obj.stop() if status == 'exit': break if status == 'prev_frame': i -= 1 status = 'stay' if status == 'next_frame': i += 1 status = 'stay' while time.time() - new_time < 1.0 / 30.0: pass except KeyError: print("Invalid Key was pressed") cv2.destroyWindow('image')
def main(): # parse command line parser = opts_parser() options = parser.parse_args() modelfile = options.modelfile cfg = config.from_parsed_arguments(options) if not options.cuda_device: device = torch.device('cpu') else: device = torch.device('cuda:%d' % options.cuda_device[0]) torch.cuda.set_device(options.cuda_device[0]) if options.cuda_sync_mode != 'auto': set_cuda_sync_mode(options.cuda_sync_mode) # prepare training data generator print("Preparing training data feed...") train_data = get_dataset(cfg, 'train') print_data_info(train_data) train_loader = get_dataloader(cfg, train_data, 'train') # start training data generation in background train_batches = iterate_infinitely(train_loader) train_batches = iterate_data(train_batches, device, cfg) # if told so, benchmark the creation of a given number of minibatches if cfg.get('benchmark_datafeed'): print("Benchmark: %d minibatches of %d items..." % (cfg['benchmark_datafeed'], cfg['batchsize'])) import itertools t0 = time.time() next( itertools.islice(train_batches, cfg['benchmark_datafeed'], cfg['benchmark_datafeed']), None) t1 = time.time() print("%.3gs per minibatch." % ((t1 - t0) / cfg['benchmark_datafeed'])) return # if told so, play back a given key of the training data as audio if cfg.get('play_datafeed'): import simpleaudio as sa for batch in train_batches: for wav in batch[cfg['play_datafeed']]: if wav.dtype.is_floating_point: wav = (wav * np.iinfo(np.int16).max).short() sa.WaveObject( wav.cpu().numpy().T.data, num_channels=wav.shape[0], bytes_per_sample=2, sample_rate=cfg['data.sample_rate']).play().wait_done() # prepare validation data generator print("Preparing validation data feed...") val_data = get_dataset(cfg, 'valid') print_data_info(val_data) val_loader = get_dataloader(cfg, val_data, 'valid') # enable cuDNN auto-tuning if on GPU and all data sizes are constant if options.cuda_device and not any(s is None for data in (train_data, val_data) for shape in data.shapes.values() for s in shape): torch.backends.cudnn.benchmark = True # prepare model print("Preparing network...") # instantiate neural network model = get_model(cfg, train_data.shapes, train_data.dtypes, train_data.num_classes, options.cuda_device) print(model) print_model_info(model) if cfg['train.teacher_model']: print("Preparing teacher network...") teacher_modelfile = cfg['train.teacher_model'] teacher_device = torch.device(cfg['train.teacher_model.device'] or device) teacher_cfg = dict(cfg) teacher_cfg.update( config.parse_config_file( teacher_modelfile.rsplit('.', 1)[0] + '.vars')) teacher_model = get_model(teacher_cfg, train_data.shapes, train_data.dtypes, train_data.num_classes, teacher_device.index) teacher_model.load_state_dict( torch.load(teacher_modelfile, map_location=teacher_device)) teacher_model.train(False) # obtain cost functions train_metrics = get_metrics(cfg, 'train') val_metrics = get_metrics(cfg, 'valid') extract_loss = get_loss_from_metrics(cfg) # initialize optimizer params = model.parameters() if cfg['train.first_params']: first_params_count = cfg['train.first_params'] # if a string, treat as a submodule name, figure out its param count if isinstance(first_params_count, str): first_params_count = sum( len(list(reduce(getattr, name.split('.'), model).parameters())) for name in first_params_count.split('+')) # advance the `params` iterator, keep the first parameters separately params = iter(params) first_params = [next(params) for _ in range(first_params_count)] optimizer = get_optimizer(cfg, params) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=cfg['train.eta_decay'], patience=cfg['train.patience'], cooldown=cfg['train.cooldown'], verbose=True) # initialize mixed-precision training if cfg['float16']: from apex import amp model, optimizer = amp.initialize(model, optimizer, opt_level=cfg['float16.opt_level']) if cfg['train.teacher_model']: teacher_model = amp.initialize(teacher_model, opt_level=cfg['float16.opt_level']) # initialize tensorboard logger, if requested if options.logdir: from tensorboardize import TensorboardLogger logger = TensorboardLogger(options.logdir, cfg=cfg, dataloader=val_loader, model=model, optimizer=optimizer) else: logger = None # resume training state if possible if options.resume and os.path.exists(options.modelfile + '.resume'): state = torch.load(options.modelfile + '.resume', map_location=device) model.load_state_dict(state.pop('model')) optimizer.load_state_dict(state.pop('optimizer')) scheduler.load_state_dict(state.pop('scheduler')) history = state.pop('history') epoch = state.pop('epoch') if cfg['float16']: amp.load_state_dict(state.pop('amp')) if (cfg['train.first_params'] and epoch > cfg['train.first_params.delay']): add_optimizer_params(optimizer, scheduler, first_params, cfg['train.first_params.eta_scale']) else: history = {} epoch = 0 # load pretrained weights if requested if cfg['model.init_from']: model.load_state_dict(torch.load( os.path.join(os.path.dirname(__file__), cfg['model.init_from'])), map_location=device) else: # run custom initializations init_model(model, cfg) # log initial state if logger is not None: logger.log_start() # warn about unused configuration keys config.warn_unused_variables( cfg, ('train.epochs', 'train.epochsize', 'train.min_eta', 'train.patience_reference', 'loss')) # run training loop print("Training:") for epoch in range(epoch, cfg['train.epochs']): # add first_params to optimizer when the delay has passed if (cfg['train.first_params'] and cfg['train.first_params.delay'] == epoch): add_optimizer_params(optimizer, scheduler, first_params, cfg['train.first_params.eta_scale']) if cfg['debug']: print( 'Training first %d parameters with learning rate ' 'scaled by %f.' % (first_params_count, cfg['train.first_params.eta_scale'])) # training pass model.train(True) if cfg['debug']: torch.autograd.set_detect_anomaly(True) train_errors = AverageMetrics() nans_in_a_row = 0 for _ in tqdm.trange(cfg['train.epochsize'], desc='Epoch %d/%d' % (epoch + 1, cfg['train.epochs']), ascii=bool(cfg['tqdm.ascii'])): # grab the next minibatch batch = next(train_batches) # reset gradients optimizer.zero_grad() # compute output preds = model(batch) # compute born-again output, if needed if cfg['train.teacher_model']: teacher_batch = copy_to_device(batch, teacher_device) with torch.no_grad(): teacher_preds = teacher_model(teacher_batch) teacher_preds = copy_to_device(teacher_preds, device) batch.update( ('teacher.' + k, v) for k, v in teacher_preds.items()) # compute training metrics and loss metrics = OrderedDict( (k, fn(preds, batch)) for k, fn in train_metrics.items()) loss = extract_loss(metrics) # bail out if Not a Number if not np.isfinite(loss.item()): if cfg['debug']: raise RuntimeError('Training error is NaN!') nans_in_a_row += 1 if nans_in_a_row < 5: print('Training error is NaN! Skipping step.') continue else: print('Training error is NaN! Stopping training.') return 1 else: nans_in_a_row = 0 train_errors += metrics train_errors += {'loss': loss.item()} # backprop and update if cfg['float16']: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() print_metrics('Train', train_errors.aggregate()) del batch, preds, loss # validation pass model.train(False) val_errors = AverageMetrics() for batch in iterate_data(iter(val_loader), device, cfg): with torch.no_grad(): preds = model(batch) metrics = { k: fn(preds, batch) for k, fn in val_metrics.items() } val_loss = float(extract_loss(metrics).item()) val_errors += metrics val_errors += {'loss': val_loss} print_metrics('Valid', val_errors.aggregate()) del batch, preds, val_loss log_metrics(train_errors.aggregate(), val_errors.aggregate(), history, modelfile) if logger is not None: logger.log_epoch(epoch, {k: v[-1] for k, v in history.items()}) # learning rate update reference = history[cfg['train.patience_reference'].lstrip('-')][-1] if hasattr(reference, 'mean'): reference = reference.mean() if cfg['train.patience_reference'].startswith('-'): reference *= -1 scheduler.step(reference) if optimizer.param_groups[0]['lr'] < cfg['train.min_eta']: print('Learning rate fell below threshold. Stopping training.') break # save training state to resume file resume_state = dict(model=model.state_dict(), optimizer=optimizer.state_dict(), scheduler=scheduler.state_dict(), epoch=epoch + 1, history=history) if cfg['float16']: resume_state['amp'] = amp.state_dict() torch.save(resume_state, options.modelfile + '.resume') del resume_state # save final network and the configuration used print("Saving final model") save_model(modelfile, model, cfg) # delete resume file if any if os.path.exists(options.modelfile + '.resume'): os.remove(options.modelfile + '.resume') # log the final state if logger is not None: logger.log_end(history)