def scrape(self): id_counter = 0 track_id, filename, sound_length = get_sound() # Check if the sound is long enough, and if yes we extract some loops from it. # TODO: make this less restrictive to waste a bit less if sound_length > 2 * self.sample_length: offset = 0 upper_limit = sound_length - 2 * self.sample_length while (offset + 2 * self.sample_length < upper_limit): # Calculate a random offset where the loop will start offset = random.randint(offset, int(min(offset + sound_length * 0.2, upper_limit))) # Extracting a loop and saving it to 'loop<n>.wav' sample = Sound.from_file(filename, start=offset, end=offset+self.sample_length) loops = sample.extract_loops() for loop in loops: if id_counter > 10: offset = upper_limit break id_counter += 1 loop_id = '%s_%s' % (track_id, id_counter) loop_path = self._get_free_path() logger.info('loop extracted to %s' % loop_path) loop.to_file(loop_path) #key, key_confidence, length = loop.echonest.key, loop.echonest.key_confidence, loop.length with self.pool_lock: self.pool[loop_id] = dict(loop.loop_infos, **{ 'path': loop_path, 'length': loop.length, 'loop_id': loop_id, 'track_id': track_id, 'timbre_start': loop.loop_infos['timbre_start'], 'timbre_end': loop.loop_infos['timbre_end'] #'key': (key, key_confidence) }) # Increment values for next loop offset += self.sample_length # Delete the sounds to save memory # We also have to collect manually because of a "bug" in pandas: # https://github.com/pydata/pandas/issues/2659 if 'loop' in locals(): del loop del sample gc.collect()
def scrape(self): track_id, pad_path, pad_length = get_sound() new_pad_path = self._get_free_path() pad_id = '%s' % (track_id) sound = Sound.from_file(pad_path, end=min(30, pad_length)) sound = sound.remove_beats() sound.to_file(new_pad_path) logger.info('pad extracted to %s' % new_pad_path) '''pad.to_file(pad_path) # Delete the sound to save memory # We also have to collect manually because of a "bug" in pandas: # https://github.com/pydata/pandas/issues/2659 del pad gc.collect()''' with self.pool_lock: self.pool[pad_id] = { 'path': new_pad_path, 'pad_id': pad_id }