def load_from_folder(self, resample, use_default_labels): os.chdir(self.folder_path) training_files = [] if use_default_labels: for filename in [f for f in os.listdir('.') if os.path.isfile(f)]: label = filename.replace('_', ' ').replace('.mp3', '').title() training_files.append((label, filename)) logger.log("Using %s with label %s" % (filename, label)) else: resample_prefix = 'resample_' for filename in [f for f in os.listdir('.') if os.path.isfile(f)]: print " enter label for '" + filename + "' >" label = raw_input() if len(label) == 0: label = filename target = filename if resample: target = resample_prefix + filename try: subprocess.call(['/usr/local/bin/lame', '-V5', '--vbr-new', '--resample', '16', filename, target], stdout=subprocess.PIPE) generated_name = label.strip().replace(" ", "_").lower() + '.mp3' subprocess.call(['rm', filename]) subprocess.call(['mv', target, generated_name]) training_files.append((label, generated_name)) except: logger.warn(" could not resample file '%s'!" % filename) else: training_files.append((label, target)) print 'finished resampling!' self.train_set.data = audio.train(training_files, self.folder_path, self.train_set.config, self.train_set.name, conn_str) self.train_set.s3_links = [] self.train_set.status = "trained" self.train_set.save() return self.train_set._id
def load_from_api(self, resample): os.chdir(self.folder_path) training_files = [] try: for i in range(len(self.train_set.s3_links)): track = self.train_set.s3_links[i] label = track['label'] url = track['url'] #filename = track['filename'] print "fetching: ", url subprocess.call(['curl', '-O', url], stdout=subprocess.PIPE) filename = url.split('/')[-1] subprocess.call(['mv', filename, filename + '.mp3']) filename = filename + '.mp3' target = filename if resample: print "resampling in 16000..." target = 'resample_' + filename try: subprocess.call(['/usr/local/bin/lame', '-V5', '--vbr-new', '--resample', '16', filename, target], stdout=subprocess.PIPE) except: print " could not resample file '%s'!" % filename training_files.append((label, target)) except: print " could not do training." print 'finished resampling!' self.train_set.data = audio.train(training_files, self.folder_path, self.train_set.config, self.train_set.name, conn_str) self.train_set.status = "trained" self.train_set.save()