def getSpecs(path): specs = [] noise = [] # Get mel-specs for file for spec in audio.specsFromFile(path, rate=cfg.SAMPLE_RATE, seconds=cfg.SPEC_LENGTH, overlap=cfg.SPEC_OVERLAP, minlen=cfg.SPEC_MINLEN, fmin=cfg.SPEC_FMIN, fmax=cfg.SPEC_FMAX, spec_type=cfg.SPEC_TYPE, shape=(cfg.IM_SIZE[1], cfg.IM_SIZE[0])): # Determine signal to noise ratio s2n = audio.signal2noise(spec) specs.append(spec) noise.append(s2n) # Shuffle arrays (we want to select randomly later) specs, noise = shuffle(specs, noise, random_state=RANDOM) return specs, noise
def analyzeFile(soundscape, test_function): ncnt = 0 # Store analysis here analysis = {} # Keep track of timestamps pred_start = 0 # Set species list accordingly setSpeciesList(cfg.DEPLOYMENT_LOCATION[0], cfg.DEPLOYMENT_LOCATION[1], cfg.DEPLOYMENT_WEEK) # Get specs for file spec_batch = [] for spec in audio.specsFromFile(soundscape, rate=cfg.SAMPLE_RATE, seconds=cfg.SPEC_LENGTH, overlap=cfg.SPEC_OVERLAP, minlen=cfg.SPEC_MINLEN, fmin=cfg.SPEC_FMIN, fmax=cfg.SPEC_FMAX, win_len=cfg.WIN_LEN, spec_type=cfg.SPEC_TYPE, magnitude_scale=cfg.MAGNITUDE_SCALE, bandpass=True, shape=(cfg.IM_SIZE[1], cfg.IM_SIZE[0]), offset=0, duration=None): # Prepare as input spec = model.prepareInput(spec) # Add to batch if len(spec_batch) > 0: spec_batch = np.vstack((spec_batch, spec)) else: spec_batch = spec # Do we have enough specs for a prediction? if len(spec_batch) >= cfg.SPECS_PER_PREDICTION: # Make prediction p, _ = model.predict(spec_batch, test_function) # Calculate next timestamp pred_end = pred_start + cfg.SPEC_LENGTH + ( (len(spec_batch) - 1) * (cfg.SPEC_LENGTH - cfg.SPEC_OVERLAP)) # Store prediction analysis[getTimestamp(pred_start, pred_end)] = p # Advance to next timestamp pred_start = pred_end - cfg.SPEC_OVERLAP spec_batch = [] return analysis
def getSpecBatches(split): # Random Seed random = cfg.getRandomState() # Make predictions for every testfile for t in split: # Spec batch spec_batch = [] # Get specs for file for spec in audio.specsFromFile(t[0], cfg.SAMPLE_RATE, cfg.SPEC_LENGTH, cfg.SPEC_OVERLAP, cfg.SPEC_MINLEN, shape=(cfg.IM_SIZE[1], cfg.IM_SIZE[0]), fmin=cfg.SPEC_FMIN, fmax=cfg.SPEC_FMAX, spec_type=cfg.SPEC_TYPE): # Resize spec spec = image.resize(spec, cfg.IM_SIZE[0], cfg.IM_SIZE[1], mode=cfg.RESIZE_MODE) # Normalize spec spec = image.normalize(spec, cfg.ZERO_CENTERED_NORMALIZATION) # Prepare as input spec = image.prepare(spec) # Add to batch if len(spec_batch) > 0: spec_batch = np.vstack((spec_batch, spec)) else: spec_batch = spec # Batch too large? if spec_batch.shape[0] >= cfg.MAX_SPECS_PER_FILE: break # No specs? if len(spec_batch) == 0: spec = random.normal(0.0, 1.0, (cfg.IM_SIZE[1], cfg.IM_SIZE[0])) spec_batch = image.prepare(spec) # Shuffle spec batch spec_batch = shuffle(spec_batch, random_state=random) # yield batch, labels and filename yield spec_batch[:cfg.MAX_SPECS_PER_FILE], t[1], t[0].split(os.sep)[-1]
def getSpecs(path): specs = [] noise = [] for spec in audio.specsFromFile(path, rate=cfg.SAMPLE_RATE, seconds=cfg.SPEC_LENGTH, overlap=cfg.SPEC_OVERLAP, minlen=cfg.SPEC_MINLEN, fmin=cfg.SPEC_FMIN, fmax=cfg.SPEC_FMAX, spec_type=cfg.SPEC_TYPE, shape=(cfg.IM_SIZE[1], cfg.IM_SIZE[0])): s2n = audio.signal2noise(spec) specs.append(spec) noise.append(s2n) specs, noise = shuffle(specs, noise, random_state=RANDOM) return specs, noise
def getSpecBatches(split): # Random Seed random = cfg.getRandomState() # Make predictions for every testfile for t in split: # Spec batch spec_batch = [] # Keep track of timestamps pred_start = 0 # Get specs for file for spec in audio.specsFromFile(t[0], cfg.SAMPLE_RATE, cfg.SPEC_LENGTH, cfg.SPEC_OVERLAP, cfg.SPEC_MINLEN, shape=(cfg.IM_SIZE[1], cfg.IM_SIZE[0]), fmin=cfg.SPEC_FMIN, fmax=cfg.SPEC_FMAX): # Resize spec spec = image.resize(spec, cfg.IM_SIZE[0], cfg.IM_SIZE[1], mode=cfg.RESIZE_MODE) # Normalize spec spec = image.normalize(spec, cfg.ZERO_CENTERED_NORMALIZATION) # Prepare as input spec = image.prepare(spec) # Add to batch if len(spec_batch) > 0: spec_batch = np.vstack((spec_batch, spec)) else: spec_batch = spec # Batch too large? if spec_batch.shape[0] >= cfg.MAX_SPECS_PER_FILE: break # Do we have enough specs for a prediction? if len(spec_batch) >= cfg.SPECS_PER_PREDICTION: # Calculate next timestamp pred_end = pred_start + cfg.SPEC_LENGTH + ( (len(spec_batch) - 1) * (cfg.SPEC_LENGTH - cfg.SPEC_OVERLAP)) # Store prediction ts = getTimestamp(int(pred_start), int(pred_end)) # Advance to next timestamp pred_start = pred_end - cfg.SPEC_OVERLAP yield spec_batch, t[1], ts, t[0].split(os.sep)[-1] # Spec batch spec_batch = []
def main(): parser = argparse.ArgumentParser() parser.add_argument('--dataroot', default="", help='path to dataset') #parser.add_argument('--model_path',default = "/media/labhdd/ASAS/V3/110modelv3.pt",help='path to model') parser.add_argument('--model_path', default="/media/labhdd/ASAS/Resnet34_2/", help='path to model') parser.add_argument('--csv_path', default="./train_v3.csv", help='path to label csv') parser.add_argument('--cuda', action='store_true', help='enables cuda') parser.add_argument('--ignore_prob', default=1e-4, type=float) opt = parser.parse_args() print(opt) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # You should read file here and call power spectrum to # # calculate the value and make decision # wav_files = [ f for f in sorted(os.listdir(cfg.TESTSET_PATH)) if f[0] != '.' ] # # # # # # # # # # # # # # # # # # # # # # # # # # # # # dirpath = 'resnet_34_2' if not os.path.exists(dirpath): os.makedirs(dirpath) for inx in range(5, 120, 5): # using different epochs model = model_prediction(opt, inx) label = model.construct_label_dict() # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # filepath = os.path.join(dirpath, 'valset_s2n001_resnet34_%3d.csv' % (inx)) f = open(filepath, 'w+') median_dict = load_median() #result = [] #predict_class = [] #predict_prob = [] submission = [] SPEC_SIGNAL_THRESHOLD = 0.001 num = 0 predict_prob_threshold = 0.0 for file in wav_files: if num == 5: break #spec_flux = select_frame_criterion(file) #print(spec_flux,file = f) # Get specs for file cnt2 = 1 predict_class = [] predict_prob = [] predict_s2n = [] accept_spec_num = 0 for spec in audio.specsFromFile( os.path.join(cfg.TESTSET_PATH, file), cfg.SAMPLE_RATE, cfg.SPEC_LENGTH, cfg.SPEC_OVERLAP, cfg.SPEC_MINLEN, shape=(cfg.IM_SIZE[1], cfg.IM_SIZE[0]), fmin=cfg.SPEC_FMIN, fmax=cfg.SPEC_FMAX, spec_type=cfg.SPEC_TYPE): s2n = audio.signal2noise(spec) #print(s2n,file = f) # Above SIGNAL_THRESHOLD? if s2n >= SPEC_SIGNAL_THRESHOLD: # Resize spec #spec = image.resize(spec, cfg.IM_SIZE[0], cfg.IM_SIZE[1], mode=cfg.RESIZE_MODE) # Normalize spec #spec = image.normalize(spec, cfg.ZERO_CENTERED_NORMALIZATION) # Prepare as input spec = image.prepare(spec) k = np.random.rand(1, 1, 256, 128) confidence, prob = model.get_probability(spec, opt) # print(label) probdict = {} for i in range(659): if prob[i] < opt.ignore_prob: pass else: probdict[label[i]] = prob[i] order = sorted(probdict.items(), key=lambda x: x[1], reverse=True) #print(file + '_' + str(c) + ':' + 'confidence = ' + str(confidence)+'\n',file = f1) #print(file + '_' + str(cnt2) + ':' + 'probablity = ' + str(order[0])+'\n',file = f2) #print(order[0][1]) if (order[0][1] > predict_prob_threshold): # select all start = cnt2 - cnt2 % 5 end = start + 5 timestamp = getTimestamp(start, end) #type:string print(median_dict[file] + ';' + timestamp + ';' + order[0][0] + ';' + str(1), file=f) submission.append([ median_dict[file] + ';' + timestamp + ';' + order[0][0] + ';' + str(1) ]) accept_spec_num += 1 cnt2 = cnt2 + 1 print('number of audio file : ' + str(num + 1) + ', containing ' + str(accept_spec_num) + ' specs') num += 1 del model