コード例 #1
0
def plot_midi(args):

    audio_path = args.audio_path
    midi_path = args.midi_path
    fig_path = 'results/{}.png'.format(get_filename(audio_path))

    (audio, _) = librosa.core.load(audio_path,
                                   sr=config.sample_rate,
                                   mono=True)
    audio_seconds = audio.shape[0] / config.sample_rate

    midi_dict = read_midi(midi_path)

    target_processor = TargetProcessor(
        segment_seconds=audio_seconds,
        frames_per_second=config.frames_per_second,
        begin_note=config.begin_note,
        classes_num=config.classes_num)

    (target_dict, note_events, pedal_events) = target_processor.process(
        start_time=0,
        midi_events_time=midi_dict['midi_event_time'],
        midi_events=midi_dict['midi_event'])

    fig, axs = plt.subplots(3, 1, figsize=(10, 4), sharex=True)
    logmel = np.log(
        librosa.feature.melspectrogram(audio,
                                       sr=16000,
                                       n_fft=2048,
                                       hop_length=160,
                                       n_mels=229,
                                       fmin=30,
                                       fmax=8000)).T
    axs[0].matshow(logmel.T, origin='lower', aspect='auto', cmap='jet')
    axs[1].matshow(target_dict['frame_roll'].T,
                   origin='lower',
                   aspect='auto',
                   cmap='jet',
                   vmin=-1,
                   vmax=1)
    axs[2].plot(target_dict['pedal_frame_roll'])
    axs[2].set_ylim(-0.02, 1.02)
    axs[0].set_title('Log mel spectrogram')
    axs[1].set_title('Transcribed notes')
    axs[2].set_title('Transcribed pedals')
    axs[0].yaxis.set_ticks(np.arange(0, 229, 228))
    axs[0].yaxis.set_ticklabels([0, 228])
    axs[1].yaxis.set_ticks(np.arange(0, 88, 87))
    axs[1].yaxis.set_ticklabels([0, 87])
    axs[0].set_ylabel('Mel bins')
    axs[1].set_ylabel('Notes')
    axs[2].set_ylabel('Probability')
    fps = config.frames_per_second
    axs[2].xaxis.set_ticks(np.arange(0, audio_seconds * fps + 1, 5 * fps))
    axs[2].xaxis.set_ticklabels(np.arange(0, audio_seconds + 1e-6, 5))
    axs[2].set_xlabel('Seconds')
    plt.tight_layout(0, 0, 0)
    plt.savefig(fig_path)
    print('Save out to {}'.format(fig_path))
コード例 #2
0
def calculate_piano_solo_prob(args):
    """Calculate the piano solo probability of all downloaded mp3s, and append
    the probability to the meta csv file.
    """
    # Arguments & parameters
    workspace = args.workspace 
    mp3s_dir = args.mp3s_dir
    mini_data = args.mini_data

    sample_rate = piano_detection_model.SR

    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    # Paths
    similarity_csv_path = os.path.join(workspace, 
        '{}full_music_pieces_youtube_similarity.csv'.format(prefix))

    piano_prediction_path = os.path.join(workspace, 
        '{}full_music_pieces_youtube_similarity_pianosoloprob.csv'.format(prefix))

    # Meta info
    meta_dict = read_csv_to_meta_dict(similarity_csv_path)

    meta_dict['piano_solo_prob'] = []
    meta_dict['audio_name'] = []
    meta_dict['audio_duration'] = []
    count = 0

    piano_solo_detector = piano_detection_model.PianoSoloDetector()

    for n in range(len(meta_dict['surname'])):
        mp3_path = os.path.join(mp3s_dir, '{}, {}, {}, {}.mp3'.format(
            meta_dict['surname'][n], meta_dict['firstname'][n], 
            meta_dict['music'][n], meta_dict['youtube_id'][n]).replace('/', '_'))

        if os.path.exists(mp3_path):
            (audio, _) = librosa.core.load(mp3_path, sr=sample_rate, mono=True)
            
            try:
                probs = piano_solo_detector.predict(audio)
                prob = np.mean(probs)
            except:
                prob = 0
    
            print(n, mp3_path, prob)
            meta_dict['audio_name'].append(get_filename(mp3_path))
            meta_dict['piano_solo_prob'].append(prob)
            meta_dict['audio_duration'].append(len(audio) / sample_rate)
        else:
            meta_dict['piano_solo_prob'].append('')
            meta_dict['audio_name'].append('')
            meta_dict['audio_duration'].append('')

    write_meta_dict_to_csv(meta_dict, piano_prediction_path)
    print('Write out to {}'.format(piano_prediction_path))
コード例 #3
0
ファイル: main.py プロジェクト: tekrei/EpubMetadataEditor
    def load_table(self, widget):
        self.folder = dialogs.get_folder()
        if not self.folder:
            return
        self.clear(None)

        for epub_file in utilities.get_epub_files(self.folder):
            try:
                values = utilities.get_metadata(
                    epub_file, self.columns[1:])
                values.insert(0, utilities.get_filename(epub_file))
                self.book_store.append(values)
            except BaseException as e:
                print(e)
                dialogs.show_error_message(
                    "No metadata is found for %s (%s)" % (epub_file, e))
コード例 #4
0
def inference(args):
    """Inference template.

    Args:
      model_type: str
      checkpoint_path: str
      post_processor_type: 'regression' | 'onsets_frames'. High-resolution 
        system should use 'regression'. 'onsets_frames' is only used to compare
        with Googl's onsets and frames system.
      audio_path: str
      cuda: bool
    """

    # Arugments & parameters
    model_type = args.model_type
    checkpoint_path = args.checkpoint_path
    post_processor_type = args.post_processor_type
    device = 'cuda' if args.cuda and torch.cuda.is_available() else 'cpu'
    audio_path = args.audio_path

    sample_rate = config.sample_rate
    segment_samples = sample_rate * 10
    """Split audio to multiple 10-second segments for inference"""

    # Paths
    midi_path = 'results/{}.mid'.format(get_filename(audio_path))
    create_folder(os.path.dirname(midi_path))

    # Load audio
    (audio, _) = load_audio(audio_path, sr=sample_rate, mono=True)

    # Transcriptor
    transcriptor = PianoTranscription(model_type,
                                      device=device,
                                      checkpoint_path=checkpoint_path,
                                      segment_samples=segment_samples,
                                      post_processor_type=post_processor_type)

    # Transcribe and write out to MIDI file
    transcribe_time = time.time()
    transcribed_dict = transcriptor.transcribe(audio, midi_path)
    print('Transcribe time: {:.3f} s'.format(time.time() - transcribe_time))

    # Visualize for debug
    plot = False
    if plot:
        output_dict = transcribed_dict['output_dict']
        fig, axs = plt.subplots(5, 1, figsize=(15, 8), sharex=True)
        mel = librosa.feature.melspectrogram(audio,
                                             sr=16000,
                                             n_fft=2048,
                                             hop_length=160,
                                             n_mels=229,
                                             fmin=30,
                                             fmax=8000)
        axs[0].matshow(np.log(mel), origin='lower', aspect='auto', cmap='jet')
        axs[1].matshow(output_dict['frame_output'].T,
                       origin='lower',
                       aspect='auto',
                       cmap='jet')
        axs[2].matshow(output_dict['reg_onset_output'].T,
                       origin='lower',
                       aspect='auto',
                       cmap='jet')
        axs[3].matshow(output_dict['reg_offset_output'].T,
                       origin='lower',
                       aspect='auto',
                       cmap='jet')
        axs[4].plot(output_dict['pedal_frame_output'])
        axs[0].set_xlim(0, len(output_dict['frame_output']))
        axs[4].set_xlabel('Frames')
        axs[0].set_title('Log mel spectrogram')
        axs[1].set_title('frame_output')
        axs[2].set_title('reg_onset_output')
        axs[3].set_title('reg_offset_output')
        axs[4].set_title('pedal_frame_output')
        plt.tight_layout(0, .05, 0)
        fig_path = '_zz.pdf'.format(get_filename(audio_path))
        plt.savefig(fig_path)
        print('Plot to {}'.format(fig_path))
コード例 #5
0
def inference(args):

    logging.info('config=%s', json.dumps(vars(args)))
    # Arguments & parameters
    workspace = args.workspace
    cuda = args.cuda

    # Paths
    hdf5_path = os.path.join(workspace, 'data.h5')
    model_path = os.path.join(workspace, 'logs', get_filename(__file__),
                              args.inference_model)

    # Load model
    model_class, model_params = MODELS[args.model]
    model = model_class(**{k: args.model_params[k] for k in model_params if k in args.model_params})
    checkpoint = torch.load(model_path)
    model.load_state_dict(checkpoint['state_dict'])

    if cuda:
        model.cuda()

    # Data generator
    generator = TestDataGenerator(hdf5_path=hdf5_path,
                                  target_device=args.target_device,
                                  train_house_list=args.train_house_list,
                                  seq_len=model.seq_len,
                                  steps=args.width * args.batch_size,
                                  binary_threshold=args.binary_threshold)

    generate_func = generator.generate_inference(house=args.inference_house)

    # Forward
    inference_time = time.time()

    outputs = forward(model=model, generate_func=generate_func, cuda=cuda, has_target=False)
    outputs = np.concatenate([output[0] for output in outputs])
    if args.binary_threshold is not None:
        logging.info('----binary threshold is not none and binary metrics are returned----')
        targets = generator.get_target()
        logging.info('Inference time: {} s'.format(time.time() - inference_time))
        metric_dict = binary_metrics(outputs, targets)
        logging.info('Metrics: {}'.format(metric_dict))
    else:
        logging.info('----binary threshold is none and mae and sae metrics are returned----')
        outputs = generator.inverse_transform(outputs)

        logging.info('Inference time: {} s'.format(time.time() - inference_time))

        # Calculate metrics
        source = generator.get_source()
        targets = generator.get_target()

        valid_data = np.ones_like(source)
        for i in range(len(source)):
            if (source[i]==0) or (source[i] < targets[i]):
                valid_data[i] = 0

        mae = mean_absolute_error(outputs * valid_data, targets * valid_data)
        sae = signal_aggregate_error(outputs * valid_data, targets * valid_data)
        mae_allzero = mean_absolute_error(outputs*0, targets * valid_data)
        sae_allmean = signal_aggregate_error(outputs*0+generator.mean_y, targets * valid_data)

        metric_dict = dict({'MAE': mae, 'MAE_zero': mae_allzero, 'SAE': sae, 'SAE_mean': sae_allmean}, **binary_metrics(((outputs - args.eval_binary_threshold) > 0).astype('float'), ((targets - args.eval_binary_threshold) > 0).astype('float')))
        logging.info('Metrics: {}'.format(metric_dict))
コード例 #6
0
def train(args):

    logging.info('config=%s', json.dumps(vars(args)))

    # Arguments & parameters
    workspace = args.workspace
    cuda = args.cuda

    # Load model
    model_class, model_params = MODELS[args.model]
    model = model_class(**{k: args.model_params[k] for k in model_params if k in args.model_params})

    if args.train_model is not None:
        logging.info("continue training ...")
        model_path = os.path.join(workspace, 'logs', get_filename(__file__),
                                  args.train_model)
        checkpoint = torch.load(model_path)
        model.load_state_dict(checkpoint['state_dict'])

    logging.info("sequence length: {}".format(model.seq_len))

    if cuda:
        model.cuda()

    # Paths
    hdf5_path = os.path.join(workspace, 'data.h5')

    models_dir = os.path.join(workspace, 'models', get_filename(__file__))

    create_folder(models_dir)

    # Data generator
    generator = DataGenerator(hdf5_path=hdf5_path,
                              target_device=args.target_device,
                              train_house_list=args.train_house_list,
                              validate_house_list=args.validate_house_list,
                              batch_size=args.batch_size,
                              seq_len=model.seq_len,
                              width=args.width,
                              binary_threshold=args.binary_threshold,
                              balance_threshold=args.balance_threshold,
                              balance_positive=args.balance_positive)

    # Optimizer
    learning_rate = 1e-3
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999),
                           eps=1e-08, weight_decay=0.)

    iteration = 0
    train_bgn_time = time.time()

    for (batch_x, batch_y) in generator.generate():

        if iteration > 1000*300:
            break

        # Evaluate
        if iteration % 1000 == 0:

            train_fin_time = time.time()

            tr_result_dict = evaluate(model=model,
                                      generator=generator,
                                      data_type='train',
                                      max_iteration=args.validate_max_iteration,
                                      cuda=cuda,
                                      binary=args.binary_threshold is not None)

            va_result_dict = evaluate(model=model,
                                      generator=generator,
                                      data_type='validate',
                                      max_iteration=args.validate_max_iteration,
                                      cuda=cuda,
                                      binary=args.binary_threshold is not None)

            logging.info('train: {}'.format(tr_result_dict))
            logging.info('validate: {}'.format(va_result_dict))

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info(
                'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s, learning rate: {}'.format(
                    iteration, train_time, validate_time, learning_rate))

            logging.info('------------------------------------')

            train_bgn_time = time.time()

        # Reduce learning rate
        if iteration % 1000 == 0 and iteration > 0 and learning_rate > 5e-5:
            for param_group in optimizer.param_groups:
                learning_rate *= 0.9
                param_group['lr'] = learning_rate

        batch_x = move_data_to_gpu(batch_x, cuda)
        batch_y = move_data_to_gpu(batch_y, cuda)

        # Forward
        forward_time = time.time()
        model.train()
        output = model(batch_x)

        # Loss
        if args.binary_threshold is not None:
            loss = loss_func_binary(output, batch_y)
        else:
            loss = loss_func(output, batch_y)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        if args.max_norm is not None:
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=args.max_norm)
        optimizer.step()

        # Save model
        if (iteration>1) and (iteration % 1000 == 0) and ((iteration//1000+4) // (((iteration//1000-1)//100+1)*100) == 1):
            save_out_dict = {'iteration': iteration,
                             'state_dict': model.state_dict(),
                             'optimizer': optimizer.state_dict()}

            save_out_path = args.basename + '_{}_{}_iter_{}_wd_{}_sl_{}.tar'.format(
                args.target_device,
                args.model,
                iteration,
                args.width,
                model.seq_len
            )

            create_folder(os.path.dirname(save_out_path))
            torch.save(save_out_dict, save_out_path)

            logging.info('Save model to {}'.format(save_out_path))

        iteration += 1
コード例 #7
0
def pack_maestro_dataset_to_hdf5(args):
    """Load & resample MAESTRO audio files, then write to hdf5 files.

    Args:
      dataset_dir: str, directory of dataset
      workspace: str, directory of your workspace
    """

    # Arguments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace

    sample_rate = config.sample_rate

    # Paths
    csv_path = os.path.join(dataset_dir, 'maestro-v2.0.0.csv')
    waveform_hdf5s_dir = os.path.join(workspace, 'hdf5s', 'maestro')

    logs_dir = os.path.join(workspace, 'logs', get_filename(__file__))
    create_logging(logs_dir, filemode='w')
    logging.info(args)

    # Read meta dict
    meta_dict = read_metadata(csv_path)

    audios_num = len(meta_dict['canonical_composer'])
    logging.info('Total audios number: {}'.format(audios_num))

    feature_time = time.time()

    # Load & resample each audio file to a hdf5 file
    for n in range(audios_num):
        logging.info('{} {}'.format(n, meta_dict['midi_filename'][n]))

        # Read midi
        midi_path = os.path.join(dataset_dir, meta_dict['midi_filename'][n])
        midi_dict = read_midi(midi_path)

        # Load audio
        audio_path = os.path.join(dataset_dir, meta_dict['audio_filename'][n])
        (audio, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True)

        packed_hdf5_path = os.path.join(waveform_hdf5s_dir, '{}.h5'.format(
            os.path.splitext(meta_dict['audio_filename'][n])[0]))

        create_folder(os.path.dirname(packed_hdf5_path))

        with h5py.File(packed_hdf5_path, 'w') as hf:
            hf.attrs.create('canonical_composer', data=meta_dict['canonical_composer'][n].encode(), dtype='S100')
            hf.attrs.create('canonical_title', data=meta_dict['canonical_title'][n].encode(), dtype='S100')
            hf.attrs.create('split', data=meta_dict['split'][n].encode(), dtype='S20')
            hf.attrs.create('year', data=meta_dict['year'][n].encode(), dtype='S10')
            hf.attrs.create('midi_filename', data=meta_dict['midi_filename'][n].encode(), dtype='S100')
            hf.attrs.create('audio_filename', data=meta_dict['audio_filename'][n].encode(), dtype='S100')
            hf.attrs.create('duration', data=meta_dict['duration'][n], dtype=np.float32)

            hf.create_dataset(name='midi_event', data=[e.encode() for e in midi_dict['midi_event']], dtype='S100')
            hf.create_dataset(name='midi_event_time', data=midi_dict['midi_event_time'], dtype=np.float32)
            hf.create_dataset(name='waveform', data=float32_to_int16(audio), dtype=np.int16)
        
    logging.info('Write hdf5 to {}'.format(packed_hdf5_path))
    logging.info('Time: {:.3f} s'.format(time.time() - feature_time))
コード例 #8
0
    hf.create_dataset(name='fold', data=folds, dtype=np.int32)

    hf.close()

    logging.info('Write out hdf5 file to {}'.format(hdf5_path))
    logging.info('Time spent: {} s'.format(time.time() - write_hdf5_time))


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='')
    subparsers = parser.add_subparsers(dest='mode')

    parser_logmel = subparsers.add_parser('logmel')
    parser_logmel.add_argument('--workspace', type=str, required=True)
    parser_logmel.add_argument('--scene_type', type=str, required=True)
    parser_logmel.add_argument('--snr', type=int, required=True)

    args = parser.parse_args()

    logs_dir = os.path.join(args.workspace, 'logs', get_filename(__file__))
    create_folder(logs_dir)
    logging = create_logging(logs_dir, filemode='w')

    logging.info(args)

    if args.mode == 'logmel':
        calculate_logmel_features(args)

    else:
        raise Exception('Incorrect arguments!')
コード例 #9
0
def sound_event_detection(args):
    """Inference sound event detection result of an audio clip.
    """

    # Arugments & parameters
    sample_rate = args.sample_rate
    window_size = args.window_size
    hop_size = args.hop_size
    mel_bins = args.mel_bins
    fmin = args.fmin
    fmax = args.fmax
    model_type = args.model_type
    checkpoint_path = args.checkpoint_path
    audio_path = args.audio_path
    device = torch.device('cuda') if args.cuda and torch.cuda.is_available() else torch.device('cpu')

    classes_num = config.classes_num
    labels = config.labels
    frames_per_second = sample_rate // hop_size

    # Paths
    fig_path = os.path.join('results', '{}.png'.format(get_filename(audio_path)))
    create_folder(os.path.dirname(fig_path))

    # Model
    Model = eval(model_type)
    model = Model(sample_rate=sample_rate, window_size=window_size, 
        hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax, 
        classes_num=classes_num)
    
    checkpoint = torch.load(checkpoint_path, map_location=device)
    model.load_state_dict(checkpoint['model'])

    # Parallel
    print('GPU number: {}'.format(torch.cuda.device_count()))
    model = torch.nn.DataParallel(model)

    if 'cuda' in str(device):
        model.to(device)
    
    # Load audio
    (waveform, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True)

    waveform = waveform[None, :]    # (1, audio_length)
    waveform = move_data_to_device(waveform, device)

    # Forward
    with torch.no_grad():
        model.eval()
        batch_output_dict = model(waveform, None)

    framewise_output = batch_output_dict['framewise_output'].data.cpu().numpy()[0]
    """(time_steps, classes_num)"""

    print('Sound event detection result (time_steps x classes_num): {}'.format(
        framewise_output.shape))

    sorted_indexes = np.argsort(np.max(framewise_output, axis=0))[::-1]

    top_k = 10  # Show top results
    top_result_mat = framewise_output[:, sorted_indexes[0 : top_k]]    
    """(time_steps, top_k)"""

    # Plot result    
    stft = librosa.core.stft(y=waveform[0].data.cpu().numpy(), n_fft=window_size, 
        hop_length=hop_size, window='hann', center=True)
    frames_num = stft.shape[-1]

    fig, axs = plt.subplots(2, 1, sharex=True, figsize=(10, 4))
    axs[0].matshow(np.log(np.abs(stft)), origin='lower', aspect='auto', cmap='jet')
    axs[0].set_ylabel('Frequency bins')
    axs[0].set_title('Log spectrogram')
    axs[1].matshow(top_result_mat.T, origin='upper', aspect='auto', cmap='jet', vmin=0, vmax=1)
    axs[1].xaxis.set_ticks(np.arange(0, frames_num, frames_per_second))
    axs[1].xaxis.set_ticklabels(np.arange(0, frames_num / frames_per_second))
    axs[1].yaxis.set_ticks(np.arange(0, top_k))
    axs[1].yaxis.set_ticklabels(np.array(labels)[sorted_indexes[0 : top_k]])
    axs[1].yaxis.grid(color='k', linestyle='solid', linewidth=0.3, alpha=0.3)
    axs[1].set_xlabel('Seconds')
    axs[1].xaxis.set_ticks_position('bottom')

    plt.tight_layout()
    plt.savefig(fig_path)
    print('Save sound event detection visualization to {}'.format(fig_path))

    return framewise_output, labels
コード例 #10
0
    def calculate_score_per_song(self, args):
        """Calculate score per song.

        Args:
          args: [n, hdf5_path, params]
        """
        n = args[0]
        hdf5_path = args[1]
        [onset_threshold, offset_threshold, frame_threshold] = args[2]

        return_dict = {}

        # Load pre-calculated system outputs and ground truths
        prob_path = os.path.join(self.probs_dir,
                                 '{}.pkl'.format(get_filename(hdf5_path)))
        total_dict = pickle.load(open(prob_path, 'rb'))

        ref_on_off_pairs = total_dict['ref_on_off_pairs']
        ref_midi_notes = total_dict['ref_midi_notes']
        output_dict = total_dict

        # Calculate frame metric
        if self.evaluate_frame:
            frame_threshold = frame_threshold
            y_pred = (np.sign(total_dict['frame_output'] - frame_threshold) +
                      1) / 2
            y_pred[np.where(y_pred == 0.5)] = 0
            y_true = total_dict['frame_roll']
            y_pred = y_pred[0:y_true.shape[0], :]
            y_true = y_true[0:y_pred.shape[0], :]

            tmp = metrics.precision_recall_fscore_support(
                y_true.flatten(), y_pred.flatten())
            return_dict['frame_precision'] = tmp[0][1]
            return_dict['frame_recall'] = tmp[1][1]
            return_dict['frame_f1'] = tmp[2][1]

        # Post processor
        if self.post_processor_type == 'regression':
            post_processor = RegressionPostProcessor(
                self.frames_per_second,
                classes_num=self.classes_num,
                onset_threshold=onset_threshold,
                offset_threshold=offset_threshold,
                frame_threshold=frame_threshold,
                pedal_offset_threshold=self.pedal_offset_threshold)

        elif self.post_processor_type == 'onsets_frames':
            post_processor = OnsetsFramesPostProcessor(
                self.frames_per_second, classes_num=self.classes_num)

        # Post process piano note outputs to piano note and pedal events information
        (est_on_off_note_vels, est_pedal_on_offs) = \
            post_processor.output_dict_to_note_pedal_arrays(output_dict)
        """est_on_off_note_vels: (events_num, 4), the four columns are: [onset_time, offset_time, piano_note, velocity], 
        est_pedal_on_offs: (pedal_events_num, 2), the two columns are: [onset_time, offset_time]"""

        # # Detect piano notes from output_dict
        est_on_offs = est_on_off_note_vels[:, 0:2]
        est_midi_notes = est_on_off_note_vels[:, 2]
        est_vels = est_on_off_note_vels[:, 3] * self.velocity_scale

        # Calculate note metrics
        if self.velocity:
            (note_precision, note_recall, note_f1,
             _) = (mir_eval.transcription_velocity.precision_recall_f1_overlap(
                 ref_intervals=ref_on_off_pairs,
                 ref_pitches=note_to_freq(ref_midi_notes),
                 ref_velocities=total_dict['ref_velocity'],
                 est_intervals=est_on_offs,
                 est_pitches=note_to_freq(est_midi_notes),
                 est_velocities=est_vels,
                 onset_tolerance=self.onset_tolerance,
                 offset_ratio=self.offset_ratio,
                 offset_min_tolerance=self.offset_min_tolerance))
        else:
            note_precision, note_recall, note_f1, _ = \
                mir_eval.transcription.precision_recall_f1_overlap(
                    ref_intervals=ref_on_off_pairs,
                    ref_pitches=note_to_freq(ref_midi_notes),
                    est_intervals=est_on_offs,
                    est_pitches=note_to_freq(est_midi_notes),
                    onset_tolerance=self.onset_tolerance,
                    offset_ratio=self.offset_ratio,
                    offset_min_tolerance=self.offset_min_tolerance)

        if self.pedal:
            # Detect piano notes from output_dict
            ref_pedal_on_off_pairs = output_dict['ref_pedal_on_off_pairs']

            # Calculate pedal metrics
            if len(ref_pedal_on_off_pairs) > 0:
                pedal_precision, pedal_recall, pedal_f1, _ = \
                    mir_eval.transcription.precision_recall_f1_overlap(
                        ref_intervals=ref_pedal_on_off_pairs,
                        ref_pitches=np.ones(ref_pedal_on_off_pairs.shape[0]),
                        est_intervals=est_pedal_on_offs,
                        est_pitches=np.ones(est_pedal_on_offs.shape[0]),
                        onset_tolerance=0.2,
                        offset_ratio=self.pedal_offset_ratio,
                        offset_min_tolerance=self.pedal_offset_min_tolerance)

                return_dict['pedal_precision'] = pedal_precision
                return_dict['pedal_recall'] = pedal_recall
                return_dict['pedal_f1'] = pedal_f1

                y_pred = (np.sign(total_dict['pedal_frame_output'] - 0.5) +
                          1) / 2
                y_pred[np.where(y_pred == 0.5)] = 0
                y_true = total_dict['pedal_frame_roll']
                y_pred = y_pred[0:y_true.shape[0]]
                y_true = y_true[0:y_pred.shape[0]]

                tmp = metrics.precision_recall_fscore_support(
                    y_true.flatten(), y_pred.flatten())
                return_dict['pedal_frame_precision'] = tmp[0][1]
                return_dict['pedal_frame_recall'] = tmp[1][1]
                return_dict['pedal_frame_f1'] = tmp[2][1]

                print('pedal f1: {:.3f}, frame f1: {:.3f}'.format(
                    pedal_f1, return_dict['pedal_frame_f1']))

        print('note f1: {:.3f}'.format(note_f1))

        return_dict['note_precision'] = note_precision
        return_dict['note_recall'] = note_recall
        return_dict['note_f1'] = note_f1
        return return_dict
コード例 #11
0
def pack_waveforms_to_hdf5(args):
    """Pack waveform and target of several audio clips to a single hdf5 file. 
    This can speed up loading and training.
    """

    # Arguments & parameters
    audios_dir = args.audios_dir
    csv_path = args.csv_path
    waveforms_hdf5_path = args.waveforms_hdf5_path
    mini_data = args.mini_data

    clip_samples = config.clip_samples
    classes_num = config.classes_num
    sample_rate = config.sample_rate
    id_to_ix = config.id_to_ix

    # Paths
    if mini_data:
        prefix = 'mini_'
        waveforms_hdf5_path += '.mini'
    else:
        prefix = ''

    create_folder(os.path.dirname(waveforms_hdf5_path))

    logs_dir = '_logs/pack_waveforms_to_hdf5/{}{}'.format(
        prefix, get_filename(csv_path))
    create_folder(logs_dir)
    create_logging(logs_dir, filemode='w')
    logging.info('Write logs to {}'.format(logs_dir))

    # Read csv file
    meta_dict = read_metadata(csv_path, classes_num, id_to_ix)

    if mini_data:
        mini_num = 10
        for key in meta_dict.keys():
            meta_dict[key] = meta_dict[key][0:mini_num]

    audios_num = len(meta_dict['audio_name'])

    # Pack waveform to hdf5
    total_time = time.time()

    with h5py.File(waveforms_hdf5_path, 'w') as hf:
        hf.create_dataset('audio_name', shape=((audios_num, )), dtype='S20')
        hf.create_dataset('waveform',
                          shape=((audios_num, clip_samples)),
                          dtype=np.int16)
        hf.create_dataset('target',
                          shape=((audios_num, classes_num)),
                          dtype=np.bool)
        hf.attrs.create('sample_rate', data=sample_rate, dtype=np.int32)

        # Pack waveform & target of several audio clips to a single hdf5 file
        for n in range(audios_num):
            audio_path = os.path.join(audios_dir, meta_dict['audio_name'][n])

            if os.path.isfile(audio_path):
                logging.info('{} {}'.format(n, audio_path))
                (audio, _) = librosa.core.load(audio_path,
                                               sr=sample_rate,
                                               mono=True)
                audio = pad_or_truncate(audio, clip_samples)

                hf['audio_name'][n] = meta_dict['audio_name'][n].encode()
                hf['waveform'][n] = float32_to_int16(audio)
                hf['target'][n] = meta_dict['target'][n]
            else:
                logging.info('{} File does not exist! {}'.format(
                    n, audio_path))

    logging.info('Write to {}'.format(waveforms_hdf5_path))
    logging.info('Pack hdf5 time: {:.3f}'.format(time.time() - total_time))
コード例 #12
0
        'ss_precision': ss_precision,
        'ss_recall': ss_recall,
        'ss_f1_score': ss_f1_score,
        'ss_auc': ss_auc,
        'ss_ap': ss_ap
    }

    pickle.dump(stat, open(out_stat_path, 'wb'))
    logging.info('Saved stat to {}'.format(out_stat_path))


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Example of parser')
    parser.add_argument('--run_name', type=str, required=True)
    args = parser.parse_args()

    config.filename = get_filename(__file__)
    # Create log
    logs_dir = os.path.join(config.workspace, 'logs', config.filename)
    create_logging(logs_dir, filemode='w')
    logging.info(config)
    train(config, args)

    # if args.mode == 'train':
    #     train(config)

    # elif args.mode == 'inference':
    #     inference(config)

    # else:
    #     raise Exception('Error argument!')
コード例 #13
0
ファイル: StepControl.py プロジェクト: fjafjan/Tracker
import ctypes
from time import sleep, clock, localtime
from time import time as time_now
from numpy import array
from math import sqrt, copysign, floor
import sys, os

from utilities import to_str, get_filename
## Load the DLL HERE

## Step4 = ctypes.WinDLL('C:/Documents and Settings/Fjafjan/My Documents/Downloads/LStepAPI_1_2_0_46/LStep4.dll')

Step4 = ctypes.WinDLL('C:/Data/dllfolder/LStep4.dll')
filename = get_filename("MetaData/position_tracking.txt")
outfile = open(filename, 'w')

##########		Here are some functions from the L_STEP API implemented as Python functions		##########


def ConnectSimple(ControllerID,
                  ComName,
                  Baudrate,
                  ShowProtocoll,
                  print_error=False):
    connect_simple = Step4['LS_ConnectSimple']
    ctrl_id = ctypes.c_int(ControllerID)
    com_name = ctypes.c_char_p(ComName)
    baud_rate = ctypes.c_int(Baudrate)
    show_prot = ctypes.c_int(ShowProtocoll)
    error_code = ctypes.c_int32(-1)
    error_code = connect_simple(ctrl_id, com_name, baud_rate, show_prot)
コード例 #14
0
            # Write out target
            target = get_target_from_events(data['events'], lb_to_ix,
                                            start / config.sr, end / config.sr)
            hf['target'].resize((item_counts + 1, classes_num))
            hf['target'][item_counts] = target

            item_counts += 1

    hf.create_dataset(name='audio_name',
                      data=[s.encode() for s in audio_names],
                      dtype='S40')

    hf.create_dataset(name='fold', data=folds, dtype=np.int32)

    hf.close()

    logging.info('Write out hdf5 file to {}'.format(hdf5_path))
    logging.info('Time spent: {} s'.format(time.time() - write_hdf5_time))


if __name__ == '__main__':

    logs_dir = os.path.join(config.workspace, 'logs', get_filename(__file__))
    create_folder(logs_dir)
    logging = create_logging(logs_dir, filemode='w')

    logging.info(config)

    calculate_logmel_features(config)
コード例 #15
0
def pack_maps_dataset_to_hdf5(args):
    """MAPS is a piano dataset only used for evaluating our piano transcription
    system (optional). Ref:

    [1] Emiya, Valentin. "MAPS Database A piano database for multipitch 
    estimation and automatic transcription of music. 2016

    Load & resample MAPS audio files, then write to hdf5 files.

    Args:
      dataset_dir: str, directory of dataset
      workspace: str, directory of your workspace
    """

    # Arguments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace

    sample_rate = config.sample_rate
    pianos = ['ENSTDkCl', 'ENSTDkAm']

    # Paths
    waveform_hdf5s_dir = os.path.join(workspace, 'hdf5s', 'maps')

    logs_dir = os.path.join(workspace, 'logs', get_filename(__file__))
    create_logging(logs_dir, filemode='w')
    logging.info(args)

    feature_time = time.time()
    count = 0

    # Load & resample each audio file to a hdf5 file
    for piano in pianos:
        sub_dir = os.path.join(dataset_dir, piano, 'MUS')

        audio_names = [os.path.splitext(name)[0] for name in os.listdir(sub_dir) 
            if os.path.splitext(name)[-1] == '.mid']
        
        for audio_name in audio_names:
            print('{} {}'.format(count, audio_name))
            audio_path = '{}.wav'.format(os.path.join(sub_dir, audio_name))
            midi_path = '{}.mid'.format(os.path.join(sub_dir, audio_name))

            (audio, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True)
            midi_dict = read_maps_midi(midi_path)
            
            packed_hdf5_path = os.path.join(waveform_hdf5s_dir, '{}.h5'.format(audio_name))
            create_folder(os.path.dirname(packed_hdf5_path))

            with h5py.File(packed_hdf5_path, 'w') as hf:
                hf.attrs.create('split', data='test'.encode(), dtype='S20')
                hf.attrs.create('midi_filename', data='{}.mid'.format(audio_name).encode(), dtype='S100')
                hf.attrs.create('audio_filename', data='{}.wav'.format(audio_name).encode(), dtype='S100')
                hf.create_dataset(name='midi_event', data=[e.encode() for e in midi_dict['midi_event']], dtype='S100')
                hf.create_dataset(name='midi_event_time', data=midi_dict['midi_event_time'], dtype=np.float32)
                hf.create_dataset(name='waveform', data=float32_to_int16(audio), dtype=np.int16)
            
            count += 1

    logging.info('Write hdf5 to {}'.format(packed_hdf5_path))
    logging.info('Time: {:.3f} s'.format(time.time() - feature_time))
コード例 #16
0
def pack_waveforms_to_hdf5(args):
    """Pack waveforms to a single hdf5 file.
    """

    # Arguments & parameters
    audios_dir = args.audios_dir
    csv_path = args.csv_path
    waveform_hdf5_path = args.waveform_hdf5_path
    target_hdf5_path = args.target_hdf5_path
    mini_data = args.mini_data

    audio_length = config.audio_length
    classes_num = config.classes_num
    sample_rate = config.sample_rate

    # Paths
    if mini_data:
        prefix = 'mini_'
        waveform_hdf5_path += '.mini'
        target_hdf5_path += '.mini'
    else:
        prefix = ''

    create_folder(os.path.dirname(waveform_hdf5_path))
    create_folder(os.path.dirname(target_hdf5_path))

    logs_dir = '_logs/pack_waveforms_to_hdf5/{}{}'.format(
        prefix, get_filename(csv_path))
    create_folder(logs_dir)
    create_logging(logs_dir, filemode='w')
    logging.info('Write logs to {}'.format(logs_dir))

    # Read csv file
    meta_dict = read_metadata(csv_path)

    if mini_data:
        mini_num = 10
        for key in meta_dict.keys():
            meta_dict[key] = meta_dict[key][0:mini_num]

    audios_num = len(meta_dict['audio_name'])

    # Pack waveform to hdf5
    total_time = time.time()

    with h5py.File(waveform_hdf5_path, 'w') as hf:
        hf.create_dataset('audio_name', shape=((audios_num, )), dtype='S20')
        hf.create_dataset('waveform',
                          shape=((audios_num, audio_length)),
                          dtype=np.int16)
        hf.create_dataset('target',
                          shape=((audios_num, classes_num)),
                          dtype=np.bool)
        hf.attrs.create('sample_rate', data=sample_rate, dtype=np.int32)

        # Read audio
        for n in range(audios_num):
            audio_path = os.path.join(audios_dir, meta_dict['audio_name'][n])

            if os.path.isfile(audio_path):
                logging.info('{} {}'.format(n, audio_path))
                (audio, _) = librosa.core.load(audio_path,
                                               sr=sample_rate,
                                               mono=True)
                audio = pad_or_truncate(audio, audio_length)

                hf['audio_name'][n] = meta_dict['audio_name'][n].encode()
                hf['waveform'][n] = float32_to_int16(audio)
                hf['target'][n] = meta_dict['target'][n]
            else:
                logging.info('{} File does not exist! {}'.format(
                    n, audio_path))

        # Pack target to hdf5
        hdf5_name = target_hdf5_path.split('/')[-1]

        with h5py.File(target_hdf5_path, 'w') as target_hf:
            target_hf.create_dataset('audio_name',
                                     data=hf['audio_name'][:],
                                     dtype='S20')
            target_hf.create_dataset('hdf5_name',
                                     data=[hdf5_name.encode()] * audios_num,
                                     dtype='S40')
            target_hf.create_dataset('index_in_hdf5',
                                     data=np.arange(audios_num),
                                     dtype=np.int32)
            target_hf.create_dataset('target',
                                     data=hf['target'][:],
                                     dtype=np.bool)

    logging.info('Write to {}'.format(waveform_hdf5_path))
    logging.info('Write to {}'.format(target_hdf5_path))
    logging.info('Pack hdf5 time: {:.3f}'.format(time.time() - total_time))
コード例 #17
0
def download_wavs(args):
    """Download videos and extract audio in wav format.
    """

    # Paths
    csv_path = args.csv_path
    audios_dir = args.audios_dir
    mini_data = args.mini_data

    if mini_data:
        logs_dir = '_logs/download_dataset/{}'.format(get_filename(csv_path))
    else:
        logs_dir = '_logs/download_dataset_minidata/{}'.format(
            get_filename(csv_path))

    create_folder(audios_dir)
    create_folder(logs_dir)
    create_logging(logs_dir, filemode='w')
    logging.info('Download log is saved to {}'.format(logs_dir))

    # Read csv
    with open(csv_path, 'r') as f:
        lines = f.readlines()

    lines = lines[3:]  # Remove csv head info

    if mini_data:
        lines = lines[0:10]  # Download small data for debug

    download_time = time.time()

    # Download
    for (n, line) in enumerate(lines):

        items = line.split(', ')
        audio_id = items[0]
        start_time = float(items[1])
        end_time = float(items[2])
        duration = end_time - start_time

        logging.info('{} {} start_time: {:.1f}, end_time: {:.1f}'.format(
            n, audio_id, start_time, end_time))

        # Download full video of whatever format
        video_name = os.path.join(audios_dir, '_Y{}.%(ext)s'.format(audio_id))
        os.system("youtube-dl --quiet -o '{}' -x https://www.youtube.com/watch?v={}"\
            .format(video_name, audio_id))

        video_paths = glob.glob(
            os.path.join(audios_dir, '_Y' + audio_id + '.*'))

        # If download successful
        if len(video_paths) > 0:
            video_path = video_paths[0]  # Choose one video

            # Add 'Y' to the head because some video ids are started with '-'
            # which will cause problem
            audio_path = os.path.join(audios_dir, 'Y' + audio_id + '.wav')

            # Extract audio in wav format
            os.system("ffmpeg -loglevel panic -i {} -ac 1 -ar 32000 -ss {} -t 00:00:{} {} "\
                .format(video_path,
                str(datetime.timedelta(seconds=start_time)), duration,
                audio_path))

            # Remove downloaded video
            os.system("rm {}".format(video_path))

            logging.info("Download and convert to {}".format(audio_path))

    logging.info(
        'Download finished! Time spent: {:.3f} s'.format(time.time() -
                                                         download_time))

    logging.info('Logs can be viewed in {}'.format(logs_dir))
コード例 #18
0
ファイル: main.py プロジェクト: node9909/EEG_Project
import os
import utilities
from shutil import copy
import csv
import numpy as np

# Get current answer files
ans_fol = "..\\..\\data\\answers"
ans_paths = utilities.get_files(ans_fol)
subs = [utilities.get_filename(i) for i in ans_paths]

# Get export files (iMotion file)
exp_fol = "..\\..\\data\\export"
exp_paths = utilities.get_files(exp_fol)

# Create subject folders if they do not exist
sub_fol = "..\\..\\data\\subjects"
for ans_path, sub in zip(ans_paths, subs):
    folder = os.path.join(sub_fol, sub)

    # Check if folder already exists
    if not os.path.isdir(folder):
        # Create folder
        os.mkdir(folder)

        # Copy answer file to subject folder
        copy(ans_path, os.path.join(folder, 'answers.txt'))

        # Find and copy the correct export file to subject folder
        exp_ind = utilities.search_string_in_list(exp_paths, sub)
        copy(exp_paths[exp_ind], os.path.join(folder, 'export.txt'))
コード例 #19
0
def infer_prob(args):
    """Inference the output probabilites on MAESTRO dataset, and write out to
    disk. This will reduce duplicate computation for later evaluation.

    Args:
      workspace: str, directory of your workspace
      model_type: str
      augmentation: str, e.g. 'none'
      checkpoint_path: str
      dataset: 'maestro'
      split: 'test'
      post_processor_type: 'regression' | 'onsets_frames'. High-resolution 
        system should use 'regression'. 'onsets_frames' is only used to compare
        with Googl's onsets and frames system.
      cuda: bool
    """

    # Arugments & parameters
    workspace = args.workspace
    model_type = args.model_type
    checkpoint_path = args.checkpoint_path
    augmentation = args.augmentation
    dataset = args.dataset
    split = args.split
    post_processor_type = args.post_processor_type
    device = torch.device('cuda') if args.cuda and torch.cuda.is_available(
    ) else torch.device('cpu')

    sample_rate = config.sample_rate
    segment_seconds = config.segment_seconds
    segment_samples = int(segment_seconds * sample_rate)
    frames_per_second = config.frames_per_second
    classes_num = config.classes_num
    begin_note = config.begin_note

    # Paths
    hdf5s_dir = os.path.join(workspace, 'hdf5s', dataset)
    probs_dir = os.path.join(workspace, 'probs',
                             'model_type={}'.format(model_type),
                             'augmentation={}'.format(augmentation),
                             'dataset={}'.format(dataset),
                             'split={}'.format(split))
    create_folder(probs_dir)

    # Transcriptor
    transcriptor = PianoTranscription(model_type,
                                      device=device,
                                      checkpoint_path=checkpoint_path,
                                      segment_samples=segment_samples,
                                      post_processor_type=post_processor_type)

    (hdf5_names, hdf5_paths) = traverse_folder(hdf5s_dir)

    n = 0
    for n, hdf5_path in enumerate(hdf5_paths):
        with h5py.File(hdf5_path, 'r') as hf:
            if hf.attrs['split'].decode() == split:
                print(n, hdf5_path)
                n += 1

                # Load audio
                audio = int16_to_float32(hf['waveform'][:])
                midi_events = [e.decode() for e in hf['midi_event'][:]]
                midi_events_time = hf['midi_event_time'][:]

                # Ground truths processor
                target_processor = TargetProcessor(
                    segment_seconds=len(audio) / sample_rate,
                    frames_per_second=frames_per_second,
                    begin_note=begin_note,
                    classes_num=classes_num)

                # Get ground truths
                (target_dict, note_events, pedal_events) = \
                    target_processor.process(start_time=0,
                        midi_events_time=midi_events_time,
                        midi_events=midi_events, extend_pedal=True)

                ref_on_off_pairs = np.array(
                    [[event['onset_time'], event['offset_time']]
                     for event in note_events])
                ref_midi_notes = np.array(
                    [event['midi_note'] for event in note_events])
                ref_velocity = np.array(
                    [event['velocity'] for event in note_events])

                # Transcribe
                transcribed_dict = transcriptor.transcribe(audio,
                                                           midi_path=None)
                output_dict = transcribed_dict['output_dict']

                # Pack probabilites to dump
                total_dict = {
                    key: output_dict[key]
                    for key in output_dict.keys()
                }
                total_dict['frame_roll'] = target_dict['frame_roll']
                total_dict['ref_on_off_pairs'] = ref_on_off_pairs
                total_dict['ref_midi_notes'] = ref_midi_notes
                total_dict['ref_velocity'] = ref_velocity

                if 'pedal_frame_output' in output_dict.keys():
                    total_dict['ref_pedal_on_off_pairs'] = \
                        np.array([[event['onset_time'], event['offset_time']] for event in pedal_events])
                    total_dict['pedal_frame_roll'] = target_dict[
                        'pedal_frame_roll']

                prob_path = os.path.join(
                    probs_dir, '{}.pkl'.format(get_filename(hdf5_path)))
                create_folder(os.path.dirname(prob_path))
                pickle.dump(total_dict, open(prob_path, 'wb'))
コード例 #20
0
import utilities
import numpy as np
import os
import re

export_fol = "D:\\Apostolis\\Programming\\Python\\Athena_project\\EEG_Project\\data\\export"
answer_fol = "D:\\Apostolis\\Programming\\Python\\Athena_project\\EEG_Project\\data\\answers"
save_fol = "D:\\Apostolis\\Programming\\Python\\Athena_project\\EEG_Project\\data\\subjects"

export_files = utilities.get_files(export_fol)
answer_files = utilities.get_files(answer_fol)

for file in export_files:
    # Get subject names from file name
    sub_name = utilities.get_filename(file, start=4)

    # Get time zero (t0) of the subject from the info.csv file.
    # t0 represents the exact ms that the eeg recording started according to iMotions export file
    with open(os.path.join(save_fol, sub_name, 'info.csv')) as csv:
        lines = csv.readlines()
        line = lines[1].split(',')
        t0 = int(re.sub("[^0-9]", "", line[2]))

    f = open(file)
    text = f.readlines()
    f.close()
    time_zero = 0
    for ind, line in enumerate(text):
        # Search in text for this particular line which marks the start of the web application
        if "NavigateComplete\thttp://localhost/exp/main.php" in line:
            line = text[ind - 2].split('\t')
コード例 #21
0
def main(training_folder, classify_folder, norm, save_subsets):
    # Read the manually created ratings
    with open('./' + training_folder + '/ratings.txt', 'r') as f:
        ratings = json.load(f)

    # Read rated images
    rated_images = []
    for filename in ratings:
        if ratings[filename] == None:
            continue
        img = ImageInfo(filename, ratings[filename])
        rated_images.append(img)

    # Create histograms and all_tags (vocabulary)
    (bad_hist, good_hist, all_tags) = create_hists_and_vocab(rated_images)

    # HISTOGRAM PLOTTING
    all_tags.sort(key=lambda tag: good_hist[tag] + bad_hist[tag], reverse=True)
    all_training_tags = all_tags
    all_tags = all_tags[:20]
    show_tags = True
    fig = pl.figure()
    good_data = [good_hist[tag] for tag in all_tags]
    bad_data = [-bad_hist[tag] for tag in all_tags]

    # Normalization
    good_data = normalize_data(good_data, norm)
    if norm == 'L1':
        bad_data = [-d for d in bad_data]
    bad_data = normalize_data(bad_data, norm)
    if norm == 'L1':
        bad_data = [-d for d in bad_data]

    if norm == '0':
        pl.title('Avainsanojen frekvenssi')
    else:
        pl.title('Avainsanojen {}-normalisoitu frekvenssi'.format(norm))
    ax = pl.subplot(111)
    ax.bar(range(len(all_tags)), good_data, width=1, color='b')
    ax.bar(range(len(all_tags)), bad_data, width=1, color='r')
    pl.ylabel('frekvenssi')
    pl.xlabel('avainsanan indeksi')
    if show_tags:
        pl.xticks(range(len(all_tags)), all_tags)
        pl.xticks(rotation=90)
        pl.xlabel('avainsana')
        pl.gcf().subplots_adjust(bottom=0.45)

    ax.legend((u'Hyödylliset kuvat', u'Hyödyttömät kuvat'), 'lower right')

    #pl.show()

    # Divide images to training and test images
    import numpy as np
    import random

    def occurrance_matrix(images, vocabulary):
        X = np.zeros((len(images), len(vocabulary)))
        for i in range(len(images)):
            for j in range(len(vocabulary)):
                if vocabulary[j] in images[i].tags:
                    X[(i, j)] = 1
        return X

    def leave_one_out(training_images, test_image, vocab):
        X_training = occurrance_matrix(training_images, vocab)
        Y_training = np.array([img.rating for img in training_images])
        X_testing = occurrance_matrix([test_image], vocab)
        from sklearn.naive_bayes import BernoulliNB
        classifier = BernoulliNB()
        classifier.fit(X_training, Y_training)
        classifier.classes_ = np.array([-1, 1])
        estimates = classifier.predict(X_testing)
        if estimates[0] == test_image.rating:
            return 1.0
        else:
            return 0.0

    repetitions_per_n = 100
    clf_rates = []
    training_ns = range(10, len(rated_images), 2)
    for n_training in training_ns:
        correct = 0.0
        print "training: {}".format(n_training)
        for repetition in range(repetitions_per_n):
            clf_images = [image for image in rated_images]
            random.shuffle(clf_images)
            correct += leave_one_out(clf_images[:n_training],
                                     clf_images[n_training], all_training_tags)
        clf_rates.append(correct / repetitions_per_n)
    print clf_rates
    good_imgs = 0
    for image in rated_images:
        if image.rating == 1:
            good_imgs += 1
    a_priori = float(good_imgs) / len(rated_images)
    pl.figure()
    pl.clf()
    pl.plot(training_ns, clf_rates, 'b-', training_ns,
            [0.5] * len(training_ns), 'r-', training_ns,
            [a_priori] * len(training_ns), 'g-')
    pl.legend(
        ['Naiivi Bayes-luokitin', 'Satunnaisluokitin', 'A priori -luokitin'],
        loc='lower right')
    pl.xlabel(u'Opetuskuvien määrä')
    pl.ylabel(u'Luokittelun onnistumistodennäköisyys')
    pl.ylim(0.0, 1.0)
    pl.xlim(training_ns[0], training_ns[-1])
    pl.show()

    rand_subset = range(len(rated_images))
    random.shuffle(rand_subset)
    #n_training = len(rated_images) / 4 #len(rated_images) - 40
    print "Training data size: {}, Testing data size: {}".format(
        n_training,
        len(rated_images) - n_training)
    training_images = [rated_images[i] for i in rand_subset[:n_training]]
    test_images = [rated_images[i] for i in rand_subset[n_training:]]
    X_training = occurrance_matrix(training_images, all_training_tags)
    Y_training = np.array([img.rating for img in training_images])
    X_testing = occurrance_matrix(test_images, all_training_tags)
    Y_testing = np.array([img.rating for img in test_images])
    # Train the Bernoulli Naive Bayes
    from sklearn.naive_bayes import BernoulliNB
    classifier = BernoulliNB()
    classifier.fit(X_training, Y_training)

    mean_accuracy = classifier.score(X_testing, Y_testing)
    print "Mean accuracy:", mean_accuracy

    estimates = classifier.predict(X_testing)
    good_to_bad_errors = 0
    bad_to_good_errors = 0
    for i in range(estimates.shape[0]):
        if estimates[i] == -1 and Y_testing[i] == 1:
            good_to_bad_errors += 1
        if estimates[i] == 1 and Y_testing[i] == -1:
            bad_to_good_errors += 1
    print "Bad images classified as good:", float(
        bad_to_good_errors) / estimates.shape[0]
    print "Good images classified as bad:", float(
        good_to_bad_errors) / estimates.shape[0]

    # todo kokeile tf-idf luokitinta tms

    # Classify ALL images, using ALL rated images for training (todo, use ratings of subset or whole set?)
    if classify_folder == None:
        return
    all_image_files = get_images_in_folder('./' + classify_folder + '/')
    all_images = [
    ]  # all images in the classify folder except ones also in subset
    for img in all_image_files:
        in_subset = False
        for rated_img in rated_images:
            if img == rated_img.filename:
                print img
                in_subset = True
                break
        if not in_subset:
            all_images.append(ImageInfo(img, None))
    (_, _, total_vocab) = create_hists_and_vocab(all_images)
    X_training = occurrance_matrix(rated_images, total_vocab)
    Y_training = np.array([img.rating for img in rated_images])
    X_classify = occurrance_matrix(all_images, total_vocab)
    classifier = BernoulliNB()
    classifier.fit(X_training, Y_training)
    all_estimates = classifier.predict(X_classify)
    if save_subsets:
        copy_from_folder = utilities.get_folder(all_images[0].filename)
        copy_to_base_folder = './' + args.training_images + '+' + args.classify_images + '/'
        print copy_from_folder, copy_to_base_folder
        good_img_paths = []
        good_md_paths = []
        bad_img_paths = []
        bad_md_paths = []
        for i in range(len(all_estimates)):
            filename = utilities.get_filename(all_images[i].filename)
            if all_estimates[i] == 1:  # image classified as "good"
                good_img_paths.append(filename + '.jpg')
                good_md_paths.append(filename + '.txt')
            else:  # image classified as "bad"
                bad_img_paths.append(filename + '.jpg')
                bad_md_paths.append(filename + '.txt')
        utilities.copy_images(copy_from_folder,
                              copy_to_base_folder + 'good_images/',
                              good_img_paths, good_md_paths)
        utilities.copy_images(copy_from_folder,
                              copy_to_base_folder + 'bad_images/',
                              bad_img_paths, bad_md_paths)
コード例 #22
0
    parser_inference.add_argument('--config', type=str, required=True)
    parser_inference.add_argument('--inference-model', type=str)
    parser_inference.add_argument('--inference-house', type=str)
    parser_inference.add_argument('--binary-threshold', type=float, default=None)
    parser_inference.add_argument('--eval-binary-threshold', type=float, default=None)
    parser_inference.add_argument('--model-threshold', type=float, default=None)
    parser_inference.add_argument('--cuda', action='store_true', default=False)
    for p in model_params:
        parser_inference.add_argument('--pm-' + p.replace('_', '-'), type=str, metavar='<{}>'.format(p))

    args = parser.parse_args()
    args = consolidate_args(args)

    # Write out log
    if args.mode == 'inference':
        logs_dir = os.path.join(args.workspace, 'logs', get_filename(__file__), 'inference_logs')
    else:
        logs_dir = os.path.join(args.workspace, 'logs', get_filename(__file__))
    logging = create_logging(logs_dir, filemode='w')
    logging.info(args)

    if args.mode == 'train':
        args.__dict__['basename'] = logging.getLoggerClass().root.handlers[0].baseFilename[:-4]
        config_to_save = args.basename + '.config.json'
        logging.info('Saving config to ' + config_to_save)
        ignores = set(['workspace', 'config', 'cuda', 'mode'])
        with open(config_to_save, 'w') as fout:
            json.dump({k: v for k, v in args.__dict__.items()
                       if k not in ignores}, fout)
        train(args)
コード例 #23
0
                              type=str,
                              required=True,
                              help='Directory of your workspace.')
    parser_train.add_argument('--taxonomy_level',
                              type=str,
                              choices=['fine', 'coarse'],
                              required=True)
    parser_train.add_argument('--model_type',
                              type=str,
                              required=True,
                              help='E.g., TFSANN.')
    parser_train.add_argument('--holdout_fold',
                              type=str,
                              choices=['1', 'none'],
                              required=True)
    parser_train.add_argument('--batch_size', type=int, required=True)
    parser_train.add_argument('--cuda', action='store_true', default=True)
    parser_train.add_argument(
        '--mini_data',
        action='store_true',
        default=False,
        help='Set True for debugging on a small part of data.')

    args = parser.parse_args()
    args.filename = get_filename(__file__)

    if args.mode == 'train':
        train(args)

    else:
        raise Exception('Error argument!')
コード例 #24
0
ファイル: StepControl.py プロジェクト: fjafjan/Tracker
import ctypes
from time import sleep, clock, localtime
from time import time as time_now
from numpy import array
from math import sqrt, copysign, floor
import sys, os

from utilities import to_str, get_filename
## Load the DLL HERE

## Step4 = ctypes.WinDLL('C:/Documents and Settings/Fjafjan/My Documents/Downloads/LStepAPI_1_2_0_46/LStep4.dll')

Step4 = ctypes.WinDLL('C:/Data/dllfolder/LStep4.dll')
filename = get_filename("MetaData/position_tracking.txt")
outfile = open(filename, 'w')


##########		Here are some functions from the L_STEP API implemented as Python functions		##########

def ConnectSimple(ControllerID, ComName, Baudrate, ShowProtocoll, print_error=False):
	connect_simple = Step4['LS_ConnectSimple']
	ctrl_id = ctypes.c_int(ControllerID)
	com_name  = ctypes.c_char_p(ComName)
	baud_rate = ctypes.c_int(Baudrate)
	show_prot = ctypes.c_int(ShowProtocoll)
	error_code= ctypes.c_int32(-1)
	error_code = connect_simple(ctrl_id, com_name, baud_rate, show_prot)
	if print_error or error_code != 0:
		print "we get the error code ", error_code

def SetVelocity(x_vel, y_vel, z_vel, a_vel, max_vel=2.0, print_error=False):