# Must remove .mp3 from the path to get generic path
            'path': row[4].replace('.mp3', '')
        } for n, row in enumerate(uspop2002_list)
    ]
    whoosh_search.create_index(
        os.path.join(BASE_DATA_PATH, 'uspop2002', 'index'), uspop2002_list)

# Quick test
artist = 'bon jovi'
title = 'livin on a prayer'

index = whoosh_search.get_whoosh_index(
    os.path.join(BASE_DATA_PATH, 'cal500', 'index'))
with index.searcher() as searcher:
    print 'cal500:\t{}'.format(
        whoosh_search.search(searcher, index.schema, artist, title))

index = whoosh_search.get_whoosh_index(
    os.path.join(BASE_DATA_PATH, 'cal10k', 'index'))
with index.searcher() as searcher:
    print 'cal10k:\t{}'.format(
        whoosh_search.search(searcher, index.schema, artist, title))

index = whoosh_search.get_whoosh_index(
    os.path.join(BASE_DATA_PATH, 'msd', 'index'))
with index.searcher() as searcher:
    print 'msd:\t{}'.format(
        whoosh_search.search(searcher, index.schema, artist, title))

index = whoosh_search.get_whoosh_index(
    os.path.join(BASE_DATA_PATH, 'uspop2002', 'index'))
        os.path.join(FILE_LIST_PATH, 'uspop2002.txt'))
    uspop2002_list = [{'id': unicode(n), 'artist': row[1], 'title': row[3],
                       # Must remove .mp3 from the path to get generic path
                       'path': row[4].replace('.mp3', '')}
                      for n, row in enumerate(uspop2002_list)]
    whoosh_search.create_index(
        os.path.join(BASE_DATA_PATH, 'uspop2002', 'index'), uspop2002_list)

# Quick test
artist = 'bon jovi'
title = 'livin on a prayer'

index = whoosh_search.get_whoosh_index(
    os.path.join(BASE_DATA_PATH, 'cal500', 'index'))
with index.searcher() as searcher:
    print 'cal500:\t{}'.format(whoosh_search.search(searcher, index.schema,
                                        artist, title))

index = whoosh_search.get_whoosh_index(
    os.path.join(BASE_DATA_PATH, 'cal10k', 'index'))
with index.searcher() as searcher:
    print 'cal10k:\t{}'.format(whoosh_search.search(searcher, index.schema,
                                        artist, title))

index = whoosh_search.get_whoosh_index(
    os.path.join(BASE_DATA_PATH, 'msd', 'index'))
with index.searcher() as searcher:
    print 'msd:\t{}'.format(whoosh_search.search(searcher, index.schema,
                                                 artist, title))

index = whoosh_search.get_whoosh_index(
    os.path.join(BASE_DATA_PATH, 'uspop2002', 'index'))
 # Get all entries with the same artist/title
 midi_matches = [
     e for e in midi_list if (e['artist'] == midi_entry['artist']
                              and e['title'] == midi_entry['title'])
 ]
 # Remove these matches so we don't use them more than once
 for match in midi_matches:
     del midi_list[midi_list.index(match)]
 # This should never happen
 if len(midi_matches) == 0:
     print "Error: No matches found for {}".format(midi_entry)
 # Match each of these MIDIs against each dataset
 dataset_matches = []
 for dataset in DATASETS:
     matches = whoosh_search.search(searchers[dataset],
                                    indices[dataset].schema,
                                    midi_entry['artist'],
                                    midi_entry['title'])
     # Add the each matched dataset entry in if we haven't already
     for match in matches:
         if [dataset, match[0]] not in dataset_matches:
             dataset_matches.append([dataset, match[0]])
 # If there are any matches, add them to pairs
 if len(dataset_matches) > 0:
     pairs.append([[m['id'] for m in midi_matches], dataset_matches])
     # Find other pairs which have include one of these dataset entries
     merge_indices = []
     for n, pair in enumerate(pairs):
         for dataset_match in dataset_matches:
             if dataset_match in pair[1]:
                 merge_indices.append(n)
                 break
 midi_entry = midi_list[-1]
 # Get all entries with the same artist/title
 midi_matches = [e for e in midi_list
                 if (e['artist'] == midi_entry['artist']
                     and e['title'] == midi_entry['title'])]
 # Remove these matches so we don't use them more than once
 for match in midi_matches:
     del midi_list[midi_list.index(match)]
 # This should never happen
 if len(midi_matches) == 0:
     print "Error: No matches found for {}".format(midi_entry)
 # Match each of these MIDIs against each dataset
 dataset_matches = []
 for dataset in DATASETS:
     matches = whoosh_search.search(
         searchers[dataset], indices[dataset].schema, midi_entry['artist'],
         midi_entry['title'])
     # Add the each matched dataset entry in if we haven't already
     for match in matches:
         if [dataset, match[0]] not in dataset_matches:
             dataset_matches.append([dataset, match[0]])
 # If there are any matches, add them to pairs
 if len(dataset_matches) > 0:
     pairs.append([[m['id'] for m in midi_matches], dataset_matches])
     # Find other pairs which have include one of these dataset entries
     merge_indices = []
     for n, pair in enumerate(pairs):
         for dataset_match in dataset_matches:
             if dataset_match in pair[1]:
                 merge_indices.append(n)
                 break