Example #1
0
def main(argv):
  argv = FLAGS(argv) 
  timit = load_timit(FLAGS.corpus_path)
  extractor = FeatureExtractor()

  utterance_names = []
  if FLAGS.data_set == "brugnara":
    utterance_names = load_brugnara_files(timit)
  elif FLAGS.data_set == "hundred":
    utterance_names = load_brugnara_files(timit)[:100]
  elif FLAGS.data_set == "ten":
    utterance_names = load_brugnara_files(timit)[:10]
  elif FLAGS.data_set == "one":
    utterance_names = load_brugnara_files(timit)[:1]
  print utterance_names
  speech_problem = SpeechProblem(timit, FLAGS.output_name)
  speech_problem.extract_phonemes()
  for utterance_file in utterance_names:
    features = extractor.extract_features(timit.abspath(utterance_file + ".wav"))
    #features = extractor.random_features()
    speech_problem.add_utterance(utterance_file, features)
  speech_problem.extract_centers()
  #speech_problem.random_centers()
  speech_problem.write()
Example #2
0
def main(argv):
  argv = FLAGS(argv) 
  data_sets = []
  timit_dev = load_timit(FLAGS.corpus_path + "TIMITNLTKTEST")
  if FLAGS.shrink_data:
    files = load_core_dev_files(timit_dev)[:10]
  else:
    files = load_core_dev_files(timit_dev)
  data_sets.append((timit_dev, files, "dev"))
  timit_train = load_timit(FLAGS.corpus_path + "TIMITNLTK39")
  if FLAGS.shrink_data:
    files = load_training_files(timit_train)[:10]
  else:
    files = load_training_files(timit_train)
  data_sets.append((timit_train, files, "train"))
  timit_test = load_timit(FLAGS.corpus_path + "TIMITNLTKTEST")
  if FLAGS.shrink_data:
    files = load_core_test_files(timit_test)[:10]
  else:
    files = load_core_test_files(timit_test)
  data_sets.append((timit_test, files, "test"))
    

  extractor = FeatureExtractor()
  all_features = []
  all_states = []
  utterance_features = []
  feature_count = 0
  for timit, utterance_names, _ in data_sets:
    for utterance_file in utterance_names:
      features = extractor.extract_features(timit.abspath(utterance_file + ".wav"))

      utterance_data = construct_gold(timit, utterance_file, features)
      #states = [state for state, feature in utterance_data]
      #assert(len(states) == len(features))
      global_indices = []

      for state, feature in utterance_data:
        all_features.append(feature)
        all_states.append(state)        
        global_indices.append(feature_count)
        feature_count += 1
      utterance_features.append(global_indices)

  vq = VQ(FLAGS.vq_size)
  vqs = vq.make_code_book(all_features, all_states)
  
  utterance_ind = 0
  for timit, utterance_names, suffix in data_sets:
    file_name = FLAGS.output_prefix + "_" + str(FLAGS.vq_size) + "_" + suffix
    if FLAGS.shrink_data:
      file_name += "_shrink"
    out_file = open(file_name, 'w')
    all_utterances = []
    for utterance_file in utterance_names:
      feature_inds = utterance_features[utterance_ind]
      utterance_ind += 1
      vq_features = [vqs[ind] for ind in feature_inds]
      states = [all_states[ind] for ind in feature_inds]
      assert(len(vq_features) == len(states))

      #utterance_data = construct_gold(timit, utterance_file, vq_features)
      #all_utterances += utterance_data
      
      print >>out_file, " ".join(["%s/%s"%(p,code) for (p, code) in izip(states , vq_features)])

  if False:
    correct_steps = 0
    total_steps = 0

    phoneme_histogram = {}
    vq_histogram = {}
    for p, code in all_utterances:
      phoneme_histogram.setdefault(p, {})
      phoneme_histogram[p].setdefault(code, 0)
      phoneme_histogram[p][code] += 1

      vq_histogram.setdefault(code, {})
      vq_histogram[code].setdefault(p, 0)
      vq_histogram[code][p] += 1
      total_steps += 1

    for p,groups in phoneme_histogram.iteritems():
      print p
      pairs = groups.items()
      pairs.sort(key=lambda a: a[1])
      pairs.reverse()
      total = sum([num for _,num in pairs])
      print "\t",
      for code, nums in pairs:
        if nums / float(total) < 0.01: continue
        print "%3s:%3.2f "%(code, nums / float(total)),
      print 

    for code in range(FLAGS.vq_size):
      if code not in vq_histogram: continue
      groups = vq_histogram[code]
      print code,
      pairs = groups.items()
      pairs.sort(key=lambda a: a[1])
      pairs.reverse()
      total = sum([num for _,num in pairs])
      correct_steps += pairs[0][1]
      print "", total,
      print "\t",
      print
      for p, nums in pairs:
        if nums / float(total) < 0.01: continue
        print "%3s:%3.2f "%(p, nums/ float(total)),
      print

    print total_steps, correct_steps, correct_steps / float(total_steps)
    print len(phoneme_histogram.keys())
Example #3
0
  print i, p 

# All the male utterances of a region.
brugnara = set([l.strip() for l in open("corpus")])
#f.startswith("dr1-f")
utterance_names = [f for f in timit.utterances() if f.split("-")[1].split("/")[0] in brugnara and "sa1" not in f and "sa2" not in f]
print len(utterance_names)
utterance_set = speech.UtteranceSet()
all_features = []
#for utterance_file in utterance_names: 
for utterance_file in utterance_names:
#for utterance_file in [ u for u in utterance_names if u == "dr8-mbcg0/sx57"]: 

  # extract features from an audio file using AudioFileProcessor
  afp = AudioFileProcessor()
  afp.processFile(engine, timit.abspath(utterance_file + ".wav"))
  phone_times = timit.phone_times(utterance_file)
  print phone_times
  last = float(phone_times[-1][2])
  features = engine.readAllOutputs()
  
  #print timit.sents(utterance_file)

  utterance = utterance_set.utterances.add()
  for phone in timit.phones(utterance_file):
    if phone == "q": continue
    utterance.phones.append(phoneme_map[phone])
  #print features["mfcc"]

  final = len(features["mfcc"])
  #print " ".join([  str(p)  for (p, s, e) in phone_times])