/
trainRecorded.py
148 lines (135 loc) · 5.45 KB
/
trainRecorded.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
"""Audio Trainer v1.0"""
# Imports of python libs
import wave
import time
import os.path
import numpy as np
import multiprocessing
import copy
# import of own scripts
import functions as f
import qualitycheck as q
import interactions
import model as modelImport
import config as conf
def preprocess(in_data):
wf = in_data[0]
wavenumber = in_data[1]
# check wether the wave file is mono or stereo
if wf.getnchannels() == 1:
loops = int(wf.getnframes() / conf.CHUNK)
number = []
switch = True
for i in range(loops):
framesAsString = wf.readframes(512)
if switch:
frame = np.fromstring(framesAsString, np.int16)
switch = False
else:
frame = np.append(
frame, np.fromstring(framesAsString, np.int16))
if conf.CEPSTRUM:
if conf.LIFTERING:
frame = f.processCepsLiftering(frame)
else:
frame = f.processCepstrum(frame)
else:
frame = f.processSpectrum(frame)
number.append(frame)
switch = True
del framesAsString
print("Processed file " + str(wavenumber) + ".wav")
return number
else:
print("Stereo wave files are not supported yet")
return None
def main():
global models
global modelThreshold
global modelScore
print("")
print("This script can only process conf.CHUNK (currently: " + str(conf.CHUNK) +
") frames per loop so if the file contains a number of frames which is not divisible by conf.CHUNK the last few frames are dropped")
model = []
wavenumber = 1
fileName, modelName, optimalFrames, scriptpath = interactions.getTrainParameters()
beginning = time.time()
# do it while there are wave files
wf = []
while os.path.isfile(str(fileName) + "/" + str(wavenumber) + ".wav"):
wf.append((wave.open(str(fileName) + "/" + str(wavenumber) + ".wav"), wavenumber))
print("File " + str(fileName) + "/" + str(wavenumber) +
".wav found.")
wavenumber += 1
for i in wf:
model.append(preprocess(i))
for i in wf:
i[0].close()
wavenumber -= 1
print("Processed " + str(wavenumber) + " files in " + str(time.time() - beginning) + " seconds, minimalize them.")
if model != []:
data = []
for i in range(wavenumber):
data.append(
(model[i],
optimalFrames))
beginning = time.time()
f.clearTmpFolder()
pool = multiprocessing.Pool(processes=4)
result = pool.map(f.minimalizeAndCalcThreshold, data)
minimalizedRecords = []
calculatedThresholds = []
for i in result:
minimalizedRecords.append(i[0])
calculatedThresholds.append(i[1])
zeroFrame = np.zeros(conf.FEATURES_PER_FRAME, dtype=np.float64)
models = []
for i in range(len(minimalizedRecords)):
features = copy.deepcopy(minimalizedRecords[i])
tmpFeatures = [copy.deepcopy(zeroFrame) for number in range(optimalFrames)]
tmpCounter = [0 for number in range(optimalFrames)]
counter = 0.
posCounter = [0 for number in range(len(minimalizedRecords))]
# for every frame in this record try if we find mergable frames
for h in range(optimalFrames):
# we try all recordings
for j in range(len(minimalizedRecords)):
if f.compare(minimalizedRecords[j][h], features[h]) < calculatedThresholds[i][h]:
tmpFeatures[h] += minimalizedRecords[j][h]
tmpCounter[h] += 1
for h in range(optimalFrames):
tmpFeatures[h] = np.divide(tmpFeatures[h], tmpCounter[h])
counter += tmpCounter[h]
counter /= optimalFrames
models.append(modelImport.Model(tmpFeatures, calculatedThresholds[i], modelName, tmpCounter, scriptpath))
print()
print("Computed the models in " + str(time.time() - beginning) + " seconds. Compute their score.")
print()
beginning = time.time()
data = []
for i in range(len(models)):
data.append((models[i], fileName, i))
pool = multiprocessing.Pool(processes=4)
pool.map(q.qualityCheck, data)
models = f.loadModels(tmp=True)
print("Computed the scores in " + str(time.time() - beginning) + " seconds.")
print()
for i in range(len(models)):
print("Model Nr:\t" +
str(i +
1) +
" | Frames:\t" +
str(len(models[i].features)) +
" | Matches:\t" +
str(models[i].matches) +
" | Influenced by:\t" +
str(models[i].influencedBy) +
" | Threshold:\t" +
str(models[i].threshold) +
" | Score:\t" +
str(models[i].score))
# get the model number and substract 1 because list indexing starts
# with 0
modelNumber = interactions.getModelNumber(len(models)+1) - 1
print("You selected Model " + str(modelNumber) + " with " + str(models[modelNumber].matches) + " Matches and a Score of: " + str(models[modelNumber].score))
f.storeModel(models[modelNumber])