def main(fileName, fntSize=16): numLags = 60 ds = np.loadtxt(f"{fileName}.txt").transpose() trDs, valDs = auxFs.divide_data(ds) data = valDs.flatten(order="C") lengthdata = data.shape[0] plt.rcParams.update({'font.size': fntSize}) cols = ['C0', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9'] lnstyle = ['-', ':', '-.', '--'] fig1 = plt.figure(figsize=(8, 8)) fig2 = plt.figure(figsize=(8, 8)) ax1 = fig1.add_subplot(111, position=[0.175, 0.1, 0.65, 0.65]) ax2 = fig2.add_subplot(111, position=[0.175, 0.1, 0.65, 0.65]) xKde = np.linspace(0.0, 1.5, num=150) ax1.plot(xKde, kde_fit_estimate(data, xKde), 'k') acfTst = acf(data, nlags=numLags) maeDict = {} ax2.plot(acf(trDs.flatten(order="c"), nlags=numLags), color='k', linestyle='-') ax2.plot(acfTst, color='k', linestyle=':') scales = [20, 30, 40, 50, 60, 80, 100] filenames = [f"{fileName}_3_{i}_fixedMeans.txt" for i in scales] for i, v in enumerate(filenames): X = np.loadtxt(v) ax1.plot(xKde, kde_fit_estimate(X.flatten(), xKde), linestyle=lnstyle[i % 3], color=cols[i % 10]) acfModel = auxFs.ACF(X.flatten(), numLags) ax2.plot(acfModel, linestyle=lnstyle[i % 3], color=cols[i % 10]) maeDict[str(scales[i] / 100)] = mean_abs_error(acfTst, acfModel) legendTxt = ["Test data" ] + [r"$\phi$" + f" = {i/100:0.2f}" for i in scales] ax1.legend(legendTxt, loc='lower center', bbox_to_anchor=(0.5, 1.01), ncol=4) ax1.set_xlim(0, 1.75) ax1.set_ylim(0, 4.0) ax1.set_ylabel("PDF") ax1.set_xlabel("CSI") auxFs.clean_axes(ax1) ax2.legend(["Training data"] + legendTxt, loc='lower center', bbox_to_anchor=(0.5, 1.01), ncol=3) ax2.set_xlim(0, 60) ax2.set_ylabel("ACF") ax2.set_xlabel("lags (minute)") auxFs.clean_axes(ax2) fig1.savefig(f"{fileName}_scaled_histograms.pdf") fig2.savefig(f"{fileName}_scaled_acf.pdf") with open(f"{fileName}_acf_mae_scaled.json", 'w') as f: json.dump(maeDict, f, indent=4)
def main(fileName, ymax, fntSize=16, numLags=60, fgSz=(6, 6)): ds = np.loadtxt(f"{fileName}.txt").transpose() trDs, valDs = auxFs.divide_data(ds) lengthTr = auxFs.calculate_length_sequence(trDs) lengthVal = auxFs.calculate_length_sequence(valDs) trDs = trDs.reshape(-1, 1) valDs = valDs.reshape(-1, 1) dataSets = [trDs, valDs] lengths = [lengthTr, lengthVal] sample = np.loadtxt(f"{fileName}_3_100_fixedMeans.txt") fig = plt.figure(figsize=fgSz) plt.rcParams.update({'font.size': fntSize}) ax = fig.add_subplot(111) acfTrnData = acf(trDs, nlags=numLags) acfTstData = acf(valDs, nlags=numLags) acfSample = auxFs.ACF(sample, numLags) ax.plot(acfTrnData, color='k', linestyle='-') ax.plot(acfTstData, color='k', linestyle=':') ax.plot(acfSample) ax.set_xlabel("lags (minute)") ax.set_ylabel("ACF") ax.set_xlim(0, 60) auxFs.clean_axes(ax) ax.legend(["Training data", "Test data", "model sample"]) fig.savefig(f"{fileName}_acf_fixedMeans.pdf") print(mean_abs_error(acfTstData, acfSample)) histFigure = plt.figure(figsize=(6, 6)) ax2 = histFigure.add_subplot(111) ax2.hist(valDs, 100, density=True) def do_on_axes(ax): ax.set_xlabel("CSI") ax.set_ylabel("PDF") auxFs.clean_axes(ax) ax.set_xlim(0, 1.75) ax.set_ylim(top=ymax) #ax.set_xticklabels([]) #ax.set_yticklabels([]) do_on_axes(ax2) histFigure.savefig(f"{fileName}_histogram_fixedMeans_test.pdf") hist2 = plt.figure(figsize=(6, 6)) ax2 = hist2.add_subplot(111) ax2.hist(sample, 100, density=True) do_on_axes(ax2) hist2.savefig(f"{fileName}_histogram_fixedMeans_sample.pdf")
def main(filename, seed, sampleLength=1000000): ds = np.loadtxt(f"{filename}.txt").transpose() trDs, valDs = auxFs.divide_data(ds) lengthTr = auxFs.calculate_length_sequence(trDs) lengthVal = auxFs.calculate_length_sequence(valDs) trDs = trDs.reshape(-1, 1) valDs = valDs.reshape(-1, 1) dataSets = [trDs, valDs] lengths = [lengthTr, lengthVal] scores = {'seed': seed} for nStates in range(2, 13): tStart = time.time() model = auxFs.train_hmm_model(trDs, lengthTr, nStates, 1, 400, 1e-3, seed) tEnd = time.time() joblib.dump(model, f"{filename}_{nStates}_100.pkl") scores['train_time_100'] = math.ceil(tEnd - tStart) scores['likelihood_100'] = auxFs.score_model_on_datasets( model, dataSets, lengths) modelSample, _ = auxFs.sample_hmm_model(model, sampleLength, 0, True) np.savetxt(f"{filename}_{nStates}_100.txt", modelSample) scores['K-S_100'] = auxFs.ks_test(modelSample, dataSets) #scores['KLD_100'] = auxFs.kld_test(modelSample, dataSets) scores['aic_100'] = auxFs.estimate_aic_score( scores['likelihood_100'][0], nStates, 2) scores['bic_100'] = auxFs.estimate_bic_score( scores['likelihood_100'][0], nStates, 2, sum(lengthTr)) oldMatrix = model.transmat_ if nStates == 3: fact = [0.8, 0.6, 0.5, 0.4, 0.3, 0.2] for i in fact: locStr = f'_{i*100:0.0f}' newModel = auxFs.change_transition_matrix_of_model( model, oldMatrix, i) scores[f'likelihood{locStr}'] = auxFs.score_model_on_datasets( newModel, dataSets, lengths) modelSample, _ = auxFs.sample_hmm_model( newModel, sampleLength, 0, True) np.savetxt(f"{filename}_{nStates}{locStr}.txt", modelSample) scores[f'K-S{locStr}'] = auxFs.ks_test(modelSample, dataSets) #scores[f'KLD{locStr}'] = auxFs.kld_test(modelSample, dataSets) scores[f'aic{locStr}'] = auxFs.estimate_aic_score( scores[f'likelihood{locStr}'][0], nStates, 2) scores[f'bic{locStr}'] = auxFs.estimate_bic_score( scores[f'likelihood{locStr}'][0], nStates, 2, sum(lengthTr)) joblib.dump(newModel, f"{filename}_{nStates}{locStr}.pkl") with open(f"{filename}_{nStates}_{seed}.json", 'w') as f: json.dump(scores, f, indent=4)
def main(filename, seed, sampleLength=1000000): ds = np.loadtxt(f"{filename}.txt").transpose() trDs, valDs = auxFs.divide_data(ds) lengthTr = auxFs.calculate_length_sequence(trDs) lengthVal = auxFs.calculate_length_sequence(valDs) trDs = trDs.reshape(-1,1) valDs = valDs.reshape(-1,1) dataSets = [trDs, valDs] lengths = [lengthTr, lengthVal] scores = {'seed': seed} nStates = 3 kBar = estimate_CSI_bar(ds, 0.95) thirdMean = 0.1205 + (0.3341*kBar) print("Mean of broken: ", thirdMean) means = np.array([1.04, 1.00, thirdMean]).reshape(3,1) tStart = time.time() model = auxFs.train_hmm_model_fixed_means(trDs, lengthTr, nStates, 1, 400, 1e-3, seed, means) tEnd = time.time() joblib.dump(model, f"{filename}_{nStates}_100_fixedMeans.pkl") scores['train_time_100'] = math.ceil(tEnd - tStart) scores['likelihood_100'] = auxFs.score_model_on_datasets(model, dataSets, lengths) modelSample, _ = auxFs.sample_hmm_model(model, sampleLength, 0, True) np.savetxt(f"{filename}_{nStates}_100_fixedMeans.txt", modelSample) scores['K-S_100'] = auxFs.ks_test(modelSample, dataSets) scores['aic_100'] = auxFs.estimate_aic_score(scores['likelihood_100'][0], nStates, 2) scores['bic_100'] = auxFs.estimate_bic_score(scores['likelihood_100'][0], nStates, 2, sum(lengthTr)) oldMatrix = model.transmat_ if nStates == 3: fact = [0.8, 0.6, 0.5, 0.4, 0.3, 0.2] for i in fact: locStr = f'_{i*100:0.0f}' newModel = auxFs.change_transition_matrix_of_model(model, oldMatrix, i) scores[f'likelihood{locStr}'] = auxFs.score_model_on_datasets(newModel, dataSets, lengths) modelSample, _ = auxFs.sample_hmm_model(newModel, sampleLength, 0, True) np.savetxt(f"{filename}_{nStates}{locStr}_fixedMeans.txt", modelSample) scores[f'K-S{locStr}'] = auxFs.ks_test(modelSample, dataSets) scores[f'aic{locStr}'] = auxFs.estimate_aic_score(scores[f'likelihood{locStr}'][0], nStates, 2) scores[f'bic{locStr}'] = auxFs.estimate_bic_score(scores[f'likelihood{locStr}'][0], nStates, 2, sum(lengthTr)) joblib.dump(newModel, f"{filename}_{nStates}{locStr}_fixedMeans.pkl") with open(f"{filename}_{nStates}_{seed}_fixedMeans.json", 'w') as f: json.dump(scores, f, indent=4)
def main(fileName, ymax=4.0, fntSize=14): nbins = 100 seed = 100 ds = np.loadtxt(f"{fileName}.txt").transpose() trDs, valDs = auxFs.divide_data(ds) data = valDs.flatten(order="C") lengthdata = data.shape[0] plt.rcParams.update({'font.size': fntSize}) # Plot histogram fig = plt.figure(figsize=(6,8.5)) ax = plt.subplot(2,4,1) ax.hist(data, nbins, density=True) ax.set_ylabel("PDF") #ax.set_xlabel("CSI") ax.set_title("Test data") ax.set_xlim(0,1.75) ax.set_ylim(0,ymax) ax.set_yticklabels([]) ax.set_xticklabels([]) auxFs.clean_axes(ax) scales = [20, 30, 40, 50, 60, 80, 100] filenames = [f"{fileName}_3_{i}_fixedMeans.txt" for i in scales] for i, v in enumerate(filenames): X = np.loadtxt(v) #fig = plt.figure(figsize=(6,6)) ax = plt.subplot(2,4, i+2) ax.hist(X, nbins, density=True) if i in [3]: ax.set_ylabel("PDF") if i in range(3,8): ax.set_xlabel("CSI") ax.set_title(r"$\phi$ = "+ f"{scales[i]/100:.2f}") #ax.set_xlabel("CSI") auxFs.clean_axes(ax) ax.set_xlim(0,1.75) ax.set_ylim(0,ymax) ax.set_yticklabels([]) ax.set_xticklabels([]) fig.savefig(f"{fileName}_scaled_process_histogram.pdf")
def main(fileName, ymax=3.5, fntSize=14): nbins = 100 seed = 100 ds = np.loadtxt(f"{fileName}.txt").transpose() trDs, valDs = auxFs.divide_data(ds) data = valDs.flatten(order="C") lengthdata = data.shape[0] plt.rcParams.update({'font.size': fntSize}) # Plot histogram fig = plt.figure(figsize=(6, 8.5)) ax = plt.subplot(4, 3, 1) ax.hist(data, nbins, density=True) ax.set_ylabel("PDF") #ax.set_xlabel("CSI") ax.set_title("Test data") ax.set_xlim(0, 1.75) ax.set_ylim(0, ymax) ax.set_yticklabels([]) ax.set_xticklabels([]) auxFs.clean_axes(ax) for i in range(2, 13): fn = f"{fileName}_{i}_100.txt" X = np.loadtxt(fn) #fig = plt.figure(figsize=(6,6)) ax = plt.subplot(4, 3, i) ax.hist(X, nbins, density=True) if i in [4, 7, 10]: ax.set_ylabel("PDF") if i in [10, 11, 12]: ax.set_xlabel("CSI") ax.set_title(f"n = {i}") #ax.set_xlabel("CSI") auxFs.clean_axes(ax) ax.set_xlim(0, 1.75) ax.set_ylim(0, ymax) ax.set_yticklabels([]) ax.set_xticklabels([]) fig.savefig(f"{fileName}_test_histogram.pdf")