''' 
regression tree for each response
'''

# y, sr = load_files('audio/101.mp3')
# mfcc_v = mfcc(y, sr)
# get exsisting valence and arousal data
all_ids, all_val, all_aro = mean_va('csv/survery2dataMin1.csv')
valence, arousal = csv_2_dict_va('csv/survery2dataMin1.csv')

print len(all_ids)

# calculate fetures for song in train set
ids, feat = calc_mfcc_features_dict('audio/full')

X = feature_matrix_by_id(all_ids, feat)
Yv = all_val
Ya = all_aro
# print X

best_avg = sys.maxint
best_near = sys.maxint
best_std = sys.maxint

best_val = sys.maxint
best_aro = sys.maxint

print "check 3"
print X.shape
print len(Yv)
print len(Ya)
    # 17\18 - mood in VA
    # 19\20 - mood color
    # 21:40 - preception of 10 labels
    # 41:57 - presence of mood
    # 58:77 - color perception for 10 lables
    # 81 - song id
    # 82:101 - induced labels positions
    # 102:129 - perceived labels positions
    # 130/131 - color for song
    # ni 134:136 - HSV for song

# calculate fetures for song in train set
#ids, feat = calc_mfcc_features_dict('audio/full')
feat = read_feature_from_json('features/mfcc_our_dataset_20.json')

X = feature_matrix_by_id(all_ids, feat)
Yv = all_val
Ya = all_aro
# add data to X

#hsv = np.array(read_csv_col('csv/survery2dataMin1.csv', 134, 136))
color = np.array(read_csv_col('csv/survery2dataMin1.csv', 130, 131))
vamood = np.array(read_csv_col('csv/survery2dataMin1.csv', 17, 18))
musicschool = np.array(read_csv_col('csv/survery2dataMin1.csv', 6, 6))
sex = np.array(read_csv_col('csv/survery2dataMin1.csv', 4, 4))
listening = np.array(read_csv_col('csv/survery2dataMin1.csv', 12, 12))
moodcolor = np.array(read_csv_col('csv/survery2dataMin1.csv', 19, 20))
moodperception = np.array(read_csv_col('csv/survery2dataMin1.csv', 21, 40))
presencemood = np.array(read_csv_col('csv/survery2dataMin1.csv', 41, 57))
colorperception = np.array(read_csv_col('csv/survery2dataMin1.csv', 58, 77))
X = np.hstack((X, moodperception))
from utils.cross_validation import cross_valid

''' 
regression tree for each response
'''


# get exsisting valence and arousal data
ids, va, aro, rows = seperate_va('csv/survery2dataMin1.csv')

while 101 in ids:
    id101 = ids.index(101)
    ids[id101:(id101+1)] = []
    va[id101:(id101+1)] = []
    aro[id101:(id101+1)] = []

va_dict, aro_dict = csv_2_dict_va('csv/survery2dataMin1.csv')

# calculate fetures for song in train set
no, feat = read_fake_chroma('features/fakechroma')
X = feature_matrix_by_id(ids, feat)
Yva = va
Yaro = aro

#for i in range(100):
X, Yva, Yaro, ids = shufle_same(X, Yva, Yaro, ids)   
    
print 'v: ' + str(cross_valid(10, X, Yva, ids, va_dict))
print 'A: ' + str(cross_valid(10, X, Yaro, ids, aro_dict))