reload(run_lib)

plt.close('all')
n_folds_list = [5, 10, 20, 50]
n_repetitions = 5
method = 'svr'
out_path = '/Users/dedan/projects/master/results/validation/gen_score_svr'

base_path = os.path.join(os.path.dirname(__file__), '..')
config = json.load(open(os.path.join(base_path, 'config', 'validate_genscore_svr.json')))
config['data_path'] = os.path.join(base_path, 'data')

# load the features
features = run_lib.prepare_features(config)

used_glomeruli = json.load(open(os.path.join(config['data_path'], 'used_glomeruli.json')))
res = {g: {nf: [] for nf in n_folds_list} for g in used_glomeruli}
for glom in used_glomeruli:

    print(glom)
    config['glomerulus'] = glom
    data, targets, molids = run_lib.load_data_targets(config, features)
    config['feature_selection']['k_best'] = data.shape[1]

    for i, n_folds in enumerate(n_folds_list):
        print(n_folds)
        config['methods'][method]['n_folds'] = n_folds
        for j in range(n_repetitions):
            run_res = run_lib.run_runner(config, data, targets)
            res[glom][n_folds].append(run_res[method]['gen_score'])
json.dump(open(os.path.join(out_path, 'res.json')))
    dtm = {}
    for name, config in feat_config.items():
        base_config['features'].update(config)
        data, targets, molids = rl.load_data_targets(base_config, cache[name]['features'])
        dtm[name] = {
            'data': data,
            'targets': targets,
            'molids': molids
        }

    # select molecules that none of the models will be trained on
    all_trained = set(dtm.values()[0]['molids']).union(*[m['molids'] for m in dtm.values()[1:]])
    to_predict_molids = mol_intersection - all_trained

    for name, data in dtm.items():

        # fit model
        print('working on model: {}'.format(name))
        base_config['feature_selection']['k_best'] = data['data'].shape[1]
        print("use {} molecules for training".format(data['data'].shape[0]))
        tmp_res = rl.run_runner(base_config, data['data'], data['targets'], get_models=True)
        to_predict = np.array([cache[name]['features'][molid] for molid in to_predict_molids])
        res[name][glom]['predictions'] = tmp_res[method]['model'].predict(to_predict)
        res[name][glom]['cases'] = [id2door[molid] for molid in to_predict_molids]
        res[name][glom]['targets'] = data['targets']
        res[name][glom]['score'] = tmp_res[method]['gen_score']
        print('model genscore: {:.2f}\n'.format(tmp_res[method]['gen_score']))

pickle.dump(dict(res), open(os.path.join(outpath, 'predictions.pkl'), 'w'))

    },
    "randomization_test": False
}

used_gloms = json.load(open(os.path.join(config['data_path'], 'used_glomeruli.json')))

alone_haddad, alone_vib, together = [], [], []
for glom in used_gloms:

    config['glomerulus'] = glom

    # prepare haddad features
    features_h = run_lib.prepare_features(config)
    data_h, targets_h, molids_h = run_lib.load_data_targets(config, features_h)
    config['feature_selection']['k_best'] = data_h.shape[1]
    tmp = run_lib.run_runner(config, data_h, targets_h)
    print glom, tmp
    alone_haddad.append(tmp['svr']['gen_score'])

    # prepare vib100
    config_spec = copy.deepcopy(config)
    config_spec['features']['type'] = 'spectral'
    config_spec['features']['kernel_width'] = 100
    config_spec['features']['bin_width'] = 150
    config_spec['features']['use_intensity'] = False
    config_spec['features']['spec_type'] = 'ir'

    features_v = run_lib.prepare_features(config_spec)
    data_v, targets_v, molids_v = run_lib.load_data_targets(config_spec, features_v)
    config['feature_selection']['k_best'] = data_v.shape[1]
    tmp = run_lib.run_runner(config, data_v, targets_v)
# overwrite optimal (param search results) parameters
for m in config['methods'].keys():
    if not m == method:
        del config['methods'][m]
config['methods'][method]['C'] = 1.0
del config['methods'][method]['regularization']
config['feature_selection']['k_best'] = k_best_dict[descriptor][-1]

# load features
print 'preparing features..'
features = run_lib.prepare_features(config)
data, targets, molids = run_lib.load_data_targets(config, features)

# fit model
print("use {} molecules for training".format(data.shape[0]))
tmp_res = run_lib.run_runner(config, data, targets, get_models=True)
model = tmp_res[method]['model']


# # structure plot for active targets and predictions
# active_targets = np.where(targets > active_thresh)[0]
# act_molids = [molids[i] for i in active_targets]
# active_predictions = np.where(predictions > active_thresh)[0]
# act_predict_molids = [molids_to_predict[i] for i in active_predictions]
# fig = plt.figure(figsize=(5,5))
# plib.structure_plot(fig, (act_molids, act_predict_molids),
#                          (targets[active_targets], predictions[active_predictions]))
# fig.suptitle(glom)
# fig.savefig(os.path.join(outpath, glom + '_structures.png'))

fig = plt.figure()
import os
import json
from master.libs import run_lib
import numpy as np
import pylab as plt

config = {
    "data_path": os.path.join(os.path.dirname(__file__), "..", "data"),
    "features": {"type": "conventional", "descriptor": "all", "normalize": True, "properties_to_add": []},
    "feature_selection": {"method": "linear"},
    "methods": {"svr": {"C": 1.0, "n_folds": 50}},
    "randomization_test": False,
}

used_gloms = json.load(open(os.path.join(config["data_path"], "used_glomeruli.json")))

for glom in used_gloms:

    config["glomerulus"] = glom
    features = run_lib.prepare_features(config)
    data_all, targets, _ = run_lib.load_data_targets(config, features)
    config["features"]["descriptor"] = "saito_desc"
    data_saito, _, _ = run_lib.load_data_targets(config, features)
    np.random.seed()
    # map(np.random.shuffle, data_all.T)
    new_data = np.hstack((data_saito, data_all))
    config["feature_selection"]["k_best"] = data_all.shape[1]
    tmp = run_lib.run_runner(config, data_all, targets, False, False)
    print glom, tmp["svr"]["gen_score"]