def get_BD_confs(): props = [[0.25, 0.05], [0.05, 0.25], [0.05, 0.125], [0.125, 0.05], [0.08, 0.05], [0.05, 0.08]] props += reversed(props) confs = [] by_id = {} for e in DataReader.read_BD().groupby(['id', 'block']): trial = e[1]['block'].unique() block = { 'block': trial[0], 'id': e[1]['id'].unique()[0], 'prop0': props[trial[0] - 1][0], 'prop1': props[trial[0] - 1][1], 'choices': e[1]['id'].size, 'group': e[1]['diag'].unique()[0] } confs.append(block) if not block['id'] in by_id: by_id[block['id']] = [block] else: by_id[block['id']].append(block) by_group = {} subj_list = by_id.values() for s in subj_list: if not s[0]['group'] in by_group: by_group[s[0]['group']] = [s] else: by_group[s[0]['group']].append(s) return confs, by_id, by_group
def run_BD(i): tf.reset_default_graph() data = DataReader.read_BD() ncells = configs[i]['cells'] learning_rate = configs[i]['lr'] group = configs[i]['g'] iters = configs[i]['iters'] model_path = configs[i]['model_path'] output_path = Paths.local_path + 'BD/rnn-opt-rand-init/' + 'run_' + str( configs[i]['s']) + '/' + str(ncells) + 'cells/' + group + '/' with LogFile(output_path, 'run.log'): DLogger.logger().debug("group: " + str(group)) gdata = data.loc[data.diag == group] ids = gdata['id'].unique().tolist() dftr = pd.DataFrame({'id': ids, 'train': 'train'}) tdftr = pd.DataFrame({'id': ids, 'train': 'test'}) train, test = DataProcess.train_test_between_subject( gdata, pd.concat((dftr, tdftr)), [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) train = DataProcess.merge_data(train) DLogger.logger().debug("total points: " + str(get_total_pionts(train))) worker = LSTMBeh(2, 0, n_cells=ncells) OptBEH.optimise(worker, output_path, train, None, learning_rate=learning_rate, global_iters=iters, load_model_path=model_path)
def run_BD(i): tf.reset_default_graph() data = DataReader.read_BD() learning_rate = configs[i]['lr'] group = configs[i]['g'] output_path = Paths.local_path + 'BD/gql-ml-opt/' + group + '/' with LogFile(output_path, 'run.log'): DLogger.logger().debug("group: " + str(group)) gdata = data.loc[data.diag == group] ids = gdata['id'].unique().tolist() dftr = pd.DataFrame({'id': ids, 'train': 'train'}) tdftr = pd.DataFrame({'id': ids, 'train': 'test'}) train, test = DataProcess.train_test_between_subject( gdata, pd.concat((dftr, tdftr)), [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) DLogger.logger().debug("total points: " + str(get_total_pionts(train))) worker = GQL.get_instance(2, 2, {}) train = DataProcess.merge_data(train) OptML.optimise(worker, output_path, train, test, global_iters=1000, learning_rate=learning_rate)
def evaluate_BD_CV(): data = DataReader.read_BD() base_input_folder = Paths.rest_path + 'archive/beh/rnn-cv/' base_output_folder = Paths.local_path + 'BD/evals/rnn-cv-evals/' model_iters = [ 'model-0', 'model-100', 'model-200', 'model-300', 'model-400', 'model-500', 'model-600', 'model-700', 'model-800', 'model-900', 'model-1000', 'model-1100', 'model-1200', 'model-1300', 'model-1400', 'model-1500', 'model-1600', 'model-1700', 'model-1800', 'model-1900', 'model-2000', 'model-2100', 'model-2200', 'model-2300', 'model-2400', 'model-2500', 'model-2600', 'model-2700', 'model-2800', 'model-2900', 'model-final' ] cells = [5, 10, 20] folds = { 'Healthy': ['fold' + str(x) for x in range(0, 34)], 'Depression': ['fold' + str(x) for x in range(0, 34)], 'Bipolar': ['fold' + str(x) for x in range(0, 33)] } def worker_gen(n_actions, n_states, n_cells): return LSTMBeh(2, 0, n_cells) Simulator.evaluate_CV(worker_gen, base_input_folder, base_output_folder, cells, 2, 0, data, folds, model_iters, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
def evaluate_BD_CV(): data = DataReader.read_BD() base_input_folder = Paths.rest_path + 'archive/beh/gql-ml-cv/' base_output_folder = Paths.local_path + 'BD/evals/gql-ml-cv-evals/' model_iters = ['model-final'] folds = { 'Healthy': ['fold' + str(x) for x in range(0, 34)], 'Bipolar': ['fold' + str(x) for x in range(0, 33)], 'Depression': ['fold' + str(x) for x in range(0, 34)] } tf.reset_default_graph() worker = GQL.get_instance(2, 2, {}) worker.set_params(OptML.get_variables(worker.get_params())) def test_and_save(sess, test, output_folder): return OptML.test_and_save("", output_folder, None, sess, test, worker) Simulator.evaluate_CV(base_input_folder, base_output_folder, test_and_save, data, folds, model_iters, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], random_tie=True)
def GQL_classify_subjects(): tf.reset_default_graph() data = DataReader.read_BD() ids = data['id'].unique().tolist() dftr = pd.DataFrame({'id': ids, 'train': 'train'}) train, test = DataProcess.train_test_between_subject( data, dftr, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) model_iter = 'model-final' df = pd.DataFrame(columns=('model', 'id', 'loss')) config = tf.ConfigProto(device_count={'GPU': 0}) subj_paths = finding_CV(Paths.rest_path + 'archive/beh/gql-ml-pred-diag/') worker = GQL.get_instance(2, 2, {}) worker.set_params(OptML.get_variables(worker.get_params())) for k, tr in train.iteritems(): for g, p in subj_paths[k].iteritems(): DLogger.logger().debug('subject ' + k + ' group ' + g + ' path ' + p) model_path = p + model_iter + '/' with tf.Session(config=config) as sess: load_model(sess, model_path) total_loss = 0 for v in tr: ell, _, _ = worker.simulate(sess, v['reward'], v['action']) total_loss += -ell df.loc[len(df)] = [g, k, total_loss] df.to_csv(Paths.local_path + 'BD/gql_diag.csv')
def analysis_BD(): output_file = Paths.local_path + 'BD/to_graph_data/subj_stats.csv' data = DataReader.read_BD() data['best_action'] = 1 * (data['choice'] == 'R1') data['choices'] = data['choice'] data['sim_ID'] = data['id'] + str(data['block']) data['group'] = data['diag'] chocie_stats = choice_statistics(data) chocie_stats.to_csv(output_file)
def run_BD(i): data = DataReader.read_BD() ncells = configs[i]['cells'] group = configs[i]['g'] input_path = Paths.rest_path + 'archive/beh/rnn-opt-rand-init/' + 'run_' + \ str(configs[i]['s']) + '/' + str(ncells) + 'cells/' + group + '/model-final/' output_path = Paths.local_path + 'BD/rnn-opt-rand-init-evals/' + 'run_' + \ str(configs[i]['s']) + '/' + str(ncells) + 'cells/' + group + '/' gdata = data.loc[data.diag == group] ids = gdata['id'].unique().tolist() dftr = pd.DataFrame({'id': ids, 'train': 'train'}) tdftr = pd.DataFrame({'id': ids, 'train': 'test'}) train, test = DataProcess.train_test_between_subject( gdata, pd.concat((dftr, tdftr)), [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) test = DataProcess.merge_data(test) tf.reset_default_graph() worker = LSTMBeh(2, 0, ncells) saver = tf.train.Saver(max_to_keep=None) with tf.Session() as sess: DLogger.logger().debug("loading model from: " + str(input_path)) ckpt = tf.train.get_checkpoint_state(input_path) tf.train.import_meta_graph(input_path + 'model.cptk.meta') saver.restore(sess, ckpt.model_checkpoint_path) for k, tr in test.iteritems(): for v in tr: _, _, _, ell = worker.simulate(sess, v['reward'], v['action'], v['state']) DLogger.logger().debug("input path: " + input_path) DLogger.logger().debug("output path: " + output_path) DLogger.logger().debug("total nlp: {} ".format(str(ell))) return pd.DataFrame({ 'total nlp': [ell], 'group': group, 'cell': ncells, 'fold': None, 'model_iter': 'model-final', 's': configs[i]['s'] })
def analysis_action_reward(): data = DataReader.read_BD() data['best_action'] = 1 * (data['choice'] == 'R1') data['sim_ID'] = (data['id'] + data['block'].map(str)) data['group'] = data['diag'] all_trials = extract_run_rew(data) # reward_trials = pd.concat(total_data) # reward_trials = reward_trials.loc[reward_trials.reward == 1] # reward_trials.loc[reward_trials['prev_rewards'] == 0, 'pre_rew_group'] = "0" # reward_trials.loc[reward_trials['prev_rewards'] == 1, 'pre_rew_group'] = "1" # reward_trials.loc[reward_trials['prev_rewards'] == 2, 'pre_rew_group'] = "2" # reward_trials.loc[reward_trials['prev_rewards'] > 2, 'pre_rew_group'] = ">2" # # reward_trials.loc[reward_trials['prev_key'] < 5, 'prev_key_group'] = "<10" # reward_trials.loc[(reward_trials['prev_key'] >= 5) & (reward_trials['prev_key'] < 10), 'prev_key_group'] = "10-20" # reward_trials.loc[(reward_trials['prev_key'] >= 10) & (reward_trials['prev_key'] < 15), 'prev_key_group'] = ">=10" # # grouped = reward_trials.groupby(['prev_key_group', 'pre_rew_group', 'group'])['same'].mean() # output_file = Paths.local_path + 'to_graph_data/subj_stats_pre_rew_key.csv' # grouped.to_csv(output_file, header=True) # # grouped = reward_trials.groupby(['prev_key_group', 'pre_rew_group', 'ID', 'group'])['same'].mean() # output_file = Paths.local_path + 'to_graph_data/subj_stats_pre_rew_key_ID.csv' # grouped.to_csv(output_file, header=True) # # grouped = reward_trials.groupby(['prev_key_group', 'ID', 'group'])['same'].mean() # output_file = Paths.local_path + 'to_graph_data/subj_stats_pre_key_ID.csv' # grouped.to_csv(output_file, header=True) # # grouped = reward_trials.groupby(['pre_rew_group', 'ID', 'group'])['same'].mean() # output_file = Paths.local_path + 'to_graph_data/subj_stats_pre_rew_ID.csv' # grouped.to_csv(output_file, header=True) # # grouped = reward_trials.groupby(['pre_rew_group', 'prev_key_group', 'ID', 'group'])['prev_key_group'].count() # output_file = Paths.local_path + 'to_graph_data/subj_run_length_ID.csv' # grouped.to_csv(output_file, header=True) output_file = Paths.local_path + 'BD/to_graph_data/subj_all_data.csv' all_trials.to_csv(output_file, header=True)
def RNN_classify_subjects(): data = DataReader.read_BD() ids = data['id'].unique().tolist() dftr = pd.DataFrame({'id': ids, 'train': 'train'}) train, test = DataProcess.train_test_between_subject( data, dftr, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) n_cells = {'Healthy': 10, 'Depression': 10, 'Bipolar': 20} model_iter = 'model-final' df = pd.DataFrame(columns=('model', 'id', 'loss')) config = tf.ConfigProto(device_count={'GPU': 0}) subj_paths = finding_CV(Paths.rest_path + 'archive/beh/rnn-pred-diag/') for k, tr in train.iteritems(): for g, p in subj_paths[k].iteritems(): tf.reset_default_graph() worker = LSTMBeh(2, 0, n_cells[g]) saver = tf.train.Saver(max_to_keep=5) DLogger.logger().debug('subject ' + k + ' group ' + g + ' path ' + p) model_path = p + model_iter + '/' ckpt = tf.train.get_checkpoint_state(model_path) tf.train.import_meta_graph(model_path + 'model.cptk.meta') with tf.Session(config=config) as sess: saver.restore(sess, ckpt.model_checkpoint_path) total_loss = 0 for v in tr: policies, c_track, h_track, loss = worker.simulate( sess, v['reward'], v['action'], v['state']) total_loss += loss df.loc[len(df)] = [g, k, total_loss] df.to_csv(Paths.local_path + 'BD/rnn_diag.csv')
# This file generates several initialisations for RNNs. import sys from multiprocessing.pool import Pool import pandas as pd import actionflow.rnn.opt_beh as lrh from actionflow.data.data_process import DataProcess from actionflow.rnn.lstm_beh import LSTMBeh from actionflow.util.helper import get_total_pionts from actionflow.util.logger import LogFile, DLogger from BD.data.data_reader import DataReader from BD.util.paths import Paths import tensorflow as tf data = DataReader.read_BD() configs = [] for lr in [1e-2]: for cells in [5, 10, 20, 30]: configs.append({'lr': lr, 'cells': cells}) def run_BD_RNN(i): tf.reset_default_graph() ncells = configs[i]['cells'] lr = configs[i]['lr'] output_path = Paths.local_path + 'BD/rnn-init/' + str(ncells) + 'cells/' with LogFile(output_path, 'run.log'): ids = data['id'].unique().tolist()