def run_BD_Q(i): tf.reset_default_graph() learning_rate = configs[i]['lr'] group = configs[i]['g'] cv_index = configs[i]['cv_index'] indx_data = cv_lists_group[group][cv_index] gdata = data.loc[data.diag == group] train, test = DataProcess.train_test_between_subject( gdata, indx_data, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) output_path = Paths.local_path + 'BD/gql10d-ml-cv/' + group + '/' + 'fold' + str( cv_index) + '/' with LogFile(output_path, 'run.log'): indx_data.to_csv(output_path + 'train_test.csv') worker = GQL.get_instance(2, 10, {}) train = DataProcess.merge_data(train) OptML.optimise(worker, output_path, train, test, learning_rate=learning_rate, global_iters=1000)
def run_BD(i): tf.reset_default_graph() data = DataReader.read_BD() ncells = configs[i]['cells'] learning_rate = configs[i]['lr'] group = configs[i]['g'] iters = configs[i]['iters'] model_path = configs[i]['model_path'] output_path = Paths.local_path + 'BD/rnn-opt-rand-init/' + 'run_' + str( configs[i]['s']) + '/' + str(ncells) + 'cells/' + group + '/' with LogFile(output_path, 'run.log'): DLogger.logger().debug("group: " + str(group)) gdata = data.loc[data.diag == group] ids = gdata['id'].unique().tolist() dftr = pd.DataFrame({'id': ids, 'train': 'train'}) tdftr = pd.DataFrame({'id': ids, 'train': 'test'}) train, test = DataProcess.train_test_between_subject( gdata, pd.concat((dftr, tdftr)), [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) train = DataProcess.merge_data(train) DLogger.logger().debug("total points: " + str(get_total_pionts(train))) worker = LSTMBeh(2, 0, n_cells=ncells) OptBEH.optimise(worker, output_path, train, None, learning_rate=learning_rate, global_iters=iters, load_model_path=model_path)
def run_BD_RNN(i): tf.reset_default_graph() ncells = configs[i]['cells'] learning_rate = configs[i]['lr'] group = configs[i]['g'] cv_index = configs[i]['cv_index'] output_path = Paths.local_path + 'BD/rnn-cv/' + str( ncells) + 'cells/' + group + '/' + 'fold' + str(cv_index) + '/' with LogFile(output_path, 'run.log'): indx_data = cv_lists_group[group][cv_index] gdata = data.loc[data.diag == group] train, test = DataProcess.train_test_between_subject( gdata, indx_data, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) indx_data.to_csv(output_path + 'train_test.csv') train_merged = DataProcess.merge_data(train) DLogger.logger().debug("total points: " + str(get_total_pionts(train_merged))) del train worker = LSTMBeh(2, 0, n_cells=ncells) OptBEH.optimise(worker, output_path, train_merged, None, learning_rate=learning_rate, global_iters=3000, load_model_path='../inits/rnn-init/' + str(ncells) + 'cells/model-final/')
def run_BD(i): tf.reset_default_graph() data = DataReader.read_BD() learning_rate = configs[i]['lr'] group = configs[i]['g'] output_path = Paths.local_path + 'BD/gql-ml-opt/' + group + '/' with LogFile(output_path, 'run.log'): DLogger.logger().debug("group: " + str(group)) gdata = data.loc[data.diag == group] ids = gdata['id'].unique().tolist() dftr = pd.DataFrame({'id': ids, 'train': 'train'}) tdftr = pd.DataFrame({'id': ids, 'train': 'test'}) train, test = DataProcess.train_test_between_subject( gdata, pd.concat((dftr, tdftr)), [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) DLogger.logger().debug("total points: " + str(get_total_pionts(train))) worker = GQL.get_instance(2, 2, {}) train = DataProcess.merge_data(train) OptML.optimise(worker, output_path, train, test, global_iters=1000, learning_rate=learning_rate)
def run_BD_GQL(i): tf.reset_default_graph() learning_rate = configs[i]['lr'] group = configs[i]['g'] cv_index = configs[i]['cv_index'] iters = configs[i]['iters'] output_path = Paths.local_path + 'BD/gql-ml-rand-opt/' + group + '/' + 'fold' + str( cv_index) + '/' with LogFile(output_path, 'run.log'): indx_data = cv_lists_group[group][cv_index] gdata = data.loc[data.diag == group] train, test = DataProcess.train_test_between_subject( gdata, indx_data, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) indx_data.to_csv(output_path + 'train_test.csv') DLogger.logger().debug("total points: " + str(get_total_pionts(train))) worker = GQL.get_instance(2, 2, {}) train = DataProcess.merge_data(train) OptML.optimise(worker, output_path, train, None, learning_rate=learning_rate, global_iters=iters)
def GQL_classify_subjects(): tf.reset_default_graph() data = DataReader.read_BD() ids = data['id'].unique().tolist() dftr = pd.DataFrame({'id': ids, 'train': 'train'}) train, test = DataProcess.train_test_between_subject( data, dftr, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) model_iter = 'model-final' df = pd.DataFrame(columns=('model', 'id', 'loss')) config = tf.ConfigProto(device_count={'GPU': 0}) subj_paths = finding_CV(Paths.rest_path + 'archive/beh/gql-ml-pred-diag/') worker = GQL.get_instance(2, 2, {}) worker.set_params(OptML.get_variables(worker.get_params())) for k, tr in train.iteritems(): for g, p in subj_paths[k].iteritems(): DLogger.logger().debug('subject ' + k + ' group ' + g + ' path ' + p) model_path = p + model_iter + '/' with tf.Session(config=config) as sess: load_model(sess, model_path) total_loss = 0 for v in tr: ell, _, _ = worker.simulate(sess, v['reward'], v['action']) total_loss += -ell df.loc[len(df)] = [g, k, total_loss] df.to_csv(Paths.local_path + 'BD/gql_diag.csv')
def simulate_model(input_folder, output_folder, data, n_cells): dftr = pd.DataFrame({'id': data['id'].unique().tolist(), 'train': 'train'}) train, _ = DataProcess.train_test_between_subject( data, dftr, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) tf.reset_default_graph() worker = LSTMBeh(2, 0, n_cells) DLogger.logger().debug('started simulations') Simulator.simulate_worker(worker, input_folder, train, output_folder)
def run_BD(i): data = DataReader.read_BD() ncells = configs[i]['cells'] group = configs[i]['g'] input_path = Paths.rest_path + 'archive/beh/rnn-opt-rand-init/' + 'run_' + \ str(configs[i]['s']) + '/' + str(ncells) + 'cells/' + group + '/model-final/' output_path = Paths.local_path + 'BD/rnn-opt-rand-init-evals/' + 'run_' + \ str(configs[i]['s']) + '/' + str(ncells) + 'cells/' + group + '/' gdata = data.loc[data.diag == group] ids = gdata['id'].unique().tolist() dftr = pd.DataFrame({'id': ids, 'train': 'train'}) tdftr = pd.DataFrame({'id': ids, 'train': 'test'}) train, test = DataProcess.train_test_between_subject( gdata, pd.concat((dftr, tdftr)), [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) test = DataProcess.merge_data(test) tf.reset_default_graph() worker = LSTMBeh(2, 0, ncells) saver = tf.train.Saver(max_to_keep=None) with tf.Session() as sess: DLogger.logger().debug("loading model from: " + str(input_path)) ckpt = tf.train.get_checkpoint_state(input_path) tf.train.import_meta_graph(input_path + 'model.cptk.meta') saver.restore(sess, ckpt.model_checkpoint_path) for k, tr in test.iteritems(): for v in tr: _, _, _, ell = worker.simulate(sess, v['reward'], v['action'], v['state']) DLogger.logger().debug("input path: " + input_path) DLogger.logger().debug("output path: " + output_path) DLogger.logger().debug("total nlp: {} ".format(str(ell))) return pd.DataFrame({ 'total nlp': [ell], 'group': group, 'cell': ncells, 'fold': None, 'model_iter': 'model-final', 's': configs[i]['s'] })
def run_BD_RNN(i): tf.reset_default_graph() ncells = configs[i]['cells'] lr = configs[i]['lr'] output_path = Paths.local_path + 'BD/rnn-init/' + str(ncells) + 'cells/' with LogFile(output_path, 'run.log'): ids = data['id'].unique().tolist() dftr = pd.DataFrame({'id': ids, 'train': 'train'}) train, test = DataProcess.train_test_between_subject( data, dftr, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) train = DataProcess.merge_data(train) DLogger.logger().debug("total points: " + str(get_total_pionts(train))) worker = LSTMBeh(2, 0, n_cells=ncells) lrh.OptBEH.optimise(worker, output_path, train, None, learning_rate=lr, global_iters=0)
def RNN_classify_subjects(): data = DataReader.read_BD() ids = data['id'].unique().tolist() dftr = pd.DataFrame({'id': ids, 'train': 'train'}) train, test = DataProcess.train_test_between_subject( data, dftr, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) n_cells = {'Healthy': 10, 'Depression': 10, 'Bipolar': 20} model_iter = 'model-final' df = pd.DataFrame(columns=('model', 'id', 'loss')) config = tf.ConfigProto(device_count={'GPU': 0}) subj_paths = finding_CV(Paths.rest_path + 'archive/beh/rnn-pred-diag/') for k, tr in train.iteritems(): for g, p in subj_paths[k].iteritems(): tf.reset_default_graph() worker = LSTMBeh(2, 0, n_cells[g]) saver = tf.train.Saver(max_to_keep=5) DLogger.logger().debug('subject ' + k + ' group ' + g + ' path ' + p) model_path = p + model_iter + '/' ckpt = tf.train.get_checkpoint_state(model_path) tf.train.import_meta_graph(model_path + 'model.cptk.meta') with tf.Session(config=config) as sess: saver.restore(sess, ckpt.model_checkpoint_path) total_loss = 0 for v in tr: policies, c_track, h_track, loss = worker.simulate( sess, v['reward'], v['action'], v['state']) total_loss += loss df.loc[len(df)] = [g, k, total_loss] df.to_csv(Paths.local_path + 'BD/rnn_diag.csv')
def evaluate_CV(base_input_folder, base_output_folder, test_and_save, data, folds, model_iters, trials, random_tie=True): df = pd.DataFrame() saver = tf.train.Saver(max_to_keep=5) config = tf.ConfigProto(device_count={'GPU': 0}) with tf.Session(config=config) as sess: for group in sorted(folds.keys()): for fold in folds[group]: for model_iter in model_iters: input_folder = base_input_folder + '/' + group + '/' + fold + '/' if base_output_folder is not None: output_folder = base_output_folder + group + '/' + fold + '/' + model_iter + '/' else: output_folder = None DLogger.logger().debug( "input folder: {}".format(input_folder)) DLogger.logger().debug( "output folder: {}".format(output_folder)) tr_tst = pd.read_csv(input_folder + 'train_test.csv') if 'ID' in tr_tst: DLogger.logger().debug( "id column was not found. Replaced id column with ID." ) tr_tst['id'] = tr_tst['ID'] tst_ids = tr_tst.loc[tr_tst.train == 'test']['id'] dftr = pd.DataFrame({'id': tst_ids, 'train': 'train'}) train, _ = DataProcess.train_test_between_subject( data, dftr, trials) model_path = input_folder + model_iter + '/' ckpt = tf.train.get_checkpoint_state(model_path) # tf.train.import_meta_graph(model_path + 'model.cptk.meta') saver.restore(sess, ckpt.model_checkpoint_path) policies = test_and_save(sess, train, output_folder) if output_folder is not None: Export.export_train(train, output_folder, 'train.csv') train_merged = Export.merge_train(train) #add a dummy column at the beginning train_merged.insert(loc=0, column='tmp', value='') policies_merged = Export.merge_policies(policies) #add a dummy column at the beginning policies_merged.insert(loc=0, column='tmp', value='') # train_merged = pd.read_csv(output_folder + 'train.csv') # policies_merged = pd.read_csv(output_folder + 'policies.csv') acc, nlp, total_nlp = Assessor.evaluate_fit_multi( policies_merged, train_merged, pol_in_log=True, random_tie=random_tie) df = df.append( pd.DataFrame({ 'acc': [acc], 'nlp': [nlp], 'total nlp': [total_nlp], 'group': group, 'option': Helper.dicstr({}), 'fold': fold, 'model_iter': model_iter })) if base_output_folder is not None: df.to_csv(base_output_folder + 'accu.csv') return df