def train(argv): # retreive command line options opts,_ = getopt.getopt(argv, "i:") found = False for opt , arg in opts: if opt == '-i': params = utils.read_params(arg) found = True if not found: params = utils.read_params() train_algo(params)
def main(data_path="data/split/", feature_path="data/features/", out_path="data/pca/"): X_train, X_test, y_train, y_test = read_data(data_path) params = read_params("params.yaml", "pca") pca = PCA(**params).fit(X_train) train_feature = pd.DataFrame(pca.transform(X_train)) test_feature = pd.DataFrame(pca.transform(X_test)) train_feature["class"] = y_train test_feature["class"] = y_test if not os.path.isdir(feature_path): os.mkdir(feature_path) train_feature.to_csv(f"{feature_path}train.csv", index=False) test_feature.to_csv(f"{feature_path}test.csv", index=False) save_results(out_path, pca, None) print(f"Finished Feature Engineering:\nStats:") print(f"\tExplained Variance: {pca.explained_variance_}") print(f"\tExplained Variance Ratio: {pca.explained_variance_ratio_}") log_experiment( out_path, metrics=dict( explained_variance_=pca.explained_variance_, explained_variance_ratio_=pca.explained_variance_ratio_, ), )
def train_model(epochs, labels): ''' :param epochs: eeg epochs of shape trials x channels x samples :return: ''' params = read_params('config.json') model = create_model(params=params, num_channels=epochs.shape[1], num_samples=epochs.shape[2]) path_to_models_dir = params['path_to_models_dir'] path_to_model = os.path.join(os.path.dirname(__file__), path_to_models_dir, str(int(time.time()))) max_epochs = params['max_epochs'] labels = to_categorical(labels, 2) epochs = epochs[:, np.newaxis, :, :] model, mean_val_aucs = cv_per_subj_test(x_tr_val=epochs, y_tr_val=labels, model=model, model_path=path_to_model, max_epochs=max_epochs, block_mode=False, plot_fold_history=True) return model, mean_val_aucs
def Setup(self): utils.say("Oooo 'ello, I'm Mrs. Premise!") self.Params = utils.read_params() try: self.Semaphore = sysv_ipc.Semaphore(self.Params["KEY"], sysv_ipc.IPC_CREX) except sysv_ipc.ExistentialError as err: self.Logger.debug(err) # One of my peers created the semaphore already self.Semaphore = sysv_ipc.Semaphore(self.Params["KEY"]) # Waiting for that peer to do the first acquire or release while not self.Semaphore.o_time: time.sleep(.1) else: # Initializing sem.o_time to nonzero value self.Semaphore.release() # Now the semaphore is safe to use. try: self.Memory = sysv_ipc.SharedMemory(self.Params["KEY"], sysv_ipc.IPC_CREX) except sysv_ipc.ExistentialError as err: self.Logger.debug(err) self.Memory = sysv_ipc.SharedMemory(self.Params["KEY"]) else: self.Memory.release() self.Logger.debug("Setup done") return True
def main(data_path='data/split/', feature_path='data/features/', out_path='data/pca/'): X_train, X_test, y_train, y_test = read_data(data_path) params = read_params('params.yaml', 'pca') pca = PCA(**params).fit(X_train) train_feature = pd.DataFrame(pca.transform(X_train)) test_feature = pd.DataFrame(pca.transform(X_test)) train_feature['class'] = y_train test_feature['class'] = y_test if not os.path.isdir(feature_path): os.mkdir(feature_path) train_feature.to_csv(f'{feature_path}train.csv', index=False) test_feature.to_csv(f'{feature_path}test.csv', index=False) save_results(out_path, pca, None) print(f'Finished Feature Engineering:\nStats:') print(f'\tExplained Variance: {pca.explained_variance_}') print(f'\tExplained Variance Ratio: {pca.explained_variance_ratio_}') log_experiment( out_path, params=params, metrics=dict(explained_variance_=pca.explained_variance_, explained_variance_ratio_=pca.explained_variance_ratio_))
def main(name, argv): if len(argv) != 1: print_usage(name) return params = utils.read_params(argv[0]) cluster = cl.getCluster(params['ClusterName']) cluster.runSingle("python " + utils.SCRIPTS_FOL + 'auto.py ' + argv[0])
def __init__(self): Process.__init__(self) self.Logger = logging.getLogger(__class__.__name__) self.State = "IDLE" self.Cntr = 0 self.Running = True self.Params = utils.read_params() self.CreateFiles()
def load(cls, model_id: str, envs_to_set=None, transfer=False, total_training_updates=None, total_timesteps=None): #TODO This function does not update trainer/optimizer variables (e.g. momentum). As such training after using this function may lead to less-than-optimal results. if transfer: if total_training_updates is None or total_timesteps is None: raise ValueError( "If transfer learning is active total_train_steps and num_timesteps must be provided!" ) if not (total_timesteps == int(total_timesteps) and total_training_updates == int(total_training_updates)): raise TypeError( "total_train_steps and num_timesteps must be integers") load_path = os.path.join(config.model_path, model_id) weights, params = utils._load_model_from_file(load_path, "multitask") model = cls(policy=params['policy_name'], env_dict=None, _init_setup_model=False) model.__dict__.update(params) model.num_timesteps = total_timesteps model.total_train_steps = total_training_updates tasks = params["tasks"] params = utils.read_params(model_id, "multitask") env_kwargs = params['env_kwargs'] if transfer: tasks_to_set = list(envs_to_set.keys()) if tasks == tasks_to_set: model.set_envs(envs_to_set, tasks) model.setup_train_model(transfer=True) else: print( "The envs passed as argument is not corresponding to the envs that the model " "is trained on.\n Trained on: {} \n Passed: {}".format( tasks, tasks_to_set)) sys.exit() else: model.setup_step_model() env_kwargs['episode_life'] = False env_kwargs['clip_rewards'] = False model.set_envs_by_name(tasks, env_kwargs=env_kwargs) restores = [] for param, loaded_weight in zip(model.trainable_variables, weights): restores.append(param.assign(loaded_weight)) model.sess.run(restores) model.sess.graph.finalize() return model, tasks
def test_all(self): list = [v for v in tf.global_variables() if '_t' or 'dense' in v.name] saver = tf.train.Saver(var_list=list) if (self.large): S_train = utils.read_large_data(self.train_mat) else: S_train = utils.read_data(self.train_mat) idxs = np.random.permutation(S_train.shape[0]) S_train = S_train[idxs] S_max, S_min = utils.max_min(S_train, self.n_train) del S_train print('Loading testing snapshot matrix...') if (self.large): self.S_test = utils.read_large_data(self.test_mat) else: self.S_test = utils.read_data(self.test_mat) utils.scaling(self.S_test, S_max, S_min) if (self.zero_padding): self.S_test = utils.zero_pad(self.S_test, self.p) print('Loading testing parameters...') self.params_test = utils.read_params(self.test_params) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state( os.path.dirname(self.checkpoints_folder + '/checkpoint')) if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) self.test_once(sess, self.init) utils.inverse_scaling(self.U_h, S_max, S_min) utils.inverse_scaling(self.S_test, S_max, S_min) n_test = self.S_test.shape[0] // self.N_t err = np.zeros((n_test, 1)) for i in range(n_test): num = np.sqrt( np.mean( np.linalg.norm( self.S_test[i * self.N_t:(i + 1) * self.N_t] - self.U_h[i * self.N_t:(i + 1) * self.N_t], 2, axis=1)**2)) den = np.sqrt( np.mean( np.linalg.norm(self.S_test[i * self.N_t:(i + 1) * self.N_t], 2, axis=1)**2)) err[i] = num / den print('Error indicator epsilon_rel: {0}'.format(np.mean(err)))
def main(): params = read_params(cfg_file_path) trainfiles, testfiles = filesplit(DATADIR) savefilenames(SAVEPATH + 'filenames/', trainfiles, testfiles) trainfiles = read_names('../../scratch/bd_lstm/filenames/trainfiles.txt') trainset = DataSet(root_dir=DATADIR, files=trainfiles, normalize=False, seq_len=params['slice_size'], stride=params['stride']) train(SAVEPATH + 'trainstats/', trainset, params)
def plot(): params_file = 'params_txtfiles/params_peer_critical_lpub.txt' all_params = read_params(params_file, int_params=['n']) pi_A, pi_B = all_params['pi_A'][0], all_params['pi_B'][0] params_order = ['pi_A', 'pi_B'] params_to_use = [all_params[lab] for lab in params_order] fig = plt.figure() ax = fig.add_subplot(111) for params_set in product(*params_to_use): params = {lab: params_set[i] for i, lab in enumerate(params_order)} all_lpub_values = [] colors = ['#ffb347', '#b347ff', '#47ffb3'] linestyles = ['--', '-', '-.'] for i, pb in enumerate([0.3, 0.5, 0.7]): pa_values = np.arange(0.01, 1.01, 0.01) # find critical lpub for each pa_value lpub_values = [ find_critical(pa, pb, pi_A, pi_B) for pa in pa_values ] ax.plot(pa_values, lpub_values, color=colors[i], label=r'$\pi_b=%.1f$' % pb, linestyle=linestyles[i]) ax.grid(True, linestyle=':') all_lpub_values += lpub_values #ax.legend(bbox_to_anchor=(0.5,0.17), loc='center') ax.legend() title = gen_str_params(params, params_order=params_order, symbols=params2latex).replace('\n', '') ax.set_xlabel(r'$\pi_a$') ax.set_ylabel(r'critical $\lambda_{pub}$') ax.set_xlim([-0.05, 1.05]) ax.set_ylim([-0.05, np.max(all_lpub_values) + 0.05]) ax.set_xticks([0., 0.2, 0.4, 0.6, 0.8, 1.]) plt.tight_layout() str_params = gen_str_params(params, params_order=params_order) uniq_param = '_'.join( str_params.replace('\n', '').replace('$', '').split('; ')) plt.savefig('figs/peer/critical_lpub_%s.pdf' % uniq_param) ax.set_title(title) plt.savefig('figs/peer/critical_lpub_%s.png' % uniq_param)
def main(): utils.say("Oooo 'ello, I'm Mrs. Conclusion!") params = utils.read_params() semaphore = sysv_ipc.Semaphore(params["KEY"]) memory = sysv_ipc.SharedMemory(params["KEY"]) utils.say("memory attached at %d" % memory.address) what_i_wrote = "" s = "" for i in range(0, params["ITERATIONS"]): utils.say("i = %d" % i) if not params["LIVE_DANGEROUSLY"]: # Wait for Mrs. Premise to free up the semaphore. utils.say("acquiring the semaphore...") semaphore.acquire() s = utils.read_from_memory(memory) while s == what_i_wrote: if not params["LIVE_DANGEROUSLY"]: # Release the semaphore... utils.say("releasing the semaphore") semaphore.release() # ...and wait for it to become available again. utils.say("acquiring for the semaphore...") semaphore.acquire() s = utils.read_from_memory(memory) if what_i_wrote: if PY_MAJOR_VERSION > 2: what_i_wrote = what_i_wrote.encode() try: assert(s == hashlib.md5(what_i_wrote).hexdigest()) except AssertionError: raise AssertionError("Shared memory corruption after %d iterations." % i) if PY_MAJOR_VERSION > 2: s = s.encode() what_i_wrote = hashlib.md5(s).hexdigest() utils.write_to_memory(memory, what_i_wrote) if not params["LIVE_DANGEROUSLY"]: utils.say("releasing the semaphore") semaphore.release() # TODO: remove time.sleep(1)
def destroy_shm_semaphore(semaphore, mapfile): params = utils.read_params() utils.say("Destroying semaphore and shared memory.") mapfile.close() # I could call memory.unlink() here but in order to demonstrate # unlinking at the module level I'll do it that way. posix_ipc.unlink_shared_memory(params["SHARED_MEMORY_NAME"]) semaphore.release() # I could also unlink the semaphore by calling # posix_ipc.unlink_semaphore() but I'll do it this way instead. semaphore.unlink()
def main(data_path="data/features/", out_path="data/models/svc/"): X_train, X_test, y_train, y_test = read_data(data_path) name = "LinearSVC" params = read_params("params.yaml", "svc") model = LinearSVC(**params) model.fit(X_train, y_train) accuracy, c_matrix, fig = evaluate_model(model, X_test, y_test) print_results(accuracy, c_matrix, name) save_results(out_path, model, fig) log_experiment(out_path, metrics=dict(accuracy=accuracy, confusion_matrics=c_matrix))
def main(): df = read_data() params = read_params('s1') df = create_bins(df, params['q1'], params['q2']) print(df.bin.value_counts()) df.reset_index(inplace=True) # save the new bins df[['parcelid', 'bin']].to_csv('data/s1_intermediate/train_bins.csv', index=False) # calculate median output per bin and save it logerr_per_bin = calculate_logerror_per_bin(df) logerr_per_bin.to_csv('data/s1_intermediate/train_logerror_per_bin.csv', index=False)
def main(data_path='data/features/', out_path='data/models/logistic/'): X_train, X_test, y_train, y_test = read_data(data_path) name = 'LogisticRegression' params = read_params('params.yaml', 'logistic') model = LogisticRegression(**params) model.fit(X_train, y_train) accuracy, c_matrix, fig = evaluate_model(model, X_test, y_test) print_results(accuracy, c_matrix, name) save_results(out_path, model, fig) log_experiment(out_path, params=params, metrics=dict(accuracy=accuracy, confusion_matrics=c_matrix))
def loadModelThread(): global Trained_model params = read_params('config.json') path_to_models_dir = params['path_to_models_dir'] path_to_model = os.path.dirname(__file__) + path_to_models_dir + '/**/*.hdf5' filename_list = glob.glob(path_to_model) if len(filename_list) > 0: filename_ = filename_list[-1] Trained_model = load_model(filename_) global ClassifierPrepared if Trained_model is None: ClassifierPrepared = False else: ClassifierPrepared = True return
def control_daemon(log_path, heat_pin, timeout): param_path = '%s.params' % log_path while True: df = utils.read_log(log_path, timeout=timeout) params = utils.read_params(param_path, timeout=timeout) heat_on = control_params(320, df['time'].values, df['heat_on'].values, df['temperature'].values, params) import ipdb; ipdb.set_trace() if heat_on: GPIO.output(heat_pin, GPIO.HIGH) else: GPIO.output(heat_pin, GPIO.LOW)
def main(data_path='data/features/', out_path='data/models/r_forrest/', n_estimators=10, max_samples=30): X_train, X_test, y_train, y_test = read_data(data_path) name = 'RandomForrest' params = read_params('params.yaml', 'forrest') model = RandomForestClassifier(**params) model.fit(X_train, y_train) accuracy, c_matrix, fig = evaluate_model(model, X_test, y_test) print_results(accuracy, c_matrix, name) save_results(out_path, model, fig) log_experiment(out_path, params=params, metrics=dict(accuracy=accuracy, confusion_matrics=c_matrix))
def main(): '''Perform species classification.''' # Get dictionary of paramters (query_dict in GUI) cwd = os.getcwd() param_dict = read_params(cwd) # Get paths output_dir, aml_path = os.path.split(param_dict['filepath']) aml_clean_path = aml_path.split('.')[0] + '-clean.csv' file_dir = os.path.dirname(__file__) class_dir = os.path.join(file_dir, os.path.pardir, 'data') class_path = os.path.join(class_dir, param_dict['classifier']) # Write scan clean file write_aml_clean(aml_path, aml_clean_path) # Run classifier and generate output classify_calls(aml_clean_path, class_path, param_dict)
def init_shm_semaphore(): params = utils.read_params() # Create the shared memory and the semaphore. memory = posix_ipc.SharedMemory(params["SHARED_MEMORY_NAME"], posix_ipc.O_CREAT, size=params["SHM_SIZE"]) semaphore = posix_ipc.Semaphore(params["SEMAPHORE_NAME"], posix_ipc.O_CREAT) # MMap the shared memory mapfile = mmap.mmap(memory.fd, memory.size) # Once I've mmapped the file descriptor, I can close it without # interfering with the mmap. memory.close_fd() # I seed the shared memory with a random string (the current time). what_i_wrote = time.asctime() utils.write_to_memory(mapfile, what_i_wrote) return semaphore, mapfile, what_i_wrote
def main(data_path='data/features/', model_path='data/models/', out_path='data/models/ensemble/'): X_train, X_test, y_train, y_test = read_data(data_path) name = 'Ensemble' params = read_params('params.yaml', 'ensemble') cl1 = load_model(f'{model_path}/logistic/') cl2 = load_model(f'{model_path}/svc/') cl3 = load_model(f'{model_path}/r_forrest/') estimators = [('l_regression', cl1), ('l_svc', cl2), ('r_forrest', cl3)] model = VotingClassifier(estimators, **params) model.fit(X_train, y_train) accuracy, c_matrix, fig = evaluate_model(model, X_test, y_test) print_results(accuracy, c_matrix, name) save_results(out_path, model, fig) log_experiment(out_path, metrics=dict(accuracy=accuracy, confusion_matrics=c_matrix))
def __init__(self, model_id: str, n_steps: int, input_len: int, output_len: int): self.model_id = model_id self.transfer = True if os.path.exists( os.path.join(config.model_path, model_id)) and model_id else False if self.transfer: params = read_params(model_id, "meta") self.input_len = params['input_length'] self.output_len = params['output_length'] self.n_steps = params['n_steps'] self.window_size = self.n_steps // 2 + 1 if self.n_steps < 15 else self.n_steps // 3 self.gamma = params['gamma'] self.meta_learner = MetaA2CModel.load(self.model_id, self.input_len, self.output_len) else: self.input_len = input_len self.output_len = output_len self.n_steps = n_steps self.window_size = self.n_steps // 2 + 1 if self.n_steps < 15 else self.n_steps // 3 self.gamma = 0.8 self.meta_learner = MetaA2CModel(self.input_len, self.output_len, self.n_steps, window_size=self.window_size, gamma=self.gamma) self.input = deque([np.zeros(self.input_len)] * self.window_size, maxlen=self.window_size) self.input_batch = deque( [np.zeros([self.window_size, self.input_len])] * self.n_steps, maxlen=self.n_steps) self.reward_batch = deque([0.0] * self.n_steps, maxlen=self.n_steps) self.action_batch = deque([0] * self.n_steps, maxlen=self.n_steps) self.value_batch = deque([0.0] * self.n_steps, maxlen=self.n_steps) self.train_step = 0
def main(): if not os.path.isdir(options.base_dir): os.mkdir(options.base_dir) #create working directory print options.action if options.action == "write_orthos": cds_dic = utils.read_params(options.param_file) ortho_dic = utils.read_orthofile(options.orthofile_format, options.ortho_file) index_file = "%s/%s_ortho.index" % (options.base_dir, options.prefix) seq_dic = utils.get_cds_files(cds_dic) if options.no_paralogs: utils.write_orthos( options.ortho_file, seq_dic, "%s/%s_orthos" % (options.base_dir, options.prefix), index_file) else: utils.write_orthoparagroups( ortho_dic, seq_dic, "%s/%s_orthos" % (options.base_dir, options.prefix), index_file, options.min_taxa) print "Orthogroups written to %s/%s_orthos" % (options.base_dir, options.prefix) print "Exiting" sys.exit() if options.action == "write_cnees": ncar_dic = utils.read_params(options.param_file) ortho_dic = utils.ncar_ortho_dic( options.ncar_ortho_file, options.min_taxa ) #this needs to be "filtered_loci.index" from the NCAR pipeline seq_dic = utils.get_cds_files(ncar_dic) index_file = "%s/%s_ncar_ortho.index" % (options.base_dir, options.prefix) utils.write_ncar_cnees( ortho_dic, seq_dic, "%s/%s_ncars" % (options.base_dir, options.prefix), options.min_taxa, index_file) sys.exit() ###Align coding sequences and concatenate all protein sequences into ###an aligned matrix that can be input into RAxML to make a phylogeny. if options.action == "align_coding": cds_dic = utils.read_params(options.param_file) index_file = "%s/%s_ortho.index" % (options.base_dir, options.prefix) paras_allowed = True og_list = utils.read_ortho_index( index_file, options.min_taxa, paras_allowed ) #Gets list of OGs that meet minimum taxa requirement. If paras_allowed is False then will not return any OGs with any paralogs in them. iscoding = True utils.fsa_coding_align( og_list, "%s/%s_orthos/" % (options.base_dir, options.prefix), "%s/%s_fsa_coding" % (options.base_dir, options.prefix), options.num_threads, iscoding) print "Orthogroups aligned using FSA and output written to %s/%s_fsa_coding" % ( options.base_dir, options.prefix) paras_allowed = False og_list = utils.read_ortho_index( index_file, len(cds_dic.keys()), paras_allowed ) #Gets only those OGs that have a single sequence for every species in the study. This is for making a sequence matrix that can be used for phylogenetics. utils.concatenate_for_raxml( "%s/%s_fsa_coding" % (options.base_dir, options.prefix), "%s/%s.afa" % (options.base_dir, options.prefix), og_list, cds_dic.keys()) print "If you would like to run a phylogenetic analysis, a concatenated amino acid sequence matrix of all orthogroups including all %s of the species in your study has been written to %s/%s.afa" % ( len(cds_dic.keys()), options.base_dir, options.prefix) print "Exiting" sys.exit() if options.action == "fourfold_matrix": cds_dic = utils.read_params(options.param_file) index_file = "%s/%s_ortho.index" % (options.base_dir, options.prefix) paras_allowed = False og_list = utils.read_ortho_index( index_file, len(cds_dic.keys()), paras_allowed ) #Gets only those OGs that have a single sequence for every species in the study. This is for making a sequence matrix that can be used for phylogenetics. utils.concatenate_fourf_for_raxml( "%s/%s_gene_ancestral" % (options.base_dir, options.prefix), "%s/%s_fourfold.afa" % (options.base_dir, options.prefix), og_list, cds_dic.keys()) sys.exit() if options.action == "align_ncars": iscoding = False ortho_dic = utils.ncar_ortho_dic( options.ncar_ortho_file, options.min_taxa ) #this needs to be "filtered_loci.index" from the NCAR pipeline ncar_list = ortho_dic.keys() utils.fsa_ncar_align( ncar_list, "%s/%s_ncars" % (options.base_dir, options.prefix), "%s/%s_fsa_ncar" % (options.base_dir, options.prefix), options.num_threads, iscoding) sys.exit() if options.action == "pairs_coding_div": coding_ortho_dic = utils.read_orthofile("orthofinder", options.ortho_file) exclude_paras = True og_list = utils.min_taxa_membership( {(options.inspecies, options.outspecies): 2}, {}, [], "%s/%s_filtered.index" % (options.base_dir, options.prefix), options.min_taxa, exclude_paras) good_coding_ortho_dic = {} # og_list = [10644, 11419, 12394, 11141, 11231, 11334, 11341] for og in og_list: good_coding_ortho_dic[og] = coding_ortho_dic[og] pickle_dir = options.pickle_dir utils.pairs_coding_div( options.inspecies, options.outspecies, good_coding_ortho_dic, "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" % (options.base_dir, options.prefix), options.base_dir, options.num_threads, options.min_taxa) sys.exit() if options.action == "alignment_filter": index_file = "%s/%s_ortho.index" % (options.base_dir, options.prefix) paras_allowed = True og_list = utils.read_ortho_index(index_file, options.min_taxa, paras_allowed) utils.alignment_column_filtering( "%s/%s_fsa_coding" % (options.base_dir, options.prefix), "%s/%s_fsa_coding_columnfilt" % (options.base_dir, options.prefix), og_list, options.nogap_min_count, options.nogap_min_prop, options.nogap_min_species, {}, options.num_threads) print "First iteration of column filtering done. Results written to %s/%s_fsa_coding_columnfilt" % ( options.base_dir, options.prefix) print "Starting Jarvis filter." utils.jarvis_filtering( og_list, "%s/%s_fsa_coding_columnfilt" % (options.base_dir, options.prefix), "%s/%s_fsa_coding_jarvis" % (options.base_dir, options.prefix), options.min_cds_len, options.num_threads) print "Jarvis filtering done. Results written to %s/%s_fsa_coding_jarvis" % ( options.base_dir, options.prefix) utils.alignment_column_filtering( "%s/%s_fsa_coding_jarvis" % (options.base_dir, options.prefix), "%s/%s_fsa_coding_jarvis_columnfilt" % (options.base_dir, options.prefix), og_list, options.nogap_min_count, options.nogap_min_prop, options.nogap_min_species, {}, options.num_threads) print "Second iteration of column filtering done. Results written to %s/%s_fsa_coding_jarvis_columnfilt" % ( options.base_dir, options.prefix) utils.sequence_gap_filtering( "%s/%s_fsa_coding_jarvis_columnfilt" % (options.base_dir, options.prefix), "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt" % (options.base_dir, options.prefix), "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" % (options.base_dir, options.prefix), "%s/%s_orthos" % (options.base_dir, options.prefix), og_list, options.min_seq_prop_kept, options.max_seq_prop_gap, options.min_cds_len, "%s/%s_filtered.index" % (options.base_dir, options.prefix)) print "Filtering of whole sequences based on gap content done. Results written to %s/%s_fsa_coding_jarvis_columnfilt_seqfilt" % ( options.base_dir, options.prefix) print "Exiting" sys.exit() if options.action == "rer_converge": test_type = "aaml_blengths" foreground = "aaml_blengths" exclude_paras = True manda_taxa, multi_taxa, remove_list = utils.make_taxa_dic( options.taxa_inclusion) og_list = utils.min_taxa_membership( manda_taxa, multi_taxa, remove_list, "%s/%s_filtered.index" % (options.base_dir, options.prefix), options.min_taxa, exclude_paras) print len(og_list) print og_list utils.paml_test( og_list, foreground, test_type, "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" % (options.base_dir, options.prefix), "%s/%s_%s_%s" % (options.base_dir, options.prefix, foreground, options.outputfile.split(".")[0]), options.tree_file, options.num_threads, options.use_gblocks, options.min_taxa, remove_list) utils.read_aaml_phylos( og_list, "%s/%s_%s_%s" % (options.base_dir, options.prefix, foreground, options.outputfile.split(".")[0]), "%s/aaml_compiled" % (options.base_dir), options.outputfile, options.min_taxa) sys.exit() if options.action == "nopara_gene_trees": constrained = False paras_allowed = True include_paras = False og_list = utils.read_ortho_index( "%s/%s_filtered.index" % (options.base_dir, options.prefix), options.min_taxa, paras_allowed) cur_og_list = og_list utils.gene_trees( cur_og_list, "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" % (options.base_dir, options.prefix), "%s/%s_nopara_nucl_gene_trees" % (options.base_dir, options.prefix), constrained, options.tree_file, options.num_threads, "nucs") sys.exit() if options.action == "check_discordance": paras_allowed = True og_list = utils.read_ortho_index( "%s/%s_filtered.index" % (options.base_dir, options.prefix), options.min_taxa, paras_allowed) utils.discordance( og_list, "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" % (options.base_dir, options.prefix), "%s/%s_nopara_nucl_gene_trees" % (options.base_dir, options.prefix), "%s/%s_discordance" % (options.base_dir, options.prefix), options.tree_file, options.num_threads) utils.read_discordance( "%s/%s_discordance" % (options.base_dir, options.prefix), og_list, options.base_dir) sys.exit() if options.action == "rer_goatools": if not os.path.exists("%s/RER_goatools" % options.base_dir): os.mkdir("%s/RER_goatools" % options.base_dir) rerconverge_output = options.rerconverge_output short_outputname = rerconverge_output.split("/")[-1][0:-4] utils.rer_goatools( rerconverge_output, rerconverge_output, options.go_database, "%s/RER_goatools/rer_0.05_slower_go_%s" % (options.base_dir, short_outputname), 3, 0.05, "slow") utils.rer_goatools( rerconverge_output, rerconverge_output, options.go_database, "%s/RER_goatools/rer_0.05_faster_go_%s" % (options.base_dir, short_outputname), 3, 0.05, "fast") sys.exit() if options.action == "goatools": outbase = options.goa_forefile.split("/")[-1].rsplit(".", 1)[0] if not os.path.exists("%s/%s_goatools/" % (options.base_dir, options.prefix)): os.mkdir("%s/%s_goatools/" % (options.base_dir, options.prefix)) outdir = "%s/%s_goatools/%s" % (options.base_dir, options.prefix, outbase) if not os.path.exists(outdir): os.mkdir(outdir) utils.og_list_goatools(options.goa_forefile, options.goa_backfile, options.go_database, outdir) sys.exit() if options.action == "hyphy_relax": test_type = "RELAX" exclude_paras = True og_list = [] manda_taxa, multi_taxa, remove_list = utils.make_taxa_dic( options.taxa_inclusion) if options.og_list_file: reader = open(options.og_list_file, 'rU') for line in reader: cur_og = int(line.strip()) og_list.append(cur_og) else: og_list = utils.min_taxa_membership( manda_taxa, multi_taxa, remove_list, "%s/%s_filtered.index" % (options.base_dir, options.prefix), options.min_taxa, exclude_paras) print len(og_list) if options.foreground == "INTREE": fore_list = "INTREE" # cur_og_list = og_list elif options.foreground.startswith("DAUGHTERS"): fore_list = options.foreground.split(",") else: fore_list = options.foreground.split(",") # og_list = [10724, 11488, 12704, 13036, 13879, 15282] print options.foreground utils.paml_test( og_list, fore_list, test_type, "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" % (options.base_dir, options.prefix), "%s/%s_%s_%s" % (options.base_dir, options.prefix, options.foreground, test_type), options.tree_file, options.num_threads, options.use_gblocks, options.min_taxa, remove_list) utils.read_hyphy_relax( og_list, "%s/%s_%s_%s" % (options.base_dir, options.prefix, options.foreground, test_type), options.base_dir, options.foreground) sys.exit() if options.action == "hyphy_relax_permutation": if not os.path.exists("%s/relax_permutations" % options.base_dir): os.mkdir("%s/relax_permutations" % options.base_dir) test_type = "RELAX" exclude_paras = True og_list = [] manda_taxa, multi_taxa, remove_list = utils.make_taxa_dic( options.taxa_inclusion) if options.og_list_file: reader = open(options.og_list_file, 'rU') for line in reader: cur_og = int(line.strip()) og_list.append(cur_og) else: og_list = utils.min_taxa_membership( manda_taxa, multi_taxa, remove_list, "%s/%s_filtered.index" % (options.base_dir, options.prefix), options.min_taxa, exclude_paras) print len(og_list) fore_list = options.foreground.split(",") random.shuffle(fore_list) fore_list = fore_list[0:6] # og_list = [10724, 11488, 12704, 13036, 13879, 15282] print options.foreground print(fore_list) perm_fores = open( "%s/relax_permutations/foreground_perm_%s.txt" % (options.base_dir, options.hyphy_perm_num), 'w') perm_fores.write(",".join(fore_list)) perm_fores.close() utils.paml_test( og_list, fore_list, test_type, "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" % (options.base_dir, options.prefix), "%s/relax_permutations/%s_%s_%s_%s" % (options.base_dir, options.prefix, ",".join(fore_list), test_type, options.hyphy_perm_num), options.tree_file, options.num_threads, options.use_gblocks, options.min_taxa, remove_list) utils.read_hyphy_relax( og_list, "%s/relax_permutations/%s_%s_%s_%s" % (options.base_dir, options.prefix, ",".join(fore_list), test_type, options.hyphy_perm_num), "%s/relax_permutations/" % options.base_dir, options.hyphy_perm_num) sys.exit() if options.action == "hyphy_absrel": test_type = "aBSREL" exclude_paras = True manda_taxa, multi_taxa, remove_list = utils.make_taxa_dic( options.taxa_inclusion) og_list = [] if options.og_list_file: reader = open(options.og_list_file, 'rU') for line in reader: cur_og = int(line.strip()) og_list.append(cur_og) else: og_list = utils.min_taxa_membership( manda_taxa, multi_taxa, remove_list, "%s/%s_filtered.index" % (options.base_dir, options.prefix), options.min_taxa, exclude_paras) print len(og_list) # print og_list[0:10] og_list = utils.limit_list(og_list, options.min_og_group, options.max_og_group) # og_list = og_list[0:10] utils.paml_test( og_list, [], test_type, "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" % (options.base_dir, options.prefix), "%s/%s_%s_%s" % (options.base_dir, options.prefix, "all", test_type), options.tree_file, options.num_threads, options.use_gblocks, options.min_taxa, remove_list) utils.read_hyphy_absrel( og_list, "%s/%s_%s_%s" % (options.base_dir, options.prefix, "all", test_type), options.base_dir) sys.exit() if options.action == "godatabase": gaf_file = "/Genomics/kocherlab/berubin/annotation/trinotate/AMEL/AMEL.gaf" gaf_file = "/Genomics/kocherlab/berubin/annotation/trinotate/PGRA/PGRA.gaf" gaf_file = "/Genomics/kocherlab/berubin/annotation/trinotate/ACEP/ACEP.gaf" gaf_file = "/Genomics/kocherlab/berubin/annotation/hic/trinotate/LALB/LALB.gaf" utils.make_go_database(ortho_dic, ipr_taxa_list, "%s/%s" % (options.base_dir, options.prefix), gaf_file) sys.exit() if options.action == "yn_dnds": # paras_allowed = True exclude_paras = True manda_taxa, multi_taxa, remove_list = utils.make_taxa_dic( options.taxa_inclusion) og_list = utils.min_taxa_membership( manda_taxa, multi_taxa, remove_list, "%s/%s_filtered.index" % (options.base_dir, options.prefix), options.min_taxa, exclude_paras) print len(og_list) # og_list = utils.read_ortho_index(index_file, options.min_taxa, paras_allowed) utils.yn_estimates( og_list, "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" % (options.base_dir, options.prefix), "%s/%s_yn" % (options.base_dir, options.prefix), options.tree_file, options.min_taxa, options.use_gblocks, remove_list) sys.exit() if options.action == "gc_content": if not os.path.isdir("%s/%s_gc_content" % (options.base_dir, options.prefix)): os.mkdir("%s/%s_gc_content" % (options.base_dir, options.prefix)) paras_allowed = True og_list = utils.read_ortho_index(index_file, options.min_taxa, paras_allowed) utils.gc_content( og_list, "%s/%s_fsa_coding" % (options.base_dir, options.prefix), "%s/%s_gc_content" % (options.base_dir, options.prefix)) sys.exit() if options.action == "free_ratios": test_type = "free" foreground = "free" get_dn_ds = True exclude_paras = True manda_taxa, multi_taxa, remove_list = utils.make_taxa_dic( options.taxa_inclusion) if options.og_list_file: reader = open(options.og_list_file, 'rU') for line in reader: cur_og = int(line.strip()) og_list.append(cur_og) else: og_list = utils.min_taxa_membership( manda_taxa, multi_taxa, remove_list, "%s/%s_filtered.index" % (options.base_dir, options.prefix), options.min_taxa, exclude_paras) print len(og_list) # og_list = utils.read_ortho_index(index_file, options.min_taxa, paras_allowed) utils.paml_test( og_list, foreground, test_type, "%s/%s_fsa_coding_jarvis_columnfilt_seqfilt_noparas" % (options.base_dir, options.prefix), "%s/%s_%s_%s" % (options.base_dir, options.prefix, foreground, test_type), options.tree_file, options.num_threads, options.use_gblocks, options.min_taxa, remove_list) cur_og_list = og_list utils.read_frees( "%s/%s_%s_%s" % (options.base_dir, options.prefix, foreground, test_type), "%s/%s_%s_%s_results" % (options.base_dir, options.prefix, foreground, test_type), "%s/%s.gaf" % (options.base_dir, options.prefix), "%s/%s_%s_%s_go" % (options.base_dir, options.prefix, foreground, test_type), get_dn_ds, options.tree_file, cur_og_list) sys.exit() if options.action == "time_aamls": paras_allowed = True foreground = "aaml_blengths" test_type = "aaml_blengths" index_file = "%s/%s_ortho.index" % (options.base_dir, options.prefix) fore_list = options.foreground.split(",") og_list = utils.read_ortho_index(index_file, options.min_taxa, paras_allowed) # og_list = [3576] utils.aaml_time_phylos( og_list, "%s/%s_%s_%s" % (options.base_dir, options.prefix, foreground, test_type), "%s/%s_aaml_time_calibrated" % (options.base_dir, options.prefix), options.timetree, fore_list) sys.exit() if options.action == "ds_correlations": paras_allowed = True og_list = utils.read_ortho_index(index_file, options.min_taxa, paras_allowed) # og_list = og_list[0:100] bootstrap_taxa = True categorical = False utils.bootstrapping_ds_time_correlations( og_list, "%s/%s_free_free" % (options.base_dir, options.prefix), "%s/%s_ds_corrs" % (options.base_dir, options.prefix), options.timetree, options.traittree, bootstrap_taxa, categorical) sys.exit() if options.action == "branch_test": test_type = "branch" foreground = options.foreground og_list = utils.read_ortho_index(index_file, options.min_taxa, paras_allowed) cur_og_list = og_list # cur_og_list = utils.target_taxa_in_og(ortho_dic, target_taxa, og_list) utils.paml_test( cur_og_list, foreground, test_type, "%s/%s_fsa_coding" % (options.base_dir, options.prefix), "%s/%s_%s_%s" % (options.base_dir, options.prefix, foreground, test_type), options.tree_file, options.num_threads) utils.test_lrt_branch( "%s/%s_%s_%s" % (options.base_dir, options.prefix, foreground, test_type), "%s/%s_%s_%s.lrt" % (options.base_dir, options.prefix, foreground, test_type), "%s/%s.gaf" % (options.base_dir, options.prefix), ortho_dic, "%s/%s_%s_%s_go" % (options.base_dir, options.prefix, foreground, test_type)) sys.exit() if options.action == "bs_test": test_type = "bs" foreground = "solitary" og_list = utils.read_ortho_index(index_file, options.min_taxa, paras_allowed) cur_og_list = og_list foreground = options.foreground utils.paml_test( cur_og_list, foreground, test_type, "%s/%s_fsa_coding" % (options.base_dir, options.prefix), "%s/%s_%s_%s" % (options.base_dir, options.prefix, foreground, test_type), options.tree_file, options.num_threads, options.use_gblocks, options.min_taxa) utils.test_lrt( "%s/%s_%s_%s" % (options.base_dir, options.prefix, foreground, test_type), "%s/%s_%s_%s.lrt" % (options.base_dir, options.prefix, foreground, test_type), "%s/%s.gaf" % (options.base_dir, options.prefix), ortho_dic, "%s/%s_%s_%s_go" % (options.base_dir, options.prefix, foreground, test_type)) sys.exit() if options.action == "hypergeom": utils.hypergeom_test(options.hyper_pop, options.hyper_pop_cond, options.hyper_targets, options.hyper_targets_back) sys.exit() if options.action == "rer_hypergeom": utils.rer_hypergeom_test(options.hyper_pop, options.hyper_pop_cond, options.rerconverge_output, 0, 0.05, "fast") utils.rer_hypergeom_test(options.hyper_pop, options.hyper_pop_cond, options.rerconverge_output, 0, 0.05, "slow") utils.rer_hypergeom_test(options.hyper_pop, options.hyper_pop_cond, options.rerconverge_output, 0, 0.01, "fast") utils.rer_hypergeom_test(options.hyper_pop, options.hyper_pop_cond, options.rerconverge_output, 0, 0.01, "slow") sys.exit()
import sysv_ipc import utils params = utils.read_params() try: mq = sysv_ipc.MessageQueue(params["KEY"]) mq.remove() s = "message queue %d removed" % params["KEY"] print (s) except: print ("message queue doesn't exist") print ("\nAll clean!")
import shutil import click import pandas as pd from deepsense import neptune from sklearn.metrics import roc_auc_score import pipeline_config as cfg from pipelines import PIPELINES from utils import init_logger, read_params, create_submission, set_seed, save_evaluation_predictions, \ read_csv_time_chunks, cut_data_in_time_chunks, data_hash_channel_send, get_submission_hours_index set_seed(1234) logger = init_logger() ctx = neptune.Context() params = read_params(ctx) @click.group() def action(): pass @action.command() def prepare_data(): logger.info('chunking train') train = pd.read_csv(params.raw_train_filepath) cut_data_in_time_chunks(train, timestamp_column='click_time', chunks_dir=params.train_chunks_dir, prefix='train',
def evaluate_any_file(): #os.system(scp ) filepath = '../../original/processed_data/' weightpath = '../../scratch/bd_lstm/trainstats/weights_middle.pth' demoweights = '../../scratch/bd_lstm/trainstats/demoweights.pth' weightpath = demoweights parampath = '../../code/bdrnn/conf_model.cfg' filenamepath = '../../scratch/bd_lstm/filenames/testfiles.txt' minmaxdatapath = '../../original/minmaxdata/' #get best file filenames = read_names(filenamepath) print(len(filenames)) filenamedict = make_dict(filenames) velocity = float( input( 'Give rotational velocity between 4Hz and 18Hz and the closest one is used at evaluation.\n' )) filename, velocity = find_closest(filenamedict, velocity) files = [filename] #read parameters params = read_params(parampath) #init dataset with the file we selected and model dataset = DataSet(root_dir=filepath, files=files, normalize=False, seq_len=params['slice_size'], stride=1000) loader = DataLoader(dataset, batch_size=int(params['batch_size']), shuffle=True) model = LSTM_layers(input_size=int(params['input_size']), hidden_size=int(params['hidden_size']), num_layers=int(params['n_layers']), dropout=float(params['dropout']), output_size=int(params['output_size']), batch_first=True, bidirectional=True) #RuntimeError: Attempting to deserialize object on a #CUDA device but torch.cuda.is_available() is False. #If you are running on a CPU-only machine, #please use torch.load with map_location='cpu' to map your storages to the CPU. model.load_state_dict(torch.load(weightpath, map_location='cpu')) model.to(device) model.eval() losses = [] for idx, sample in enumerate(loader): y = sample[:, :, :2].clone().detach().requires_grad_(True).to(device) x = sample[:, :, 2:].clone().detach().requires_grad_(True).to(device) h0 = model.init_hidden(int(params['batch_size']), None).to(device) c0 = model.init_cell(int(params['batch_size'])).to(device) #compute output = model.forward(x, (h0, c0)) loss = F.mse_loss(output, y) losses.append(loss.item()) output, y = scale_seqs(output, y, filename, minmaxdatapath) if (idx % 3) == 0: save_this_plot(0, 2763, output[0], y[0], loss.item(), velocity) print("Avg loss:", np.mean(losses))
def main(): integrate_ode = integrate_unanimity_ode params_file = 'params_txtfiles/params_unanimity_zA_vs_xi.txt' all_params = read_params(params_file, int_params=['n']) n = all_params['n'][0] #print all_params params_order = ['pi_A', 'pi_B', 'xi', 'lpub', 'lpriv', 'pa', 'pb'] types_names = [ ''.join(['z'] + ['a'] * (n - i) + ['b'] * i) for i in range(n + 1) ] tipo2latex = { elem: r'$z_{%s}$' % (elem[1:].upper()) for elem in types_names } my_colors = ['#e41a1c', '#377eb8', '#4daf4a', '#984ea3'] more_than_one = ['xi'] init_conditions = gen_init_conditions(n)[::2] str_vec_in = [str(vec_in) for vec_in in init_conditions] df_zA = pd.DataFrame() linestyles = ['-', '--', '-.', ':'] handles = [] labels = [] fig = plt.figure(figsize=(16, 14)) ax = fig.add_subplot(111) out_count = 0 for xi in all_params['xi']: params = { lab: all_params[lab][0] for i, lab in enumerate(params_order) if lab != 'xi' } params['xi'] = xi print(params) lpriv = 1 - params['lpub'] params['lpriv'] = lpriv params_nums = tuple([params[lab] for lab in params_order] + [n]) for i, vec_in in enumerate(init_conditions): df_temp = pd.DataFrame() curve_points, time_points = integrate_ode(vec_in, params_nums) df_temp = dynamics_to_dataframe(n, curve_points, time_points, initial_vec_str=str_vec_in[i]) df_temp['xi'] = xi df_zA = pd.concat([df_zA, df_temp]) df_aux = df_temp.sort_values(by='t') tipo = 'zA' freq_tipo = df_aux[tipo].tolist() time_points = df_aux['t'].tolist() linha, = ax.plot(time_points, freq_tipo, color=my_colors[i], lw=2, linestyle=linestyles[out_count]) out_count += 1 handles.append(linha) condicao = [] for i, elem in enumerate(vec_in[:1]): pedaco_str = tipo2latex[types_names[i]] + \ (r'$^{init}=%.0f' % round(elem, 0))+'$' condicao.append(pedaco_str) pedaco_str = r'$z_{BBB}$' + (r'$^{init}=%.0f' % round(1 - sum(vec_in), 0)) + '$' condicao.append(pedaco_str) pedaco_str = (r'$ \xi=%.7f' % xi).rstrip('0').rstrip('.') + '$' condicao.append(pedaco_str) labels.append(';\t'.join(condicao)) fig.legend(handles, labels, bbox_to_anchor=(0.5, 0.17), loc='center') default_order = ['lpub', 'lpriv', 'pi_A', 'pi_B', 'pa', 'pb', 'xi'] default_order = [col for col in default_order if col not in more_than_one] titulo = gen_str_params(params, params_order=default_order, symbols=params2latex, exclude_keys=more_than_one).replace('\n', '') max_za = np.max(df_zA['zA'].tolist()) min_za = np.min(df_zA['zA'].tolist()) dist = 0.05 * (max_za - min_za) ax.set_xlim([0., 160.1]) #ax.set_ylim([0., 0.81]) ax.set_ylim([min_za - dist, max_za + dist]) ax.set_xlabel(r'$t$') ax.set_ylabel(r'$z_A$') ax.grid(True) plt.subplots_adjust(left=0., right=0.95, top=0.75, bottom=0.4) str_params = gen_str_params(params, params_order=params_order, exclude_keys=more_than_one) uniq_param = '_'.join( str_params.replace('\n', '').replace('$', '').split('; ')) uniq_param += '_n=%d' % n plt.savefig('figs/same_plot_zA_dynamics_%s.pdf' % (uniq_param), bbox_inches='tight') ax.set_title(titulo) #plt.tight_layout() plt.savefig('figs/same_plot_zA_dynamics_%s.png' % (uniq_param), bbox_inches='tight') plt.close()
def train_all(self, n_epochs): if (not self.restart): utils.safe_mkdir(self.checkpoints_folder) saver = tf.train.Saver() train_writer = tf.summary.FileWriter('./' + self.graph_folder + '/train', tf.get_default_graph()) test_writer = tf.summary.FileWriter('./' + self.graph_folder + '/test', tf.get_default_graph()) print('Loading snapshot matrix...') if (self.large): S = utils.read_large_data(self.train_mat) else: S = utils.read_data(self.train_mat) idxs = np.random.permutation(S.shape[0]) S = S[idxs] S_max, S_min = utils.max_min(S, self.n_train) utils.scaling(S, S_max, S_min) if (self.zero_padding): S = utils.zero_pad(S, self.p) self.S_train, self.S_val = S[:self.n_train, :], S[self.n_train:, :] del S print('Loading parameters...') params = utils.read_params(self.train_params) params = params[idxs] self.params_train, self.params_val = params[:self.n_train], params[self.n_train:] del params self.loss_best = 1 count = 0 with tf.Session(config = tf.ConfigProto(gpu_options = tf.GPUOptions(allow_growth = True))) as sess: sess.run(tf.global_variables_initializer()) if (self.restart): ckpt = tf.train.get_checkpoint_state(os.path.dirname(self.checkpoints_folder + '/checkpoint')) if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) step = self.g_step.eval() for epoch in range(n_epochs): step = self.train_one_epoch(sess, self.init, train_writer, epoch, step) total_loss_mean = self.eval_once(sess, saver, self.init, test_writer, epoch, step) if total_loss_mean < self.loss_best: self.loss_best = total_loss_mean count = 0 else: count += 1 # early - stopping if count == 500: print('Stopped training due to early-stopping cross-validation') break print('Best loss on validation set: {0}'.format(self.loss_best)) train_writer.close() test_writer.close() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(os.path.dirname(self.checkpoints_folder + '/checkpoint')) if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) print('Loading testing snapshot matrix...') if (self.large): self.S_test = utils.read_large_data(self.test_mat) else: self.S_test = utils.read_data(self.test_mat) utils.scaling(self.S_test, S_max, S_min) if (self.zero_padding): self.S_test = utils.zero_pad(self.S_test, self.n) print('Loading testing parameters...') self.params_test = utils.read_params(self.test_params) self.test_once(sess, self.init) utils.inverse_scaling(self.U_h, S_max, S_min) utils.inverse_scaling(self.S_test, S_max, S_min) n_test = self.S_test.shape[0] // self.N_t err = np.zeros((n_test, 1)) for i in range(n_test): num = np.sqrt(np.mean(np.linalg.norm(self.S_test[i * self.N_t : (i + 1) * self.N_t] - self.U_h[i * self.N_t : (i + 1) * self.N_t], 2, axis = 1) ** 2)) den = np.sqrt(np.mean(np.linalg.norm(self.S_test[i * self.N_t : (i + 1) * self.N_t], 2, axis = 1) ** 2)) err[i] = num / den print('Error indicator epsilon_rel: {0}'.format(np.mean(err)))
def __init__(self, cfg): self.cfg = cfg self.db = dutils.init_db(self.cfg.db_path) self.init_post() self.device = torch.device(self.cfg.device) # dataset parameters if self.cfg.dataset.lower() == 'mnist': self.dataset = MNIST self.data_path = self.cfg.data_dir + 'mnist' self.img_size = [1, 28, 28] self.normalize = [(0.1307, ), (0.3081, )] elif self.cfg.dataset.lower() == 'cifar10': self.dataset = CIFAR10 self.data_path = self.cfg.data_dir + 'cifar10' self.img_size = [3, 32, 32] self.normalize = [(0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)] else: raise NotImplementedError() # datasets and dataloaders # base transforms self.train_transforms = [transforms.ToTensor()] if self.cfg.normalize_input: self.train_transforms.append( transforms.Normalize(self.normalize[0], self.normalize[1])) self.val_transforms = copy.deepcopy(self.train_transforms) # # (if applicable) additional training set transforms defined here # train_transforms.extend([ # ]) self.dataset_train = self.dataset(root=self.data_path, train=True, download=True, transform=transforms.Compose( self.train_transforms), target_transform=None) self.dataloader_train = DataLoader(dataset=self.dataset_train, batch_size=self.cfg.batch_size, shuffle=self.cfg.shuffle, num_workers=self.cfg.num_workers, pin_memory=True, drop_last=False) # number of output classes (based only on training data) self.c_dim = len(torch.unique(self.dataset_train.targets)) self.dataset_val = self.dataset(root=self.data_path, train=False, download=True, transform=transforms.Compose( self.val_transforms), target_transform=None) self.dataloader_val = DataLoader(dataset=self.dataset_val, batch_size=self.cfg.batch_size, shuffle=False, num_workers=self.cfg.num_workers, pin_memory=True, drop_last=False) # maximum entropy threshold for training with random inputs self.max_entropy = metrics.max_entropy(self.c_dim) self.thresh_entropy = self.cfg.train_random * self.max_entropy # define model # parameters for each hidden layer is passed in as an argument self.params = utils.read_params( self.cfg.model_params[self.cfg.model_type]) self.activation = getattr(activations, self.cfg.activation.lower()) if self.cfg.model_type.lower() == 'fc': if self.cfg.norm.lower() == 'batch': self.norm = nn.BatchNorm1d elif self.cfg.norm.lower() == 'layer': self.norm = layers.LayerNorm1d else: self.norm = None net = FCNet elif self.cfg.model_type.lower() == 'conv': if self.cfg.norm.lower() == 'batch': self.norm = nn.BatchNorm2d elif self.cfg.norm.lower() == 'layer': self.norm = layers.LayerNorm2d else: self.norm = None net = ConvNet else: raise NotImplementedError() self.net = net(self.img_size, self.c_dim, self.params, self.activation, self.norm).to(self.device) self.post['params'] = self.params # TODO: add custom weight initialization scheme # # weight initialization - weights are initialized using Kaiming uniform (He) initialization by default # loss function <kl_y_to_p> generalizes the cross entropy loss to continuous label distributions # i.e. <kl_y_to_p> is equivalent to <cross_entropy_loss> for one-hot labels # but is also a sensible loss function for continuous label distributions self.criterion = loss_fns.kl_y_to_p if self.cfg.optim.lower() == 'sgd': self.optimizer = optim.SGD( params=self.net.parameters(), lr=self.cfg.lr, momentum=self.cfg.optim_params['sgd']['momentum'], nesterov=self.cfg.optim_params['sgd']['nesterov']) self.post['momentum'], self.post[ 'nesterov'] = self.cfg.optim_params['sgd'][ 'momentum'], self.cfg.optim_params['sgd']['nesterov'] else: self.optimizer = optim.Adam( params=self.net.parameters(), lr=self.cfg.lr, betas=(self.cfg.optim_params['adam']['beta1'], self.cfg.optim_params['adam']['beta2'])) self.post['beta1'], self.post['beta2'] = self.cfg.optim_params[ 'adam']['beta1'], self.cfg.optim_params['adam']['beta2']
# older Pythons so I import md5 if hashlib is not available. Fortunately # md5 can masquerade as hashlib for my purposes. try: import hashlib except ImportError: import md5 as hashlib # 3rd party modules import posix_ipc # Utils for this demo import utils utils.say("Oooo 'ello, I'm Mrs. Conclusion!") params = utils.read_params() # Mrs. Premise has already created the message queue. I just need a handle # to it. mq = posix_ipc.MessageQueue(params["MESSAGE_QUEUE_NAME"]) what_i_sent = "" for i in range(0, params["ITERATIONS"]): utils.say("iteration %d" % i) s, _ = mq.receive() s = s.decode() utils.say("Received %s" % s) while s == what_i_sent:
import os from attrdict import AttrDict from deepsense import neptune from utils import read_params ctx = neptune.Context() params = read_params(ctx) CATEGORICAL_COLUMNS = [ 'CODE_GENDER', 'EMERGENCYSTATE_MODE', 'FLAG_MOBIL', 'FLAG_OWN_CAR', 'FLAG_OWN_REALTY', 'FONDKAPREMONT_MODE', 'HOUSETYPE_MODE', 'NAME_CONTRACT_TYPE', 'NAME_TYPE_SUITE', 'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE', 'OCCUPATION_TYPE', 'ORGANIZATION_TYPE', 'WALLSMATERIAL_MODE', 'WEEKDAY_APPR_PROCESS_START' ] NUMERICAL_COLUMNS = [ 'AMT_ANNUITY', 'AMT_CREDIT', 'AMT_GOODS_PRICE', 'AMT_INCOME_TOTAL', 'CNT_CHILDREN', 'DAYS_BIRTH', 'DAYS_EMPLOYED', 'DAYS_ID_PUBLISH', 'DAYS_REGISTRATION', 'EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3', 'OWN_CAR_AGE', 'REGION_POPULATION_RELATIVE', 'REGION_RATING_CLIENT', 'REGION_RATING_CLIENT_W_CITY' ] TIMESTAMP_COLUMNS = [] ID_COLUMNS = ['SK_ID_CURR'] TARGET_COLUMNS = ['TARGET'] DEV_SAMPLE_SIZE = int(20e4)
def main(name, argv): if not len(argv) == 1: print_usage(name) return log = open('log.txt', 'w', buffering=1) log.write('INFO: PRosettaC run has started\n') log.write('INFO: Processing inputs\n') params = utils.read_params(argv[0]) PDB = params['PDB'].split() LIG = params['LIG'].split() Linkers = params['PROTAC'].split()[0] Full = params['Full'].split()[0] == 'True' if '.smi' in Linkers: with open(Linkers, 'r') as f: protac = f.readline().split()[0] else: protac = Linkers Structs = ['StructA.pdb', 'StructB.pdb'] Heads = ['HeadA.sdf', 'HeadB.sdf'] Subs = ['SubA.sdf', 'SubB.sdf'] Chains = ['A', 'B'] Anchors = [] # Get a handle to the cluster specified in config file. Default to PBS cluster. cluster = cl.getCluster(params['ClusterName']) for i in [0, 1]: if not '.pdb' in PDB[i] and '.sdf' in LIG[i]: log.write( 'ERROR: An .sdf file can only be chosen is a corresponding .pdb file is chosen\n' ) sys.exit() if not pymol_utils.get_rec_plus_lig(PDB[i], LIG[i], Structs[i], Heads[i], Chains[i]): log.write( 'ERROR: There is a problem with the PDB chains. If using an .sdf file, it should be close to exactly one protein chain in its appropriate .pdb file. If using a LIG name, make sure that the ligand has a chain assigned to it within the .pdb file. Also, make sure the ligand is has at least 10 heavy atoms.\n' ) sys.exit() Anchors.append(pl.get_mcs_sdf(Heads[i], Subs[i], protac)) if Anchors[i] == None: log.write( 'ERROR: There is some problem with the PDB ligand ' + LIG[i] + '. It could be either one of the following options: the ligand is not readable by RDKit, the MCS (maximal common substructure) between the PROTAC smiles and ' + LIG[i] + ' ligand does not have an anchor atom which is uniquly defined in regard to smiles, or there is a different problem regarding substructure match. Try to choose a different PDB template, or use the manual option, supplying your own .sdf files.\n' ) log.close() sys.exit() Heads = Subs log.write( 'INFO: Cleaning structures, adding hydrogens to binders and running relax\n' ) PT_params = [] for i in [0, 1]: #Adding hydrogens to the heads (binders) new_head = Heads[i].split('.')[0] + "_H.sdf" utils.addH_sdf(Heads[i], new_head) Anchors[i] = pl.translate_anchors(Heads[i], new_head, Anchors[i]) if Anchors[i] == -1: log.write( 'ERROR: There is a problem with the maximal common substructure between the PROTAC and PDB ligand ' + LIG[i] + '.\n') log.close() sys.exit() Heads[i] = new_head #Cleaning the structures rs.clean(Structs[i], Chains[i]) Structs[i] = Structs[i].split('.')[0] + '_' + Chains[i] + '.pdb' #Relaxing the initial structures PT_pdb, PT_param = rs.mol_to_params(Heads[i], 'PT' + str(i), 'PT' + str(i)) PT_params.append(PT_param) os.system('cat ' + PT_pdb + ' ' + Structs[i] + ' > Side' + str(i) + '.pdb') Structs[i] = 'Side' + str(i) + '.pdb' rs.relax(Structs[i], PT_param) Structs[i] = 'Side' + str(i) + '_0001.pdb' #Fix the atom order by adding the original ligands to the relaxed structures rs.clean(Structs[i], Chains[i]) Structs[i] = Structs[i].split('.')[0] + '_' + Chains[i] + '.pdb' os.system('cat ' + PT_pdb + ' ' + Structs[i] + ' > Init' + str(i) + '.pdb') Structs[i] = 'Init' + str(i) + '.pdb' #Generate up to 200 conformations of PROTAC conformations for each anchor distance within bins log.write('INFO: Sampling the distance between the two anchor points\n') (min_value, max_value) = pl.SampleDist(Heads, Anchors, Linkers) if (min_value, max_value) == (None, None): log.write( 'ERROR: There is a problem with finding substructure between the .sdf file and the SMILES of the full protac. Please check that your .sdf files have the right conformations.\n' ) log.close() sys.exit() if (min_value, max_value) == (0, 0): log.write( 'ERROR: There is a problem with generating protac conformations to sample the anchor distance. Please check that both .sdf files are in a bound conformation to their appropriate structures and that this conformation is valid.\n' ) log.close() sys.exit() #PatchDock log.write('INFO: Running PatchDock with the constrains\n') if Full: Global = 1000 else: Global = 500 Num_Results = utils.patchdock(Structs, [a + 1 for a in Anchors], min_value, max_value, Global, 2.0) if Num_Results == None: log.write( 'INFO: PatchDock did not find any global docking solution within the geometrical constraints\n' ) log.write('INFO: PRosettaC run has finished\n') log.close() sys.exit() #Rosetta Local Docking log.write( 'INFO: Run Rosetta local docking on the top 1000 PatchDock results\n') curr_dir = os.getcwd() os.chdir('Patchdock_Results/') if Full: Local = 50 else: Local = 10 commands = [ rs.local_docking('pd.' + str(i + 1) + '.pdb', Chains[0] + 'X', Chains[1] + 'Y', curr_dir + '/' + PT_params[0], curr_dir + '/' + PT_params[1], Local) for i in range(Num_Results) ] jobs = cluster.runBatchCommands(commands, mem=params['RosettaDockMemory']) log.write('INFO: Local docking jobs: ' + str(jobs) + '\n') cluster.wait(jobs) #Generating 100 constrained conformations for the entire linker based on PatchDock results log.write( 'INFO: Generating up to 100 constrained conformations for each local docking results\n' ) docking_solutions = glob.glob('*_docking_????.pdb') suffix = [] for s in docking_solutions: suffix.append([s, s.split('.')[1].split('_')]) suffix[-1][1] = suffix[-1][1][0] + '_' + str(int(suffix[-1][1][2])) commands = [ 'python ' + utils.SCRIPTS_FOL + '/constraint_generation.py ../' + Heads[0] + ' ../' + Heads[1] + ' ../' + Linkers + ' ' + s[1] + " " + s[0] + " " + ''.join(Chains) for s in suffix ] jobs = cluster.runBatchCommands(commands, batch_size=12, mem=params['ProtacModelMemory']) log.write('INFO: Constrained conformation generation jobs: ' + str(jobs) + '\n') cluster.wait(jobs) #Clustering the top 200 local docking models (according to interface RMSD), out of 1000 final scoring models log.write('INFO: Clustering the top results\n') os.system('cat ../Init0.pdb ../Init1.pdb > ../Init.pdb') os.chdir('../') os.system('python ' + utils.SCRIPTS_FOL + '/clustering.py 1000 200 4 ' + Chains[1]) if os.path.isdir('Results/'): log.write('INFO: Clustering is done\n') else: log.write('INFO: No models have been created\n') log.write('INFO: PRosettaC run has finished\n') log.close()