def main(): vrb_main = GLOBAL_VERBOSITY_FLAG """ Entry Point for Program """ all_tests_successful = True is_solution_list = [] final_chains_list = [] atom_positions_list = [] for INPUT_FILE_NAME in TEST_FILE_NAMES: # The loaded data are NOT numpy arrays (change later?) (atom_types, atom_positions) = read_xyz_file(str(INPUT_FILE_NAME)) (region_list, interaction_distances) = \ read_transport_file(str(INPUT_FILE_NAME)) if LOAD_CACHE_DATA: data = load_data(INPUT_FILE_NAME) else: data = prep_data(atom_types, atom_positions, region_list, interaction_distances) chache_data(INPUT_FILE_NAME, data) atom_positions_list.append(atom_positions) dist_mtrx = data["dist_mtrx"] interact_mtrx = data["interact_mtrx"] num_atoms = np.size(dist_mtrx, axis=0) # Didn't convert to np array in file_io because this complicates caching # (since numpy arrays non json-serialzable) numpy_region_list = [] for region in region_list: numpy_region_list.append(np.array(region)) device = numpy_region_list[0] contacts = numpy_region_list[1:] print_var(device, vrb=vrb_main) print_var(contacts, vrb=vrb_main) contact_bins = get_contact_bins(device, contacts, interact_mtrx) num_unlisted_contact_atoms = \ count_atoms([contacts]) - count_atoms([contact_bins]) prev_bins = list.copy(contacts) # Each element in "chains" is a list of bins. Each of these lists # contains the bins of a specific generation. The bins are sorted in the # order of ascending contact indices. # All bins that are the same number of steps away from the contacts are # assigned to the same "generation", the atoms in "contact_bins" are # in generation zero. # "contact_bins": All contact atoms that are interacting with the device # are assigned to this bin. chains = [] chains.append(contact_bins) # print("bin_generations" + str(bin_generations)) num_chains = len(contacts) curr_gen_idx = 1 final_collision_found = False final_chain_idxs = [] gen_idx_of_last_collision = -1 # This condition is a failsafe, to avoid infinite loops while curr_gen_idx < MAX_GENERATIONS: collisions_found = [] if vrb_main: print(curr_gen_idx) curr_gen = get_next_bins(chains[-1], prev_bins, interact_mtrx) chains.append(curr_gen) prev_bins = prev_bins + curr_gen if vrb_main: print("\n Chains before merge step ") print_generations(chains) if not final_collision_found: for chain1_idx, bn1 in enumerate(curr_gen): for chain2_idx, bn2 in enumerate(curr_gen): if chain2_idx > chain1_idx: if bins_are_neighbours(bn1, bn2, interact_mtrx): if num_chains > 2: collisions_found.append( (chain1_idx, chain2_idx)) num_chains -= 1 if vrb_main: print("collisions_found: " + str(collisions_found)) print("num_chains = " + str(num_chains)) else: if num_chains < 2: sys.exit("FATAL ERROR: num_chains < 2") final_collision_found = True final_chain_idxs = [chain1_idx, chain2_idx] gen_idx_of_last_collision = curr_gen_idx remove_duplicates_from_all_tips(chains) if vrb_main: print( "\n ---- final_collision_found! ---- \n" ) print("gen_idx_of_last_collision = " + str(gen_idx_of_last_collision)) print("final_chain_idxs: " + str(final_chain_idxs)) if final_collision_found: break if final_collision_found: break for col_tuple in collisions_found: # Merge from src_chain_idx into target_chain_idx src_chain_idx = col_tuple[0] target_chain_idx = col_tuple[1] if col_tuple[0] in final_chain_idxs: if col_tuple[1] in final_chain_idxs: sys.exit("FATAL ERROR: Should never merge the two \ final chains into eachother.") # Make sure we are merging into the final chain. # If not, swap src_chain_idx with target_chain_idx. if col_tuple[0] in final_chain_idxs: src_chain_idx = col_tuple[1] target_chain_idx = col_tuple[0] # Merge chains if vrb_main: print("Merge chain_idxs:" + str(col_tuple)) print("src_chain bin: " + str([x + 1 for x in curr_gen[src_chain_idx]])) print("target_chain bin: " + str([x + 1 for x in curr_gen[target_chain_idx]])) ######################################## # CONSIDER: FOR MULTIPLE COLLISIONS, TRY TO MERGE SMALLER CHAINS TOGETHER FIRST ######################################## # Duplicates have to be removed AFTER collision recognition, # since otherwise this could prevent finding collisions remove_duplicates_from_all_tips(chains) (chains, contacts) = merge(chains, contacts, curr_gen_idx, target_chain_idx, src_chain_idx) if vrb_main: print("\n Chains after merge step: ") print_generations(chains) remove_duplicates_from_all_tips(chains) num_sorted_atoms = count_atoms(chains) + num_unlisted_contact_atoms if num_sorted_atoms >= num_atoms: if num_sorted_atoms > num_atoms: sys.exit("FATAL ERROR: num_sorted_atoms > num_atoms") if vrb_main: print("All atoms sorted.") break curr_gen_idx += 1 if curr_gen_idx >= MAX_GENERATIONS: sys.exit( "FATAL ERROR: MAX_GENERATIONS exceeded. (Increase MAX_GENERATIONS?)" ) if not final_collision_found: sys.exit( "FATAL ERROR: No final collision found, don't know which chains to keep" ) if vrb_main: print("\n Chain before culling dead ends: ") print_generations(chains) #Find dead ends in the two final chains dead_ends = get_dead_ends(chains, final_chain_idxs, gen_idx_of_last_collision) # Before Merging dead ends, we have to make sure the dead end isn't longer # than the final chain we are attempting to merge it into chain_length_until_last_collision = gen_idx_of_last_collision + 1 shortened_dead_ends = shorten_dead_ends( dead_ends, chain_length_until_last_collision) merge_dead_ends_into_final_chains(chains, shortened_dead_ends, final_chain_idxs, gen_idx_of_last_collision) remove_duplicates_from_all_tips(chains) if vrb_main: print( "\n chain after removing duplicates from tips, and before glueing: " ) print_generations(chains) final_chain = build_final_chain(chains, contacts, final_chain_idxs, interact_mtrx) final_chains_list.append(final_chain) if vrb_main: print("\nfinal_chain: ") print_final_chain(final_chain) is_solution = test_solution(final_chain, interact_mtrx) if not is_solution: all_tests_successful = False is_solution_list.append(is_solution) #------------------------------------------------------------------------------ print("- INPUT_FILE_NAME ---------------- solution found:") for idx, INPUT_FILE_NAME in enumerate(TEST_FILE_NAMES): # print(INPUT_FILE_NAME + ": " + str(is_solution_list[idx])) print("%-*s success: %s" % (35, INPUT_FILE_NAME, str(is_solution_list[idx]))) if all_tests_successful: print("\n --> All test cases completed SUCCESSFULY. <--\n") if not all_tests_successful: print("\n --> BAD SOLUTION in test cases! <--\n") OPEN_JMOL = [] for INPUT_FILE_NAME in TEST_FILE_NAMES: if INPUT_FILE_NAME is DISPLAY_FILE_NAME: OPEN_JMOL.append(True) else: OPEN_JMOL.append(False) for idx, INPUT_FILE_NAME in enumerate(TEST_FILE_NAMES): write_bins(final_chains_list[idx], atom_positions_list[idx], INPUT_FILE_NAME, OPEN_JMOL[idx]) """
nltk_preprocessor = NLTKPreprocessor() def get_key(dataset_type): if dataset_type == 'binary': return 'Abstract' elif dataset_type == 'multi-class': return 'Text' def preprocess(dataset, key): log.info("Preprocessing data") tokens = nltk_preprocessor.transform(dataset[key]) joined = [';'.join(t) for t in tokens] series = pd.Series(joined, index=dataset.index) dataset['Tokens'] = series return dataset if __name__ == '__main__': # Display progress logs on stdout log.basicConfig(level=log.DEBUG, format='%(asctime)s %(levelname)s %(message)s') parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) args = get_args(parser) dataset_type = io.get_data_set(args.data) x, y = io.load_data(args.data) new_data = preprocess(x, get_key(dataset_type)) new_data = pd.concat([new_data, y], axis=1) io.save_data(new_data, args.filename) log.info("Saved to file: {}".format(args.filename))
log.basicConfig(level=log.INFO, format='%(asctime)s %(levelname)s %(message)s') if __name__ == '__main__': """Does hp search and stores the parameters for each dataset and classifier.""" config = io.load_config(sys.argv, None) experiment_dir = "{}_hpsearch".format(config['experiment']) if not os.path.exists(experiment_dir): os.makedirs(experiment_dir) for dataset_i, dataset_filename in enumerate(config['datasets']): log.debug("DATASET_{}: {}".format(dataset_i, dataset_filename)) # load preprocessed dataset X, y, arff_data = io.load_data(dataset_filename, config) dataset_name = os.path.splitext(dataset_filename)[0] log.info("DATASET_{}_NAME: {}".format(dataset_i, dataset_name)) for estimator_i, estimator in enumerate(config['estimators']): log.debug("ESTIMATOR_{}: {}".format(estimator_i, estimator['estimator'])) # load algorithm estimator = ut.get_estimator(estimator) estimator_name = estimator.__class__.__name__ log.info("ESTIMATOR_{}_NAME: {}".format(estimator_i, estimator_name)) search_space, n_iter = ut.get_search_space(estimator)
log.warning("Article not available.") return keywords, mesh_terms if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) args = get_args(parser) log.debug("Commandline arguments: {}".format(args)) check_mode_of_operation(args) data, _ = io.load_data(args.data) if io.get_data_set(args.data) == 'binary': keywords = [] terms = [] log.info("Fetching keywords an terms...") kw_cnt = 0 t_cnt = 0 for index, row in data.iterrows(): #if index > 4: # keywords.append(" ") # terms.append(" ")
dir_images = './dataset/' dir_model = './checkpoints/epoch44/' image_height = 32 batch_size = 1 # Specify ouput file with predictions of test samples file_predictions = './predictions/test_predictions.txt' if not os.path.isdir('./predictions/'): os.makedirs('./predictions/') # Get the filenames and corresponding slants for the dataset test_gt = load_gt(file_test_ids, file_gt) test_ids = test_gt.keys() # Load test images batched_test_data = load_data(dir_images, test_gt, batch_size=batch_size, image_height=image_height) # Create the model object model = Model(image_height=image_height) if not os.path.isdir(dir_model): print('Selected model for testing (' + dir_model + ') does not exist') else: file_model = dir_model + 'model.ckpt' num_test_samples = len(batched_test_data) with tf.Session() as session: # Load the trained network from file model.saver.restore(session, file_model) # Get predictions and error for the test samples predictions = []
num_epochs = 60 batch_size = 8 checkpoint_steps = 1 image_height = 32 learning_rate = 0.0005 dropout_rate = 0.3 width_stretch = 1.8 # Get the filenames and corresponding slants for the datasets train_gt = load_gt(file_train_ids, file_gt) valid_gt = load_gt(file_valid_ids, file_gt) # Load training and validation images batched_train_data = load_data(dir_images, train_gt, batch_size=batch_size, image_height=image_height, width_stretch=width_stretch) batched_valid_data = load_data(dir_images, valid_gt, batch_size=batch_size, image_height=image_height, width_stretch=width_stretch) # Create the model object model = Model(learning_rate=learning_rate, dropout_rate=dropout_rate, image_height=image_height) # Start training train_costs = []
io.save_prediction(combined_data.loc[:, ['Id', 'Category']], prediction_filename) if __name__ == '__main__': parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) args = get_args(parser) log.debug("Commandline arguments: {}".format(args)) check_mode_of_operation(args) if args.score and args.train and args.predict: log.info("Mode score->train->predict") x, y = io.load_data(args.train) # Create a training/validation and a test set for model selection (hyper-parameter search) and evaluation x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=args.test_size, random_state=cfg.split_random_state, stratify=y) log.info("Created training set ({}) and test set ({})".format(len(y_train), len(y_test))) data_set = io.get_data_set(args.predict) fu_pl, clf_pl = select_model(args, data_set, x_train, y_train) # Score part fu_pl, clf_pl = mode_score(args, fu_pl, clf_pl, x_train, y_train, x_test, y_test, data_set)