def __init__(self, path, save_opp): self.path = path self.save_opp = save_opp self.mcc = Measure(create_path(path, 'mcc.csv')) self.acc = Measure(create_path(path, 'accuracy.csv')) if save_opp: self.opp_mcc = Measure(create_path(path, 'opp_mcc.csv'))
def save_data(self, final=False): """ Saves updates the JSON file being generated at the of the data extraction or updates the current file :param: bool (optional), if selected it saves the final under and new name and moves it to another folder :return: null """ city = self.city.replace(' ', '_').lower() company = self.company.replace(' ', '_').lower() old_filepath = self.last_saved_file filename = f'{unidecode(city)}_{unidecode(company)}_page_{self.page}.json' new_filepath = create_path(filename=filename, folder='../data_in_progress') data = self.all_results if not len(old_filepath): write_file(data, new_filepath) self.logger.info(f"First file created - page{self.page}") elif final: final_pathname = create_path(filename=filename, folder='../data_raw', final=True) rename_file(data, old_filepath, final_pathname) else: rename_file(data, old_filepath, new_filepath) self.logger.info(f"File updated - {self.page}") self.last_saved_file = new_filepath
def load_params(): print os.path.dirname(__file__) params = parse_params("params.cfg", os.path.join(os.path.dirname(__file__), "paramspec_pop.cfg")) create_path(params["prefix"]) create_path(os.path.join(params["prefix"], "thumbnails")) print params return params
def all_normalised_maru_histogram(path, activation_data): normalised_maru_data_list = [] for seed in activation_data: data = activation_data[seed] mean_data = np.mean(data, axis=0) std_data = np.std(data, axis=0) maru_data = np.divide(mean_data, std_data, out=np.zeros_like(mean_data), where=std_data != 0) normalised_maru = minmax_scale(maru_data, feature_range=(0, 1)) normalised_maru_data_list.append(normalised_maru) n, bins, patches = plt.hist(normalised_maru_data_list, 20, range=(0.0, 1.0), label=activation_data.keys(), rwidth=1.0, linewidth=0) plt.ylabel('number of neurons') plt.xlabel('maru value') plt.title('normalised maru value for all seeds') # plt.legend(loc='upper right') # plt.ylim(0, 60) # plt.show() path = create_path(path, "activations") plt.savefig(create_path(path, "all_normal_maru.png"), dpi=__PNG_DPI__) plt.clf()
def output_single(sim): print "Processing data and generating output plots..." fig_prefix = sim.params['prefix'] create_path(fig_prefix) output_hh_life_cycle( sim, os.path.join(fig_prefix, 'hh_life_cycle.%s' % sim.params['ext'])) output_comp_by_hh_size( sim, os.path.join(fig_prefix, 'age_by_hh_size.%s' % sim.params['ext'])) output_comp_by_hh_age( sim, os.path.join(fig_prefix, 'age_by_hh_age.%s' % sim.params['ext'])) output_hh_size_distribution( sim, 10, os.path.join(fig_prefix, 'hh_size_dist.%s' % sim.params['ext'])) output_age_distribution( sim, os.path.join(fig_prefix, 'age_dist.%s' % sim.params['ext'])) output_household_type( sim, os.path.join(fig_prefix, 'hh_type.%s' % sim.params['ext'])) output_household_composition( sim, os.path.join(fig_prefix, 'hh_comp.%s' % sim.params['ext'])) output_hh_size_time(sim, os.path.join(fig_prefix, 'hh_size_time.%s' % sim.params['ext']), comp=False) output_fam_type_time(sim, os.path.join(fig_prefix, 'fam_type_time.%s' % sim.params['ext']), comp=False) print 'Output written to ', sim.params['prefix']
def load_params(): print os.path.dirname(__file__) params = parse_params( 'params.cfg', os.path.join(os.path.dirname(__file__), 'paramspec_pop.cfg')) create_path(params['prefix']) create_path(os.path.join(params['prefix'], 'thumbnails')) print params return params
def __init__(self, crawler_worker_loop, path, start=0, max_fetch_cnt=50): super().__init__() self._max_fetch_cnt = max_fetch_cnt self._start = start self.crawler_worker_loop = crawler_worker_loop self.base_url = 'https://api-prod.wallstreetcn.com/apiv1/content/articles?category=global&limit=100' self.summary_urls = [self.base_url] self._path = path self.summary_saved_file = self._path + '/summary_{}.txt' self.detail_saved_file = self._path + '/detail_{}.txt' self._source = 'WallStreet' create_path(self._path)
def main(train): struct = np.array([[10,10,10,10]]) eql = EQL(2, struct, 1) eql = eql.to(device) dataset = TestDatasetV1() model_path = "../saved_model/test_eql_v1" create_path(model_path) model_path = os.path.join(model_path, "state_dict_model.pt") if train: eql.train_model(DataLoader(dataset, batch_size=20, shuffle=True), model_path=model_path, max_epochs=100, lrate=0.001) else: test_dataset = TestEQLDataset() eql.test_model(test_dataset, model_path)
def activation_mean_histogram(path, activation_data): path = create_path(path, "activations") for seed in activation_data: data = activation_data[seed] mean_data = np.mean(data, axis=0) n, bins, patches = plt.hist(mean_data, 20, range=(0.0, 1.0)) plt.ylabel('number of neurons') plt.xlabel('mean activation') plt.title('mean activation for seed {}'.format(seed)) plt.ylim(0, 60) # plt.show() plt.savefig(create_path(path, "seed_{}_mean_act.png".format(seed)), dpi=__PNG_DPI__) plt.clf()
def __init__(self, experiment_name, run_num, hyperparameters=None, save_opp=False): self.hyperparameters = hyperparameters self.path = create_path('results_uni', experiment_name, "run_{}".format(str(run_num))) self.single_data = None self.apoz_data = None self.activation_data = {} self.target = NetworkType(create_path(self.path, 'target'), save_opp) self.naive = NetworkType(create_path(self.path, 'naive'), save_opp=False) self.source = NetworkType(create_path(self.path, 'source'), save_opp)
def _alt_sal_map(model, layer_idx, img, run, model_type, seed): import keras.backend as K # select class of interest class_idx = 0 # define derivative d loss / d layer_input layer_input = model.input # This model must already use linear activation for the final layer loss = model.layers[layer_idx].output[..., class_idx] grad_tensor = K.gradients(loss, layer_input)[0] # create function that evaluate the gradient for a given input # This function accept numpy array derivative_fn = K.function([layer_input], [grad_tensor]) # evaluate the derivative_fn grad_eval_by_hand = derivative_fn([img[np.newaxis, ...]])[0] print(grad_eval_by_hand.shape) grad_eval_by_hand = np.abs(grad_eval_by_hand).max(axis=(0, 3)) # normalize to range between 0 and 1 arr_min, arr_max = np.min(grad_eval_by_hand), np.max(grad_eval_by_hand) grad_eval_by_hand = (grad_eval_by_hand - arr_min) / (arr_max - arr_min + K.epsilon()) plt.imsave(create_path(run.path, model_type, "images", "sal_image_{}_alt.png".format(seed)), grad_eval_by_hand)
def create_sales(name): with open("./users.db") as us: users = json.load(us) while True: venta = input("\nNombre su venta: ") if venta == "": print("Nombre no valido, intente con uno diferente") continue elif name not in users: users[name] = {venta: 1} elif venta in users[name]: print("Nombre existente, intente con uno diferente") continue else: users[name][venta] = 1 path = create_path(name, venta) with open(f"{path}.db", "w") as db: json.dump({}, db) with open(f"{path}_names.db", "w") as names: json.dump({}, names) with open("./users.db", "w") as user: json.dump(users, user) print("Venta creada exitosamente. \n") break
def to_csv(self, path): d = { "source_max_epochs": [self.source_max_epochs], "target_max_epochs": [self.target_max_epochs], "num_starting_units": [self.num_starting_units], "upper_threshold": [self.upper_threshold], "lower_threshold": [self.lower_threshold], "source_lr": [self.source_lr], "target_lr": [self.target_lr], "batch_size": [self.batch_size], "conv_activation": [self.conv_activation], "loss_function": [self.loss_function], "pruning_method": [self.pruning_method], "source_animal": [self.source_animal], "target_animal": [self.target_animal], "pruning_dataset": [self.pruning_dataset], "save_opp": [self.save_opp], "labels_per_category": [self.labels_per_category], 'reinit_weights': [self.reinit_weights] } hyperparams = pd.DataFrame(data=d) file_path = create_path(path, 'params.csv') hyperparams.to_csv(file_path, index=None)
def training(self): vocab_to_id = get_vocab_to_id(self.train_data_path, self.vocab_file, False) logdir = os.path.join( self.summary_path, datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + "/") self.vocab_size = len(vocab_to_id) create_path(self.log_path) logger = get_logger(self.logfile_path) with tf.Session() as sess: summary_writer = tf.summary.FileWriter(logdir, graph=sess.graph) summary_writer.flush() rnn_model = RNNModel(self.rnn_size, self.embedding_size, self.class_num, self.vocab_size, self.learning_rate, self.model_path) test_data_generator = SentenceGenerator(self.test_data_path) testBatchManage = BatchManager(test_data_generator, 0, vocab_to_id) test_data = testBatchManage.get_all_data_to_batch() sess.run(tf.global_variables_initializer()) current_step = 0 for e in range(self.epoch_num): logger.info("Epoch num: " + str(e + 1) + "\n") print("Epoch num: " + str(e + 1) + "\n") train_data_generator = SentenceGenerator(self.train_data_path) trainBatchManage = BatchManager(train_data_generator, self.batch_size, vocab_to_id) for batchs in trainBatchManage.getBatches(): current_step += 1 loss, accuracy, summary_op = rnn_model.train( sess, batchs, self.dropout) if current_step % self.epoch_step == 0: loss_test, accuracy_test, _ = rnn_model.train_test( sess, test_data, 1.0) logger.info("loss:" + str(loss_test) + " accuracy:" + str(accuracy_test) + "\n") print("loss:" + str(loss_test) + " accuracy:" + str(accuracy_test) + "\n") summary_writer.add_summary(summary_op, current_step) rnn_model.saver.save(sess, self.model_path, global_step=current_step)
def output_single(sim): print "Processing data and generating output plots..." fig_prefix = sim.params["prefix"] create_path(fig_prefix) output_hh_life_cycle(sim, os.path.join(fig_prefix, "hh_life_cycle.%s" % sim.params["ext"])) output_comp_by_hh_size(sim, os.path.join(fig_prefix, "age_by_hh_size.%s" % sim.params["ext"])) output_comp_by_hh_age(sim, os.path.join(fig_prefix, "age_by_hh_age.%s" % sim.params["ext"])) output_hh_size_distribution(sim, 10, os.path.join(fig_prefix, "hh_size_dist.%s" % sim.params["ext"])) output_age_distribution(sim, os.path.join(fig_prefix, "age_dist.%s" % sim.params["ext"])) output_household_type(sim, os.path.join(fig_prefix, "hh_type.%s" % sim.params["ext"])) output_household_composition(sim, os.path.join(fig_prefix, "hh_comp.%s" % sim.params["ext"])) output_hh_size_time(sim, os.path.join(fig_prefix, "hh_size_time.%s" % sim.params["ext"]), comp=False) output_fam_type_time(sim, os.path.join(fig_prefix, "fam_type_time.%s" % sim.params["ext"]), comp=False) print "Output written to ", sim.params["prefix"]
def scatter_std_mean_activation(path, activation_data): path = create_path(path, "activations") for seed in activation_data: data = activation_data[seed] mean_data = np.mean(data, axis=0) std_data = np.std(data, axis=0) plt.scatter(mean_data, std_data) plt.xlabel('mean activation') plt.ylabel('std of activation') plt.title('mean vs std for seed {}'.format(seed)) # plt.ylim(0.0, 4.0) # plt.xlim(0.0, 3.0) # plt.show() plt.savefig(create_path(path, "seed_{}_mean_std_act.png".format(seed)), dpi=__PNG_DPI__) plt.clf()
def single_network_performance(title, measure, path, data): plt.xlabel('epochs') plt.ylabel(measure) plt.title(title) plt.ylim(0.0, 1.0) plt.plot(data) # plt.show() plt.savefig(create_path(path, "{}.png".format(measure)), dpi=__PNG_DPI__) plt.clf()
def all_scatter_std_mean_activation(path, activation_data): for seed in activation_data: data = activation_data[seed] mean_data = np.mean(data, axis=0) std_data = np.std(data, axis=0) plt.scatter(mean_data, std_data, label=seed) plt.xlabel('mean activation') plt.ylabel('std of activation') plt.title('mean vs std for all seeds') # plt.ylim(0.0, 4.0) # plt.xlim(0.0, 8.0) # plt.legend(loc='upper right') # plt.show() path = create_path(path, "activations") plt.savefig(create_path(path, "all_seed_mean_std_act.png"), dpi=__PNG_DPI__) plt.clf()
def network(seed, run, hp, num_seeded_units): dog_train_labels, dog_train_images, dog_test_labels, dog_test_images = data.get_training_and_val_data( hp.target_animal, labels_per_category=hp.labels_per_category) dog_val_labels, dog_val_images = data.get_test_data(hp.target_animal) if hp.reinit_weights: num_starting_units = hp.num_starting_units else: num_starting_units = num_seeded_units # Model model = Sequential() weight_init = glorot_uniform(seed) model.add(Conv2D(32, (3, 3), padding='same', input_shape=dog_train_images.shape[1:], kernel_initializer=weight_init)) model.add(Activation(hp.conv_activation)) model.add(Conv2D(32, (3, 3), kernel_initializer=weight_init)) model.add(Activation(hp.conv_activation)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Conv2D(64, (3, 3), padding='same', kernel_initializer=weight_init)) model.add(Activation(hp.conv_activation)) model.add(Conv2D(64, (3, 3), kernel_initializer=weight_init)) model.add(Activation(hp.conv_activation)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(num_starting_units, kernel_initializer=weight_init)) model.add(Activation('relu')) model.add(Dense(1, kernel_initializer=weight_init)) model.add(Activation('sigmoid')) # Adam learning optimizer opt = keras.optimizers.adam(lr=hp.target_lr) # train the model using Adam model.compile(loss=hp.loss_function, optimizer=opt, metrics=[binary_accuracy]) # Callbacks: all_predictions = library_extensions.PredictionHistory(model, dog_train_images, dog_train_labels, dog_val_images, dog_val_labels, dog_test_images, dog_test_labels) # Training naive network model.fit(dog_train_images, dog_train_labels, batch_size=hp.batch_size, epochs=hp.target_max_epochs, validation_data=(dog_val_images, dog_val_labels), shuffle=True, callbacks=[all_predictions]) # Save trained model model.save(utils.create_path(run.path, "naive", "saved_models", "naive_model_{}.h5".format(seed))) print("naive model saved") # Generate results history run.naive.update(seed, all_predictions)
def __init__(self, crawler_worker_loop, path, start=0, max_fetch_cnt=50): super().__init__() self._max_fetch_cnt = max_fetch_cnt self._start = start self.crawler_worker_loop = crawler_worker_loop self.channels = {"economy": 129, "finance": 125, "companies": 130, "china": 131, "science": 179, "international": 132, "culture": 134} self.base_url = 'http://tag.caixin.com/news/homeInterface.jsp?' \ 'channel={}&start={}&count={}&picdim=_145_97' \ '&callback=jQuery17209677058548357216_1530938601322&_=1530938933631' self.comment_url = 'http://file.c.caixin.com/comment-sync/js/100/{}/{}.js' self.summary_urls = [self.base_url.format(str(channel), self._start, self._max_fetch_cnt) for _, channel in self.channels.items()] self._path = path self.summary_saved_file = self._path + '/summary_{}.txt' self.detail_saved_file = self._path + '/detail_{}.txt' self._source = 'Caixin' create_path(self._path)
def simulate_background(input_yaml, jobs_yaml, count): config_in = yaml.safe_load(open(input_yaml)) jobs_config = yaml.safe_load(open(jobs_yaml)) try: ctools_pipe_path = create_path(jobs_config['exe']['software_path']) except KeyError: ctools_pipe_path = "." # find proper IRF name irf = IRFPicker(config_in, ctools_pipe_path) name_irf = irf.irf_pick() if irf.prod_number == "3b" and irf.prod_version == 0: caldb = "prod3b" else: caldb = f'prod{irf.prod_number}-v{irf.prod_version}' out_path = create_path( f"{jobs_config['exe']['path']}/back_sim/{irf.prod_number}_{irf.prod_version}_{name_irf}" ) # simulation details sim_details = config_in['sim'] seed = int(count) * 10 # do the simulation sim = ctools.ctobssim() sim['inmodel'] = f"{ctools_pipe_path}/models/bkg_only_model.xml" sim['caldb'] = caldb sim['irf'] = name_irf sim['ra'] = 0 sim['dec'] = 0 sim['rad'] = sim_details['radius'] sim['tmin'] = u.Quantity(sim_details['time']['t_min']).to_value(u.s) sim['tmax'] = u.Quantity(sim_details['time']['t_max']).to_value(u.s) sim['emin'] = u.Quantity(sim_details['energy']['e_min']).to_value(u.TeV) sim['emax'] = u.Quantity(sim_details['energy']['e_max']).to_value(u.TeV) sim['outevents'] = f"{out_path}/background_z-{irf.zenith}_site-{irf.irf_site}_{str(count).zfill(2)}_seed{seed}.fits" sim['seed'] = seed sim.execute()
def random_explain(dataset_path: Path, output_path: Path): create_path(output_path) nx_graphs, labels = read_graphs(dataset_path) def explain(graph_num): g = nx_graphs[graph_num] random_importance = list(range(len(g.edges()))) random.shuffle(random_importance) N = g.number_of_nodes() masked_adj = np.zeros((N, N)) for (u, v), importance in zip(g.edges(), random_importance): u = int(u) v = int(v) masked_adj[u, v] = masked_adj[v, u] = importance return masked_adj for gid in tq(nx_graphs): masked_adj = explain(gid) np.save(output_path / ('%s.npy' % gid), masked_adj)
def all_activation_mean_histogram(path, activation_data): activation_data_list = [] for seed in activation_data: data = activation_data[seed] mean_data = np.mean(data, axis=0) activation_data_list.append(mean_data) n, bins, patches = plt.hist(activation_data_list, 20, range=(0.0, 1.0), label=activation_data.keys(), rwidth=1.0, linewidth=0) plt.ylabel('number of neurons') plt.xlabel('mean activation') plt.title('mean activation for all seeds') # plt.legend(loc='upper right') # plt.ylim(0, 40) # plt.show() path = create_path(path, "activations") plt.savefig(create_path(path, "all_seeds_mean_act.png"), dpi=__PNG_DPI__) plt.clf()
def all_averaged_dataset_performance(network_name, measure, path, data, source_animal, target_animal): plt.plot(data["train"][measure], label='train') plt.plot(data["val"][measure], label='val') plt.plot(data["test"][measure], label='test') plt.xlabel('epochs') plt.ylabel(measure) plt.title(network_name + " averaged results " + source_animal + " to " + target_animal) plt.legend(loc='lower right') plt.ylim(0.0, 1.0) # plt.show() plt.savefig(create_path(path, "all_datasets_{}.png".format(measure)), dpi=__PNG_DPI__) plt.clf()
def _create_directory_structure(self): if not self.created_dirs: logger.info("creating output directory structure") now = datetime.datetime.now().time().isoformat().replace( ":", "").replace(".", "") output_dir_name = "pylicense_{}".format(now) license_files = os.path.join(output_dir_name, "license_files") output_path = os.path.join(self.output_path, license_files) create_dirs = utils.create_path(output_path) if create_dirs: logger.info("Successfully created output directory structure") self.created_dirs = output_path return output_path else: return False else: return self.created_dirs
def main(args): """ Function for handling the arguments and starting the experiment. Inputs: args - Namespace object from the argument parser """ # set the seed torch.manual_seed(args.seed) # check if GPU is available device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # print the model parameters print('-----TRAINING PARAMETERS-----') print('Model version: {}'.format(args.model_version)) print('Labels: {}'.format(args.labels)) print('Setting: {}'.format(args.setting)) print('Test scenario: {}'.format(args.test_scenario)) print('Auxilary tasks: {}'.format(args.aux_tasks)) print('Auxilary task probing: {}'.format(args.aux_probing)) print('PyTorch device: {}'.format(device)) print('Max epochs: {}'.format(args.max_epochs)) print('Patience: {}'.format(args.patience)) print('Learning rates: {}'.format(args.lrs)) print('Batch size: {}'.format(args.batch_size)) print('Results directory: {}'.format(args.results_dir)) print('Progress bar: {}'.format(args.progress_bar)) print('Advanced metrics: {}'.format(args.advanced_metrics)) print('Pretrain: {}'.format(args.pretrain)) print('-----------------------------') # generate the path to use for the results path = create_path(args) if not os.path.exists(path): os.makedirs(path) # check which setting is selected if args.setting == 'matched': handle_matched(args, device, path) else: handle_unmatched(args, device, path)
def compare_network_performance(dataset, measure, path, first_data, second_data, source_animal, target_animal): plt.figure(1) plt.subplot(211) plt.plot(first_data, label='target') plt.xlabel('epochs') plt.ylabel(measure) plt.title("target " + dataset + " " + source_animal + " to " + target_animal) plt.ylim(0.0, 1.0) plt.subplot(212) plt.plot(second_data, label='naive') plt.xlabel('epochs') plt.ylabel(measure) plt.title("naive " + dataset + " " + target_animal) plt.ylim(0.0, 1.0) # plt.show() plt.savefig(create_path(path, "compare_{}_{}.png".format(dataset, measure)), dpi=__PNG_DPI__) plt.clf()
def compare_average_network_performance(dataset, measure, path, first_data, second_data, first_label, second_label, source_animal, target_animal): if first_label == "target": plt.plot(first_data, label=first_label, color='C0') plt.plot(second_data, label=second_label, color='C1') else: plt.plot(first_data, label=first_label, color='C1') plt.plot(second_data, label=second_label, color='C0') plt.xlabel('epochs') plt.ylabel(measure) plt.title(dataset + ' ' + measure + " averaged" + " " + source_animal + " to " + target_animal) plt.legend(loc='lower right') plt.ylim(0.0, 1.0) # plt.show() plt.savefig(create_path( path, "compare_average_{}_{}_{}.png".format(dataset, measure, second_label)), dpi=__PNG_DPI__) plt.clf()
def saliency_map(run, model_type, seed, dataset_type, attempt, category=None, positive=True): if not category: category = run.hyperparameters.target_animal img = _get_image(category, dataset_type, run, positive=positive) plt.imsave(create_path(run.path, model_type, "images", category, "{}_original_image.png".format(attempt)), img) img = img[np.newaxis, ...] model = load_model(create_path(run.path, model_type, "saved_models", "{}_model_{}.h5".format(model_type, seed))) print(model_type + " network predicts:") print(model.predict(img)) layer_idx = 15 model.layers[layer_idx].activation = activations.linear model = utils.apply_modifications(model) grads = visualize_saliency(model, layer_idx, filter_indices=0, seed_input=img) plt.imsave( create_path(run.path, model_type, "images", category, "sal", "{}_sal_image_seed{}.png".format(attempt, seed)), grads) grads_guided = visualize_saliency(model, layer_idx, filter_indices=0, seed_input=img, backprop_modifier="guided") plt.imsave(create_path(run.path, model_type, "images", category, "sal", "{}_sal_guided_image_seed{}.png".format(attempt, seed)), grads_guided) acti = visualize_activation(model, layer_idx, filter_indices=0, seed_input=img) plt.imsave( create_path(run.path, model_type, "images", category, "acti", "{}_acti_image_seed{}.png".format(attempt, seed)), acti) cam = visualize_cam(model, layer_idx, filter_indices=0, seed_input=img) plt.imsave( create_path(run.path, model_type, "images", category, "cam", "{}_cam_image_seed{}.png".format(attempt, seed)), cam) cam_guided = visualize_cam(model, layer_idx, filter_indices=0, seed_input=img, backprop_modifier="guided") plt.imsave(create_path(run.path, model_type, "images", category, "cam", "{}_cam_guided_image_seed{}.png".format(attempt, seed)), cam_guided)
def from_csv(file_path): m = Hyperparameters() hp_data = pd.read_csv(create_path(file_path, 'params.csv')) dict_hp_data = hp_data.to_dict(orient='rows')[0] m.source_max_epochs = dict_hp_data["source_max_epochs"] m.target_max_epochs = dict_hp_data["target_max_epochs"] m.num_starting_units = dict_hp_data["num_starting_units"] m.upper_threshold = dict_hp_data["upper_threshold"] m.lower_threshold = dict_hp_data["lower_threshold"] m.source_lr = dict_hp_data["source_lr"] m.target_lr = dict_hp_data["target_lr"] m.batch_size = dict_hp_data["batch_size"] m.conv_activation = dict_hp_data["conv_activation"] m.loss_function = dict_hp_data["loss_function"] m.pruning_method = dict_hp_data["pruning_method"] m.source_animal = dict_hp_data["source_animal"] m.target_animal = dict_hp_data["target_animal"] m.pruning_dataset = dict_hp_data["pruning_dataset"] m.save_opp = dict_hp_data["save_opp"] m.labels_per_category = dict_hp_data["labels_per_category"] m.reinit_weights = dict_hp_data["reinit_weights"] return m
print(f"process {counter} started") p = subprocess.Popen([ 'python', 'background_sim.py', infile, in_jobs, str(counter + 1) ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # if everything goes well, the output is None # check this just for the first job if counter == 0: (result, error) = p.communicate() print(result, error) elif execution['mode'] == "bsub": details = execution['details'] env_path = create_path(execution['env_path']) # create string for jobs submission exec_string = f"{execution['mode']} " if details['queue']['name'] != "N/A": exec_string += f"-q {details['queue']['name']} " if details['queue']['flags'] != "N/A": exec_string += f"{details['queue']['flags']} " if details['mail'] != "N/A": exec_string += f"-u {details['mail']} " if execution['others'] != "N/A": exec_string += f"{execution['others']} " print(exec_string) for counter in range(realizations):
def train(self, num_of_iters=1, data=None, hidden=None): self.loss = 0.0 s = time.time() # zero the parameter gradients #self.optimizer.zero_grad() for i in range(num_of_iters): self.adjust_learning_rate(self.train_epoch, self.optimizer) if self.train_iter % self.num_batches_per_epoch == 0 and self.train_iter > 0: self.train_epoch += 1 logger.info('train iter: %d, num_batches_per_epoch: %d', self.train_iter, self.num_batches_per_epoch) logger.info('Epoch %d, avg train acc: %f, lr: %f, avg loss: %f' % (self.train_iter//self.num_batches_per_epoch, np.mean(self.train_acc_top1), self.lr, self.avg_loss_per_epoch/self.num_batches_per_epoch)) if self.rank == 0 and self.writer is not None: self.writer.add_scalar('cross_entropy', self.avg_loss_per_epoch/self.num_batches_per_epoch, self.train_epoch) self.writer.add_scalar('top-1_acc', np.mean(self.train_acc_top1), self.train_epoch) if self.rank == 0: self.test(self.train_epoch) self.sparsities = [] self.compression_ratios = [] self.communication_sizes = [] self.train_acc_top1 = [] self.epochs_info.append(self.avg_loss_per_epoch/self.num_batches_per_epoch) self.avg_loss_per_epoch = 0.0 # Save checkpoint if self.train_iter > 0 and self.rank == 0: state = {'iter': self.train_iter, 'epoch': self.train_epoch, 'state': self.get_model_state()} if self.prefix: relative_path = './weights/%s/%s-n%d-bs%d-lr%.4f' % (self.prefix, self.dnn, self.nworkers, self.batch_size, self.base_lr) else: relative_path = './weights/%s-n%d-bs%d-lr%.4f' % (self.dnn, self.nworkers, self.batch_size, self.base_lr) utils.create_path(relative_path) filename = '%s-rank%d-epoch%d.pth'%(self.dnn, self.rank, self.train_epoch) fn = os.path.join(relative_path, filename) if self.train_epoch % 2== 0: self.save_checkpoint(state, fn) self.remove_dict(state) if self.train_sampler and (self.nworkers > 1): self.train_sampler.set_epoch(self.train_epoch) ss = time.time() if data is None: data = self.data_iter() if self.dataset == 'an4': inputs, labels_cpu, input_percentages, target_sizes = data input_sizes = input_percentages.mul_(int(inputs.size(3))).int() else: inputs, labels_cpu = data if self.is_cuda: if self.dnn == 'lstm' : inputs = Variable(inputs.transpose(0, 1).contiguous()).cuda() labels = Variable(labels_cpu.transpose(0, 1).contiguous()).cuda() else: inputs, labels = inputs.cuda(non_blocking=True), labels_cpu.cuda(non_blocking=True) else: labels = labels_cpu self.iotime += (time.time() - ss) sforward = time.time() if self.dnn == 'lstman4': out, output_sizes = self.net(inputs, input_sizes) out = out.transpose(0, 1) # TxNxH loss = self.criterion(out, labels_cpu, output_sizes, target_sizes) #torch.cuda.synchronize() self.forwardtime += (time.time() - sforward) loss = loss / inputs.size(0) # average the loss by minibatch elif self.dnn == 'lstm' : hidden = lstmpy.repackage_hidden(hidden) outputs, hidden = self.net(inputs, hidden) tt = torch.squeeze(labels.view(-1, self.net.batch_size * self.net.num_steps)) loss = self.criterion(outputs.view(-1, self.net.vocab_size), tt) #torch.cuda.synchronize() self.forwardtime += (time.time() - sforward) else: # forward + backward + optimize outputs = self.net(inputs) loss = self.criterion(outputs, labels) #torch.cuda.synchronize() self.forwardtime += (time.time() - sforward) sbackward = time.time() if self.amp_handle is not None: with apex.amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() loss = scaled_loss else: loss.backward() loss_value = loss.item() #torch.cuda.synchronize() self.backwardtime += (time.time() - sbackward) self.loss += loss_value self.avg_loss_per_epoch += loss_value if self.dnn not in ['lstm', 'lstman4']: acc1, = self.cal_accuracy(outputs, labels, topk=(1,)) self.train_acc_top1.append(float(acc1)) self.train_iter += 1 self.num_of_updates_during_comm += 1 self.loss /= num_of_iters self.timer += time.time() - s display = 40 if self.train_iter % display == 0: logger.warn('[%3d][%5d/%5d][rank:%d] loss: %.3f, average forward (%f) and backward (%f) time: %f, iotime: %f ' % (self.train_epoch, self.train_iter, self.num_batches_per_epoch, self.rank, self.loss, self.forwardtime/display, self.backwardtime/display, self.timer/display, self.iotime/display)) self.timer = 0.0 self.iotime = 0.0 self.forwardtime = 0.0 self.backwardtime = 0.0 if self.dnn == 'lstm': return num_of_iters, hidden return num_of_iters
def create_module(module): src_path = get_src_file_path(module) inc_path = get_inc_file_path(module) utils.create_path(src_path) utils.create_path(inc_path) add_to_building_list(module)