def write_output(name: str, algorithm_name: str, matches: dict, metrics: dict, run_times: dict): """ Function that writes the output of a schema matching job Parameters ---------- name : str The experiment unique name algorithm_name : str The name of the algorithm matches : dict Dictionary containing the ranked list of matches based on their similarity sorted in descending order metrics : dict Dictionary containing the metrics calculated in the schema matching job run_times : dict Dictionary containing the metrics measured in the schema matching job """ create_folder(get_project_root() + "/output") create_folder(get_project_root() + "/output/" + algorithm_name) with open( get_project_root() + "/output/" + algorithm_name + "/" + re.sub('\\W+', '_', str(name)) + ".json", 'w') as fp: matches = {str(k): v for k, v in matches.items()} output = { "name": name, "matches": matches, "metrics": metrics, "run_times": run_times } json.dump(output, fp, indent=2)
def post(self, request): orig_file = request.FILES.get('file') extension = orig_file.name.split('.')[1].lower() file_name = str(uuid.uuid4()) + '.' + extension upload_path = os.path.join("media/upload/file", file_name[0], file_name[1]) create_folder(upload_path) file_path = os.path.join(upload_path, file_name) # file_path with open(file_path, 'wb') as f: for i in orig_file.chunks(): f.write(i) if 'liaison' in request.data: file_path = os.path.join(file_name[0], file_name[1], file_name) liaison_id = request.data['liaison'] liaison = Liaisons.objects.get(pk=liaison_id) liaison.freleaserpt = file_path liaison.save() return Response(status=status.HTTP_200_OK)
def __init__(self, local_path="", base_feature_dir="features", recompute_features=False, save_log_feature=True): self.local_path = local_path self.recompute_features = recompute_features self.save_log_feature = save_log_feature self.base_feature_dir = base_feature_dir feature_dir = os.path.join( self.base_feature_dir, "sr" + str(cfg.sample_rate) + "_win" + str(cfg.n_window) + "_hop" + str(cfg.hop_length) + "_mels" + str(cfg.n_mels)) if not self.save_log_feature: feature_dir += "_nolog" self.feature_dir = os.path.join(feature_dir, "features") self.metadata_dir = os.path.join(feature_dir, "metadata") # create folder if not exist create_folder(self.metadata_dir) create_folder(self.feature_dir) self.classes = []
def post(self, request): user = request.user avatar = request.FILES.get('avatar') extension = avatar.name.split('.')[1].lower() file_name = "A" + str(uuid.uuid4()) + '.' + extension upload_path = "media/avatar" create_folder(upload_path) file_path = os.path.join(upload_path, file_name) with open(file_path, 'wb') as f: for i in avatar.chunks(): f.write(i) avatar_str = "avatar/" + file_name user.avatar = avatar_str user.save() data = { "code": 200, } return Response(data)
def _handle(self, extension, path): if is_image(extension): create_folder(path, IMG_FOLDER) return True return False
def calculate_embedding(embedding_dl, model, savedir=None, concatenate=None, squeeze=True): # If frames, assume the savedir name or the filename is different than when it is not defined model.eval() if savedir is not None: create_folder(savedir) df = embedding_dl.df.copy() df.filename = df.filename.apply(lambda x: os.path.join(savedir, os.path.basename(x))) if savedir is not None: df.to_csv(os.path.join(savedir, "df"), sep="\t", index=False) if concatenate is not None: concat_embed = [] for cnt, (data_in, y) in enumerate(embedding_dl): data_in = to_cuda_if_available(data_in) emb = get_embeddings_numpy(data_in, model, flatten=False) if cnt == 0: LOG.debug(f"shapes: input: {data_in.shape}, embed: {emb.shape}, dir: {savedir}") if squeeze: emb = np.squeeze(emb) if savedir is not None: np.save(df.iloc[cnt].filename, emb) if concatenate == "append": concat_embed.append(emb) elif concatenate == "extend": concat_embed.extend(emb) else: if concatenate is not None: raise NotImplementedError("Impossible to aggregate with this value") model.train() if concatenate is not None: concat_embed = np.array(concat_embed) return df, concat_embed return df
def get_dirs(common_dir, create=True): """""" model_dir = osp.join("stored_data", "model", common_dir) log_dir = osp.join("stored_data", "logs", common_dir) if create: create_folder(model_dir) create_folder(log_dir) return model_dir, log_dir
def write_log(log_info): timestamp = time.localtime(time.time()) log_path = os.path.join(LOG_PATH, time.strftime('%Y', timestamp)) create_folder(log_path) log_name = time.strftime('%Y-%m-%d', timestamp) + '.log' full_log_path = os.path.join(log_path, log_name) with open(full_log_path, 'a', encoding='utf-8') as f: f.write(log_info + '\n')
def save_chunks(chunk_id, subset_type, track_name, sources, energy_profile): save_folder_path = os.path.join(CHUNKS_PATH, subset_type, track_name, str(chunk_id)) create_folder(save_folder_path) true_label = np.zeros(len(SOURCES) + 1, dtype='int') for source_id, source in enumerate([*SOURCES, 'MIX']): signal = sources[source_id, chunk_id] signal_energy = get_signal_energy(signal) if int(signal_energy) > ENERGY_THRESHOLD: true_label[source_id] = 1 save_path = os.path.join( save_folder_path, source + '_' + str(int(round(signal_energy))) + '.wav') energy_profile[source_id][os.path.dirname(save_path)] = signal_energy librosa.output.write_wav(save_path, signal, TARGET_SAMPLING_RATE) return energy_profile, true_label[:-1]
def test_create_valid_folder_delete_folder(): folder = 'Shared/test' response = create_folder(folder) assert response.status_code == 201, f'Error: {response.json()["errorMessage"]}' print(f'Folder created, ID: {find_json_key(response, "folder_id")}') response = delete_folder(folder) assert response.status_code == 200, f'Error: {response.json()["errorMessage"]}' print('Folder deleted!')
def folder_to_txt(folder_path: str) -> str: dict_raw = folder_to_dict(folder_path) now = calendar.timegm(time.gmtime()) out_path = f'{folder_path}/out_{now}/' utils.create_folder(out_path) for file, text_raw in zip(dict_raw['file_name'], dict_raw['text_raw']): utils.to_txt(text_raw=text_raw, file_name=file, out_path=out_path) print(f'The results are in {out_path}') return out_path
def get_df_feat_dir(self, csv_path, subpart_data=None, frames_in_sec=None, segment=False, fixed_segment=None): """ Initialize the dataset, extract the features dataframes Args: csv_path: str, csv path in the initial dataset subpart_data: int, the number of file to take in the dataframe if taking a small part of the dataset. frames_in_sec: int, allow to divide full segments into smaller segments of this number of frames. segment: bool, whether or not to segment event when having strong labels. fixed_segment: float, in seconds, the size of the kept segment. If >audio length, the audio length is kept. If segment is True, and >label, it takes the surrounding (allow creating weak labels). Returns: pd.DataFrame The dataframe containing the right features and labels """ feature_dir = os.path.join(self.feature_dir, name_only(csv_path)) create_folder(feature_dir) meta_name = os.path.join(self.local_path, csv_path) assert (not segment or frames_in_sec is None ), "if you want to segment, you can't give frames" if segment: df = self.extract_features_from_meta_segment( meta_name, feature_dir, subpart_data=subpart_data, fixed_segment=fixed_segment) elif frames_in_sec is not None: df = self.extract_features_from_meta_frames( meta_name, feature_dir, frames_in_sec, subpart_data=subpart_data) # get_classes is done inside the method because of get_labels else: df = self.extract_features_from_meta(meta_name, feature_dir, subpart_data=subpart_data) self.get_classes(df) return df
def unix_sort_ranks(corpus: set, file_name: str): """ Function that takes a corpus sorts it with the unix sort -n command and generates the global ranks for each value in the corpus. Parameters ---------- corpus: set The corpus (all the unique values from every column) file_name : str The name of the file to sore these "global" ranks Returns ------- dict The ranks in the form of k: value, v: the rank of the value """ create_folder("./cache/sorts/" + file_name) with open("./cache/sorts/" + file_name + "/unsorted_file.txt", 'w') as out: for var in corpus: print(str(var), file=out) sort_env = os.environ.copy() sort_env['LC_ALL'] = 'C' with open('cache/sorts/' + file_name + '/sorted_file.txt', 'w') as f: subprocess.call(['sort', '-n', 'cache/sorts/' + file_name + '/unsorted_file.txt'], stdout=f, env=sort_env) rank = 1 ranks = [] with open('./cache/sorts/' + file_name + '/sorted_file.txt', 'r') as f: txt = f.read() for var in txt.splitlines(): ranks.append((convert_data_type(var.replace('\n', '')), rank)) rank = rank + 1 shutil.rmtree('./cache/sorts/' + file_name) os.mkdir('./cache/sorts/' + file_name) return dict(ranks)
def __init__(self, feature_dir, local_path="", recompute_features=False, exp_tag='default', save_log_feature=True): self.local_path = local_path self.recompute_features = recompute_features self.save_log_feature = save_log_feature self.feature_dir = feature_dir # if feature_dir is None: # feature_dir = os.path.join(base_feature_dir, '_' + exp_tag) # feature_dir = os.path.join(base_feature_dir, "sr" + str(cfg.sample_rate) + "_win" + str(cfg.n_window) # + "_hop" + str(cfg.hop_length) + "_mels" + str(cfg.n_mels)) # if not self.save_log_feature: # feature_dir += "_nolog" # self.feature_dir = os.path.join(feature_dir, "features") # create folder if not exist create_folder(self.feature_dir)
def post(self, request): image = request.FILES.get('file') extension = image.name.split('.')[1].lower() image_name = str(uuid.uuid4()) + '.' + extension upload_path = os.path.join("media/upload/image", image_name[0], image_name[1]) create_folder(upload_path) image_save_path = os.path.join(upload_path, image_name) # 保存单个文件 with open(image_save_path, 'wb') as f: for i in image.chunks(): f.write(i) ret_url = os.path.join("files", image_name) ret = {"code": 0, "msg": "success", "data": {"url": ret_url}} return Response(data=ret, status=status.HTTP_200_OK)
def __new__(cls, metadata: data.Metadata, images: data.Images, seq_len: int, batch_size: int, timesteps: typing.Union[list, timedelta] = timedelta(minutes=15), cache: bool = False): dataset = tf.data.Dataset.from_generator( DataGenerator(metadata, images, seq_len, timesteps).get_next_example, output_types={ 'station_name': tf.string, 'images': tf.float32, 'csky_ghi': tf.float32, 'ghi': tf.float32 }, output_shapes={ 'station_name': tf.TensorShape([]), 'images': tf.TensorShape([None, images.image_size, images.image_size, 5]), 'csky_ghi': tf.TensorShape([4]), 'ghi': tf.TensorShape([4]) }).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE) if cache: utils.create_folder(DATASET_CACHE_PATH) return dataset.cache(filename=os.path.join( DATASET_CACHE_PATH, f'cache_seqlen_{seq_len}_imagesize_{images.image_size}_batchsize_{batch_size}_nbexamples_{metadata.get_number_of_examples()}' )) else: return dataset
def post(self, request): orig_file = request.FILES.get('file') extension = orig_file.name.split('.')[1].lower() file_name = "T" + str(uuid.uuid4()) + '.' + extension upload_path = os.path.join("media/upload/image", file_name[0], file_name[1]) create_folder(upload_path) file_path = os.path.join(upload_path, file_name) # file_path with open(file_path, 'wb') as f: for i in orig_file.chunks(): f.write(i) ret_url = os.path.join('files', file_name) ret_path = f'<p><a href="{ret_url}">{orig_file}</a></p>' data = {"path": ret_path} # if 'liaison' in request.data: # file_path = os.path.join("upload/file", file_name[0], file_name[1], file_name) # liaison_id = request.data['liaison'] # liaison = Liaisons.objects.get(pk=liaison_id) # liaison.freleaserpt = file_path # liaison.save() # # ret_url = os.path.join(request.stream._current_scheme_host, "media", file_path) # data['liaison'] = ret_url return Response(data=data, status=status.HTTP_200_OK)
def combine_data_algorithms(config_data: dict, config_algo: dict, completed_jobs: dict): create_folder(get_project_root() + "/configuration_files") for cfd_key, cfd_value in config_data.items(): for cfa_key, cfa_value in config_algo.items(): if (cfa_value["algorithm"]["type"] == "SemProp" and "assays" in cfd_key and "SemProp" in algorithms) \ or (cfa_value["algorithm"]["type"] != "SemProp" and cfa_value["algorithm"]["type"] in algorithms): name = cfd_key + '__' + cfa_key if name not in completed_jobs[cfa_value["algorithm"]["type"]]: create_folder( str(get_project_root()) + "/configuration_files/" + cfa_value["algorithm"]["type"]) create_folder( str(get_project_root()) + "/configuration_files/" + cfa_value["algorithm"]["type"] + '/' + cfd_key) cfa_key = re.sub('\\W+', '_', cfa_key) file_name = str(get_project_root())+"/configuration_files/" + cfa_value["algorithm"]["type"] + \ '/' + cfd_key + '/' + cfa_key + ".json" with open(file_name, 'w') as fp: configuration = { "name": name, "dataset_name": cfd_key, "source": { "type": cfa_value["data_loader"], "args": cfd_value["source"]["args"] }, "target": { "type": cfa_value["data_loader"], "args": cfd_value["target"]["args"] }, "algorithm": cfa_value["algorithm"], "metrics": metrics, "golden_standard": cfd_value["golden_standard"] } if cfa_value["algorithm"]["type"] == "SemProp": configuration["source"]["args"]["schema"] = configuration["source"]["args"]["schema"]\ .replace(get_project_root(), "/code") configuration["source"]["args"]["data"] = configuration["source"]["args"]["data"]\ .replace(get_project_root(), "/code") configuration["target"]["args"]["schema"] = configuration["target"]["args"]["schema"] \ .replace(get_project_root(), "/code") configuration["target"]["args"]["data"] = configuration["target"]["args"]["data"] \ .replace(get_project_root(), "/code") configuration["golden_standard"] = configuration["golden_standard"] \ .replace(get_project_root(), "/code") json.dump(configuration, fp, indent=2)
f_args = parser.parse_args() reduced_number_of_data = f_args.subpart_data no_weak = f_args.no_weak LOG.info("subpart_data = {}".format(reduced_number_of_data)) LOG.info("Using_weak labels : {}".format(not no_weak)) if not no_weak: add_dir_path = "_with_weak" else: add_dir_path = "_synthetic_only" store_dir = os.path.join("stored_data", "simple_CRNN" + add_dir_path) saved_model_dir = os.path.join(store_dir, "model") saved_pred_dir = os.path.join(store_dir, "predictions") create_folder(store_dir) create_folder(saved_model_dir) create_folder(saved_pred_dir) # ############## # Model # ############## crnn_kwargs = cfg.crnn_kwargs crnn = CRNN(**crnn_kwargs) crnn.apply(weights_init) pooling_time_ratio = cfg.pooling_time_ratio LOG.info(crnn) # ##############
VALIDATION_PATH = os.path.join(MUSDB_SPLITS_PATH, 'val') train_paths = [] energy_profile = [{} for _ in range(len(SOURCES) + 1)] sample_dict = {} subset = ['train', 'test'] cuda = 0 for subset_type in subset: DATA_PATH = os.path.join(MUSDB_WAVS_FOLDER_PATH, subset_type) tracks = sorted(os.listdir(DATA_PATH)) for track_id, track_name in enumerate(tracks): track_path = os.path.join(DATA_PATH, track_name) dump_path = os.path.join(MUSDB_SPLITS_PATH, subset_type, track_name) create_folder(dump_path) sources_downsampled = get_sources(track_path) sources_split = split_sources(sources_downsampled, subset_type) stft_output = _stft(sources_split) for chunk_id in range(stft_output.shape[1]): matrix = stft_output[:, chunk_id, ...] energy_profile, true_label = save_chunks(chunk_id, subset_type, track_name, sources_split, energy_profile) if subset_type == 'train': train_paths.append( os.path.join(dump_path, str(chunk_id) + '.npy')) sample_dict['spec'] = matrix sample_dict['true_label'] = true_label full_path = os.path.join(dump_path, str(chunk_id))
def __init__(self, bs=84, use_fp16=True, net_teacher=None, path="./gen_images/", final_data_path="/gen_images_final/", parameters=dict(), setting_id=0, jitter=30, criterion=None, coefficients=dict(), network_output_function=lambda x: x, hook_for_display=None): ''' :param bs: batch size per GPU for image generation :param use_fp16: use FP16 (or APEX AMP) for model inversion, uses less memory and is faster for GPUs with Tensor Cores :parameter net_teacher: Pytorch model to be inverted :param path: path where to write temporal images and data :param final_data_path: path to write final images into :param parameters: a dictionary of control parameters: "resolution": input image resolution, single value, assumed to be a square, 224 "random_label" : for classification initialize target to be random values "start_noise" : start from noise, def True, other options are not supported at this time "detach_student": if computing Adaptive DI, should we detach student? :param setting_id: predefined settings for optimization: 0 - will run low resolution optimization for 1k and then full resolution for 1k; 1 - will run optimization on high resolution for 2k 2 - will run optimization on high resolution for 20k :param jitter: amount of random shift applied to image at every iteration :param coefficients: dictionary with parameters and coefficients for optimization. keys: "r_feature" - coefficient for feature distribution regularization "tv_l1" - coefficient for total variation L1 loss "tv_l2" - coefficient for total variation L1 loss "l2" - l2 penalization weight "lr" - learning rate for optimization "main_loss_multiplier" - coefficient for the main loss optimization "adi_scale" - coefficient for Adaptive DeepInversion, competition, def =0 means no competition network_output_function: function to be applied to the output of the network to get the output hook_for_display: function to be executed at every print/save call, useful to check accuracy of verifier ''' print("Deep inversion class generation") # for reproducibility torch.manual_seed(torch.cuda.current_device()) self.net_teacher = net_teacher if "resolution" in parameters.keys(): self.image_resolution = parameters["resolution"] self.random_label = parameters["random_label"] self.start_noise = parameters["start_noise"] self.detach_student = parameters["detach_student"] self.do_flip = parameters["do_flip"] self.store_best_images = parameters["store_best_images"] else: self.image_resolution = 224 self.random_label = False self.start_noise = True self.detach_student = False self.do_flip = True self.store_best_images = False self.setting_id = setting_id self.bs = bs # batch size self.use_fp16 = use_fp16 self.save_every = 100 self.jitter = jitter self.criterion = criterion self.network_output_function = network_output_function do_clip = True if "r_feature" in coefficients: self.bn_reg_scale = coefficients["r_feature"] self.var_scale_l1 = coefficients["tv_l1"] self.var_scale_l2 = coefficients["tv_l2"] self.l2_scale = coefficients["l2"] self.lr = coefficients["lr"] self.main_loss_multiplier = coefficients["main_loss_multiplier"] self.adi_scale = coefficients["adi_scale"] else: print("Provide a dictionary with ") self.num_generations = 0 self.final_data_path = final_data_path ## Create folders for images and logs prefix = path self.prefix = prefix local_rank = torch.cuda.current_device() if local_rank == 0: create_folder(prefix) create_folder(prefix + "/best_images/") create_folder(self.final_data_path) # save images to folders # for m in range(1000): # create_folder(self.final_data_path + "/s{:03d}".format(m)) ## Create hooks for feature statistics self.loss_r_feature_layers = [] for module in self.net_teacher.modules(): if isinstance(module, nn.BatchNorm2d): self.loss_r_feature_layers.append( DeepInversionFeatureHook(module)) self.hook_for_display = None if hook_for_display is not None: self.hook_for_display = hook_for_display
if cfg.early_stopping is not None: early_stopping_call = EarlyStopping(patience=cfg.early_stopping, val_comp="sup") # ########## # # Training # ########## save_results = pd.DataFrame() model_name_triplet = base_model_name + "triplet" if cfg.save_best: model_path_pretrain = os.path.join(model_directory, model_name_triplet, "best_model") else: model_path_pretrain = os.path.join(model_directory, model_name_triplet, "epoch_" + str(f_args.epochs)) print("path of model : " + model_path_pretrain) create_folder(os.path.join(model_directory, model_name_triplet)) batch_size_classif = cfg.batch_size_classif # Hard coded because no semi_hard in this version semi_hard_embed = None semi_hard_input = None if not os.path.exists(model_path_pretrain) or cfg.recompute_embedding: margin = triplet_margin for epoch in range(f_args.epochs): t_start_epoch = time.time() if cfg.rampup_margin_length is not None: margin = sigmoid_rampup(epoch, cfg.rampup_margin_length) * triplet_margin model_triplet.train() model_triplet, loss_mean_triplet, ratio_used = train_triplet_epoch(triplet_loader, # triplet_loader,
def test_create_multiple_folders(): for number in range(1, 6): folder = 'Shared/test' + str(number) response = create_folder(folder) assert response.status_code == 201, f'Error: {response.json()["errorMessage"]}' print(f'Folder created, ID: {find_json_key(response, "folder_id")}')
def train_classifier(train_loader, classif_model, optimizer_classif, many_hot_encoder=None, valid_loader=None, state={}, dir_model="model", result_path="res", recompute=True): criterion_bce = nn.BCELoss() classif_model, criterion_bce = to_cuda_if_available(classif_model, criterion_bce) print(classif_model) early_stopping_call = EarlyStopping(patience=cfg.early_stopping, val_comp="sup", init_patience=cfg.first_early_wait) save_best_call = SaveBest(val_comp="sup") # scheduler = ReduceLROnPlateau(optimizer_classif, 'max', factor=0.1, patience=cfg.reduce_lr, # verbose=True) print(optimizer_classif) save_results = pd.DataFrame() create_folder(dir_model) if cfg.save_best: model_path_sup1 = os.path.join(dir_model, "best_model") else: model_path_sup1 = os.path.join(dir_model, "epoch_" + str(cfg.n_epoch_classifier)) print("path of model : " + model_path_sup1) state['many_hot_encoder'] = many_hot_encoder.state_dict() if not os.path.exists(model_path_sup1) or recompute: for epoch_ in range(cfg.n_epoch_classifier): print(classif_model.training) start = time.time() loss_mean_bce = [] for i, samples in enumerate(train_loader): inputs, pred_labels = samples if i == 0: LOG.debug("classif input shape: {}".format(inputs.shape)) # zero the parameter gradients optimizer_classif.zero_grad() inputs = to_cuda_if_available(inputs) # forward + backward + optimize weak_out = classif_model(inputs) weak_out = to_cpu(weak_out) # print(output) loss_bce = criterion_bce(weak_out, pred_labels) loss_mean_bce.append(loss_bce.item()) loss_bce.backward() optimizer_classif.step() loss_mean_bce = np.mean(loss_mean_bce) classif_model.eval() n_class = len(many_hot_encoder.labels) macro_f_measure_train = get_f_measure_by_class(classif_model, n_class, train_loader) if valid_loader is not None: macro_f_measure = get_f_measure_by_class(classif_model, n_class, valid_loader) mean_macro_f_measure = np.mean(macro_f_measure) else: mean_macro_f_measure = -1 classif_model.train() print("Time to train an epoch: {}".format(time.time() - start)) # print statistics print('[%d / %d, %5d] loss: %.3f' % (epoch_ + 1, cfg.n_epoch_classifier, i + 1, loss_mean_bce)) results = {"train_loss": loss_mean_bce, "macro_measure_train": np.mean(macro_f_measure_train), "class_macro_train": np.array_str(macro_f_measure_train, precision=2), "macro_measure_valid": mean_macro_f_measure, "class_macro_valid": np.array_str(macro_f_measure, precision=2), } for key in results: LOG.info("\t\t ----> {} : {}".format(key, results[key])) save_results = save_results.append(results, ignore_index=True) # scheduler.step(mean_macro_f_measure) # ########## # # Callbacks # ########## state['epoch'] = epoch_ + 1 state["model"]["state_dict"] = classif_model.state_dict() state["optimizer"]["state_dict"] = optimizer_classif.state_dict() state["loss"] = loss_mean_bce state.update(results) if cfg.early_stopping is not None: if early_stopping_call.apply(mean_macro_f_measure): print("EARLY STOPPING") break if cfg.save_best and save_best_call.apply(mean_macro_f_measure): save_model(state, model_path_sup1) if cfg.save_best: LOG.info( "best model at epoch : {} with macro {}".format(save_best_call.best_epoch, save_best_call.best_val)) LOG.info("loading model from: {}".format(model_path_sup1)) classif_model, state = load_model(model_path_sup1, return_optimizer=False, return_state=True) else: model_path_sup1 = os.path.join(dir_model, "epoch_" + str(cfg.n_epoch_classifier)) save_model(state, model_path_sup1) LOG.debug("model path: {}".format(model_path_sup1)) LOG.debug('Finished Training') else: classif_model, state = load_model(model_path_sup1, return_optimizer=False, return_state=True) LOG.info("#### End classif") save_results.to_csv(result_path, sep="\t", header=True, index=False) return classif_model, state
pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad) LOG.info( "number of parameters in the model: {}".format(pytorch_total_params)) early_stopping_call = EarlyStopping(patience=cfg.early_stopping, val_comp="sup", init_patience=cfg.first_early_wait) save_best_call = SaveBest(val_comp="sup") print(optimizer) save_results = pd.DataFrame() model_name_sup = osp.join(model_directory, "classif") create_folder(model_name_sup) if cfg.save_best: model_path_sup1 = os.path.join(model_name_sup, "best_model") else: model_path_sup1 = os.path.join(model_name_sup, "epoch_" + str(n_epochs)) print("path of model : " + model_path_sup1) state['many_hot_encoder'] = many_hot_encoder.state_dict() def train_loop(train_load, model): loss_bce = [] if args.segment: for cnt, indexes in enumerate(train_load.batch_sampler): optimizer.zero_grad() for j, ind in enumerate(indexes):
def _handle(self, extension, path): folder_name = extension[1:] if extension else OTHER_FOLDER create_folder(path, folder_name) return True
def test_create_folder_forbidden_character(): folder = 'Shared/*' response = create_folder(folder) assert response.status_code == 409 print(f'Error: {find_json_key(response, "errorMessage")}')
encode_function_label = many_hot_encoder.encode_weak scaler = ScalerSum.load_state_dict(state['scaler']) frames_in_sec = cfg.frames_in_sec transf = Compose([ApplyLog(), PadOrTrunc(nb_frames=cfg.frames), ToTensor(), Unsqueeze(0), Normalize(scaler), Unsqueeze(1)]) test_fr = dataset.get_df_feat_dir(cfg.test2018, frames_in_sec=frames_in_sec, subpart_data=subpart_data) print(len(test_fr)) test_dataset = DataLoadDf(test_fr, many_hot_encoder.encode_weak, transform=transf) embed_set = "embedding" embed_dir = "stored_data/embeddings" embed_dir = os.path.join(embed_dir, embed_name, "embeddings") create_folder(embed_dir) fig_dir = os.path.join(embed_dir, "figures") create_folder(fig_dir) df_emb, embeddings = calculate_embedding(test_dataset, emb_model, savedir=os.path.join(embed_dir, embed_set), concatenate="append") print(embeddings.mean()) print(embeddings.var()) embeddings = sklearn.preprocessing.StandardScaler().fit_transform(embeddings.reshape(embeddings.shape[0], -1)) print("normalized") print(embeddings.mean()) print(embeddings.var()) df_emb = df_emb.fillna("") tsne = TSNE() tsne_emb = tsne.fit_transform(X=embeddings.reshape(embeddings.shape[0], -1)) tsne_plots(tsne_emb, df_emb, savefig=os.path.join(fig_dir, embed_set))
from utils.stock_utils import get_stock_names, get_stock_indices from config import PREFIX_OFFSET, POSTFIX_OFFSET import technical_indicator_features as indicators import price_based_features as price from data_preparation.normalization import normalize_features from utils.utils import create_folder import pandas as pd import numpy as np import os stocks = get_stock_names() write_path = os.path.join("..", "data", "extracted") scaler_write_path = os.path.join("..", "data", "scalers") create_folder(write_path) create_folder(scaler_write_path) def apply_price_based_features(stock_label): price_based_features = price.extract_price_based_feature_for_stock( stock_label) columns = price_based_features.columns return price_based_features, columns def apply_technical_analysis_features(stock_label): ta_feature_extractor = indicators.TechnicalAnalysisFeatures(stock_label) technical_analysis_features = ta_feature_extractor.run_all() columns = technical_analysis_features.columns return technical_analysis_features, columns
def __init__(self, bs=84, use_fp16=True, net_teacher=None, path="./gen_images/", final_data_path="/gen_images_final/", parameters=dict(), setting_id=0, jitter=30, criterion=None, coefficients=dict(), network_output_function=lambda x: x, hook_for_display=None, opt=None): ''' :param bs: batch size per GPU for image generation :param use_fp16: use FP16 (or APEX AMP) for model inversion, uses less memory and is faster for GPUs with Tensor Cores :parameter net_teacher: Pytorch model to be inverted :param path: path where to write temporal images and data :param final_data_path: path to write final images into :param parameters: a dictionary of control parameters: "resolution": input image resolution, single value, assumed to be a square, 224 "random_label" : for classification initialize target to be random values "start_noise" : start from noise, def True, other options are not supported at this time "detach_student": if computing Adaptive DI, should we detach student? :param setting_id: predefined settings for optimization: 0 - will run low resolution optimization for 1k and then full resolution for 1k; 1 - will run optimization on high resolution for 2k 2 - will run optimization on high resolution for 20k :param jitter: amount of random shift applied to image at every iteration :param coefficients: dictionary with parameters and coefficients for optimization. keys: "r_feature" - coefficient for feature distribution regularization "tv_l1" - coefficient for total variation L1 loss "tv_l2" - coefficient for total variation L1 loss "l2" - l2 penalization weight "lr" - learning rate for optimization "main_loss_multiplier" - coefficient for the main loss optimization "adi_scale" - coefficient for Adaptive DeepInversion, competition, def =0 means no competition network_output_function: function to be applied to the output of the network to get the output hook_for_display: function to be executed at every print/save call, useful to check accuracy of verifier ''' self.opt = opt print("Deep inversion class generation") # for reproducibility torch.manual_seed(torch.cuda.current_device()) self.net_teacher = net_teacher if "resolution" in parameters.keys(): self.image_resolution = parameters["resolution"] self.random_label = parameters["random_label"] self.start_noise = parameters["start_noise"] self.detach_student = parameters["detach_student"] self.do_flip = parameters["do_flip"] self.store_best_images = parameters["store_best_images"] else: self.image_resolution = 224 self.random_label = False self.start_noise = True self.detach_student = False self.do_flip = True self.store_best_images = False self.setting_id = setting_id self.bs = bs # batch size self.use_fp16 = use_fp16 self.save_every = 100 self.jitter = jitter self.criterion = criterion self.network_output_function = network_output_function do_clip = True if "r_feature" in coefficients: self.bn_reg_scale = coefficients["r_feature"] self.first_bn_multiplier = coefficients["first_bn_multiplier"] self.var_scale_l1 = coefficients["tv_l1"] self.var_scale_l2 = coefficients["tv_l2"] self.l2_scale = coefficients["l2"] self.lr = coefficients["lr"] self.main_loss_multiplier = coefficients["main_loss_multiplier"] self.adi_scale = coefficients["adi_scale"] else: print("Provide a dictionary with ") self.num_generations = 0 self.final_data_path = final_data_path ## Create folders for images and logs prefix = path self.prefix = prefix local_rank = torch.cuda.current_device() if local_rank == 0: create_folder(prefix) create_folder(prefix + "/best_images/") create_folder(self.final_data_path) # save images to folders # for m in range(1000): # create_folder(self.final_data_path + "/s{:03d}".format(m)) ## Create hooks for feature statistics self.loss_r_feature_layers = [] ## bn layer hook for module in self.net_teacher.modules(): if isinstance(module, nn.BatchNorm2d): self.loss_r_feature_layers.append( DeepInversionFeatureHook(module)) if isinstance(module, nn.InstanceNorm2d): self.loss_r_feature_layers.append( DeepInversionFeatureHook_InstanceNormalization(module)) self.hook_for_display = None if hook_for_display is not None: self.hook_for_display = hook_for_display ## biggan discriminator model if self.opt.biggan_d_prior: config_fixed = { 'dataset': 'I128_hdf5', 'augment': False, 'num_workers': 0, 'pin_memory': True, 'shuffle': True, 'load_in_mem': False, 'use_multiepoch_sampler': True, 'model': 'BigGAN', 'G_param': 'SN', 'D_param': 'SN', 'G_ch': 96, 'D_ch': 96, 'G_depth': 1, 'D_depth': 1, 'D_wide': True, 'G_shared': True, 'shared_dim': 128, 'dim_z': 120, 'z_var': 1.0, 'hier': True, 'cross_replica': False, 'mybn': False, 'G_nl': 'inplace_relu', 'D_nl': 'inplace_relu', 'G_attn': '64', 'D_attn': '64', 'norm_style': 'bn', 'seed': 0, 'G_init': 'ortho', 'D_init': 'ortho', 'skip_init': True, 'G_lr': 0.0001, 'D_lr': 0.0004, 'G_B1': 0.0, 'D_B1': 0.0, 'G_B2': 0.999, 'D_B2': 0.999, 'batch_size': 256, 'G_batch_size': 64, 'num_G_accumulations': 8, 'num_D_steps': 1, 'num_D_accumulations': 8, 'split_D': False, 'num_epochs': 100, 'parallel': True, 'G_fp16': False, 'D_fp16': False, 'D_mixed_precision': False, 'G_mixed_precision': False, 'accumulate_stats': False, 'num_standing_accumulations': 16, 'G_eval_mode': True, 'save_every': 1000, 'num_save_copies': 2, 'num_best_copies': 5, 'which_best': 'IS', 'no_fid': False, 'test_every': 2000, 'num_inception_images': 50000, 'hashname': False, 'base_root': '', 'data_root': 'data', 'weights_root': 'weights', 'logs_root': 'logs', 'samples_root': 'samples', 'pbar': 'mine', 'name_suffix': '', 'experiment_name': '', 'config_from_name': False, 'ema': True, 'ema_decay': 0.9999, 'use_ema': True, 'ema_start': 20000, 'adam_eps': 1e-06, 'BN_eps': 1e-05, 'SN_eps': 1e-06, 'num_G_SVs': 1, 'num_D_SVs': 1, 'num_G_SV_itrs': 1, 'num_D_SV_itrs': 1, 'G_ortho': 0.0, 'D_ortho': 0.0, 'toggle_grads': True, 'which_train_fn': 'GAN', 'load_weights': '', 'resume': False, 'logstyle': '%3.3e', 'log_G_spectra': False, 'log_D_spectra': False, 'sv_log_interval': 10, 'sample_npz': True, 'sample_num_npz': 50000, 'sample_sheets': True, 'sample_interps': True, 'sample_sheet_folder_num': -1, 'sample_random': True, 'sample_trunc_curves': '0.05_0.05_1.0', 'sample_inception_metrics': True, 'resolution': 128, 'n_classes': 1000, 'G_activation': ReLU(inplace=True), 'D_activation': ReLU(inplace=True), 'no_optim': True } # state_dict = {'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config} state_dict = { 'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999 } weights_root = './external/BigGANPyTorch/weights' experiment_name = 'BigGAN_ch96_bs256x8_138k' load_weights = '' print('laod biggan D...') self.biggan_D = D_biggan(**config_fixed).cuda() utils_biggan.load_weights(None, self.biggan_D, state_dict, weights_root, experiment_name, '', None, strict=False, load_optim=False)