Example #1
0
def write_output(name: str, algorithm_name: str, matches: dict, metrics: dict,
                 run_times: dict):
    """
    Function that writes the output of a schema matching job

    Parameters
    ----------
    name : str
        The experiment unique name
    algorithm_name : str
        The name of the algorithm
    matches : dict
        Dictionary containing the ranked list of matches based on their similarity sorted in descending order
    metrics : dict
        Dictionary containing the metrics calculated in the schema matching job
    run_times : dict
        Dictionary containing the metrics measured in the schema matching job
    """
    create_folder(get_project_root() + "/output")
    create_folder(get_project_root() + "/output/" + algorithm_name)
    with open(
            get_project_root() + "/output/" + algorithm_name + "/" +
            re.sub('\\W+', '_', str(name)) + ".json", 'w') as fp:
        matches = {str(k): v for k, v in matches.items()}
        output = {
            "name": name,
            "matches": matches,
            "metrics": metrics,
            "run_times": run_times
        }
        json.dump(output, fp, indent=2)
Example #2
0
    def post(self, request):

        orig_file = request.FILES.get('file')

        extension = orig_file.name.split('.')[1].lower()

        file_name = str(uuid.uuid4()) + '.' + extension

        upload_path = os.path.join("media/upload/file", file_name[0], file_name[1])

        create_folder(upload_path)

        file_path = os.path.join(upload_path, file_name)

        # file_path
        with open(file_path, 'wb') as f:
            for i in orig_file.chunks():
                f.write(i)

        if 'liaison' in request.data:
            file_path = os.path.join(file_name[0], file_name[1], file_name)
            liaison_id = request.data['liaison']
            liaison = Liaisons.objects.get(pk=liaison_id)
            liaison.freleaserpt = file_path
            liaison.save()

        return Response(status=status.HTTP_200_OK)
Example #3
0
    def __init__(self,
                 local_path="",
                 base_feature_dir="features",
                 recompute_features=False,
                 save_log_feature=True):

        self.local_path = local_path
        self.recompute_features = recompute_features
        self.save_log_feature = save_log_feature

        self.base_feature_dir = base_feature_dir

        feature_dir = os.path.join(
            self.base_feature_dir,
            "sr" + str(cfg.sample_rate) + "_win" + str(cfg.n_window) + "_hop" +
            str(cfg.hop_length) + "_mels" + str(cfg.n_mels))
        if not self.save_log_feature:
            feature_dir += "_nolog"

        self.feature_dir = os.path.join(feature_dir, "features")
        self.metadata_dir = os.path.join(feature_dir, "metadata")
        # create folder if not exist
        create_folder(self.metadata_dir)
        create_folder(self.feature_dir)
        self.classes = []
Example #4
0
    def post(self, request):
        user = request.user

        avatar = request.FILES.get('avatar')

        extension = avatar.name.split('.')[1].lower()

        file_name = "A" + str(uuid.uuid4()) + '.' + extension

        upload_path = "media/avatar"

        create_folder(upload_path)

        file_path = os.path.join(upload_path, file_name)

        with open(file_path, 'wb') as f:
            for i in avatar.chunks():
                f.write(i)

        avatar_str = "avatar/" + file_name
        user.avatar = avatar_str
        user.save()

        data = {
            "code": 200,
        }
        return Response(data)
    def _handle(self, extension, path):

        if is_image(extension):
            create_folder(path, IMG_FOLDER)

            return True
        return False
Example #6
0
def calculate_embedding(embedding_dl, model, savedir=None, concatenate=None, squeeze=True):
    # If frames, assume the savedir name or the filename is different than when it is not defined
    model.eval()
    if savedir is not None:
        create_folder(savedir)
    df = embedding_dl.df.copy()
    df.filename = df.filename.apply(lambda x: os.path.join(savedir, os.path.basename(x)))
    if savedir is not None:
        df.to_csv(os.path.join(savedir, "df"), sep="\t", index=False)
    if concatenate is not None:
        concat_embed = []
    for cnt, (data_in, y) in enumerate(embedding_dl):
        data_in = to_cuda_if_available(data_in)

        emb = get_embeddings_numpy(data_in, model, flatten=False)
        if cnt == 0:
            LOG.debug(f"shapes: input: {data_in.shape}, embed: {emb.shape}, dir: {savedir}")
        if squeeze:
            emb = np.squeeze(emb)
        if savedir is not None:
            np.save(df.iloc[cnt].filename, emb)

        if concatenate == "append":
            concat_embed.append(emb)
        elif concatenate == "extend":
            concat_embed.extend(emb)
        else:
            if concatenate is not None:
                raise NotImplementedError("Impossible to aggregate with this value")

    model.train()
    if concatenate is not None:
        concat_embed = np.array(concat_embed)
        return df, concat_embed
    return df
Example #7
0
def get_dirs(common_dir, create=True):
    """"""
    model_dir = osp.join("stored_data", "model", common_dir)
    log_dir = osp.join("stored_data", "logs", common_dir)
    if create:
        create_folder(model_dir)
        create_folder(log_dir)
    return model_dir, log_dir
Example #8
0
def write_log(log_info):
    timestamp = time.localtime(time.time())
    log_path = os.path.join(LOG_PATH, time.strftime('%Y', timestamp))
    create_folder(log_path)
    log_name = time.strftime('%Y-%m-%d', timestamp) + '.log'

    full_log_path = os.path.join(log_path, log_name)

    with open(full_log_path, 'a', encoding='utf-8') as f:
        f.write(log_info + '\n')
Example #9
0
def save_chunks(chunk_id, subset_type, track_name, sources, energy_profile):
    save_folder_path = os.path.join(CHUNKS_PATH, subset_type, track_name,
                                    str(chunk_id))
    create_folder(save_folder_path)
    true_label = np.zeros(len(SOURCES) + 1, dtype='int')
    for source_id, source in enumerate([*SOURCES, 'MIX']):
        signal = sources[source_id, chunk_id]
        signal_energy = get_signal_energy(signal)
        if int(signal_energy) > ENERGY_THRESHOLD:
            true_label[source_id] = 1
        save_path = os.path.join(
            save_folder_path,
            source + '_' + str(int(round(signal_energy))) + '.wav')
        energy_profile[source_id][os.path.dirname(save_path)] = signal_energy
        librosa.output.write_wav(save_path, signal, TARGET_SAMPLING_RATE)
    return energy_profile, true_label[:-1]
Example #10
0
def test_create_valid_folder_delete_folder():
    folder = 'Shared/test'
    response = create_folder(folder)
    assert response.status_code == 201, f'Error: {response.json()["errorMessage"]}'
    print(f'Folder created, ID: {find_json_key(response, "folder_id")}')
    response = delete_folder(folder)
    assert response.status_code == 200, f'Error: {response.json()["errorMessage"]}'
    print('Folder deleted!')
Example #11
0
def folder_to_txt(folder_path: str) -> str:

    dict_raw = folder_to_dict(folder_path)

    now = calendar.timegm(time.gmtime())

    out_path = f'{folder_path}/out_{now}/'

    utils.create_folder(out_path)

    for file, text_raw in zip(dict_raw['file_name'], dict_raw['text_raw']):

        utils.to_txt(text_raw=text_raw, file_name=file, out_path=out_path)

    print(f'The results are in {out_path}')

    return out_path
Example #12
0
    def get_df_feat_dir(self,
                        csv_path,
                        subpart_data=None,
                        frames_in_sec=None,
                        segment=False,
                        fixed_segment=None):
        """ Initialize the dataset, extract the features dataframes
        Args:
            csv_path: str, csv path in the initial dataset
            subpart_data: int, the number of file to take in the dataframe if taking a small part of the dataset.
            frames_in_sec: int, allow to divide full segments into smaller segments of this number of frames.
            segment: bool, whether or not to segment event when having strong labels.
            fixed_segment: float, in seconds, the size of the kept segment. If >audio length, the audio length is kept.
                If segment is True, and >label, it takes the surrounding (allow creating weak labels).
        Returns:
            pd.DataFrame
            The dataframe containing the right features and labels
        """
        feature_dir = os.path.join(self.feature_dir, name_only(csv_path))
        create_folder(feature_dir)
        meta_name = os.path.join(self.local_path, csv_path)

        assert (not segment or frames_in_sec is None
                ), "if you want to segment, you can't give frames"
        if segment:
            df = self.extract_features_from_meta_segment(
                meta_name,
                feature_dir,
                subpart_data=subpart_data,
                fixed_segment=fixed_segment)
        elif frames_in_sec is not None:
            df = self.extract_features_from_meta_frames(
                meta_name,
                feature_dir,
                frames_in_sec,
                subpart_data=subpart_data)
            # get_classes is done inside the method because of get_labels
        else:
            df = self.extract_features_from_meta(meta_name,
                                                 feature_dir,
                                                 subpart_data=subpart_data)
            self.get_classes(df)

        return df
Example #13
0
def unix_sort_ranks(corpus: set, file_name: str):
    """
    Function that takes a corpus sorts it with the unix sort -n command and generates the global ranks
    for each value in the corpus.

    Parameters
    ----------
    corpus: set
        The corpus (all the unique values from every column)
    file_name : str
        The name of the file to sore these "global" ranks

    Returns
    -------
    dict
        The ranks in the form of k: value, v: the rank of the value
    """
    create_folder("./cache/sorts/" + file_name)
    with open("./cache/sorts/" + file_name + "/unsorted_file.txt", 'w') as out:
        for var in corpus:
            print(str(var), file=out)

    sort_env = os.environ.copy()
    sort_env['LC_ALL'] = 'C'

    with open('cache/sorts/' + file_name + '/sorted_file.txt', 'w') as f:
        subprocess.call(['sort', '-n', 'cache/sorts/' + file_name + '/unsorted_file.txt'], stdout=f, env=sort_env)

    rank = 1
    ranks = []

    with open('./cache/sorts/' + file_name + '/sorted_file.txt', 'r') as f:
        txt = f.read()
        for var in txt.splitlines():
            ranks.append((convert_data_type(var.replace('\n', '')), rank))
            rank = rank + 1

    shutil.rmtree('./cache/sorts/' + file_name)
    os.mkdir('./cache/sorts/' + file_name)

    return dict(ranks)
Example #14
0
    def __init__(self,
                 feature_dir,
                 local_path="",
                 recompute_features=False,
                 exp_tag='default',
                 save_log_feature=True):

        self.local_path = local_path
        self.recompute_features = recompute_features
        self.save_log_feature = save_log_feature

        self.feature_dir = feature_dir
        # if feature_dir is None:
        #     feature_dir = os.path.join(base_feature_dir, '_' + exp_tag)
        # feature_dir = os.path.join(base_feature_dir, "sr" + str(cfg.sample_rate) + "_win" + str(cfg.n_window)
        #                            + "_hop" + str(cfg.hop_length) + "_mels" + str(cfg.n_mels))
        # if not self.save_log_feature:
        #     feature_dir += "_nolog"
        # self.feature_dir = os.path.join(feature_dir, "features")

        # create folder if not exist
        create_folder(self.feature_dir)
Example #15
0
    def post(self, request):
        image = request.FILES.get('file')

        extension = image.name.split('.')[1].lower()

        image_name = str(uuid.uuid4()) + '.' + extension

        upload_path = os.path.join("media/upload/image", image_name[0],
                                   image_name[1])

        create_folder(upload_path)

        image_save_path = os.path.join(upload_path, image_name)

        # 保存单个文件
        with open(image_save_path, 'wb') as f:
            for i in image.chunks():
                f.write(i)

        ret_url = os.path.join("files", image_name)

        ret = {"code": 0, "msg": "success", "data": {"url": ret_url}}
        return Response(data=ret, status=status.HTTP_200_OK)
Example #16
0
 def __new__(cls,
             metadata: data.Metadata,
             images: data.Images,
             seq_len: int,
             batch_size: int,
             timesteps: typing.Union[list,
                                     timedelta] = timedelta(minutes=15),
             cache: bool = False):
     dataset = tf.data.Dataset.from_generator(
         DataGenerator(metadata, images, seq_len,
                       timesteps).get_next_example,
         output_types={
             'station_name': tf.string,
             'images': tf.float32,
             'csky_ghi': tf.float32,
             'ghi': tf.float32
         },
         output_shapes={
             'station_name':
             tf.TensorShape([]),
             'images':
             tf.TensorShape([None, images.image_size, images.image_size,
                             5]),
             'csky_ghi':
             tf.TensorShape([4]),
             'ghi':
             tf.TensorShape([4])
         }).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
     if cache:
         utils.create_folder(DATASET_CACHE_PATH)
         return dataset.cache(filename=os.path.join(
             DATASET_CACHE_PATH,
             f'cache_seqlen_{seq_len}_imagesize_{images.image_size}_batchsize_{batch_size}_nbexamples_{metadata.get_number_of_examples()}'
         ))
     else:
         return dataset
Example #17
0
    def post(self, request):
        orig_file = request.FILES.get('file')

        extension = orig_file.name.split('.')[1].lower()

        file_name = "T" + str(uuid.uuid4()) + '.' + extension

        upload_path = os.path.join("media/upload/image", file_name[0],
                                   file_name[1])

        create_folder(upload_path)

        file_path = os.path.join(upload_path, file_name)

        # file_path
        with open(file_path, 'wb') as f:
            for i in orig_file.chunks():
                f.write(i)

        ret_url = os.path.join('files', file_name)

        ret_path = f'<p><a href="{ret_url}">{orig_file}</a></p>'

        data = {"path": ret_path}

        # if 'liaison' in request.data:
        #     file_path = os.path.join("upload/file", file_name[0], file_name[1], file_name)
        #     liaison_id = request.data['liaison']
        #     liaison = Liaisons.objects.get(pk=liaison_id)
        #     liaison.freleaserpt = file_path
        #     liaison.save()
        #
        #     ret_url = os.path.join(request.stream._current_scheme_host, "media", file_path)
        #     data['liaison'] = ret_url

        return Response(data=data, status=status.HTTP_200_OK)
Example #18
0
def combine_data_algorithms(config_data: dict, config_algo: dict,
                            completed_jobs: dict):
    create_folder(get_project_root() + "/configuration_files")
    for cfd_key, cfd_value in config_data.items():
        for cfa_key, cfa_value in config_algo.items():
            if (cfa_value["algorithm"]["type"] == "SemProp" and "assays" in cfd_key and "SemProp" in algorithms) \
                    or (cfa_value["algorithm"]["type"] != "SemProp" and cfa_value["algorithm"]["type"] in algorithms):
                name = cfd_key + '__' + cfa_key
                if name not in completed_jobs[cfa_value["algorithm"]["type"]]:
                    create_folder(
                        str(get_project_root()) + "/configuration_files/" +
                        cfa_value["algorithm"]["type"])
                    create_folder(
                        str(get_project_root()) + "/configuration_files/" +
                        cfa_value["algorithm"]["type"] + '/' + cfd_key)
                    cfa_key = re.sub('\\W+', '_', cfa_key)
                    file_name = str(get_project_root())+"/configuration_files/" + cfa_value["algorithm"]["type"] + \
                                                        '/' + cfd_key + '/' + cfa_key + ".json"
                    with open(file_name, 'w') as fp:
                        configuration = {
                            "name": name,
                            "dataset_name": cfd_key,
                            "source": {
                                "type": cfa_value["data_loader"],
                                "args": cfd_value["source"]["args"]
                            },
                            "target": {
                                "type": cfa_value["data_loader"],
                                "args": cfd_value["target"]["args"]
                            },
                            "algorithm": cfa_value["algorithm"],
                            "metrics": metrics,
                            "golden_standard": cfd_value["golden_standard"]
                        }
                        if cfa_value["algorithm"]["type"] == "SemProp":
                            configuration["source"]["args"]["schema"] = configuration["source"]["args"]["schema"]\
                                .replace(get_project_root(), "/code")
                            configuration["source"]["args"]["data"] = configuration["source"]["args"]["data"]\
                                .replace(get_project_root(), "/code")
                            configuration["target"]["args"]["schema"] = configuration["target"]["args"]["schema"] \
                                .replace(get_project_root(), "/code")
                            configuration["target"]["args"]["data"] = configuration["target"]["args"]["data"] \
                                .replace(get_project_root(), "/code")
                            configuration["golden_standard"] = configuration["golden_standard"] \
                                .replace(get_project_root(), "/code")
                        json.dump(configuration, fp, indent=2)
    f_args = parser.parse_args()

    reduced_number_of_data = f_args.subpart_data
    no_weak = f_args.no_weak
    LOG.info("subpart_data = {}".format(reduced_number_of_data))
    LOG.info("Using_weak labels : {}".format(not no_weak))

    if not no_weak:
        add_dir_path = "_with_weak"
    else:
        add_dir_path = "_synthetic_only"

    store_dir = os.path.join("stored_data", "simple_CRNN" + add_dir_path)
    saved_model_dir = os.path.join(store_dir, "model")
    saved_pred_dir = os.path.join(store_dir, "predictions")
    create_folder(store_dir)
    create_folder(saved_model_dir)
    create_folder(saved_pred_dir)

    # ##############
    # Model
    # ##############

    crnn_kwargs = cfg.crnn_kwargs
    crnn = CRNN(**crnn_kwargs)
    crnn.apply(weights_init)
    pooling_time_ratio = cfg.pooling_time_ratio

    LOG.info(crnn)

    # ##############
Example #20
0
VALIDATION_PATH = os.path.join(MUSDB_SPLITS_PATH, 'val')
train_paths = []
energy_profile = [{} for _ in range(len(SOURCES) + 1)]
sample_dict = {}
subset = ['train', 'test']
cuda = 0

for subset_type in subset:
    DATA_PATH = os.path.join(MUSDB_WAVS_FOLDER_PATH, subset_type)
    tracks = sorted(os.listdir(DATA_PATH))

    for track_id, track_name in enumerate(tracks):
        track_path = os.path.join(DATA_PATH, track_name)
        dump_path = os.path.join(MUSDB_SPLITS_PATH, subset_type, track_name)
        create_folder(dump_path)
        sources_downsampled = get_sources(track_path)
        sources_split = split_sources(sources_downsampled, subset_type)
        stft_output = _stft(sources_split)
        for chunk_id in range(stft_output.shape[1]):
            matrix = stft_output[:, chunk_id, ...]
            energy_profile, true_label = save_chunks(chunk_id, subset_type,
                                                     track_name, sources_split,
                                                     energy_profile)
            if subset_type == 'train':
                train_paths.append(
                    os.path.join(dump_path,
                                 str(chunk_id) + '.npy'))
            sample_dict['spec'] = matrix
            sample_dict['true_label'] = true_label
            full_path = os.path.join(dump_path, str(chunk_id))
Example #21
0
    def __init__(self,
                 bs=84,
                 use_fp16=True,
                 net_teacher=None,
                 path="./gen_images/",
                 final_data_path="/gen_images_final/",
                 parameters=dict(),
                 setting_id=0,
                 jitter=30,
                 criterion=None,
                 coefficients=dict(),
                 network_output_function=lambda x: x,
                 hook_for_display=None):
        '''
        :param bs: batch size per GPU for image generation
        :param use_fp16: use FP16 (or APEX AMP) for model inversion, uses less memory and is faster for GPUs with Tensor Cores
        :parameter net_teacher: Pytorch model to be inverted
        :param path: path where to write temporal images and data
        :param final_data_path: path to write final images into
        :param parameters: a dictionary of control parameters:
            "resolution": input image resolution, single value, assumed to be a square, 224
            "random_label" : for classification initialize target to be random values
            "start_noise" : start from noise, def True, other options are not supported at this time
            "detach_student": if computing Adaptive DI, should we detach student?
        :param setting_id: predefined settings for optimization:
            0 - will run low resolution optimization for 1k and then full resolution for 1k;
            1 - will run optimization on high resolution for 2k
            2 - will run optimization on high resolution for 20k

        :param jitter: amount of random shift applied to image at every iteration
        :param coefficients: dictionary with parameters and coefficients for optimization.
            keys:
            "r_feature" - coefficient for feature distribution regularization
            "tv_l1" - coefficient for total variation L1 loss
            "tv_l2" - coefficient for total variation L1 loss
            "l2" - l2 penalization weight
            "lr" - learning rate for optimization
            "main_loss_multiplier" - coefficient for the main loss optimization
            "adi_scale" - coefficient for Adaptive DeepInversion, competition, def =0 means no competition

        network_output_function: function to be applied to the output of the network to get the output

        hook_for_display: function to be executed at every print/save call, useful to check accuracy of verifier

        '''

        print("Deep inversion class generation")
        # for reproducibility
        torch.manual_seed(torch.cuda.current_device())

        self.net_teacher = net_teacher

        if "resolution" in parameters.keys():
            self.image_resolution = parameters["resolution"]
            self.random_label = parameters["random_label"]
            self.start_noise = parameters["start_noise"]
            self.detach_student = parameters["detach_student"]
            self.do_flip = parameters["do_flip"]
            self.store_best_images = parameters["store_best_images"]
        else:
            self.image_resolution = 224
            self.random_label = False
            self.start_noise = True
            self.detach_student = False
            self.do_flip = True
            self.store_best_images = False

        self.setting_id = setting_id

        self.bs = bs  # batch size
        self.use_fp16 = use_fp16

        self.save_every = 100

        self.jitter = jitter
        self.criterion = criterion

        self.network_output_function = network_output_function

        do_clip = True

        if "r_feature" in coefficients:
            self.bn_reg_scale = coefficients["r_feature"]
            self.var_scale_l1 = coefficients["tv_l1"]
            self.var_scale_l2 = coefficients["tv_l2"]
            self.l2_scale = coefficients["l2"]
            self.lr = coefficients["lr"]
            self.main_loss_multiplier = coefficients["main_loss_multiplier"]
            self.adi_scale = coefficients["adi_scale"]
        else:
            print("Provide a dictionary with ")

        self.num_generations = 0

        self.final_data_path = final_data_path

        ## Create folders for images and logs
        prefix = path
        self.prefix = prefix

        local_rank = torch.cuda.current_device()
        if local_rank == 0:
            create_folder(prefix)
            create_folder(prefix + "/best_images/")
            create_folder(self.final_data_path)
            # save images to folders
            # for m in range(1000):
            #     create_folder(self.final_data_path + "/s{:03d}".format(m))

        ## Create hooks for feature statistics
        self.loss_r_feature_layers = []

        for module in self.net_teacher.modules():
            if isinstance(module, nn.BatchNorm2d):
                self.loss_r_feature_layers.append(
                    DeepInversionFeatureHook(module))

        self.hook_for_display = None
        if hook_for_display is not None:
            self.hook_for_display = hook_for_display
Example #22
0
    if cfg.early_stopping is not None:
        early_stopping_call = EarlyStopping(patience=cfg.early_stopping, val_comp="sup")

    # ##########
    # # Training
    # ##########
    save_results = pd.DataFrame()

    model_name_triplet = base_model_name + "triplet"

    if cfg.save_best:
        model_path_pretrain = os.path.join(model_directory, model_name_triplet, "best_model")
    else:
        model_path_pretrain = os.path.join(model_directory, model_name_triplet, "epoch_" + str(f_args.epochs))
    print("path of model : " + model_path_pretrain)
    create_folder(os.path.join(model_directory, model_name_triplet))

    batch_size_classif = cfg.batch_size_classif
    # Hard coded because no semi_hard in this version
    semi_hard_embed = None
    semi_hard_input = None
    if not os.path.exists(model_path_pretrain) or cfg.recompute_embedding:
        margin = triplet_margin
        for epoch in range(f_args.epochs):
            t_start_epoch = time.time()

            if cfg.rampup_margin_length is not None:
                margin = sigmoid_rampup(epoch, cfg.rampup_margin_length) * triplet_margin
            model_triplet.train()
            model_triplet, loss_mean_triplet, ratio_used = train_triplet_epoch(triplet_loader,
                                                                               # triplet_loader,
Example #23
0
def test_create_multiple_folders():
    for number in range(1, 6):
        folder = 'Shared/test' + str(number)
        response = create_folder(folder)
        assert response.status_code == 201, f'Error: {response.json()["errorMessage"]}'
        print(f'Folder created, ID: {find_json_key(response, "folder_id")}')
Example #24
0
def train_classifier(train_loader, classif_model, optimizer_classif, many_hot_encoder=None,
                     valid_loader=None, state={},
                     dir_model="model", result_path="res", recompute=True):
    criterion_bce = nn.BCELoss()
    classif_model, criterion_bce = to_cuda_if_available(classif_model, criterion_bce)
    print(classif_model)

    early_stopping_call = EarlyStopping(patience=cfg.early_stopping, val_comp="sup",
                                        init_patience=cfg.first_early_wait)
    save_best_call = SaveBest(val_comp="sup")

    # scheduler = ReduceLROnPlateau(optimizer_classif, 'max', factor=0.1, patience=cfg.reduce_lr,
    #                               verbose=True)
    print(optimizer_classif)

    save_results = pd.DataFrame()

    create_folder(dir_model)
    if cfg.save_best:
        model_path_sup1 = os.path.join(dir_model, "best_model")
    else:
        model_path_sup1 = os.path.join(dir_model, "epoch_" + str(cfg.n_epoch_classifier))
    print("path of model : " + model_path_sup1)

    state['many_hot_encoder'] = many_hot_encoder.state_dict()

    if not os.path.exists(model_path_sup1) or recompute:
        for epoch_ in range(cfg.n_epoch_classifier):
            print(classif_model.training)
            start = time.time()
            loss_mean_bce = []
            for i, samples in enumerate(train_loader):
                inputs, pred_labels = samples
                if i == 0:
                    LOG.debug("classif input shape: {}".format(inputs.shape))

                # zero the parameter gradients
                optimizer_classif.zero_grad()
                inputs = to_cuda_if_available(inputs)

                # forward + backward + optimize
                weak_out = classif_model(inputs)
                weak_out = to_cpu(weak_out)
                # print(output)
                loss_bce = criterion_bce(weak_out, pred_labels)
                loss_mean_bce.append(loss_bce.item())
                loss_bce.backward()
                optimizer_classif.step()

            loss_mean_bce = np.mean(loss_mean_bce)
            classif_model.eval()
            n_class = len(many_hot_encoder.labels)
            macro_f_measure_train = get_f_measure_by_class(classif_model, n_class,
                                                           train_loader)
            if valid_loader is not None:
                macro_f_measure = get_f_measure_by_class(classif_model, n_class,
                                                         valid_loader)
                mean_macro_f_measure = np.mean(macro_f_measure)
            else:
                mean_macro_f_measure = -1
            classif_model.train()
            print("Time to train an epoch: {}".format(time.time() - start))
            # print statistics
            print('[%d / %d, %5d] loss: %.3f' %
                  (epoch_ + 1, cfg.n_epoch_classifier, i + 1, loss_mean_bce))

            results = {"train_loss": loss_mean_bce,
                       "macro_measure_train": np.mean(macro_f_measure_train),
                       "class_macro_train": np.array_str(macro_f_measure_train, precision=2),
                       "macro_measure_valid": mean_macro_f_measure,
                       "class_macro_valid": np.array_str(macro_f_measure, precision=2),
                       }
            for key in results:
                LOG.info("\t\t ---->  {} : {}".format(key, results[key]))

            save_results = save_results.append(results, ignore_index=True)
            # scheduler.step(mean_macro_f_measure)

            # ##########
            # # Callbacks
            # ##########
            state['epoch'] = epoch_ + 1
            state["model"]["state_dict"] = classif_model.state_dict()
            state["optimizer"]["state_dict"] = optimizer_classif.state_dict()
            state["loss"] = loss_mean_bce
            state.update(results)

            if cfg.early_stopping is not None:
                if early_stopping_call.apply(mean_macro_f_measure):
                    print("EARLY STOPPING")
                    break

            if cfg.save_best and save_best_call.apply(mean_macro_f_measure):
                save_model(state, model_path_sup1)

        if cfg.save_best:
            LOG.info(
                "best model at epoch : {} with macro {}".format(save_best_call.best_epoch, save_best_call.best_val))
            LOG.info("loading model from: {}".format(model_path_sup1))
            classif_model, state = load_model(model_path_sup1, return_optimizer=False, return_state=True)
        else:
            model_path_sup1 = os.path.join(dir_model, "epoch_" + str(cfg.n_epoch_classifier))
            save_model(state, model_path_sup1)
        LOG.debug("model path: {}".format(model_path_sup1))
        LOG.debug('Finished Training')
    else:
        classif_model, state = load_model(model_path_sup1, return_optimizer=False, return_state=True)
    LOG.info("#### End classif")
    save_results.to_csv(result_path, sep="\t", header=True, index=False)

    return classif_model, state
Example #25
0
    pytorch_total_params = sum(p.numel() for p in model.parameters()
                               if p.requires_grad)
    LOG.info(
        "number of parameters in the model: {}".format(pytorch_total_params))

    early_stopping_call = EarlyStopping(patience=cfg.early_stopping,
                                        val_comp="sup",
                                        init_patience=cfg.first_early_wait)
    save_best_call = SaveBest(val_comp="sup")

    print(optimizer)

    save_results = pd.DataFrame()

    model_name_sup = osp.join(model_directory, "classif")
    create_folder(model_name_sup)
    if cfg.save_best:
        model_path_sup1 = os.path.join(model_name_sup, "best_model")
    else:
        model_path_sup1 = os.path.join(model_name_sup,
                                       "epoch_" + str(n_epochs))
    print("path of model : " + model_path_sup1)

    state['many_hot_encoder'] = many_hot_encoder.state_dict()

    def train_loop(train_load, model):
        loss_bce = []
        if args.segment:
            for cnt, indexes in enumerate(train_load.batch_sampler):
                optimizer.zero_grad()
                for j, ind in enumerate(indexes):
    def _handle(self, extension, path):
        folder_name = extension[1:] if extension else OTHER_FOLDER

        create_folder(path, folder_name)

        return True
Example #27
0
def test_create_folder_forbidden_character():
    folder = 'Shared/*'
    response = create_folder(folder)
    assert response.status_code == 409
    print(f'Error: {find_json_key(response, "errorMessage")}')
Example #28
0
    encode_function_label = many_hot_encoder.encode_weak
    scaler = ScalerSum.load_state_dict(state['scaler'])

    frames_in_sec = cfg.frames_in_sec

    transf = Compose([ApplyLog(), PadOrTrunc(nb_frames=cfg.frames), ToTensor(), Unsqueeze(0),
                      Normalize(scaler), Unsqueeze(1)])
    test_fr = dataset.get_df_feat_dir(cfg.test2018, frames_in_sec=frames_in_sec, subpart_data=subpart_data)
    print(len(test_fr))

    test_dataset = DataLoadDf(test_fr, many_hot_encoder.encode_weak, transform=transf)

    embed_set = "embedding"
    embed_dir = "stored_data/embeddings"
    embed_dir = os.path.join(embed_dir, embed_name, "embeddings")
    create_folder(embed_dir)
    fig_dir = os.path.join(embed_dir, "figures")
    create_folder(fig_dir)

    df_emb, embeddings = calculate_embedding(test_dataset, emb_model,
                                             savedir=os.path.join(embed_dir, embed_set), concatenate="append")
    print(embeddings.mean())
    print(embeddings.var())
    embeddings = sklearn.preprocessing.StandardScaler().fit_transform(embeddings.reshape(embeddings.shape[0], -1))
    print("normalized")
    print(embeddings.mean())
    print(embeddings.var())
    df_emb = df_emb.fillna("")
    tsne = TSNE()
    tsne_emb = tsne.fit_transform(X=embeddings.reshape(embeddings.shape[0], -1))
    tsne_plots(tsne_emb, df_emb, savefig=os.path.join(fig_dir, embed_set))
Example #29
0
from utils.stock_utils import get_stock_names, get_stock_indices
from config import PREFIX_OFFSET, POSTFIX_OFFSET
import technical_indicator_features as indicators
import price_based_features as price
from data_preparation.normalization import normalize_features
from utils.utils import create_folder

import pandas as pd
import numpy as np
import os

stocks = get_stock_names()
write_path = os.path.join("..", "data", "extracted")
scaler_write_path = os.path.join("..", "data", "scalers")

create_folder(write_path)
create_folder(scaler_write_path)


def apply_price_based_features(stock_label):
    price_based_features = price.extract_price_based_feature_for_stock(
        stock_label)
    columns = price_based_features.columns
    return price_based_features, columns


def apply_technical_analysis_features(stock_label):
    ta_feature_extractor = indicators.TechnicalAnalysisFeatures(stock_label)
    technical_analysis_features = ta_feature_extractor.run_all()
    columns = technical_analysis_features.columns
    return technical_analysis_features, columns
    def __init__(self,
                 bs=84,
                 use_fp16=True,
                 net_teacher=None,
                 path="./gen_images/",
                 final_data_path="/gen_images_final/",
                 parameters=dict(),
                 setting_id=0,
                 jitter=30,
                 criterion=None,
                 coefficients=dict(),
                 network_output_function=lambda x: x,
                 hook_for_display=None,
                 opt=None):
        '''
        :param bs: batch size per GPU for image generation
        :param use_fp16: use FP16 (or APEX AMP) for model inversion, uses less memory and is faster for GPUs with Tensor Cores
        :parameter net_teacher: Pytorch model to be inverted
        :param path: path where to write temporal images and data
        :param final_data_path: path to write final images into
        :param parameters: a dictionary of control parameters:
            "resolution": input image resolution, single value, assumed to be a square, 224
            "random_label" : for classification initialize target to be random values
            "start_noise" : start from noise, def True, other options are not supported at this time
            "detach_student": if computing Adaptive DI, should we detach student?
        :param setting_id: predefined settings for optimization:
            0 - will run low resolution optimization for 1k and then full resolution for 1k;
            1 - will run optimization on high resolution for 2k
            2 - will run optimization on high resolution for 20k

        :param jitter: amount of random shift applied to image at every iteration
        :param coefficients: dictionary with parameters and coefficients for optimization.
            keys:
            "r_feature" - coefficient for feature distribution regularization
            "tv_l1" - coefficient for total variation L1 loss
            "tv_l2" - coefficient for total variation L1 loss
            "l2" - l2 penalization weight
            "lr" - learning rate for optimization
            "main_loss_multiplier" - coefficient for the main loss optimization
            "adi_scale" - coefficient for Adaptive DeepInversion, competition, def =0 means no competition
        network_output_function: function to be applied to the output of the network to get the output
        hook_for_display: function to be executed at every print/save call, useful to check accuracy of verifier
        '''

        self.opt = opt

        print("Deep inversion class generation")
        # for reproducibility
        torch.manual_seed(torch.cuda.current_device())

        self.net_teacher = net_teacher

        if "resolution" in parameters.keys():
            self.image_resolution = parameters["resolution"]
            self.random_label = parameters["random_label"]
            self.start_noise = parameters["start_noise"]
            self.detach_student = parameters["detach_student"]
            self.do_flip = parameters["do_flip"]
            self.store_best_images = parameters["store_best_images"]
        else:
            self.image_resolution = 224
            self.random_label = False
            self.start_noise = True
            self.detach_student = False
            self.do_flip = True
            self.store_best_images = False

        self.setting_id = setting_id
        self.bs = bs  # batch size
        self.use_fp16 = use_fp16
        self.save_every = 100
        self.jitter = jitter
        self.criterion = criterion
        self.network_output_function = network_output_function
        do_clip = True

        if "r_feature" in coefficients:
            self.bn_reg_scale = coefficients["r_feature"]
            self.first_bn_multiplier = coefficients["first_bn_multiplier"]
            self.var_scale_l1 = coefficients["tv_l1"]
            self.var_scale_l2 = coefficients["tv_l2"]
            self.l2_scale = coefficients["l2"]
            self.lr = coefficients["lr"]
            self.main_loss_multiplier = coefficients["main_loss_multiplier"]
            self.adi_scale = coefficients["adi_scale"]
        else:
            print("Provide a dictionary with ")

        self.num_generations = 0
        self.final_data_path = final_data_path

        ## Create folders for images and logs
        prefix = path
        self.prefix = prefix

        local_rank = torch.cuda.current_device()
        if local_rank == 0:
            create_folder(prefix)
            create_folder(prefix + "/best_images/")
            create_folder(self.final_data_path)
            # save images to folders
            # for m in range(1000):
            #     create_folder(self.final_data_path + "/s{:03d}".format(m))

        ## Create hooks for feature statistics
        self.loss_r_feature_layers = []

        ## bn layer hook
        for module in self.net_teacher.modules():
            if isinstance(module, nn.BatchNorm2d):
                self.loss_r_feature_layers.append(
                    DeepInversionFeatureHook(module))
            if isinstance(module, nn.InstanceNorm2d):
                self.loss_r_feature_layers.append(
                    DeepInversionFeatureHook_InstanceNormalization(module))

        self.hook_for_display = None
        if hook_for_display is not None:
            self.hook_for_display = hook_for_display

        ## biggan discriminator model
        if self.opt.biggan_d_prior:
            config_fixed = {
                'dataset': 'I128_hdf5',
                'augment': False,
                'num_workers': 0,
                'pin_memory': True,
                'shuffle': True,
                'load_in_mem': False,
                'use_multiepoch_sampler': True,
                'model': 'BigGAN',
                'G_param': 'SN',
                'D_param': 'SN',
                'G_ch': 96,
                'D_ch': 96,
                'G_depth': 1,
                'D_depth': 1,
                'D_wide': True,
                'G_shared': True,
                'shared_dim': 128,
                'dim_z': 120,
                'z_var': 1.0,
                'hier': True,
                'cross_replica': False,
                'mybn': False,
                'G_nl': 'inplace_relu',
                'D_nl': 'inplace_relu',
                'G_attn': '64',
                'D_attn': '64',
                'norm_style': 'bn',
                'seed': 0,
                'G_init': 'ortho',
                'D_init': 'ortho',
                'skip_init': True,
                'G_lr': 0.0001,
                'D_lr': 0.0004,
                'G_B1': 0.0,
                'D_B1': 0.0,
                'G_B2': 0.999,
                'D_B2': 0.999,
                'batch_size': 256,
                'G_batch_size': 64,
                'num_G_accumulations': 8,
                'num_D_steps': 1,
                'num_D_accumulations': 8,
                'split_D': False,
                'num_epochs': 100,
                'parallel': True,
                'G_fp16': False,
                'D_fp16': False,
                'D_mixed_precision': False,
                'G_mixed_precision': False,
                'accumulate_stats': False,
                'num_standing_accumulations': 16,
                'G_eval_mode': True,
                'save_every': 1000,
                'num_save_copies': 2,
                'num_best_copies': 5,
                'which_best': 'IS',
                'no_fid': False,
                'test_every': 2000,
                'num_inception_images': 50000,
                'hashname': False,
                'base_root': '',
                'data_root': 'data',
                'weights_root': 'weights',
                'logs_root': 'logs',
                'samples_root': 'samples',
                'pbar': 'mine',
                'name_suffix': '',
                'experiment_name': '',
                'config_from_name': False,
                'ema': True,
                'ema_decay': 0.9999,
                'use_ema': True,
                'ema_start': 20000,
                'adam_eps': 1e-06,
                'BN_eps': 1e-05,
                'SN_eps': 1e-06,
                'num_G_SVs': 1,
                'num_D_SVs': 1,
                'num_G_SV_itrs': 1,
                'num_D_SV_itrs': 1,
                'G_ortho': 0.0,
                'D_ortho': 0.0,
                'toggle_grads': True,
                'which_train_fn': 'GAN',
                'load_weights': '',
                'resume': False,
                'logstyle': '%3.3e',
                'log_G_spectra': False,
                'log_D_spectra': False,
                'sv_log_interval': 10,
                'sample_npz': True,
                'sample_num_npz': 50000,
                'sample_sheets': True,
                'sample_interps': True,
                'sample_sheet_folder_num': -1,
                'sample_random': True,
                'sample_trunc_curves': '0.05_0.05_1.0',
                'sample_inception_metrics': True,
                'resolution': 128,
                'n_classes': 1000,
                'G_activation': ReLU(inplace=True),
                'D_activation': ReLU(inplace=True),
                'no_optim': True
            }
            # state_dict = {'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config}
            state_dict = {
                'itr': 0,
                'epoch': 0,
                'save_num': 0,
                'save_best_num': 0,
                'best_IS': 0,
                'best_FID': 999999
            }
            weights_root = './external/BigGANPyTorch/weights'
            experiment_name = 'BigGAN_ch96_bs256x8_138k'
            load_weights = ''
            print('laod biggan D...')
            self.biggan_D = D_biggan(**config_fixed).cuda()
            utils_biggan.load_weights(None,
                                      self.biggan_D,
                                      state_dict,
                                      weights_root,
                                      experiment_name,
                                      '',
                                      None,
                                      strict=False,
                                      load_optim=False)