Ejemplo n.º 1
0
 def convert(self, data_dir):
     self.reset_idx_counter()
     if not os.path.exists(data_dir):
         raise ValueError('data dir {} does not exist'.format(data_dir))
     # load raw data
     train_data = load_json(os.path.join(data_dir, 'train.json'))
     val_data = load_json(os.path.join(data_dir, 'val_2.json'))
     test_data = load_json(os.path.join(data_dir, 'val_1.json'))
     # process data
     train_set = self.process_data(train_data, scope='train')
     val_set = self.process_data(val_data, scope='val')
     test_set = self.process_data(test_data, scope='test')
     return train_set, val_set, test_set
Ejemplo n.º 2
0
 def convert(self, data_dir):
     self.reset_idx_counter()
     if not os.path.exists(data_dir):
         raise ValueError('data dir {} does not exist'.format(data_dir))
     # load raw data
     charades = load_json(os.path.join(data_dir, 'charades.json'))
     train_data = load_lines(
         os.path.join(data_dir, 'charades_sta_train.txt'))
     test_data = load_lines(os.path.join(data_dir, 'charades_sta_test.txt'))
     # process data
     train_set = self.process_data(train_data, charades, scope='train')
     test_set = self.process_data(test_data, charades, scope='test')
     return train_set, None, test_set  # train/val/test
Ejemplo n.º 3
0
                    torch.save(
                        model.state_dict(),
                        os.path.join(
                            model_dir,
                            '{}_{}.t7'.format(configs.model_name,
                                              global_step)))
                    # only keep the top-3 model checkpoints
                    filter_checkpoints(model_dir, suffix='t7', max_to_keep=3)
                model.train()
    score_writer.close()

elif configs.mode.lower() == 'test':
    if not os.path.exists(model_dir):
        raise ValueError('No pre-trained weights exist')
    # load previous configs
    pre_configs = load_json(os.path.join(model_dir, "configs.json"))
    parser.set_defaults(**pre_configs)
    configs = parser.parse_args()
    # build model
    model = VSLNet(configs=configs,
                   word_vectors=dataset['word_vector']).to(device)
    # get last checkpoint file
    filename = get_last_checkpoint(model_dir, suffix='t7')
    model.load_state_dict(torch.load(filename))
    model.eval()
    r1i3, r1i5, r1i7, mi, _ = eval_test(model=model,
                                        data_loader=test_loader,
                                        device=device,
                                        mode='test')
    print("\n" + "\x1b[1;31m" + "Rank@1, IoU=0.3:\t{:.2f}".format(r1i3) +
          "\x1b[0m",
Ejemplo n.º 4
0
    def validate(self, previous_loss):
        '''
        Args:
            previous_loss: tuple, previously the best loss value

        Returns:
            tuple, Validation loss value
        '''
        self.mus = musdb.DB(self.MUSDB_PATH,
                            is_wav=True,
                            subsets='train',
                            split='valid')
        loss = torch.nn.L1Loss()
        conf = load_json(self.project_root + "config/json/" + self.model_name +
                         ".json")
        decrease_ratio = conf['decrease_ratio']
        bac_loss = []
        voc_loss = []
        t_start = time.time()
        with torch.no_grad():
            for track in self.mus:
                print(track.name)
                # if("Alexander Ross - Goodbye Bolero" in track.name): # todo this song is broken on my server
                #     continue
                bac = track.targets['accompaniment'].audio
                voc = track.targets['vocals'].audio
                for i in range(len(self.start)):
                    portion_start, portion_end, real_end = self.start[
                        i], self.end[i], self.realend[i]
                    reference_bac = self.seg(bac, portion_start, real_end)
                    reference_voc = self.seg(voc, portion_start, real_end)
                    input_bac = self.pre_pro(torch.Tensor(reference_bac))
                    input_voc = self.pre_pro(torch.Tensor(reference_voc))
                    input_f_background, input_f_vocals = before_forward_f(
                        input_bac,
                        input_voc,
                        subband_num=self.split_band,
                        device=self.device,
                        sample_rate=self.sample_rate,
                        normalize=False)
                    input_f = (input_f_vocals + input_f_background)
                    self.model.eval()
                    out_bac = input_f * self.model(0, input_f)
                    out_voc = input_f * self.model(1, input_f)
                    self.model.train()
                    bac_loss.append(float(loss(input_f_background, out_bac)))
                    voc_loss.append(float(loss(input_f_vocals, out_voc)))
        t_end = time.time()
        ret = (np.average(bac_loss), np.average(voc_loss))
        print("decrease-rate-threshold:", decrease_ratio)
        print("Validation time usage:", t_end - t_start, "s")
        print("Result:   ", "bac-", ret[0], "voc-", ret[1])
        print("Previous: ", "bac-", previous_loss[0], "voc-", previous_loss[1])
        if (previous_loss[0] is None):
            return ret
        if (ret[0] / previous_loss[0] < decrease_ratio
                or ret[1] / previous_loss[1] < decrease_ratio):
            try:
                print("Save model")
                torch.save(
                    self.model.state_dict(),
                    self.project_root + "saved_models/" + self.model_name +
                    "/model" + str(self.model.cnt) + ".pth")
                self.evaluate(save_wav=True, save_json=True)
                self.split_listener()
                return ret
            except Exception as e:
                logging.exception(e)
                return ret
        else:
            return previous_loss
Ejemplo n.º 5
0
    def evaluate(self, save_wav=True, save_json=True):
        '''
        Do evaluation on MUSDB18 test set
        Args:
            save_wav: boolean,
            save_json: boolean, save result json
        '''
        def __fm(num):
            return format(num, ".2f")

        def __get_aproperate_keys():
            keys = []
            for each in list(res.keys()):
                if ("ALL" not in each):
                    keys.append(each)
            return keys

        def __get_key_average(key, keys):
            util_list = [res[each][key] for each in keys]
            return np.mean(util_list)  # sum(util_list) / (len(util_list) - 1)

        def __get_key_median(key, keys):
            util_list = [res[each][key] for each in keys]
            return np.median(util_list)

        def __get_key_std(key, keys):
            util_list = [res[each][key] for each in keys]
            return np.std(util_list)

        def __roc_val(item, key: list, value: list):
            for each in zip(key, value):
                res[item][each[0]] = each[1]

        def __cal_avg_val(keys: list):
            proper_keys = __get_aproperate_keys()
            for each in keys:
                res["ALL_median"][each] = 0
                res["ALL_mean"][each] = 0
                res["ALL_std"][each] = 0
                res["ALL_median"][each] = __get_key_median(each, proper_keys)
                res["ALL_mean"][each] = __get_key_average(each, proper_keys)
                res["ALL_std"][each] = __get_key_std(each, proper_keys)
                print(each, ":")
                print(__fm(res["ALL_median"][each]), ",",
                      __fm(res["ALL_mean"][each]), ",",
                      __fm(res["ALL_std"][each]))

        self.mus = musdb.DB(self.MUSDB_PATH, is_wav=True, subsets='test')
        json_file_alias = self.project_root + "outputs/musdb_test/" + self.model_name + str(
            self.start_point) + "/result_" + self.model_name + str(
                self.start_point) + ".json"
        bac_keys = ["mus_sdr_bac", "mus_isr_bac", "mus_sir_bac", "mus_sar_bac"]
        voc_keys = ["mus_sdr_voc", "mus_isr_voc", "mus_sir_voc", "mus_sar_voc"]
        save_pth = self.project_root + "outputs/musdb_test/" + self.model_name + str(
            self.start_point)
        # if(os.path.exists(save_pth)):
        #     print("Already exist: ", save_pth)
        #     return
        if (
                os.path.exists(json_file_alias + "@")
        ):  # todo here we just do not want this program to find these json file
            res = load_json(json_file_alias)
            # print("Find:",res)
            res["ALL_median"] = {}
            res["ALL_mean"] = {}
            res["ALL_std"] = {}
        else:
            res = {}
            res["ALL_median"] = {}
            res["ALL_mean"] = {}
            res["ALL_std"] = {}
            dir_pth = self.test_pth
            pth = os.listdir(dir_pth)
            pth.sort()
            for cnt, track in enumerate(self.mus):
                # print("evaluating: ", track.name)
                res[track.name] = {}
                try:
                    print("......................")
                    background, vocal, origin_background, origin_vocal = self.split(
                        track,
                        save=save_wav,
                        save_path=save_pth + "/",
                        fname=track.name,
                    )
                    eval_targets = ['vocals', 'accompaniment']
                    origin, estimate = {}, {}
                    origin[eval_targets[0]], origin[
                        eval_targets[1]] = origin_vocal, origin_background
                    estimate[eval_targets[0]], estimate[
                        eval_targets[1]] = vocal, background
                    data = eval_mus_track(origin,
                                          estimate,
                                          output_dir=save_pth,
                                          track_name=track.name)
                    print(data)
                    museval_res = data.get_result()
                    bac_values = [
                        museval_res['accompaniment']['SDR'],
                        museval_res['accompaniment']['ISR'],
                        museval_res['accompaniment']['SIR'],
                        museval_res['accompaniment']['SAR']
                    ]
                    voc_values = [
                        museval_res['vocals']['SDR'],
                        museval_res['vocals']['ISR'],
                        museval_res['vocals']['SIR'],
                        museval_res['vocals']['SAR']
                    ]
                    __roc_val(track.name, bac_keys, bac_values)
                    __roc_val(track.name, voc_keys, voc_values)

                except Exception as e:
                    print("ERROR: splitting error...")
                    logging.exception(e)

        print("Result:")
        print("Median,", "Mean,", "Std")
        __cal_avg_val(bac_keys)
        __cal_avg_val(voc_keys)

        if (save_json == True):
            if (not os.path.exists(self.project_root + "outputs/musdb_test/" +
                                   self.model_name + str(self.start_point))):
                os.mkdir(self.project_root + "outputs/musdb_test/" +
                         self.model_name + str(self.start_point))
            write_json(
                res, self.project_root + "outputs/musdb_test/" +
                self.model_name + str(self.start_point) + "/result_" +
                self.model_name + str(self.start_point) + ".json")
 def load_stored_result(self, json_path):
     self.scores = load_json(json_path)
Ejemplo n.º 7
0
    def refresh_configuration(cls, path_to_config_json):
        conf_json = load_json(path_to_config_json)

        # Data path
        Config.MUSDB18_PATH = conf_json['PATH']['MUSDB18_PATH']

        # Model configurations
        Config.sources = conf_json['MODEL']['sources']
        Config.model_name = conf_json['MODEL'][
            'model_name']  # ["Unet-6" "MMDenseNet" "MDenseNet"]

        # Split four bands
        Config.subband = conf_json['SUBBAND']['number']

        # Validation loss decrease threshold
        Config.decrease_ratio = conf_json["VALIDATION"]['decrease_ratio']

        # Reload pre-trained model
        Config.load_model_path = conf_json['MODEL']['PRE-TRAINED'][
            'load_model_path']
        Config.start_point = conf_json['MODEL']['PRE-TRAINED']['start_point']

        # Hyper-params
        Config.epoches = conf_json["TRAIN"]['epoches']
        Config.learning_rate = conf_json["TRAIN"]['learning_rate']['initial']
        Config.batch_size = conf_json["TRAIN"]['batchsize']
        Config.accumulation_step = conf_json["TRAIN"]['accumulation_step']
        Config.gamma = conf_json["TRAIN"]['learning_rate']['gamma_decrease']

        Config.frame_length = conf_json["TRAIN"]['frame_length']
        Config.drop_rate = conf_json["TRAIN"]['dropout']

        # Training
        Config.device_str = conf_json["TRAIN"]['device_str']

        # loss conponents
        Config.loss_component = conf_json["TRAIN"]['loss']

        # Additional data
        ## vocal data
        Config.additional_vocal_data = conf_json["PATH"]['additional_data'][
            "additional_vocal_path"]

        ### background data
        Config.additional_accompaniment_data = conf_json["PATH"][
            'additional_data']["additional_accompaniments_path"]

        # TRAIN
        Config.every_n = conf_json["LOG"]["every_n"]
        Config.show_model_structure = True if conf_json["LOG"][
            "show_model_structure"] == 1 else False
        ##############################################################################
        # Auto generated parameters

        Config.conf = {}
        Config.project_root = os.getcwd() + "/"
        Config.sample_rate = 44100
        if (Config.model_name == "Unet-5"):
            Config.model_name_alias = "_unet_5_"
        if (Config.model_name == "Unet-6"):
            Config.model_name_alias = "_unet_6_"
        elif (Config.model_name == "MMDenseNet"):
            Config.model_name_alias = "MMDenseNet"
        elif (Config.model_name == "MDenseNet"):
            Config.model_name_alias = "MDenseNet"

        Config.num_workers = Config.batch_size
        if (len(Config.additional_vocal_data) != 0
                or len(Config.additional_accompaniment_data) != 0):
            Config.BIG_DATA = True
        else:
            Config.BIG_DATA = False

        if (Config.BIG_DATA):
            Config.step_size = int(180000 / Config.batch_size)  # Every 45 h
        else:
            Config.step_size = int(72000 / Config.batch_size)  # Every 30 h

        if (not Config.BIG_DATA):
            Config.validation_interval = int(3600 /
                                             Config.batch_size)  # Every 1.5 h
        else:
            Config.validation_interval = int(18000 /
                                             Config.batch_size)  # Every 4.5h

        Config.split_band = True if Config.subband != 1 else False

        if "cuda" in str(Config.device_str):
            Config.use_gpu = True
        else:
            Config.use_gpu = False
        Config.device = torch.device(
            Config.device_str if Config.use_gpu else "cpu")

        # Build trail name
        cur = datetime.datetime.now()
        Config.trail_name = str(cur.year) + "_" + str(cur.month) + "_" + str(
            cur.day) + "_" + Config.model_name_alias + "sf" + str(
                Config.start_point) + "_"
        Config.counter = 1
        for each in os.listdir(Config.project_root + "saved_models"):
            t = str(cur.year) + "_" + str(cur.month) + "_" + str(cur.day)
            if (t in each):
                for dirName in os.listdir(Config.project_root +
                                          "saved_models/" + each):
                    if ("model" in dirName):
                        Config.counter += 1
                        break

        Config.trail_name = str(Config.counter) + "_" + Config.trail_name
        for each in Config.loss_component:
            Config.trail_name += each + "_"
        Config.trail_name.strip("_")
        Config.trail_name += "_BD_" + str(Config.BIG_DATA) + "_lr" + str(Config.learning_rate).split(".")[-1] + "_" \
                      + "bs" + str(Config.batch_size) + "-" + str(Config.accumulation_step) + "_" \
                      + "fl" + str(Config.frame_length) + "_" \
                      + "ss" + str(Config.step_size) + "_" + str(Config.gamma).split(".")[-1] \
                      + "drop" + str(Config.drop_rate) \
                      + "split_band" + str(Config.split_band) + "_" + str(Config.subband)
        # +"emptyN"+str(empty_every_n)\
        print(
            "Write config file at: ",
            Config.project_root + "config/json/" + Config.trail_name + ".json")
        write_json(
            Config.conf,
            Config.project_root + "config/json/" + Config.trail_name + ".json")

        Config.conf['model_name'] = Config.model_name
        Config.conf['split_band'] = Config.split_band
        Config.conf['decrease_ratio'] = Config.decrease_ratio
        Config.conf['start_point'] = Config.start_point
        Config.conf['learning_rate'] = Config.learning_rate
        Config.conf['batch_size'] = Config.batch_size
        Config.conf['accumulation_step'] = Config.accumulation_step
        Config.conf['step_size'] = Config.step_size
        Config.conf['gamma'] = Config.gamma
        Config.conf['sample_rate'] = Config.sample_rate
        Config.conf['frame_length'] = Config.frame_length
        Config.conf['drop_rate'] = Config.drop_rate

        find_and_build(Config.project_root, "outputs")
        find_and_build(Config.project_root, "outputs/listener")
        find_and_build(Config.project_root, "outputs/musdb_test")
        find_and_build(Config.project_root, "saved_model")
        find_and_build(Config.project_root, "config/json")
        find_and_build(Config.project_root, "evaluate/listener_todo")
Ejemplo n.º 8
0
def gen_or_load_dataset(configs):
    if not os.path.exists(configs.save_dir):
        os.makedirs(configs.save_dir)
    data_dir = os.path.join('data', 'dataset', configs.task)
    feature_dir = os.path.join('data', 'features', configs.task, configs.fv)
    if configs.suffix is None:
        save_path = os.path.join(
            configs.save_dir,
            '_'.join([configs.task, configs.fv,
                      str(configs.max_pos_len)]) + '.pkl')
    else:
        save_path = os.path.join(
            configs.save_dir, '_'.join([
                configs.task, configs.fv,
                str(configs.max_pos_len), configs.suffix
            ]) + '.pkl')
    if os.path.exists(save_path):
        dataset = load_pickle(save_path)
        return dataset
    feat_len_path = os.path.join(feature_dir, 'feature_shapes.json')
    emb_path = os.path.join('data', 'features', 'glove.840B.300d.txt')
    # load video feature length
    vfeat_lens = load_json(feat_len_path)
    for vid, vfeat_len in vfeat_lens.items():
        vfeat_lens[vid] = min(configs.max_pos_len, vfeat_len)
    # load data
    if configs.task == 'charades':
        processor = CharadesProcessor()
    elif configs.task == 'activitynet':
        processor = ActivityNetProcessor()
    elif configs.task == 'tacos':
        processor = TACoSProcessor()
    else:
        raise ValueError('Unknown task {}!!!'.format(configs.task))
    train_data, val_data, test_data = processor.convert(data_dir)
    # generate dataset
    data_list = [train_data, test_data
                 ] if val_data is None else [train_data, val_data, test_data]
    word_dict, char_dict, vectors = vocab_emb_gen(data_list, emb_path)
    train_set = dataset_gen(train_data, vfeat_lens, word_dict, char_dict,
                            configs.max_pos_len, 'train')
    val_set = None if val_data is None else dataset_gen(
        val_data, vfeat_lens, word_dict, char_dict, configs.max_pos_len, 'val')
    test_set = dataset_gen(test_data, vfeat_lens, word_dict, char_dict,
                           configs.max_pos_len, 'test')
    # save dataset
    n_val = 0 if val_set is None else len(val_set)
    dataset = {
        'train_set': train_set,
        'val_set': val_set,
        'test_set': test_set,
        'word_dict': word_dict,
        'char_dict': char_dict,
        'word_vector': vectors,
        'n_train': len(train_set),
        'n_val': n_val,
        'n_test': len(test_set),
        'n_words': len(word_dict),
        'n_chars': len(char_dict)
    }
    save_pickle(dataset, save_path)
    return dataset