def __getitem__(self, idx):
        #spk_list = os.listdir(self.path)

        #if self.shuffle:
        # select random speaker
        #    selected_file = random.sample(spk_list, 1)[0]
        #else:
        #    selected_file = spk_list[idx]
        selected_spk = self.spk_list[idx]
        spk_id = int(re.findall("(\d+)", selected_spk)[0])

        # load utterance spectrogram of selected speaker
        utters = os.listdir(os.path.join(self.path, selected_spk))
        #utters = np.load(os.path.join(self.path, selected_spk))
        if self.shuffle:
            # select M utterances per speaker
            utters_sample = sample(utters, self.utter_num)
            #utter_index = np.random.randint(0, len(utters), self.utter_num)
            #utterance = utters[utter_index]
        else:
            # utterances of a speaker [batch(M), n_mels, frames]
            utters_sample = utters[self.utter_start:self.utter_start +
                                   self.utter_num]

        utterance = []
        for utt in utters_sample:
            utterance.append(
                load_feat(os.path.join(self.path, selected_spk, utt)))

        utterance = np.array(utterance)
        #utterance = utterance[:,:,:160]               # TODO implement variable length batch size
        # transpose [batch, frames, n_mels]
        utterance = torch.tensor(np.transpose(utterance, axes=(0, 2, 1)))
        return utterance, spk_id
 def load_ins_feat(self,train_file,eval_file,feat_file):
     [self.train_ins,self.train_ins_count] = utils.load_ins(self.sc,train_file)
 
     [self.eval_ins,self.eval_ins_count] = utils.load_ins(self.sc,eval_file)
 
     self.feat_dict = utils.load_feat(self.sc,feat_file)
     self.feat_weight = [0.0] * len(self.feat_dict)
    def __init__(self, root, train=True, verbose=True):
        self.root_dir = root
        self.train = train
        self.verbose = verbose

        if self.train:
            train_feat_path = os.path.join(self.root_dir, cfg.TRAIN_TEXT_FEAT_FILE_NAME)
            self.train_data, self.train_labels, _ = load_feat(train_feat_path)
            self.train_ids = np.array(range(len(self.train_labels)))
            if self.verbose:
                print('Loading {} training item'.format(len(self.train_labels)))
        else:
            test_feat_path = os.path.join(self.root_dir, cfg.TEST_TEXT_FEAT_FILE_NAME)
            self.test_data, self.test_labels, _ = load_feat(test_feat_path)
            self.test_ids = np.array(range(len(self.test_labels)))
            if self.verbose:
                print('Loading {} testing items'.format(len(self.test_labels)))
Beispiel #4
0
    def load_ins_feat(self, train_file, eval_file, feat_file):
        [self.train_ins,
         self.train_ins_count] = utils.load_ins(self.sc, train_file)

        [self.eval_ins,
         self.eval_ins_count] = utils.load_ins(self.sc, eval_file)

        self.feat_dict = utils.load_feat(self.sc, feat_file)
        self.feat_weight = [0.0] * len(self.feat_dict)
 def __getitem__(self, idx):
     selected_utt = self.utt_list[idx]
     spk_id = int(
         re.findall("(\d+)",
                    os.path.basename(os.path.dirname(selected_utt)))[0])
     utterance = load_feat(selected_utt)
     utterance = np.array(utterance)
     utterance = np.expand_dims(utterance, axis=0)
     utterance = torch.tensor(np.transpose(utterance, axes=(0, 2, 1)))
     return utterance, spk_id
Beispiel #6
0
def load_feat_batch(paths):
    features = []
    lengths = []
    for path in paths:
        feature = utils.load_feat(path) 
        feature = torch.from_numpy(feature)
        features.append(feature)
        lengths.append(feature.shape[0])
    max_length = max(lengths)
    dim = feature.shape[1]
    padded_features = torch.zeros(len(lengths), max_length, dim)
    for i in range(len(lengths)):
        padded_features[i, :lengths[i], :] += features[i]
    return padded_features, torch.tensor(lengths).long()
    def __init__(self, root_dir, verbose=True, layer_norm=True, split_sents=False, semi_supervised=False, use_cuda=torch.cuda.is_available()):
        self.root_dir = root_dir
        self.verbose = verbose
        self.use_cuda = use_cuda
        self.semi_supervised = semi_supervised

        if self.semi_supervised:
            constraint_file_path = os.path.join(self.root_dir, cfg.CONSTRAINTS_NAME + '.csv')
            self.constraints = load_constraint_file(constraint_file_path)

        train_feat_path = os.path.join(self.root_dir, cfg.TRAIN_TEXT_FEAT_FILE_NAME)
        self.train_fixed_features, self.train_labels, _ = load_feat(train_feat_path)
        self.train_ids = np.array(range(len(self.train_labels)))
        if self.verbose:
            print('Loading {} training item'.format(len(self.train_labels)))
            parser.add_argument('--batch_size', type=int, default=16, help='batch_size')
            args = parser.parse_args()
            return args
        args = get_args()
        assert 0 <= args.corpora_id <= 2
    from collections import namedtuple
    ARGS= namedtuple('ARGS', ['corpora_id', 'batch_size'])
    for corpora_id in range(3, 7):
        args = ARGS(corpora_id=corpora_id, batch_size=32)
        corpora_name = data_dict[args.corpora_id]
        n_clusters = n_cluster_dict[args.corpora_id]

        root_dir = os.path.join('data', corpora_name)
        for feat_id, feat_name in feat_dict.items():
            train_feat_path = os.path.join(root_dir, feat_name+'.h5')
            raw_train_feat, labels, _  = load_feat(train_feat_path)
            for feat_func_name, feat_func in feat_func_dict.items():
                t_raw_train_feat = feat_func(raw_train_feat)
                for n_components in [50, 100, 200, 300]:
                    try:
                        feat = pca_feat_transform(t_raw_train_feat, hidden_dim=n_components)
                    except:
                        continue
                    trial_num = 10
                    best_acc = 0.0
                    best_pred = None
                    all_pred = []
                    all_acc = []
                    all_nmi = []
                    all_ari = []
                    for i in range(trial_num):
Beispiel #9
0
    # n_clusters = 4
    # data_dir = 'data/ag_news/'
    data_dir = args.data_dir
    n_clusters = args.n_clusters
    use_cuda = torch.cuda.is_available()
    random_seed = args.seed
    recons_lam = args.recons_lam
    cluster_lam = args.cluster_lam
    batch_size = args.batch_size
    tol = args.tol
    lr = args.lr

    initialize_environment(random_seed=random_seed, use_cuda=use_cuda)

    feat_path = os.path.join(data_dir, cfg.TRAIN_TEXT_FEAT_FILE_NAME)
    feat, labels, ids = load_feat(feat_path)
    outputdir = get_output_dir(data_dir)
    net_filename = os.path.join(outputdir, cfg.PRETRAINED_FAE_FILENAME)
    checkpoint = torch.load(net_filename)
    net = extract_sdae_model(input_dim=cfg.INPUT_DIM,
                             hidden_dims=cfg.HIDDEN_DIMS)
    net.load_state_dict(checkpoint['state_dict'])
    if use_cuda:
        net.cuda()

    dcn = DCN(n_clusters,
              net,
              cfg.HIDDEN_DIMS[-1],
              lr=lr,
              tol=tol,
              batch_size=batch_size,
    # there some problems when loading cuda pretrained models
    fd_ae.load_state_dict(checkpoint['state_dict'])
    if use_cuda:
        fd_ae.cuda()
    return fd_ae


def dump_fd_autoencoder(net_filename, net):
    torch.save({'state_dict': net.state_dict()}, net_filename)


args = parse_args()
outputdir = get_output_dir(args.db_dir)
net_filename = os.path.join(outputdir, cfg.PRETRAINED_FAE_FILENAME)
feat_filename = os.path.join(args.db_dir, cfg.TRAIN_TEXT_FEAT_FILE_NAME)
feat, _, _ = load_feat(feat_filename)
data_size = feat.shape[0]
batch_size = args.batch_size
nepochs = args.nepochs
n_clusters = args.n_clusters
use_cuda = torch.cuda.is_available()
lr = args.lr
gamma = args.gamma
print_every = args.print_every

print('Get pseudo labels by KMeans')
pseu_labels = feat_cluster(feat, n_clusters=n_clusters)

print('Get pretrained SDAE')
ae_net = load_pretrained_fd_autoencoder(net_filename, use_cuda=use_cuda)
classifier = torch.nn.Linear(cfg.HIDDEN_DIMS[-1], n_clusters)