Beispiel #1
0
def framewise_RSA_matrix(directory, layers, size=70000):

    from sklearn.model_selection import train_test_split
    splitseed = random.randint(0, 1024)
    result = []
    mfcc_done = False
    data = pickle.load(open("{}/local_input.pkl".format(directory), "rb"))
    for mode in ["trained", "random"]:
        mfcc_cor = [item['cor'] for item in result if item['layer'] == 'mfcc']
        if len(mfcc_cor) > 0:
            logging.info("Result for MFCC computed previously")
            result.append(dict(model=mode, layer='mfcc', cor=mfcc_cor[0]))
        else:

            X, X_val, y, y_val = train_test_split(data['features'],
                                                  data['labels'],
                                                  test_size=size,
                                                  random_state=splitseed)
            logging.info(
                "Computing label identity matrix for {} datapoints".format(
                    len(y_val)))
            y_val_sim = torch.tensor(y_val.reshape((-1, 1)) == y_val).float()
            logging.info(
                "Computing activation similarities for {} datapoints".format(
                    len(X_val)))
            X_val = torch.tensor(X_val).float()
            X_val_sim = S.cosine_matrix(X_val, X_val)
            cor = S.pearson_r(S.triu(y_val_sim), S.triu(X_val_sim)).item()
            logging.info("Point biserial correlation for {}, mfcc: {}".format(
                mode, cor))
            result.append(dict(model=mode, layer='mfcc', cor=cor))
        for layer in layers:
            logging.info("Loading phoneme data for {} {}".format(mode, layer))
            data = pickle.load(
                open("{}/local_{}_{}.pkl".format(directory, mode, layer),
                     "rb"))
            X, X_val, y, y_val = train_test_split(data[layer]['features'],
                                                  data[layer]['labels'],
                                                  test_size=size,
                                                  random_state=splitseed)
            logging.info(
                "Computing label identity matrix for {} datapoints".format(
                    len(y_val)))
            y_val_sim = torch.tensor(y_val.reshape((-1, 1)) == y_val).float()
            logging.info(
                "Computing activation similarities for {} datapoints".format(
                    len(X_val)))
            X_val = torch.tensor(X_val).float()
            X_val_sim = S.cosine_matrix(X_val, X_val)
            cor = S.pearson_r(S.triu(y_val_sim), S.triu(X_val_sim)).item()
            logging.info("Point biserial correlation for {}, {}: {}".format(
                mode, layer, cor))
            result.append(dict(model=mode, layer=layer, cor=cor))
    return result
Beispiel #2
0
def train_wa(edit_sim,
             edit_sim_val,
             stack,
             stack_val,
             attention='scalar',
             attention_hidden_size=None,
             epochs=1,
             device='cpu'):
    if attention == 'scalar':
        wa = platalea.attention.ScalarAttention(stack[0].size(2),
                                                hidden_size).to(device)
    elif attention == 'linear':
        wa = platalea.attention.LinearAttention(stack[0].size(2)).to(device)
    elif attention == 'mean':
        wa = platalea.attention.MeanPool().to(device)
        avg_pool_val = torch.cat([wa(item) for item in stack_val])
        avg_pool_sim_val = S.cosine_matrix(avg_pool_val, avg_pool_val)
        cor_val = S.pearson_r(S.triu(avg_pool_sim_val), S.triu(edit_sim_val))
        return {'epoch': None, 'cor': cor_val.item()}

    else:
        wa = platalea.attention.Attention(stack[0].size(2),
                                          attention_hidden_size).to(device)

    optim = torch.optim.Adam(wa.parameters())
    minloss = 0
    minepoch = None
    logging.info("Optimizing for {} epochs".format(epochs))
    for epoch in range(1, 1 + epochs):
        avg_pool = torch.cat([wa(item) for item in stack])
        avg_pool_sim = S.cosine_matrix(avg_pool, avg_pool)
        loss = -S.pearson_r(S.triu(avg_pool_sim), S.triu(edit_sim))
        with torch.no_grad():
            avg_pool_val = torch.cat([wa(item) for item in stack_val])
            avg_pool_sim_val = S.cosine_matrix(avg_pool_val, avg_pool_val)
            loss_val = -S.pearson_r(S.triu(avg_pool_sim_val),
                                    S.triu(edit_sim_val))
        logging.info("{} {} {}".format(epoch, -loss.item(), -loss_val.item()))
        if loss_val.item() <= minloss:
            minloss = loss_val.item()
            minepoch = epoch
        optim.zero_grad()
        loss.backward()
        optim.step()
        # Release CUDA-allocated tensors
        del loss, loss_val, avg_pool, avg_pool_sim, avg_pool_val, avg_pool_sim_val
    del wa, optim
    return {'epoch': minepoch, 'cor': -minloss}
Beispiel #3
0
def weighted_average_RSA_partial(directory='.',
                                 layers=[],
                                 test_size=1 / 2,
                                 standardize=False,
                                 epochs=1,
                                 device='cpu'):
    from sklearn.model_selection import train_test_split
    from platalea.dataset import Flickr8KData
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    splitseed = random.randint(0, 1024)
    result = []
    logging.info("Loading transcription data")
    data = pickle.load(open("{}/global_input.pkl".format(directory), "rb"))
    trans = data['ipa']
    act = [
        torch.tensor([item[:, :]]).float().to(device) for item in data['audio']
    ]
    val = Flickr8KData(root='/roaming/gchrupal/datasets/flickr8k/',
                       split='val')
    image_map = {item['audio_id']: item['image'] for item in val}
    image = np.stack([image_map[item] for item in data['audio_id']])

    trans, trans_val, act, act_val, image, image_val = train_test_split(
        trans, act, image, test_size=test_size, random_state=splitseed)
    if standardize:
        logging.info("Standardizing data")
        act, act_val = normalize(act, act_val)
    logging.info("Computing edit distances")
    edit_sim = torch.tensor(U.pairwise(S.stringsim, trans)).float().to(device)
    edit_sim_val = torch.tensor(U.pairwise(S.stringsim,
                                           trans_val)).float().to(device)
    logging.info("Computing image similarities")
    image = torch.tensor(image).float()
    image_val = torch.tensor(image_val).float()
    sim_image = S.cosine_matrix(image, image)
    sim_image_val = S.cosine_matrix(image_val, image_val)

    logging.info(
        "Computing partial correlation for input features (mean pooling)")
    wa = platalea.attention.MeanPool().to(device)
    avg_pool = torch.cat([wa(item) for item in act])
    avg_pool_sim = S.cosine_matrix(avg_pool, avg_pool)
    avg_pool_val = torch.cat([wa(item) for item in act_val])
    avg_pool_sim_val = S.cosine_matrix(avg_pool_val, avg_pool_val)
    # Training data
    #  Edit ~ Act + Image
    Edit = S.triu(edit_sim).cpu().numpy()
    Image = S.triu(sim_image).cpu().numpy()
    Act = S.triu(avg_pool_sim).cpu().numpy()
    # Val data
    Edit_val = S.triu(edit_sim_val).cpu().numpy()
    Image_val = S.triu(sim_image_val).cpu().numpy()
    Act_val = S.triu(avg_pool_sim_val).cpu().numpy()
    e_full, e_base, e_mean = partial_r2(Edit, Act, Image, Edit_val, Act_val,
                                        Image_val)
    logging.info("Full, base, mean error: {} {}".format(
        e_full, e_base, e_mean))
    r2 = (e_base - e_full) / e_base
    this = {
        'epoch': None,
        'error': e_full,
        'baseline': e_base,
        'error_mean': e_mean,
        'r2': r2
    }

    #this = train_wa(edit_sim, edit_sim_val, act, act_val, attention=attention, attention_hidden_size=None, epochs=epochs, device=device)
    result.append({**this, 'model': 'random', 'layer': 'mfcc'})
    result.append({**this, 'model': 'trained', 'layer': 'mfcc'})
    del act, act_val
    logging.info("Partial r2 on val: {} at epoch {}".format(
        result[-1]['r2'], result[-1]['epoch']))
    for mode in ["trained", "random"]:
        for layer in layers:
            logging.info("Loading activations for {} {}".format(mode, layer))
            data = pickle.load(
                open("{}/global_{}_{}.pkl".format(directory, mode, layer),
                     "rb"))
            logging.info("Training for {} {}".format(mode, layer))
            act = [
                torch.tensor([item[:, :]]).float().to(device)
                for item in data[layer]
            ]
            act, act_val = train_test_split(act,
                                            test_size=test_size,
                                            random_state=splitseed)
            if standardize:
                logging.info("Standardizing data")
                act, act_val = normalize(act, act_val)
            avg_pool = torch.cat([wa(item) for item in act])
            avg_pool_sim = S.cosine_matrix(avg_pool, avg_pool)
            avg_pool_val = torch.cat([wa(item) for item in act_val])
            avg_pool_sim_val = S.cosine_matrix(avg_pool_val, avg_pool_val)
            Act = S.triu(avg_pool_sim).cpu().numpy()
            Act_val = S.triu(avg_pool_sim_val).cpu().numpy()
            e_full, e_base, e_mean = partial_r2(Edit, Act, Image, Edit_val,
                                                Act_val, Image_val)
            logging.info("Full, base, mean error: {} {}".format(
                e_full, e_base, e_mean))
            r2 = (e_base - e_full) / e_base
            this = {
                'epoch': None,
                'error': e_full,
                'baseline': e_base,
                'error_mean': e_mean,
                'r2': r2
            }
            pickle.dump(dict(Edit=Edit,
                             Act=Act,
                             Image=Image,
                             Edit_val=Edit_val,
                             Act_val=Act_val,
                             Image_val=Image_val),
                        open("fufi_{}_{}.pkl".format(mode, layer), "wb"),
                        protocol=4)
            result.append({**this, 'model': mode, 'layer': layer})
            del act, act_val
            logging.info("Partial R2 on val: {} at epoch {}".format(
                result[-1]['r2'], result[-1]['epoch']))
    return result