Beispiel #1
0
 def __init__(self, batch_size, data_dir, model_name='DistMulti'):
     #self.dataset = dataloader.AmazonDataset('./data')
     #self.dataset = AmazonDataset('./data', model_name=model_name)
     self.data_dir = data_dir
     self.dataset = AmazonDataset(self.data_dir, model_name=model_name)
     self.batch_size = batch_size
     self.model_name = model_name
def train_embed(data_dir, params, model_name):
    # ハイパラ読み込み
    embedding_dim = params['embedding_dim']
    batch_size = params['batch_size']
    lr = params['lr']
    weight_decay = params['weight_decay']
    #warmup = params['warmup']
    warmup = 350
    #lr_decay_every = params['lr_decay_every']
    lr_decay_every = 2
    lr_decay_rate = params['lr_decay_rate']
    if model_name == 'SparseTransE':
        alpha = params['alpha']
    
    # dataload
    dataset = AmazonDataset(data_dir, model_name='TransE')
    relation_size = len(set(list(dataset.triplet_df['relation'].values)))
    entity_size = len(dataset.entity_list)
    if model_name == 'TransE':
        model = TransE(int(embedding_dim), relation_size, entity_size).to(device)
    elif model_name == 'SparseTransE':
        model = SparseTransE(int(embedding_dim), relation_size, entity_size, alpha=alpha).to(device)
    iterater = TrainIterater(batch_size=int(batch_size), data_dir=data_dir, model_name=model_name)
    #iterater.iterate_epoch(model, lr=lr, epoch=3000, weight_decay=weight_decay, warmup=warmup,
    #                       lr_decay_rate=lr_decay_rate, lr_decay_every=lr_decay_every, eval_every=1e+5)
    iterater.iterate_epoch(model, lr=lr, epoch=3000, weight_decay=weight_decay, warmup=warmup,
                           lr_decay_rate=lr_decay_rate, lr_decay_every=lr_decay_every, eval_every=1e+5, 
                           early_stop=True)
    return model
Beispiel #3
0
    def __init__(self,
                 embedding_dim,
                 relation_size,
                 entity_size,
                 data_dir,
                 alpha,
                 mu,
                 kappa,
                 gamma=1):
        super(PPR_TransE, self).__init__(embedding_dim, relation_size,
                                         entity_size, gamma)

        # dataloader
        self.dataset = AmazonDataset(data_dir)
        self.item_idx = torch.tensor([
            self.dataset.entity_list.index(i) for i in self.dataset.item_list
        ],
                                     dtype=torch.long,
                                     device=device)

        self.user_idx = torch.tensor([
            self.dataset.entity_list.index(u) for u in self.dataset.user_list
        ],
                                     dtype=torch.long,
                                     device=device)

        self.brand_idx = torch.tensor([
            self.dataset.entity_list.index(b) for b in self.dataset.brand_list
        ],
                                      dtype=torch.long,
                                      device=device)

        # load network
        edges = [[r[0], r[1]] for r in self.dataset.triplet_df.values]
        # user-itemとitem-userどちらの辺も追加
        for r in self.dataset.triplet_df.values:
            if r[2] == 0:
                edges.append([r[1], r[0]])

        self.G = nx.DiGraph()
        self.G.add_nodes_from(
            [i for i in range(len(self.dataset.entity_list))])
        self.G.add_edges_from(edges)
        self.H = nx.to_scipy_sparse_matrix(self.G)
        #self.H = scipy.sparse.coo_matrix(H)
        #coo = torch.tensor([H.row, H.col], dtype=torch.long)
        #v = torch.tensor(H.data, dtype=torch.float)
        #self.H = torch.sparse.FloatTensor(coo, v, torch.Size(H.shape), device=device)

        # mk_sim_matの係数
        self.kappa = kappa

        # 埋め込み誤差とページランク誤差のバランス
        # self.lambda_ = lambda_

        # 隣接行列と類似度行列のバランス
        self.alpha = alpha

        # PPRでのバイアスの強さ
        self.mu = mu
def objective(trial):
    start = time.time()
    # pagerank para
    mu = trial.suggest_uniform('mu', 0, 1)
    alpha = trial.suggest_uniform('beta', 0, 0.5)
    kappa1 = trial.suggest_uniform('kappa1', 0, 1)
    kappa2 = trial.suggest_uniform('kappa2', 0, 1)
    kappa3 = trial.suggest_uniform('kappa3', 0, 1)
    kappa = [kappa1, kappa2, kappa3]

    # model para
    embedding_dim = int(
        trial.suggest_discrete_uniform('embedding_dim', 16, 128, 16))
    #alpha = trial.suggest_loguniform('alpha', 1e-6, 1e-2) #SparseTransEの時だけ

    # training para
    lambda_ = trial.suggest_uniform('lambada_', 0, 1)
    batch_size = trial.suggest_int('batch_size', 256, 512, 128)
    lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2)
    warmup = trial.suggest_int('warmup', 10, 100)
    #lr_decay_every = trial.suggest_int('lr_decay_every', 1, 10)
    lr_decay_every = 2
    lr_decay_rate = trial.suggest_uniform('lr_decay_rate', 0.5, 1)

    data_dir = ['../data_luxury_5core/valid1', '../data_luxury_5core/valid2']
    score_sum = 0
    for i in range(len(data_dir)):

        dataset = AmazonDataset(data_dir[i], model_name='TransE')
        relation_size = len(set(list(dataset.triplet_df['relation'].values)))
        entity_size = len(dataset.entity_list)

        ppr_transe = PPR_TransE(embedding_dim, relation_size, entity_size,
                                data_dir[i], alpha, mu, kappa).to(device)

        iterater = TrainIterater(batch_size=int(batch_size),
                                 data_dir=data_dir[i],
                                 model_name=model_name)

        iterater.iterate_epoch(ppr_transe,
                               lr=lr,
                               epoch=2000,
                               weight_decay=weight_decay,
                               lambda_=lambda_,
                               warmup=warmup,
                               lr_decay_rate=lr_decay_rate,
                               lr_decay_every=lr_decay_every,
                               eval_every=1e+5)

        # inference
        inf = Inference(data_dir[i])
        score = inf.get_score(ppr_transe, kappa, mu, alpha)
        score_sum += score

    mi, sec = time_since(time.time() - start)
    print('{}m{}sec'.format(mi, sec))

    return -1 * score_sum / 2
Beispiel #5
0
def objective(trial):
    start = time.time()
    import gc
    gc.collect()

    data_dir = [data_path + '/valid1', data_path + '/valid2']
    score_sum = 0

    embed_model = {'TransE': TransE, 'SparseTransE': SparseTransE}
    # hyper para
    embedding_dim = trial.suggest_discrete_uniform('embedding_dim', 16, 128,
                                                   16)

    if model_name == 'SparseTransE':
        alpha = trial.suggest_loguniform('alpha', 1e-6,
                                         1e-2)  #SparseTransEの時だけ

    batch_size = trial.suggest_int('batch_size', 128, 512, 128)
    lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2)
    #warmup = trial.suggest_int('warmup', 100, 500)
    #warmup = trial.suggest_int('warmup', 10, 100)
    warmup = 350
    #lr_decay_every = trial.suggest_int('lr_decay_every', 1, 10)
    lr_decay_every = 2
    lr_decay_rate = trial.suggest_uniform('lr_decay_rate', 0.5, 1)

    for dir_path in data_dir:
        # データ読み込み
        dataset = AmazonDataset(dir_path, model_name=model_name)
        relation_size = len(set(list(dataset.triplet_df['relation'].values)))
        entity_size = len(dataset.entity_list)
        #model = TransE(int(embedding_dim), relation_size, entity_size).to(device)
        model = embed_model[model_name](int(embedding_dim), relation_size,
                                        entity_size).to(device)
        iterater = TrainIterater(batch_size=int(batch_size),
                                 data_dir=dir_path,
                                 model_name=model_name)
        score = iterater.iterate_epoch(model,
                                       lr=lr,
                                       epoch=3000,
                                       weight_decay=weight_decay,
                                       warmup=warmup,
                                       lr_decay_rate=lr_decay_rate,
                                       lr_decay_every=lr_decay_every,
                                       eval_every=1e+5,
                                       early_stop=True)

        score_sum += score

    torch.cuda.empty_cache()

    mi, sec = time_since(time.time() - start)
    print('{}m{}sec'.format(mi, sec))

    return -1 * score_sum / 2
Beispiel #6
0
    def __init__(self, data_dir, model_name, patience):
        self.dataset = AmazonDataset(data_dir, model_name)
        self.patience = patience
        self.model_name = model_name

        self.user_item_nega_df = self.negative_sampling()

        y_test = [1 for i in range(len(self.dataset.user_item_test_df))] \
                   + [0 for i in range(len(self.user_item_nega_df))]
        self.y_test = np.array(y_test)

        self.loss_list = []
        self.model_list = []
Beispiel #7
0
    def __init__(self, data_dir):

        # 本当はAmazonDatasetクラスを渡した方が速いが、
        self.evaluater = Evaluater(data_dir)
        self.dataset = AmazonDataset(data_dir, model_name='TransE')
        edges = [[r[0], r[1]] for r in self.dataset.triplet_df.values]
        # user-itemとitem-userどちらの辺も追加
        for r in self.dataset.triplet_df.values:
            if r[2] == 0:
                edges.append([r[1], r[0]])

        # load network
        self.G = nx.DiGraph()
        self.G.add_nodes_from(
            [i for i in range(len(self.dataset.entity_list))])
        self.G.add_edges_from(edges)
def objective(trial):
    start = time.time()
    # ハイパラ読み込み
    # gamma = trial.suggest_loguniform('gamma', 1e-6, 1e-3)
    # lin_model = trial.suggest_categorical('lin_model', ['lasso', 'elastic'])
    alpha = trial.suggest_uniform('alpha', 0, 1)
    beta = trial.suggest_uniform('beta', 0, 0.5)

    data_dirs = [
        '../' + data_path + '/valid1/', '../' + data_path + '/valid2/'
    ]

    score_sum = 0
    for data_dir in data_dirs:
        # dataload
        dataset = AmazonDataset(data_dir)

        # laod model
        #slim = train_SLIM(data_dir, load=True)
        sim_mat = load_sim_mat('sim_mat' + data_dir[-2] + '.csr',
                               len(dataset.user_list), len(dataset.item_list))

        edges = [[r[0], r[1]] for r in dataset.triplet_df.values]
        # user-itemとitem-userどちらの辺も追加
        for r in dataset.triplet_df.values:
            if r[2] == 0:
                edges.append([r[1], r[0]])

        # load network
        G = nx.DiGraph()
        G.add_nodes_from([i for i in range(len(dataset.entity_list))])
        G.add_edges_from(edges)

        evaluater = Evaluater(data_dir)
        #ranking_mat = get_ranking_mat(G, slim, alpha, beta, dataset)
        ranking_mat = get_ranking_mat(G, sim_mat, alpha, beta, dataset)
        #score = evaluater.topn_map(ranking_mat)
        score = evaluater.topn_precision(ranking_mat)

        score_sum += score

    mi, sec = time_since(time.time() - start)
    print('{}m{}s'.format(mi, sec))

    return -1 * score_sum / 2
Beispiel #9
0
def objective(trial):
    start = time.time()
    # hyper parameter
    #gamma = trial.suggest_loguniform('gamma', 1e-6, 1e-3)
    #lin_model = trial.suggest_categorical('lin_model', ['lasso', 'elastic'])
    #slim = train_SLIM(lin_model, gamma)
    alpha = trial.suggest_uniform('alpha', 0, 0.5)
    beta = trial.suggest_uniform('beta', 0, 0.5)
    gamma1 = trial.suggest_uniform('gamma1', 0, 1)
    gamma2 = trial.suggest_uniform('gamma2', 0, 1)
    gamma3 = trial.suggest_uniform('gamma3', 0, 1)
    gamma = [gamma1, gamma2, gamma3]

    data_dir = ['../data_luxury_5core/valid1', '../data_luxury_5core/valid2']
    score_sum = 0
    for i in range(len(data_dir)):
        # dataload
        dataset = AmazonDataset(data_dir[i], model_name='TransE')
        edges = [[r[0], r[1]] for r in dataset.triplet_df.values]
        # user-itemとitem-userどちらの辺も追加
        for r in dataset.triplet_df.values:
            if r[2] == 0:
                edges.append([r[1], r[0]])
        #user_items_test_dict = pickle.load(open('./data/user_items_test_dict.pickle', 'rb'))

        # load network
        G = nx.DiGraph()
        G.add_nodes_from([i for i in range(len(dataset.entity_list))])
        G.add_edges_from(edges)

        ranking_mat = get_ranking_mat(G, dataset, model[i], gamma, alpha, beta)
        #score = topn_precision(ranking_mat, user_items_test_dict)
        evaluater = Evaluater(data_dir[i])
        score = evaluater.topn_map(ranking_mat)
        score_sum += score

    mi, sec = time_since(time.time() - start)
    print('{}m{}sec'.format(mi, sec))

    return -1 * score_sum / 2
def objective(trial):
    start = time.time()
    # hyper parameter
    alpha = trial.suggest_uniform('alpha', 0, 0.5)
    beta = trial.suggest_uniform('beta', 0, 0.5)
    gamma1 = trial.suggest_uniform('gamma1', 0, 1)
    gamma2 = trial.suggest_uniform('gamma2', 0, 1)
    gamma3 = trial.suggest_uniform('gamma3', 0, 1)
    gamma = [gamma1, gamma2, gamma3]
    
    data_dir = ['../' + data_path + '/valid1', '../' + data_path + '/valid2']
    score_sum = 0
    for i in range(len(data_dir)):
        # dataload
        dataset = AmazonDataset(data_dir[i], model_name='SparseTransE')

        # load network
        edges = [[r[0], r[1]] for r in dataset.triplet_df.values]
        # user-itemとitem-userどちらの辺も追加
        for r in dataset.triplet_df.values:
            if r[2] == 0:
                edges.append([r[1], r[0]])

        G = nx.DiGraph()
        G.add_nodes_from([i for i in range(len(dataset.entity_list))])
        G.add_edges_from(edges)

        ranking_mat = get_ranking_mat(G, dataset, model[i], gamma, alpha, beta)
        #score = topn_precision(ranking_mat, user_items_test_dict)
        evaluater = Evaluater(data_dir[i])
        score = evaluater.topn_map(ranking_mat)
        score_sum += score

    mi, sec = time_since(time.time() - start)
    print('{}m{}sec'.format(mi, sec))
    
    return -1 * score_sum / 2
Beispiel #11
0
def objective(trial):
    start = time.time()

    import gc
    gc.collect()

    dataset = AmazonDataset('./data')

    embedding_dim = trial.suggest_discrete_uniform('embedding_dim', 16, 64, 16)
    bpr = BPR(int(embedding_dim), len(dataset.user_list),
              len(dataset.item_list)).to(device)

    batch_size = trial.suggest_discrete_uniform('batch_size', 64, 256, 64)
    iterater = TrainIterater(batch_size=int(batch_size))

    lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2)
    warmup = trial.suggest_int('warmup', 100, 500)
    #warmup = trial.suggest_int('warmup', 1, 5)
    lr_decay_every = trial.suggest_int('lr_decay_every', 1, 5)
    lr_decay_rate = trial.suggest_uniform('lr_decay_rate', 0.5, 1)

    score = iterater.iterate_epoch(bpr,
                                   lr=lr,
                                   epoch=3000,
                                   weight_decay=weight_decay,
                                   warmup=warmup,
                                   lr_decay_rate=lr_decay_rate,
                                   lr_decay_every=lr_decay_every,
                                   eval_every=1e+5)

    torch.cuda.empty_cache()

    mi, sec = time_since(time.time() - start)
    print('{}m{}sec'.format(mi, sec))

    return -1 * score
Beispiel #12
0
class TrainIterater():
    def __init__(self, batch_size, data_dir, model_name='DistMulti'):
        #self.dataset = dataloader.AmazonDataset('./data')
        #self.dataset = AmazonDataset('./data', model_name=model_name)
        self.data_dir = data_dir
        self.dataset = AmazonDataset(self.data_dir, model_name=model_name)
        self.batch_size = batch_size
        self.model_name = model_name

    def train(self, batch, loss_func, optimizer, model):
        optimizer.zero_grad()

        if self.model_name == 'DistMulti' or self.model_name == 'Complex':
            triplet, y_train = batch
            h_entity_tensor = torch.tensor(triplet[:, 0],
                                           dtype=torch.long,
                                           device=device)
            t_entity_tensor = torch.tensor(triplet[:, 1],
                                           dtype=torch.long,
                                           device=device)
            relation_tensor = torch.tensor(triplet[:, 2],
                                           dtype=torch.long,
                                           device=device)
            y_train = torch.tensor(y_train, dtype=torch.float, device=device)

            pred = model(h_entity_tensor, t_entity_tensor, relation_tensor)
            loss = loss_func(pred, y_train)

        elif self.model_name == 'TransE':
            posi_batch, nega_batch = batch
            h = torch.tensor(posi_batch[:, 0], dtype=torch.long, device=device)
            t = torch.tensor(posi_batch[:, 1], dtype=torch.long, device=device)
            r = torch.tensor(posi_batch[:, 2], dtype=torch.long, device=device)

            n_h = torch.tensor(nega_batch[:, 0],
                               dtype=torch.long,
                               device=device)
            n_t = torch.tensor(nega_batch[:, 1],
                               dtype=torch.long,
                               device=device)
            n_r = torch.tensor(nega_batch[:, 2],
                               dtype=torch.long,
                               device=device)

            pred = model(h, t, r, n_h, n_t, n_r)
            loss = torch.sum(pred)

        elif self.model_name == 'SparseTransE':
            posi_batch, nega_batch, batch_user, batch_item, batch_brand = batch
            h = torch.tensor(posi_batch[:, 0], dtype=torch.long, device=device)
            t = torch.tensor(posi_batch[:, 1], dtype=torch.long, device=device)
            r = torch.tensor(posi_batch[:, 2], dtype=torch.long, device=device)

            n_h = torch.tensor(nega_batch[:, 0],
                               dtype=torch.long,
                               device=device)
            n_t = torch.tensor(nega_batch[:, 1],
                               dtype=torch.long,
                               device=device)
            n_r = torch.tensor(nega_batch[:, 2],
                               dtype=torch.long,
                               device=device)

            reg_user = torch.tensor(batch_user,
                                    dtype=torch.long,
                                    device=device)
            reg_item = torch.tensor(batch_item,
                                    dtype=torch.long,
                                    device=device)
            reg_brand = torch.tensor(batch_brand,
                                     dtype=torch.long,
                                     device=device)

            pred = model(h, t, r, n_h, n_t, n_r, reg_user, reg_item, reg_brand)

            loss = torch.sum(pred)

        elif self.model_name == 'RegComplex':
            triplet, y_train, batch_user, batch_item, batch_brand = batch
            h_entity_tensor = torch.tensor(triplet[:, 0],
                                           dtype=torch.long,
                                           device=device)
            t_entity_tensor = torch.tensor(triplet[:, 1],
                                           dtype=torch.long,
                                           device=device)
            relation_tensor = torch.tensor(triplet[:, 2],
                                           dtype=torch.long,
                                           device=device)
            y_train = torch.tensor(y_train, dtype=torch.float, device=device)

            reg_user = torch.tensor(batch_user,
                                    dtype=torch.long,
                                    device=device)
            reg_item = torch.tensor(batch_item,
                                    dtype=torch.long,
                                    device=device)
            reg_brand = torch.tensor(batch_brand,
                                     dtype=torch.long,
                                     device=device)

            pred, reg = model(h_entity_tensor, t_entity_tensor,
                              relation_tensor, reg_user, reg_item, reg_brand)

            loss = loss_func(pred, y_train) + reg

        loss.backward()
        optimizer.step()

        return loss

    def iterate_train(self,
                      model,
                      lr=0.001,
                      weight_decay=0,
                      print_every=2000,
                      plot_every=50):

        optimizer = optim.Adam(model.parameters(),
                               lr=lr,
                               weight_decay=weight_decay)
        # optimizer = optim.SGD(model.parameters(), lr=lr)

        loss_func = nn.BCELoss()

        print_loss_total = 0
        plot_loss_list = []
        plot_loss_total = 0

        if self.model_name == 'DistMulti' or self.model_name == 'Complex' or self.model_name == 'RegComplex':
            train_num = len(self.dataset.triplet_df) + len(
                self.dataset.nega_triplet_df)
        elif self.model_name == 'TransE' or self.model_name == 'SparseTransE':
            train_num = len(self.dataset.triplet_df)

        start_time = time.time()

        for i in range(int(train_num / self.batch_size) + 1):

            batch = self.dataset.get_batch(batch_size=self.batch_size)
            loss = self.train(batch, loss_func, optimizer, model)
            print_loss_total += loss.detach()
            plot_loss_total += loss.detach()

            # print_everyごとに現在の平均のlossと、時間、dataset全体に対する進捗(%)を出力
            if (i + 1) % print_every == 0:
                runtime = time.time() - start_time
                mi, sec = self.time_since(runtime)
                avg_loss = print_loss_total / print_every
                data_percent = int(i * self.batch_size / train_num * 100)
                print('train loss: {:e}    processed: {}({}%)    {}m{}sec'.
                      format(avg_loss, i * self.batch_size, data_percent, mi,
                             sec))
                print_loss_total = 0

            # plot_everyごとplot用のlossをリストに記録しておく
            if (i + 1) % plot_every == 0:
                avg_loss = plot_loss_total / plot_every
                plot_loss_list.append(avg_loss)
                plot_loss_total = 0

        return plot_loss_list

    def time_since(self, runtime):
        mi = int(runtime / 60)
        sec = int(runtime - mi * 60)
        return (mi, sec)

    def iterate_epoch(self,
                      model,
                      lr,
                      epoch,
                      weight_decay=0,
                      warmup=0,
                      lr_decay_rate=1,
                      lr_decay_every=10,
                      eval_every=5,
                      early_stop=False):
        eval_model = Evaluater(self.data_dir, model_name=self.model_name)
        #es = EarlyStop(self.data_dir[0:-6] + 'early_stopping/', self.model_name, patience=6)
        es = EarlyStop('../data_beauty_2core_es/early_stopping/',
                       self.model_name,
                       patience=6)
        plot_loss_list = []
        plot_score_list = []

        for i in range(epoch):
            plot_loss_list.extend(
                self.iterate_train(model,
                                   lr=lr,
                                   weight_decay=weight_decay,
                                   print_every=10000))

            # early stop
            if early_stop:
                pre_model = es.early_stop(model)
                if pre_model:
                    print('Early Stop eposh: {}'.format(i + 1))
                    return eval_model.topn_map(pre_model)

            # lrスケジューリング
            if i > warmup:
                if (i - warmup) % lr_decay_every == 0:
                    lr = lr * lr_decay_rate

            if (i + 1) % eval_every == 0:
                #score = eval_model.topn_precision(model)
                #print('epoch: {}  precision: {}'.format(i, score))
                score = eval_model.topn_map(model)
                print('epoch: {}  map: {}'.format(i, score))
                plot_score_list.append(score)

        #self._plot(plot_loss_list)
        #self._plot(plot_score_list)

        #return eval_model.topn_precision(model)
        return eval_model.topn_map(model)

    def _plot(self, loss_list):
        # ここもっとちゃんと書く
        plt.plot(loss_list)
        plt.show()
# Data augmentation and normalization for training
# Just normalization for validation
"""
train_transforms = transforms.Compose([
                        transforms.RandomHorizontalFlip(),
                        utils.RandomRotation(),
                        utils.RandomTranslation(),
                        utils.RandomVerticalFlip(),
                        transforms.ToTensor()])
"""
train_transforms = transforms.Compose([transforms.ToTensor()])
print("Initializing Datasets and Dataloaders...")
data_path = '/home/jlcastillo/Database_real/train-tif-v2'

# Create training, validation and test datasets
train_dataset = AmazonDataset('csv/train_v2.csv', data_path, 'csv/labels.txt',
                              train_transforms)
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=args.batch_size,
                                           shuffle=True,
                                           num_workers=4)

# check the size of your datatset
dataset_sizes = {}
dataset_sizes['train'] = len(train_dataset)
print('Training dataset size:', dataset_sizes['train'])

# -------------------------- MODEL --------------------------
## URL`s a los pesos
RESNET_18 = 'https://download.pytorch.org/models/resnet18-5c106cde.pth'
RESNET_101 = 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth'
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)
kwargs = {'pin_memory': True} if args.cuda else {}

# -------------------------- LOADING THE DATA --------------------------
# Data augmentation and normalization for training
# Just normalization for validation

print("Initializing Datasets and Dataloaders...")
data_path = '/home/jlcastillo/Database_real/train-jpg'
# Create training, validation and test datasets
train_dataset = AmazonDataset('csv/train_v2.csv',
                              data_path,
                              'csv/labels.txt',
                              transform=transforms.Compose([
                                  Rescale((args.input_size, args.input_size)),
                                  transforms.ToTensor()
                              ]))
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=args.batch_size,
                                           shuffle=True,
                                           num_workers=4)

# check the size of your datatset
dataset_sizes = {}
dataset_sizes['train'] = len(train_dataset)
print('Training dataset size:', dataset_sizes['train'])

# -------------------------- MODEL --------------------------
## URL`s a los pesos
Beispiel #15
0
from models import DistMulti, TransE
from training import TrainIterater
from evaluate import Evaluater

import optuna
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import warnings

warnings.filterwarnings('ignore')

# dataload
model_name = 'TransE'
dataset = AmazonDataset('./data', model_name='TransE')
edges = [[r[0], r[1]] for r in dataset.triplet_df.values]

# load network
G = nx.DiGraph()
G.add_nodes_from([i for i in range(len(dataset.entity_list))])
G.add_edges_from(edges)


def reconstruct_kg(model):
    with torch.no_grad():
        batch_size = int(len(dataset.item_list) / 2)
        item_index = [
            dataset.entity_list.index(item) for item in dataset.item_list
        ]
        user_index = [
Beispiel #16
0
if __name__ == '__main__':

    args = sys.argv

    model_name = args[1]

    params = load_params()
    print(params)

    import gc
    gc.collect()

    # dataload
    data_dir = '../' + data_path + '/test/'
    dataset = AmazonDataset(data_dir, model_name=model_name)
    
    relation_size = len(set(list(dataset.triplet_df['relation'].values)))
    entity_size = len(dataset.entity_list)
    embedding_dim = params['embedding_dim']
    alpha = params['alpha']
    model = SparseTransE(int(embedding_dim), relation_size, entity_size, alpha=alpha).to(device)
    
    batch_size = params['batch_size']
    iterater = TrainIterater(batch_size=int(batch_size), data_dir=data_dir, model_name=model_name)
    
    lr = params['lr']
    weight_decay = params['weight_decay']
    
    warmup = 350
    lr_decay_every = 2
Beispiel #17
0
    return -1 * score


if __name__ == '__main__':
    params = load_param('./result_beauty')
    embedding_dim = params['embedding_dim']
    batch_size = params['batch_size']
    lr = params['lr']
    weight_decay = params['weight_decay']
    warmup = params['warmup']
    lr_decay_every = params['lr_decay_every']
    lr_decay_rate = params['lr_decay_rate']

    #data_dir = '../data_beauty_2core_es/test/bpr'
    data_dir = '../data_beauty_2core_es/test/bpr'
    dataset = AmazonDataset(data_dir)
    bpr = BPR(int(embedding_dim), len(dataset.user_list),
              len(dataset.item_list)).to(device)
    iterater = TrainIterater(batch_size=int(batch_size), data_dir=data_dir)
    score = iterater.iterate_epoch(bpr,
                                   lr=lr,
                                   epoch=3000,
                                   weight_decay=weight_decay,
                                   warmup=warmup,
                                   lr_decay_rate=lr_decay_rate,
                                   lr_decay_every=lr_decay_every,
                                   eval_every=1e+5,
                                   early_stop=True)

    # test結果を記録
    np.savetxt('./result_beauty/score.txt', np.array([score]))
# Define transformations
# If using pretrained models normalization should also be added.
# normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
#  std=[0.229, 0.224, 0.225])
test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
#val_transforms = transforms.Compose([transforms.Scale(args.scale),
#                        transforms.ToTensor()])

# Create dataloaders
kwargs = {'pin_memory': True} if cuda else {}
testset = AmazonDataset('csv/sample_submission_v2.csv',
                        '/home/jlcastillo/Database_real/test_full',
                        'csv/labels.txt', args.nir_channel, test_transforms)
test_loader = DataLoader(testset,
                         batch_size=args.batch_size,
                         shuffle=False,
                         num_workers=args.nworkers,
                         **kwargs)


def fscore(prediction):
    """ Get the fscore of the validation set. Gives a good indication
    of score on public leaderboard"""
    target = torch.FloatTensor(0, 17)
    for i, (_, y) in enumerate(val_loader):
        target = torch.cat((target, y), 0)
    fscore = fbeta_score(target.numpy(),
Beispiel #19
0
# -------------------------- LOADING THE DATA --------------------------
# Data augmentation and normalization for training
# Just normalization for validation

train_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

print("Initializing Datasets and Dataloaders...")
data_path = '/home/jlcastillo/Database_real/train-jpg'

# Create training, validation and test datasets
train_dataset = AmazonDataset('csv/train.csv',
                              data_path,
                              'csv/labels.txt',
                              args.nir_channel,
                              transform=train_transforms)
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=args.batch_size,
                                           shuffle=True,
                                           num_workers=4)

#Val
val_dataset = AmazonDataset('csv/val.csv',
                            data_path,
                            'csv/labels.txt',
                            args.nir_channel,
                            transform=train_transforms)
val_loader = torch.utils.data.DataLoader(train_dataset,
                                         batch_size=args.batch_size,
Beispiel #20
0
 def __init__(self, data_dir):
     #self.user_num = user_num
     self.dataset = AmazonDataset(data_dir=data_dir)
Beispiel #21
0
                reg_user = torch.tensor(batch_user,
                                        dtype=torch.long,
                                        device=device)
                reg_item = torch.tensor(batch_item,
                                        dtype=torch.long,
                                        device=device)
                reg_brand = torch.tensor(batch_brand,
                                         dtype=torch.long,
                                         device=device)

                pred = model(h, t, r, n_h, n_t, n_r, reg_user, reg_item,
                             reg_brand)

                loss = torch.sum(pred)

        return loss

    def valid_metric(self, model):
        return 0


if __name__ == '__main__':
    import models

    dataset = AmazonDataset('../data_beauty_2core_es/valid1/', 'TransE')
    relation_size = len(set(list(dataset.triplet_df['relation'].values)))
    entity_size = len(dataset.entity_list)
    model = models.TransE(10, relation_size, entity_size).to(device)

    es = EarlyStop('../data_beauty_2core_es/early_stopping/', 'TransE', 10)
    es.early_stop(model)
Beispiel #22
0
 def __init__(self, data_dir, model_name='DistMulti'):
     self.dataset = AmazonDataset(data_dir, model_name=model_name)
     self.model_name = model_name
Beispiel #23
0
    if amazon_data[0] == 'b':
        data_path = 'data_' + amazon_data + '_2core'
    elif amazon_data[0] == 'l':
        data_path = 'data_' + amazon_data + '_5core'

    model_name = args[2]

    params = load_params()
    print(params)

    import gc
    gc.collect()

    # dataload
    data_dir = '../' + data_path + '/test/'
    dataset = AmazonDataset(data_dir, model_name='SparseTransE')

    relation_size = len(set(list(dataset.triplet_df['relation'].values)))
    entity_size = len(dataset.entity_list)
    embedding_dim = params['embedding_dim']
    alpha = params['alpha']
    model = SparseTransE(int(embedding_dim),
                         relation_size,
                         entity_size,
                         alpha=alpha).to(device)

    batch_size = params['batch_size']
    iterater = TrainIterater(batch_size=int(batch_size),
                             data_dir=data_dir,
                             model_name=model_name)
Beispiel #24
0
            loss_total += loss.detach()
        
        return loss_total / len(self.user_item_train_df)


    def valid_loss(self, batch, y_train, loss_func, model):
        with torch.no_grad(): 
            posi_batch, nega_batch = batch
            user_tensor = torch.tensor(posi_batch[:, 0], dtype=torch.long, device=device)
            item_tensor = torch.tensor(posi_batch[:, 1], dtype=torch.long, device=device)
            nega_item_tensor = torch.tensor(nega_batch[:, 1], dtype=torch.long, device=device)

            pred = model(user_tensor, item_tensor, nega_item_tensor)
            loss = loss_func(pred, y_train)

        return loss


    def valid_metric(self, model):
        return 0

if __name__ == '__main__':
    import bpr_model
    dataset = AmazonDataset('../data_beauty_2core_es/valid1/bpr/')
    user_size = len(dataset.user_list)
    item_size = len(dataset.item_list)

    model = bpr_model.BPR(32, user_size, item_size)

    es = EarlyStop('../data_beauty_2core_es/early_stopping/bpr/', 10)
    es.early_stop(model)