def __init__(self, config):

        self.params = config
        self.device = device
        self.word2index, self.index2word, self.embeddings = pickle.load(open(config.data_pickle, 'rb'))
        train_dataset = Loader(config, config.p_train_data)
        val_dataset = Loader(config, config.p_val_data)
        test_dataset = Loader(config, config.p_test_data)

        self.model = Model(config, self.embeddings).to(self.device)

        self.train_loader = DataLoader(dataset=train_dataset, batch_size=config.batch_size, shuffle=True)
        self.val_loader = DataLoader(dataset=val_dataset, batch_size=config.batch_size, shuffle=False)
        self.test_loader = DataLoader(dataset=test_dataset, batch_size=config.batch_size, shuffle=False)

        params = filter(lambda param: param.requires_grad, self.model.parameters())
        self.optimizer = torch.optim.Adam(lr=config.learning_rate, betas=(config.beta1, config.beta2), eps=1e-7, weight_decay=3e-7, params=params)

        # lr = config.learning_rate
        # base_lr = 1.0
        # params = filter(lambda param: param.requires_grad, slef.model.parameters())
        # optimizer = torch.optim.Adam(lr=base_lr, betas=(config.beta1, config.beta2), eps=1e-7, weight_decay=3e-7, params=params)
        # cr = lr / math.log2(config.lr_warm_up_num)
        # scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
        #                                         lr_lambda=lambda ee: cr * math.log2(ee + 1) if ee < config.lr_warm_up_num else lr)


        self.model_path = os.path.join(self.params.cache_dir)
        if not os.path.exists(self.model_path):
            print('create path: ', self.model_path)
            os.makedirs(self.model_path)

        self.best_model = None
        self.lr_epoch = 0
def main(_):
    if config.mode == "train":
        trainer = Trainer(config)
        trainer.train()
    elif config.mode == "preprocess":
        dataloader.preprocess(_)
    elif config.mode == "debug":
        trainer = Trainer(config)
        train_dataset = Loader(config, config.p_train_data)
        train_loader = DataLoader(dataset=train_dataset, batch_size=2, shuffle=False)
        data_iter = iter(train_loader)
        frame_vecs, frame_n, ques, ques_n, start_frame, end_frame = data_iter.next()
        frame_vecs = frame_vecs.to(trainer.device)
        ques = ques.to(trainer.device)

        # Forward pass
        p1, p2 = trainer.model(frame_vecs, ques)
        y1, y2 = start_frame.to(trainer.device), end_frame.to(trainer.device)
        print(p1.shape,p2.shape,y1.shape,y2.shape)
        loss1 = F.nll_loss(p1, y1, reduction='elementwise_mean')
        loss2 = F.nll_loss(p2, y2, reduction='elementwise_mean')
        loss = (loss1 + loss2) / 2
        # Backward and optimize
        trainer.optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_value_(trainer.model.parameters(), 1)
        trainer.optimizer.step()
        print(loss.item())

    elif config.mode == "test":
        trainer = Trainer(config)
        trainer.evaluate()
    else:
        print("Unknown mode")
        exit(0)
Beispiel #3
0
def makecsv(file, model, loadfile):
    cuda = False

    kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}

    data_loader = Loader(c.FILE_TRAIN_LABELED_AUG, c.FILE_TRAIN_UNLABELED, c.FILE_TEST, 'data/test-labeled.p', kwargs)
    test_loader = data_loader.getTest()
    test_actual = data_loader.getValidation()
    label_predict = np.array([])

    mnist_model = model
    if loadfile:
        mnist_model = torch.load(model)
    correct = 0

    for data, target in test_loader:
        mnist_model.eval()
        data, target = Variable(data, volatile=True), Variable(target)
        output = mnist_model(data)
        temp = output.data.max(1)[1]
        pred = output.data.max(1)[1]
        correct += pred.eq(target.data).cpu().sum()
        label_predict = np.concatenate((label_predict, temp.numpy().reshape(-1)))

    print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(correct, len(test_loader.dataset),
                                                           100. * correct / len(test_loader.dataset)))

    predict_label = pd.DataFrame(label_predict, columns=['label'], dtype=int)
    predict_label.reset_index(inplace=True)
    predict_label.rename(columns={'index': 'ID'}, inplace=True)
    filename = 'predictions/' + file + "-labeled.csv"
    predict_label.to_csv(filename, index=False)

    label_predict = np.array([])
    correct = 0

    for data, target in test_actual:
        mnist_model.eval()
        data, target = Variable(data, volatile=True), Variable(target)
        output = mnist_model(data)
        temp = output.data.max(1)[1]
        pred = output.data.max(1)[1]  # get the index of the max log-probability
        correct += pred.eq(target.data).cpu().sum()
        label_predict = np.concatenate((label_predict, temp.numpy().reshape(-1)))

    print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(correct, len(test_loader.dataset),
                                                           100. * correct / len(test_loader.dataset)))

    predict_label = pd.DataFrame(label_predict, columns=['label'], dtype=int)
    predict_label.reset_index(inplace=True)
    predict_label.rename(columns={'index': 'ID'}, inplace=True)
    filename = 'predictions/' + file + "-unlabeled.csv"
    predict_label.to_csv(filename, index=False)
Beispiel #4
0
def getDataLoaderDict(idxModel): # idxModel is tuple
    print("\nStarted Preprocessing")
    trainDataLoaderDict = {}
    testDataLoaderDict = {}

    for idx in idxModel:
        print("-Preprocessing model{} dataloader".format(idx))
        loader = Loader(idx)
        trainLoader, testLoader = loader.getLoader()

        trainDataLoaderDict[idx] = trainLoader
        testDataLoaderDict[idx] = testLoader
        print("-Completed Preprocessing model{} dataloader\n".format(idx))
    print("All Completed Preprocessing\n")
    return trainDataLoaderDict, testDataLoaderDict
Beispiel #5
0
def main(_):
    print 'begin loading data...'
    loader = Loader(FLAGS.mincount, FLAGS.maxlen, full_train=False)
    print 'finished loading data\n'
    print 'begin building model...'
    # dataloader, embedding_size, max_epoch, learning_rate, keep_prob
    model = Model(loader,
                  embedding_size=FLAGS.embedding_size,
                  max_epoch=FLAGS.max_epoch,
                  learning_rate=FLAGS.learning_rate,
                  keep_prob=FLAGS.keep_prob)
    print 'finished building model\n'
    # model.train(full_train=True)
    # model.save_doc_vector()
    model.many_test()
Beispiel #6
0
# read word2vec
with open('/home/student/glove/glove.6B.100d.txt', 'r') as f:
    lines = f.readlines()

dictionary = {}
weight_matrix = []
for i, line in enumerate(lines):
    word = line.split()
    dictionary[word[0]] = i
    weight_matrix.append([float(word[i]) for i in range(1, len(word))])

weight_matrix = np.array(weight_matrix)

# ------------------------------------------------------------------------------- #
# read data
train_data = Loader(0, dictionary)
valid_data = Loader(1, dictionary)
test_data = Loader(2, dictionary)
train_data = DataLoader(train_data, batch_size=128, shuffle=True)
valid_data = DataLoader(valid_data, batch_size=128, shuffle=False)
test_data = DataLoader(test_data, batch_size=128, shuffle=False)

# ------------------------------------------------------------------------------ #
model = SentenceCompression(100, 200, 50, 2, weight_matrix,
                            len(dictionary)).cuda()
lr = 0.0005
num_epoch = 20

criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),
                       lr=lr,
Beispiel #7
0
import scipy.sparse as sp
import numpy as np
from dataloader import Loader
from scipy.sparse import csr_matrix

loader = Loader()
R = loader.UserItemNet.tolil()
twohop = 0.004

adj_user = R.T.todok()
print("generating 2")
rowsum_user = np.array(adj_user.sum(axis=0))
D_user = np.power(rowsum_user, -0.5).flatten()
D_user[np.isinf(D_user)] = 0
Dmat_user = sp.diags(D_user)
print("generating 3")

adj_item = R.todok()
print("generating 4")
rowsum_item = np.array(adj_item.sum(axis=0))
D_item = np.power(rowsum_item, -0.5).flatten()
D_item[np.isinf(D_item)] = 0
Dmat_item = sp.diags(D_item)
print("generating 5")

norm_user = Dmat_item.dot(adj_user).dot(Dmat_user)
norm_item = Dmat_user.dot(adj_item).dot(Dmat_item)


def sparsify_propagation(adj, hop_thres):
    adj_valid = (adj > hop_thres)
Beispiel #8
0
        "word2vec": "data/word2vec.bin",
        "is_origin_dataset": True,
        "train_json": "data/activity-net/train.json",
        "val_json": "data/activity-net/val_1.json",
        "test_json": "data/activity-net/val_2.json",
        "train_data": "data/activity-net/train_data.json",
        "val_data": "data/activity-net/val_data.json",
        "test_data": "data/activity-net/test_data.json",
        "feature_path": "data/activity-c3d",
        "feature_path_tsn": "data/tsn_score"
    }

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    word2vec = KeyedVectors.load_word2vec_format(config["word2vec"],
                                                 binary=True)
    train_dataset = Loader(config, config['train_data'], word2vec, flag=True)
    val_dataset = Loader(config, config['val_data'], word2vec)
    test_dataset = Loader(config, config['test_data'], word2vec)

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=64,
                              shuffle=True)
    val_loader = DataLoader(dataset=val_dataset, batch_size=64, shuffle=False)
    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=64,
                             shuffle=False)

    d_model = config['d_model']
    d_ff = config['d_ff']
    num_heads = config['num_heads']
batch_size = 2
device = '2'
os.environ['CUDA_VISIBLE_DEVICES'] = device

#main
if __name__ == '__main__':

    #create model
    model = Model()
    model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr)
    loss = torch.nn.MSELoss()
    visdom_server = Visdom(port=8097)

    #create dataloade
    train_set = Loader(mode='train')
    val_set = Loader(mode='test')
    data_loader = torch.utils.data.DataLoader(train_set,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=1)
    val_data_loader = torch.utils.data.DataLoader(val_set,
                                                  batch_size=batch_size,
                                                  shuffle=True,
                                                  num_workers=1)

    #training
    print(f'The number of training data {len(data_loader)}')
    print(f'The number of testing data {len(val_data_loader)}')

    for epoch in range(epochs):
Beispiel #10
0
                    action='store',
                    default='model.p',
                    help='modelname')
args = parser.parse_args()

args.cuda = not args.no_cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

print(args)

kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}

data_loader = Loader(c.FILE_TRAIN_LABELED, c.FILE_TRAIN_UNLABELED,
                     c.FILE_VALIDATION, c.FILE_TEST, kwargs)
train_loader = data_loader.getLabeledtrain()
unlabeled_train_loader = data_loader.getUnlabeledtrain()
valid_loader = data_loader.getValidation()


class AEMnist(nn.Module):
    def __init__(self):
        super(AEMnist, self).__init__()
        self.supervised = False
        # ENCODER
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 200)
        self.fc2 = nn.Linear(200, 50)
Beispiel #11
0
from train import *
from eval import *
from predict import predict
import paddle.distributed as dist
from paddle.distributed import fleet


def cprint(words: str):
    print(f"\033[0;30;43m{words}\033[0m")


if __name__ == '__main__':

    print(config.config)
    train_dataset = Loader(path=config.dataset)
    name_datasets = config.dataset.split('/')[-1]
    Recmodel = NGCF(config.config, train_dataset)
    if config.config['multigpu']:
        print('using fleet multigpu training', Recmodel)
        dist.init_parallel_env()
        Recmodel = paddle.DataParallel(Recmodel)
    if config.config['multicpu']:
        fleet.init(is_collective=True)
        optimizer = fleet.distributed_optimizer(optimizer)
        Recmodel = fleet.distributed_model(Recmodel)
        print('using fleet multicpu training', Recmodel)
    Neg_k = 1
    bpr = BPRLoss(Recmodel, config.config)
    f = open(f'logger/train_logger_{name_datasets}.txt', 'w')
    f_test = open(f'logger/test_logger_{name_datasets}.txt', 'w')
Beispiel #12
0
        for result in pre_results:
            results['recall'] += result['recall']
            results['precision'] += result['precision']
            results['ndcg'] += result['ndcg']
        results['recall'] /= float(len(users))
        results['precision'] /= float(len(users))
        results['ndcg'] /= float(len(users))
        # results['auc'] = np.mean(auc_record)
        if multicore == 1:
            pool.close()
        print(results)
        return results
def cprint(words : str):
    print(f"\033[0;30;43m{words}\033[0m")
if __name__ == '__main__':
    train_dataset = Loader(path=world.dataset)
    Recmodel = NGCF(world.config, train_dataset)
    Neg_k = 1
    bpr=BPRLoss(Recmodel, world.config)
    f = open (f'train_logger_{world.dataset}.txt','w')
    f_test = open (f'test_logger_{world.dataset}.txt','w')
    for epoch in range(world.TRAIN_epochs):
        if epoch %10 == 0:
            cprint("[TEST]")
            result = Test(train_dataset, Recmodel, epoch, world.config['multicore'])
            print(epoch, result, file=f_test, flush=True)
        output_information = BPR_train_original(train_dataset, Recmodel, bpr, epoch, neg_k=Neg_k,w=None)
        log_output = f'EPOCH[{epoch+1}/{world.TRAIN_epochs}] {output_information}'
        print(f'EPOCH[{epoch+1}/{world.TRAIN_epochs}] {output_information}')
        print(log_output, file=f, flush=True)
    f.close()
Beispiel #13
0
        help="available datasets: [lastfm, gowalla, yelp2018, amazon-book]")
    parser.add_argument(
        '--path',
        type=str,
        default="./checkpoints",
        help="path to save weights")
    parser.add_argument(
        '--topks', nargs='?', default="[20]", help="@k test list")
    parser.add_argument('--epochs', type=int, default=1000)
    parser.add_argument('--seed', type=int, default=2020, help='random seed')
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()

    dataset = Loader(args, path=args.dataset)
    model = NGCF(args, dataset)
    neg = 1

    optim = paddle.optimizer.Adam(
        parameters=model.parameters(), learning_rate=args.lr)

    for epoch in range(args.epochs):
        if epoch % 10 == 0:
            results = test(dataset, model, epoch, args)
            print(results)

        log = train(dataset, model, epoch, optim, args, neg_k=neg, w=None)
        print(f'EPOCH[{epoch+1}/{args.epochs}] {log}')
Beispiel #14
0
from yamlparams.utils import Hparam
import sys

from metrics import mean_average_presision_k, hitrate_k, novelty, coverage
from dataloader import Loader
from preprocessor import Preprocessor
from model import TopPopular

# Read config
if len(sys.argv) < 2:
    raise AttributeError('Use config name to define model config')
cfg_path = sys.argv[1]  #'books_big_setting.yml'
print('Using config ' + cfg_path)
config = Hparam(cfg_path)

loader = Loader(config.path)
preprocessor = Preprocessor()

print('Reading data')
df = loader.get_views()

print('Preprocessing')
# train_df = preprocessor.filter_zeros(train_df)
df = preprocessor.filter_lazy_users(df, 0)
train_df, test_df = loader.split_train_test(df, config.min_user_views,
                                            config.testing.samples)

train_df, test_df = preprocessor.filter_not_in_test_items(train_df, test_df)
preprocessor.build_mappers(train_df.append(test_df))

train_df.user_id = train_df.user_id.apply(preprocessor.get_user_ix)
Beispiel #15
0
        'num_workers': args.num_workers,
        'pin_memory': True
    } if args.cuda else {}
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    captioning_dataset = json.load(open(args.captioning_dataset_path, "rb"))
    all_arts = pickle.load(open(args.articles_metadata, 'rb'))
    art2id = {}
    # ipdb.set_trace()
    for i, art in enumerate(all_arts):
        art2id[art] = i
    p = open(args.fake_articles, 'r')
    fake_articles = [json.loads(l) for i, l in enumerate(p)]

    train_loader = DataLoader(Loader(args, 'train', captioning_dataset, art2id,
                                     fake_articles),
                              batch_size=args.batch_size,
                              shuffle=True,
                              collate_fn=collate)
    val_loader = DataLoader(Loader(args, 'valid', captioning_dataset, art2id,
                                   fake_articles),
                            batch_size=args.batch_size,
                            shuffle=False,
                            collate_fn=collate,
                            **kwargs)
    test_loader = DataLoader(Loader(args, 'test', captioning_dataset, art2id,
                                    fake_articles),
                             batch_size=args.batch_size,
                             shuffle=False,
                             collate_fn=collate,
                             **kwargs)
Beispiel #16
0
def main():
    parser = Parser()
    config = parser.config

    for param, value in config.__dict__.items():
        print(param + '.' * (50 - len(param) - len(str(value))) + str(value))
    print()

    # Load previous checkpoint if it exists
    checkpoint = load_latest(config)

    # Create model
    model = load_model(config, checkpoint)

    # print number of parameters in the model
    n_params = sum([param.view(-1).size()[0] for param in model.parameters()])
    print('Total number of parameters: \33[91m{}\033[0m'.format(n_params))

    # Load train and test data
    train_loader, valid_loader, test_loader = Loader(config)

    n_batches = int(len(train_loader.dataset.train_data) / config.batch_size)

    # save the configuration
    with open(os.path.join(config.save, 'log.txt'), 'w') as file:
        json.dump('json_stats: ' + str(config.__dict__), file)

    # Instantiate the criterion, optimizer and learning rate scheduler
    criterion = torch.nn.CrossEntropyLoss(size_average=True)

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=config.LR,
                                momentum=config.momentum,
                                weight_decay=config.weight_decay,
                                nesterov=config.nesterov)

    start_time = 0
    if checkpoint is not None:
        start_epoch = checkpoint['time'] + 1
        optimizer.load_state_dict(checkpoint['optimizer'])

    if config.lr_shape == 'multistep':
        scheduler = MultiStepLR(optimizer, milestones=[81, 122], gamma=0.1)
    elif config.lr_shape == 'cosine':
        if checkpoint is not None:
            scheduler = checkpoint['scheduler']
        else:
            scheduler = CosineAnnealingRestartsLR(optimizer,
                                                  1,
                                                  config.T_e,
                                                  T_mul=config.T_mul)

    # The trainer handles the training loop and evaluation on validation set
    trainer = Trainer(model, criterion, config, optimizer, scheduler)

    epoch = 1

    while True:
        # Train for a single epoch
        train_top1, train_loss, stop_training = trainer.train(
            epoch, train_loader)

        # Run model on the validation and test set
        valid_top1 = trainer.evaluate(epoch, valid_loader, 'valid')
        test_top1 = trainer.evaluate(epoch, test_loader, 'test')

        current_time = time.time()

        results = {
            'epoch': epoch,
            'time': current_time,
            'train_top1': train_top1,
            'valid_top1': valid_top1,
            'test_top1': test_top1,
            'train_loss': float(train_loss.data),
        }

        with open(os.path.join(config.save, 'results.txt'), 'w') as file:
            json.dump(str(results), file)
            file.write('\n')

        print(
            '==> Finished epoch %d (budget %.3f): %7.3f (train) %7.3f (validation) %7.3f (test)'
            % (epoch, config.budget, train_top1, valid_top1, test_top1))

        if stop_training:
            break

        epoch += 1

    if start_time >= config.budget:
        trainer.evaluate(epoch, test_loader, 'test')
    else:
        save_checkpoint(int(config.budget), trainer.model, trainer.optimizer,
                        trainer.scheduler, config)
Beispiel #17
0
 def setUp(self):
     #Wipe DB
     apiproxy_stub_map.apiproxy.GetStub('datastore_v3').Clear()
     #load default data
     l = Loader()
     l.loadDatabase()
Beispiel #18
0
    default=100,
    metavar='N',
    help='how many batches to wait before logging training status')
args = parser.parse_args()

args.cuda = not args.no_cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

print(args)

kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}

data_loader = Loader('data/train_labeled_aug.p', c.FILE_TRAIN_UNLABELED,
                     c.FILE_VALIDATION, c.FILE_TEST, kwargs)
train_loader = data_loader.getLabeledtrain()
unlabeled_train_loader = data_loader.getUnlabeledtrain()
valid_loader = data_loader.getValidation()

model = Ladder()

if args.cuda:
    model.cuda()

l2loss = torch.nn.BCELoss(
)  #torch.nn.L1Loss() # BCELoss : Pass through sigmoid
#l2_2 = torch.nn.L1Loss()
nllloss = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
Beispiel #19
0
import torch
import pickle
import numpy as np
import pandas as pd
import torch.nn as nn
import constants as c
from dataloader import Loader
from torch.autograd import Variable


cuda = torch.cuda.is_available()

kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}

data_loader = Loader(c.FILE_TRAIN_LABELED_AUG, c.FILE_TRAIN_UNLABELED, c.FILE_TEST, 'data/test-labeled.p', kwargs)
test_loader = data_loader.getTest()
test_actual = data_loader.getValidation()
label_predict = np.array([])



def callval(mnist_model, test_loader, test_actual, model, file):
    label_predict = np.array([])
    loadfile = True
    if loadfile:
        mnist = torch.load(model)

    mnist_model.load_state_dict(mnist)
    correct = 0
    if torch.cuda.is_available():
        mnist_model.cuda()
 def read_data(self):
     self.loader = Loader()
     self.loader.read_data()
                 histogram_freq=1,
                 write_graph=True,
                 write_images=False)

RP = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.1,
    patience=3,
    verbose=0,
    mode='auto',
)

callbacks = [ES, TB, RP]

#create data
train_set = Loader(mode='train')
data_loader = torch.utils.data.DataLoader(train_set,
                                          batch_size=300,
                                          shuffle=False,
                                          num_workers=1)
for i, (x, y) in enumerate(data_loader):
    x = x.data.numpy()
    y = y.data.numpy()

#main
if __name__ == '__main__':
    model = simple_s2s()
    model.fit(x,
              y,
              epochs=2000,
              batch_size=1,