Example #1
0
    def benchmarkClicked(self, wrapper):
        if wrapper._benchmark.get('isOther', False):
            path = QFileDialog.getOpenFileName(
                None,
                "Open benchmark",
                join(dirname(__file__), "benchmarks"),
                "Benchmark files (*.hgr)"
            )[0]
            if path:
                outputPath = QFileDialog.getExistingDirectory(
                    None,
                    "Select placement engine output directory",
                    dirname(__file__),
                )
            else:
                outputPath = None
        else:
            path = wrapper._benchmark['path']
            outputPath = dirname(path)

        if path and outputPath:
            app.setOverrideCursor(Qt.WaitCursor)
            results = load_data(path, outputPath)
            app.restoreOverrideCursor()

            if results[0] == -1:
                QMessageBox.critical(
                    None,
                    f"{basename(path)}",
                    f"Could not load benchmark.\n\nReason: {results[1]}",
                )

            else:
                placements = results[1]

                progress = QProgressDialog(
                    labelText="Generating visualizations...",
                    cancelButtonText=None,
                    minimum=0,
                    maximum=len(placements),
                    flags=Qt.WindowStaysOnTopHint,
                )
                progress.setWindowTitle(f"{basename(path)}")
                progress.setMinimumDuration(0)
                progress.setValue(0)
                progress.forceShow()
                app.processEvents()

                self.pws = []
                for placement in placements:
                    app.setOverrideCursor(Qt.WaitCursor)

                    pw = PlacementWindow(placement, app)
                    pw.show()
                    pw.plot()
                    self.pws.append(pw)

                    progress.setValue(progress.value() + 1)
                    app.processEvents()
                app.restoreOverrideCursor()
Example #2
0
def main():
    args = load_args()
    print(args)
    torch.manual_seed(args.seed)
    if args.use_cuda:
        torch.cuda.manual_seed(args.seed)
    np.random.seed(args.seed)

    X_train, y_train, X_val, y_val, _ = load_data(dataset=args.dataset)
    mask_train, mask_val, _ = load_masks(args.dataset)

    X_train *= mask_train
    X_val *= mask_val

    y_train = torch.from_numpy(y_train)
    y_val = torch.from_numpy(y_val)

    nb_train = int(0.8 * X_train.shape[0])

    train_dset = TensorDataset(X_train[:nb_train].permute(0, 2, 1),
                               y_train[:nb_train])
    val_dset = TensorDataset(X_train[nb_train:].permute(0, 2, 1),
                             y_train[nb_train:])

    loader_args = {}
    if args.use_cuda:
        loader_args = {'num_workers': 1, 'pin_memory': True}

    train_loader = DataLoader(
        train_dset, batch_size=args.batch_size, shuffle=False, **loader_args)
    val_loader = DataLoader(
        val_dset, batch_size=args.batch_size, shuffle=False, **loader_args)

    model = SeqAttention(
        768, 2, args.n_filters, args.len_motifs, args.subsamplings,
        kernel_args=args.kernel_params, alpha=args.weight_decay,
        eps=args.eps, heads=args.heads, out_size=args.out_size,
        max_iter=args.max_iter, fit_bias=False)
    print(model)
    print(len(train_dset))

    print("Initializing...")
    tic = timer()
    if args.use_cuda:
        model.cuda()
    n_samples = 3000
    if args.n_filters[-1] > 256:
        n_samples //= args.n_filters[-1] // 256
    model.unsup_train(train_loader, args.sampling_patches, n_samples=n_samples,
                      wb=args.wb, use_cuda=args.use_cuda)
    toc = timer()
    print("Finished feature learning, elapsed time: {:.2f}s".format(toc - tic))

    print("Encoding...")
    Xtr, ytr = model.predict(train_loader, only_repr=True,
                             use_cuda=args.use_cuda)
    preprocess(Xtr)
    print(Xtr.shape)

    Xval = []
    yval = []

    X, y = model.predict(val_loader, only_repr=True, use_cuda=args.use_cuda)
    preprocess(X)
    Xval.append(X)
    yval.append(y)

    search_grid = 2. ** np.arange(1, 15)
    search_grid = 1. / search_grid
    best_score = -np.inf
    clf = model.classifier
    criterion = torch.nn.CrossEntropyLoss(reduction='sum')
    if Xtr.shape[-1] > 20000:
        optimizer = torch.optim.Adam(clf.parameters(), lr=0.01)
        epochs = 800
    else:
        optimizer = torch.optim.LBFGS(
                clf.parameters(), lr=1.0, max_eval=10, history_size=10,
                tolerance_grad=1e-05, tolerance_change=1e-05)
        epochs = 100
    torch.cuda.empty_cache()
    print("Start crossing validation")
    for alpha in search_grid:
        tic = timer()
        clf.fit(Xtr, ytr, criterion, reg=alpha, epochs=epochs,
                optimizer=optimizer, use_cuda=args.use_cuda)
        toc = timer()
        scores = []
        for X, y in zip(Xval, yval):
            if args.use_cuda:
                X = X.cuda()
            score = clf.score(X, y)
            scores.append(score)
        score = np.mean(scores)
        print("CV alpha={}, acc={:.2f}, ts={:.2f}s".format(alpha, score * 100.,
              toc - tic))
        if score > best_score:
            best_score = score
            best_alpha = alpha
            best_weight = copy.deepcopy(clf.state_dict())

    clf.load_state_dict(best_weight)

    print("Finished, elapsed time: {:.2f}s".format(toc - tic))

    test_dset = TensorDataset(X_val.permute(0, 2, 1), y_val)
    test_loader = DataLoader(
        test_dset, batch_size=args.batch_size, shuffle=False)
    Xte, y_true = model.predict(test_loader, only_repr=True,
                                use_cuda=args.use_cuda)
    preprocess(Xte)
    if args.use_cuda:
        Xte = Xte.cuda()
    with torch.no_grad():
        y_pred = clf(Xte).cpu()

    scores = accuracy(y_pred, y_true)
    print(scores)

    if args.save_logs:
        print('Saving logs...')
        data = {
            # 'title': title,
            'score': scores,
            'best_param': best_alpha,
            'val_score': best_score,
            'args': args
            }
        np.save(os.path.join(args.outdir, f"seed_{args.seed}_results.npy"),
                data)
        # torch.save(
        #     {'args': args,
        #      'state_dict': model.state_dict()},
        #     args.outdir + '/model.pkl')
    return
Example #3
0
if _name_ == "_main_":
    print("YOOO")
    #get arguments usage is python main.py --arg-1 arg1value , .. --arg-n argnvalue
    args = get_args()
    print(args)
    #check if there is a gpu in case sets it as device
    cuda = torch.cuda.is_available()
    device = torch.device("cuda" if cuda else "cpu")
    #device = torch.device('cpu')
    #here something is loading the (possibly already sharded) data structures:

#     if not os.path.exists(os.path.join(args.data_dir,'ISDdata.pt')):
#         sampled_data = load_raw_data(data_folder=args.data_dir)
#     else:
#         sampled_data = None
    ISD, SD, S, I , D = load_data(args.data_dir)
    #generete the datset objects from the datasets structures and the cv_out, here using None indicates that we use the last two seq_len as test set
    dataset_train = ReplenishmentDataset(ISD, D, SD, I, S, args.seq_len, cv=None, train=True)
    #dataset_train.debug()
    train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=48)
    dataset_test  = ReplenishmentDataset(ISD, D, SD, I, S, args.seq_len, cv=None, train=False)
    test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=args.batch_size, shuffle=False, num_workers=16)

    print('Final train sampled %:', len(dataset_train)/len(dataset_train.ISD))
    #print(len(dataset_test.ISD) - len(dataset_train) - len(dataset_test))
#     for ii, data in enumerate(train_loader):
#         pass

    #print(dataset_train.emb_dict)

    # TODO: Rewrite gathering of seq/stat embed keys so that we don't need to force get item to set dataset's seq/stat_int_dict
Example #4
0
import json
import pandas as pd
import time

from modules.clustering import Executor
from loaders import load_data, dump_clusters

if __name__ == '__main__':

    data = list(
        zip(load_data('entities_title.json'),
            load_data('keyphrases_title.json'),
            load_data('keyphrases_text.json')))

    try:
        labels_true = pd.factorize(load_data('labels_true.json'))
    except:
        print('No labels')

    with open('best_params.json', mode='r') as fp:
        best_params = json.load(fp)

    a, b, c, thr = best_params[0].values()

    config = dict(a=a, b=b, c=c, thr=thr)

    e = Executor(config)

    st = time.time()

    for article_id, article_dict in enumerate(data):
Example #5
0
def main():
    args = load_args()
    print(args)
    torch.manual_seed(args.seed)
    if args.use_cuda:
        torch.cuda.manual_seed(args.seed)
    np.random.seed(args.seed)

    X_train, y_train, X_val, y_val, _ = load_data(dataset=args.dataset)
    mask_train, mask_val, _ = load_masks(args.dataset)

    X_train *= mask_train
    X_val *= mask_val

    y_train = torch.from_numpy(y_train)
    y_val = torch.from_numpy(y_val)

    nb_train = int(0.8 * X_train.shape[0])

    train_dset = TensorDataset(X_train[:nb_train].permute(0, 2, 1),
                               y_train[:nb_train])
    val_dset = TensorDataset(X_train[nb_train:].permute(0, 2, 1),
                             y_train[nb_train:])

    loader_args = {}
    if args.use_cuda:
        loader_args = {'num_workers': 1, 'pin_memory': True}

    init_loader = DataLoader(
        train_dset, batch_size=args.batch_size, shuffle=False, **loader_args)
    train_loader = DataLoader(
        train_dset, batch_size=args.batch_size, shuffle=True, **loader_args)
    val_loader = DataLoader(
        val_dset, batch_size=args.batch_size, shuffle=False, **loader_args)

    model = SeqAttention(
        768, 2, args.n_filters, args.len_motifs, args.subsamplings,
        kernel_args=args.kernel_params, alpha=args.weight_decay,
        eps=args.eps, heads=args.heads, out_size=args.out_size,
        max_iter=args.max_iter)
    print(model)
    print(len(train_dset))

    print("Initializing...")
    tic = timer()
    if args.use_cuda:
        model.cuda()
    n_samples = 3000
    if args.n_filters[-1] > 256:
        n_samples //= args.n_filters[-1] // 256
    model.unsup_train(init_loader, args.sampling_patches, n_samples=n_samples,
                      use_cuda=args.use_cuda)
    criterion_clf = nn.CrossEntropyLoss(reduction='sum')
    if args.n_filters[-1] * args.out_size * args.heads < 30000:
        optimizer_clf = None
        epochs_clf = 20
    else:
        print("low ram optimizer clf")
        optimizer_clf = optim.Adam(model.classifier.parameters(), lr=0.01)
        epochs_clf = 100
    model.train_classifier(init_loader, criterion_clf, epochs=epochs_clf * 5,
                           optimizer=optimizer_clf, use_cuda=args.use_cuda)
    toc = timer()
    print("Finished, elapsed time: {:.2f}s".format(toc - tic))
    criterion = nn.CrossEntropyLoss()
    # epoch_loss, epoch_acc = eval_epoch_list(
    #         model, val_loader, criterion, use_cuda=args.use_cuda)

    # criterion = nn.CrossEntropyLoss()
    if args.alternating:
        optimizer = optim.Adam(model.feature_parameters(), lr=args.lr)
        lr_scheduler = ReduceLROnPlateau(
            optimizer, factor=0.5, patience=5, min_lr=1e-4)
    else:
        weight_decay = args.weight_decay / args.batch_size
        optimizer = optim.Adam([
            {'params': model.feature_parameters()},
            {'params': model.classifier.parameters(),
             'weight_decay': weight_decay}
            ], lr=args.lr)
        lr_scheduler = StepLR(optimizer, 30, gamma=0.5)

    print("Start training...")
    tic = timer()

    epoch_loss = None
    best_loss = float('inf')
    for epoch in range(args.epochs):
        print('Epoch {}/{}'.format(epoch + 1, args.epochs))
        print('-' * 10)
        if args.alternating:
            model.eval()
            tic_c = timer()
            model.train_classifier(train_loader, criterion_clf,
                                   epochs=epochs_clf, optimizer=optimizer_clf,
                                   use_cuda=args.use_cuda)
            toc_c = timer()
            print("Classifier trained. Time: {:.2f}s".format(toc_c - tic_c))
        print("current LR: {}".format(
              optimizer.param_groups[0]['lr']))
        train_loss, train_acc = train_epoch(
            model, train_loader, criterion, optimizer, use_cuda=args.use_cuda)
        val_loss, val_acc = eval_epoch_list(
            model, [val_loader], criterion, use_cuda=args.use_cuda)
        if isinstance(lr_scheduler, ReduceLROnPlateau):
            lr_scheduler.step(val_loss)
        else:
            lr_scheduler.step()
        if val_loss < best_loss:
            best_loss = val_loss
            best_acc = val_acc
            best_epoch = epoch + 1
            best_weights = copy.deepcopy(model.state_dict())

    toc = timer()
    training_time = (toc - tic) / 60
    print("Traning finished, elapsed time: {:.2f}s".format(toc - tic))
    model.load_state_dict(best_weights)
    print("Testing...")

    test_dset = TensorDataset(X_val.permute(0, 2, 1), y_val)
    test_loader = DataLoader(
        test_dset, batch_size=args.batch_size, shuffle=False)
    y_pred, y_true = model.predict(
        test_loader, use_cuda=args.use_cuda)

    scores = accuracy(y_pred, y_true)
    print(scores)

    if args.save_logs:
        print('Saving logs...')
        data = {
            # 'title': title,
            'score': scores,
            'best_epoch': best_epoch,
            'best_loss': best_loss,
            'val_score': best_acc,
            'args': args
            }
        np.save(os.path.join(args.outdir, f"seed_{args.seed}_results.npy"),
                data)
        # torch.save(
        #     {'args': args,
        #      'state_dict': model.state_dict()},
        #     args.outdir + '/model.pkl')
    return
import json
import numpy as np
from tqdm import tqdm

from modules.preprocessing import Preprocessor
from config import DATASET, MAX_NUM_ARTICLES
from loaders import make_data_path, load_data

if __name__ == '__main__':

    articles = load_data('articles.json')

    if DATASET == 'lsir':
        np.random.seed(42)
        articles = np.random.choice(articles, MAX_NUM_ARTICLES,
                                    replace=False).tolist()
        np.random.seed()

    with open(make_data_path('processed_articles'), mode='w') as fp:
        for article in tqdm(articles):
            processed_article = {
                'entities_title':
                Preprocessor.get_entities_spacy(article['title']),
                'keyphrases_title':
                Preprocessor.get_keyphrases_pke(article['title']),
                'keyphrases_text':
                Preprocessor.get_keyphrases_pke(article['text']),
            }
            line = json.dump(processed_article, fp)
            fp.write('\n')