Example #1
0
def main(args=sys.argv[1:]):
    args = parse_args(args)
    logging.basicConfig(level=args.logging)

    device = torch.device(
        "cpu" if args.no_cuda or not torch.cuda.is_available() else "cuda")

    vocab = Vocab()
    # Load data now to know the whole vocabulary when training model.
    train_data = data_loader.load(data_loader.path("train"), vocab)
    valid_data = data_loader.load(data_loader.path("valid"), vocab)
    test_data = data_loader.load(data_loader.path("test"), vocab)

    model = RnnLm(len(vocab), args.embedding_dim, args.gru_hidden,
                  args.gru_layers, not args.untied,
                  args.gru_dropout).to(device)
    optimizer = optim.RMSprop(model.parameters(), lr=args.lr)

    for epoch_ind in range(args.epochs):
        logging.info("Training epoch %d", epoch_ind)
        train_epoch(train_data, model, optimizer, args, device)
        logging.info("Validation perplexity: %.1f",
                     evaluate(valid_data, model, args.batch_size, device))
    logging.info("Test perplexity: %.1f",
                 evaluate(test_data, model, args.batch_size, device))
Example #2
0
def run(training_data, test_data, problog_file):
    queries = load(training_data)
    test_queries = load(test_data)

    network = MultiLabelNet()

    with open(problog_file, 'r') as f:
        problog_string = f.read()

    net = Network(network, 'multilabel_net', neural_predicate)
    net.optimizer = torch.optim.Adam(network.parameters(), lr=0.001)
    model = Model(problog_string, [net], caching=False)
    optimizer = Optimizer(model, 2)

    train_model(
        model,
        queries,
        nr_epochs=50,
        optimizer=optimizer,
        test_iter=len(queries) * 10,
        # test=multilabel_test,
        log_iter=500,
        snapshot_iter=len(queries))

    for query in test_queries:
        print(query)

        for k, v in model.solve(query).items():
            print('\t{}: {:.4f}\t{}'.format(
                k.args[1], v[0],
                CLASSES_BY_LABEL[int(query.args[0])][str(k.args[1])]))
Example #3
0
def main(config):
    prepare_dirs_and_logger(config)

    rng = np.random.RandomState(config.random_seed)
    tf.set_random_seed(config.random_seed)

    load()
    train_data_loader, train_label_loader, train_loc_loader, train_mask_loader = get_loader(
        config.data_path, config.batch_size, 0, 'train', True)
    test_data_loader, test_label_loader, test_loc_loader, test_mask_loader = get_loader(
        config.data_path, config.batch_size_test, 5, 'train', True)

    trainer = Trainer(config, train_data_loader, train_label_loader,
                      train_loc_loader, train_mask_loader, test_data_loader,
                      test_label_loader, test_loc_loader, test_mask_loader)
    print("loaded trainer")
    if config.is_train:
        save_config(config)
        trainer.train()
        print("finished train")
    else:
        if not config.load_path:
            raise Exception(
                "[!] You should specify `load_path` to load a pretrained model"
            )
        trainer.test()
Example #4
0
def run_linear(training_data,
               test_data,
               problog_files,
               problog_train_files=(),
               problog_test_files=()):
    queries = load(training_data)
    test_queries = load(test_data)

    # network = SoundLinearNet()
    # network = SoundCNNet()
    network = SoundVGGish()

    problog_string = add_files_to(problog_files, '')

    problog_train_string = add_files_to(problog_train_files, problog_string)
    problog_test_string = add_files_to(problog_test_files, problog_string)

    net = Network(network, 'sound_net', neural_predicate_vggish)
    net.optimizer = torch.optim.Adam(network.parameters(), lr=0.001)
    model_to_train = Model(problog_train_string, [net], caching=False)
    optimizer = Optimizer(model_to_train, 2)

    model_to_test = Model(problog_test_string, [net], caching=False)

    train_model(model_to_train,
                queries,
                nr_epochs=10,
                optimizer=optimizer,
                test_iter=len(queries),
                test=lambda _: my_test(model_to_test, test_queries),
                log_iter=500,
                snapshot_iter=len(queries),
                snapshot_name=' SequenceDetectionSnapshots/model')
Example #5
0
def predict():
    """
    An example of how to load a trained model and use it
    to predict labels.
    """

    # load the saved model
    classifier = cPickle.load(open('best_model.pkl'))

    # compile a predictor function
    predict_model = theano.function(
        inputs=[classifier.input],
        outputs=classifier.y_pred)

    # We can test it on some examples from test test
    dataset='data/mnist.pkl.gz'
    training_set, validation_set, testing_set, = data_loader.load(dataset)
    testing_set_x   , testing_set_y    = testing_set
    testing_set_x = testing_set_x.get_value()
    testing_set_y = testing_set_y.eval()[:30]

    predicted_values = predict_model(testing_set_x[:30])
    print ("Predicted values for the first 10 examples in test set:")
    print predicted_values
    print ("answers:")
    print 
Example #6
0
def main():
    tf.set_random_seed(2018)
    
    dtrain, dtest, z, y_std = data_loader.load('airplane.csv', n_clusters=config.clusters, n_induce=config.num_inducing, sgp=config.sgp)
    N, _ = dtrain.shape
    model = VBSGPR(N, config.log_beta, config.log_sf2, config.log_theta, z, whiten=True)

    clusters = [i for i in range(config.clusters)]

    lb = model.lower_bound()
    fmu, fcov = model.predict_f()
    gp_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'vbsgpr')
    gp_opt = tf.train.AdamOptimizer(0.01, beta1=0.9, name='gp_opt') # Best: 40.459
    # gp_opt = tf.train.MomentumOptimizer(0.01, momentum=0.9, use_nesterov=False)
    gp_train_op = gp_opt.minimize(-lb, var_list=gp_vars)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(config.epochs):
            random.shuffle(clusters)
            for i, cluster in enumerate(clusters):
                data_batch = dtrain[np.where(dtrain[:, -1] == cluster)]
                X, y = data_batch[:, :-2], data_batch[:, -2:-1]
                _, lb_ = sess.run([gp_train_op, lb], {model.x: X, model.y: y, model.batch: y.shape[0]})
                if i % 100 == 0: 
                    print ('Epoch: [{}], The {}-th Cluster: [{}], Lower Bound: [{}]'.format(
                            epoch, i, cluster, lb_))
            X_test, y_test = dtest[:, :-2], dtest[:, -2:-1]
            f_test, _ = sess.run([fmu, fcov], {model.x: X_test})
            rmse = np.sqrt(np.mean(y_std**2 * ((y_test - f_test))**2))
            print ('Epoch {} test RMSE: {}'.format(epoch, rmse))
Example #7
0
def main():
    # Select chain and info file with a GUI.
    # datafile = open_file_gui(add_pattern="*.txt")
    # infofile = open_file_gui(add_pattern="*.txt")

    parser = arg_parser(description='Superplot summary tool', conflict_handler='resolve')

    parser.add_argument('--data_file',
                        '-d',
                        help='Chain file to summarise',
                        type=str,
                        required=True)
    parser.add_argument('--info_file',
                        '-i',
                        help='Info file to summarise',
                        type=str,
                        default=None,
                        required=False)

    args = vars(parser.parse_args())

    datafile = os.path.abspath(args['data_file'])

    infofile = args['info_file']
    if infofile:
        infofile = os.path.abspath(infofile)

    # Load and label data
    labels, data = data_loader.load(infofile, datafile)

    summary_table = _summary_table(labels, data, datafile=datafile, infofile=infofile)
    return summary_table
def train(train_data, ep, bz):
    x, y = data_loader.load(train_data, dim, t)
    print('Training model ...')
    model = create_model()
    model.fit(x, y, epochs=ep, batch_size=bz, verbose=2)
    # save(directory + 'model/', m_name, model)
    evaluate(create_model, seed, x, y, t, ep, bz)
Example #9
0
def run_coauthor(fold_i):
    def neural_predicate(network, i, dataset='train'):
        i = int(i)
        dataset = str(dataset)
        if dataset == 'train':
            d, l = mnist_train_data[i]
        elif dataset == 'test':
            d, l = mnist_test_data[i]
        d = Variable(d.unsqueeze(0))
        output = network.net(d)
        return output.squeeze(0)

    queries = load('train_data.txt')

    with open('coauthor_rules.pl') as f:
        problog_string = f.read()

    network = coauthor_net()
    net = Network(network, f'coauthor {i+1}', neural_predicate)
    net.optimizer = torch.optim.Adam(network.parameters(), lr=0.001)
    model = Model(problog_string, [net], caching=False)
    optimizer = Optimizer(model, 2)

    train_model(model,
                queries,
                1,
                optimizer,
                test_iter=1000,
                test=test_coauthor,
                snapshot_iter=10000)
Example #10
0
def run(data_dir: str,
        save_estimator=False,
        segment_size=query_name_learner.SEGMENT_SIZE,
        overlap: float = query_name_learner.OVERLAP_FRACTION,
        is_segment_size_in_seconds: bool = False,
        ipython_when_done=False):
    data_loader.load(data_dir,
                     ['frame_time_relative', 'dns_qry_name', 'dns_qry_type'])

    query_name_learner.SEGMENT_SIZE = segment_size
    query_name_learner.OVERLAP_FRACTION = overlap
    query_name_learner.IS_SEGMENT_SIZE_IN_SECONDS = is_segment_size_in_seconds

    per_user_states = query_name_learner.run_one_v_all(save_estimator)
    if ipython_when_done:
        from IPython import embed
        embed()
Example #11
0
def main():
    df = data_loader.make_df("../input_data/data_h27v07")

    df["mean_lai"] = df.path.map(
        lambda x: np.mean(data_loader.load(x)["Lai_500m"]))

    df.sort_values("date", ascending=True).plot(x="date", y="mean_lai")
    plt.show()
Example #12
0
def run(training_data,
        test_data,
        problog_files,
        problog_train_files=(),
        problog_test_files=(),
        config_file=None,
        net_mode='init',
        cfg=None):
    config = json.load(open(config_file))
    config['net_mode'] = net_mode
    config['cfg'] = cfg

    queries = load(training_data)
    test_queries = load(test_data)

    sounds = SoundsUtils(config)

    problog_string = add_files_to(problog_files, '')

    problog_train_string = add_files_to(problog_train_files, problog_string)
    problog_test_string = add_files_to(problog_test_files, problog_string)

    network = sounds.network
    net = Network(network, 'sound_net', sounds.neural_predicate)
    net.optimizer = sounds.optimizer
    model_to_train = Model(problog_train_string, [net], caching=False)
    optimizer = Optimizer(model_to_train, 2)

    model_to_test = Model(problog_test_string, [net], caching=False)

    train_model(model_to_train,
                queries,
                5,
                optimizer,
                test_iter=len(queries),
                test=lambda _: my_test(
                    model_to_test,
                    test_queries,
                    test_functions={
                        'sound_net':
                        lambda *args, **kwargs: sounds.neural_predicate(
                            *args, **kwargs, in_training=False)
                    },
                ),
                snapshot_iter=len(queries))
Example #13
0
def main():
    # Select chain and info file with a GUI.
    datafile = open_file_gui()
    infofile = open_file_gui()

    # Load and label data
    labels, data = data_loader.load(infofile, datafile)

    summary_table = _summary_table(labels, data, datafile=datafile, infofile=infofile)
    return summary_table
Example #14
0
def main():
    # Select chain and info file with a GUI.
    datafile = open_file_gui(add_pattern="*.txt")
    infofile = open_file_gui(add_pattern="*.txt")

    # Load and label data
    labels, data = data_loader.load(infofile, datafile)

    summary_table = _summary_table(labels, data, datafile=datafile, infofile=infofile)
    return summary_table
Example #15
0
def load_data(args):
    data = data_loader.load(args.dataset,
                            n_train=args.n_train,
                            n_test=args.n_test,
                            train_noise=args.train_noise,
                            test_noise=args.test_noise)
    stratify = args.dataset not in ["abalone", "segment"]
    if args.dataset not in [
            'arcene', 'moon', 'toy_Story', 'toy_Story_ood', 'segment'
    ]:
        print(args.dataset)
        x = data_loader.prepare_inputs(data['features'])
        y = data['labels']
        x_train, x_test, y_train, y_test = train_test_split(
            x,
            y,
            train_size=args.train_test_ratio,
            stratify=y if stratify else None)
    else:
        if args.dataset == 'moon' or args.dataset=='toy_Story' or \
           args.dataset=='toy_Story_ood':
            x_train, x_test = data['x_train'], data['x_val']
        else:
            x_train, x_test = data_loader.prepare_inputs(
                data['x_train'], data['x_val'])
        y_train, y_test = data['y_train'], data['y_val']

# Generate validation split
    x_train, x_val, y_train, y_val = train_test_split(
        x_train,
        y_train,
        train_size=args.train_test_ratio,
        stratify=y_train if stratify else None)
    x_train = x_train.astype(np.float32)
    x_val = x_val.astype(np.float32)
    x_test = x_test.astype(np.float32)

    n_mean = np.mean(x_train, axis=0)
    n_std = np.var(x_train, axis=0)**.5

    x_train = (x_train - n_mean) / n_std
    x_val = (x_val - n_mean) / n_std
    x_test = (x_test - n_mean) / n_std

    try:
        if args.n_ood > 0 and y_val.shape[1] > args.n_ood:
            n_ood = y_val.shape[1] - args.n_ood - 1
            return utils.prepare_ood(x_train, x_val, x_test, y_train, y_val,
                                     y_test, n_ood, args.norm)
    except AttributeError:
        #print(x_train, x_val, x_test, y_train, y_val, y_test)
        return x_train, x_val, x_test, y_train, y_val, y_test, 0, 0
    return x_train, x_val, x_test, y_train, y_val, y_test, 0, 0
Example #16
0
def run(training_data, test_data, problog_files):
    queries = load(training_data)
    test_queries = load(test_data)

    problog_string = ''
    for problog_file in problog_files:
        with open(problog_file) as f:
            problog_string += f.read()
            problog_string += '\n\n'

    network = MNIST_Net()
    net = Network(network, 'mnist_net', neural_predicate)
    net.optimizer = torch.optim.Adam(network.parameters(), lr=0.001)
    model = Model(problog_string, [net], caching=False)
    optimizer = Optimizer(model, 2)

    train_model(model,
                queries,
                1,
                optimizer,
                test_iter=1000,
                test=test_MNIST,
                snapshot_iter=10000)
Example #17
0
def run(training_data,
        test_data,
        problog_files,
        problog_train_files=(),
        problog_test_files=()):
    queries = load(training_data)
    test_queries = load(test_data)

    problog_string = add_files_to(problog_files, '')

    problog_train_string = add_files_to(problog_train_files, problog_string)
    problog_test_string = add_files_to(problog_test_files, problog_string)

    network = MNIST_Net()
    net = Network(network, 'mnist_net', neural_predicate)
    net.optimizer = torch.optim.Adam(network.parameters(), lr=0.001)
    model_to_train = Model(problog_train_string, [net], caching=False)
    optimizer = Optimizer(model_to_train, 2)

    model_to_test = Model(problog_test_string, [net], caching=False)

    train_model(model_to_train,
                queries,
                1,
                optimizer,
                test_iter=len(queries),
                test=lambda _: my_test(
                    model_to_test,
                    test_queries,
                    test_functions={
                        'mnist_net':
                        lambda *args, **kwargs: neural_predicate(
                            *args, **kwargs, dataset='test')
                    },
                ),
                log_iter=1000,
                snapshot_iter=len(queries))
Example #18
0
def indexATIS():
    train_set, valid_set, dicts = load(
        'atis.pkl')  # load() from data_loader.py
    w2idx, la2idx = dicts['words2idx'], dicts['labels2idx']

    idx2w = {w2idx[k]: k for k in w2idx}
    idx2la = {la2idx[k]: k for k in la2idx}

    indexes = {
        "idx2w": idx2w,
        "idx2la": idx2la,
        "w2idx": w2idx,
        "la2idx": la2idx
    }

    with open('embeddings/word_indexes.json', 'w') as f:
        json.dump(indexes, f)

    log("Word Indexes saved at (embeddings/word_indexes.json)...")

    train_x, _, train_label = train_set
    valid_x, _, valid_label = valid_set

    MAX_LEN = max(max([len(s) for s in train_x]),
                  max([len(s) for s in valid_x]))

    # Add padding
    train_x = pad_sequences(train_x,
                            maxlen=MAX_LEN,
                            padding='post',
                            value=w2idx["<UNK>"])
    train_label = pad_sequences(train_label,
                                maxlen=MAX_LEN,
                                padding='post',
                                value=la2idx["O"])

    valid_x = pad_sequences(valid_x,
                            maxlen=MAX_LEN,
                            padding='post',
                            value=w2idx["<UNK>"])
    valid_label = pad_sequences(valid_label,
                                maxlen=MAX_LEN,
                                padding='post',
                                value=la2idx["O"])

    train_set = (train_x, train_label)  # packing only train_x and train_label
    valid_set = (valid_x, valid_label)

    return (train_set, valid_set, indexes)
Example #19
0
def main():
    input_path = os.path.abspath(os.path.join('./data', args.dataset))
    dataset = os.path.splitext(args.dataset)[0]
    logger.info('Load {}'.format(input_path))
    params = {'test_size': 0.2, 'random_state': 1, 'cluster': 'kmeans'}
    X_train, X_test, y_train, y_test = data_loader.load(input_path, **params)
    logger.info('Split into train and test subsets: {}'.format(params))

    params_path = os.path.abspath(os.path.join('./params', args.params))
    with open(params_path) as file_:
        params = yaml.load(file_, Loader=yaml.SafeLoader)
    logger.info('Load {}'.format(params_path))
    logger.info('Hyperparameters: {}'.format(params))
    models = {
        'MLP': nn.MLPClassifier,
        'CNN': nn.CNNClassifier,
        'RNN': nn.RNNClassifier
    }
    clf = models[args.model](**params)
    estimator = clf.__class__.__name__
    logger.info('Train {} on {}'.format(estimator, dataset))
    clf.fit(X_train, y_train)

    output_dir = os.path.abspath(args.output)
    os.makedirs(output_dir, exist_ok=True)
    csv_log = pd.DataFrame({
        'loss': clf.loss_curve_,
        'train_score': clf.training_scores_,
        'val_score': clf.validation_scores_
    })
    csv_log_path = os.path.join(output_dir, time.strftime('%Y%m%d-%H%M%S.csv'))
    csv_log.to_csv(csv_log_path)
    logger.info('Save learning log to {}'.format(csv_log_path))

    if args.plot:
        plot_path = os.path.join(output_dir,
                                 time.strftime('%Y%m%d-%H%M%S.png'))
        plotting.plot_learning_curve(csv_log_path,
                                     '{} on {}'.format(estimator,
                                                       dataset), plot_path)
        logger.info('Save learning curves to {}'.format(plot_path))

    logger.info('Training score: {}'.format(clf.score(X_train, y_train)))
    logger.info('Testing score: {}'.format(clf.score(X_test, y_test)))
    logger.info('Done')
Example #20
0
def predict(path, output):
    print('Prediction ...')
    x, y = data_loader.load(path, dim, t)
    pipe = load_model(directory + 'model/', m_name)
    pred = pipe.predict_proba(x)[:, 0]
    new_p = pred.round(2).astype(int)
    count = 0
    for i in range(len(new_p)):
        if new_p[i] == y[i]:
            count = count + 1
    print('Test result: ',
          count,
          '/',
          len(y),
          ' (',
          round(count / len(y) * 100, 2),
          '%)',
          sep='')
    df = pd.DataFrame({'Expected': y, 'Predicted': new_p})
    df.to_csv(output)
Example #21
0
def train(model_name, category_type, dump=False):
    clf = tfidf_pipeline.make(model_name)

    categories = names.categories[category_type]

    print 'Loading data...'
    data = data_loader.load('full', categories)
    train_X, train_y, test_X, test_y = data_loader.split(data, 0.1)
    print 'Done.'

    print 'Training...'
    clf.fit(train_X, train_y)
    print 'Done.'

    print 'Testing...'
    predicted = clf.predict(test_X)

    if model_name in ['svr', 'linreg']:
        predicted = np.clip(np.round(predicted), 0, 7)
        accuracy = scorers.err1(test_y, predicted)
        print 'Off-by-one accuracy: ' +  str(accuracy)
    else:
        accuracy = scorers.err0(test_y, predicted)
        print 'Exact accuracy: ' +  str(accuracy)
        print classification_report(test_y, predicted, target_names=categories)
    cm = confusion_matrix(test_y, predicted)
    print cm
    plot.plot_confusion_matrix(cm, category_type)

    if dump:
        print 'Saving classifier...'
        if not exists('dumps'):
            makedirs('dumps')
        joblib.dump(clf, join('dumps', category_type + '_' + model_name + '_classifier.pkl'))
        print 'Done.'

    return clf
Example #22
0
def main():
    df = data_loader.make_df("../input_data/data_h27v07")
    bin = 10
    rang = np.arange(0, int(1000 / bin) * int(700 / bin))
    print(rang)
    col_areas = []
    for i in rang:
        df[f"mean_lai_{i}"] = 0
        col_areas.append(f"mean_lai_{i}")
    #print(df)
    for idx in df.index:
        val = data_loader.load(df.loc[idx, "path"])["Lai_500m"]
        for i in rang:
            j = i // int(1000 / bin)
            k = i % int(700 / bin)

            df.loc[idx, f"mean_lai_{i}"] = np.mean(
                val[j * bin:(j + 1) * bin,
                    1000 + k * bin:1000 + (k + 1) * bin])
        #print(df.loc[idx, :])
    print(df.shape)
    #    df.sort_values("date", ascending=True).plot(x="date", y="mean_lai")
    #   plt.show()
    df.to_csv("../input_data/area_lai_5km_mean.csv")
import matplotlib.pyplot as plt
import numpy as np
import data_loader
from lda import lda_experiment
from logistic import logistic_regression_experiment
from linear import linear_regression_experiment
from qda import qda_experiment

if __name__ == "__main__":
    # Train datasets
    X_trainA, Y_trainA = data_loader.load("data/trainA")
    X_trainB, Y_trainB = data_loader.load("data/trainB")
    X_trainC, Y_trainC = data_loader.load("data/trainC")

    # Test datasets
    X_testA, Y_testA = data_loader.load("data/testA")
    X_testB, Y_testB = data_loader.load("data/testB")
    X_testC, Y_testC = data_loader.load("data/testC")

    # 2.1 : LDA
    print("{:=^30}".format("LDA"))
    lda_experiment(X_trainA, Y_trainA, X_testA, Y_testA, "A")
    lda_experiment(X_trainB, Y_trainB, X_testB, Y_testB, "B")
    lda_experiment(X_trainC, Y_trainC, X_testC, Y_testC, "C")

    # 2.2 : logistic regression
    print("{:=^30}".format("Logistic Regression"))
    logistic_regression_experiment(X_trainA, Y_trainA, X_testA, Y_testA, "A")
    logistic_regression_experiment(X_trainB, Y_trainB, X_testB, Y_testB, "B")
    logistic_regression_experiment(X_trainC, Y_trainC, X_testC, Y_testC, "C")
def dump_results(results, n_clusters):
    out_file = f"./output/algs_{n_clusters}clusters.json"
    with open(out_file, 'w') as f:
        json.dump(results, f, indent=1)


if __name__ == "__main__":
    args = parse_arguments()
    nr_clusters = args.n_clusters
    REPEATS = args.repeats
    cluster_metrics = args.metrics

    path = os.getcwd()
    os.chdir('..')
    X = data_loader.load(
        "0_data_generators/data_{}_shuffled.csv".format(nr_clusters))
    X = np.array(X)
    print("Done loading, shape:", X.shape)
    os.chdir(path)

    raster = Raster(precision=4, threshold=5, min_size=5)

    clustering_algorithms = [] if args.no_raster else [('RASTER', raster)]

    # 20 for 10 clusters, 300-500 for 100 clusters.
    # Don't even try 1000 clusters (a run takes days).
    tau = 5 / (X.size)  # Clique equivalent of RASTER's threshold
    for xsi in args.xsi:
        clique = clique_fit.Clique(xsi=xsi, tau=tau)
        name = "CLIQUE_xsi" + str(xsi)
        clustering_algorithms.append((name, clique))
Example #25
0
import json

from train import train_model
from data_loader import load
from examples.NIPS.MNIST.mnist import test_MNIST, MNIST_Net, MNIST_Net2, neural_predicate
from model import Model
from optimizer import Optimizer
from network import Network
import torch

queries = load('train_data.txt')

with open('abs.pl') as f:
    problog_string = f.read()

network = MNIST_Net()
network.load_state_dict(torch.load('sd_rec_cnn.pt'))
network.eval()
net = Network(network, 'mnist_net', neural_predicate)
net.optimizer = torch.optim.Adam(network.parameters(), lr=0.001)
model = Model(problog_string, [net], caching=False)
optimizer = Optimizer(model, 2)

log = {}

logs = test_MNIST(model)
for e in logs:
    log[e[0]] = e[1]

with open('notl_rec2abs_dpl.json', 'w') as outfile:
    json.dump(log, outfile)
Example #26
0
    def __init__(self,
                 data_file,
                 info_file,
                 xindex=2,
                 yindex=3,
                 zindex=4,
                 default_plot_type=0
                 ):

        self.data_file = data_file
        self.info_file = info_file
        self.xindex = xindex
        self.yindex = yindex
        self.zindex = zindex

        self.plot_limits = default("plot_limits")
        self.bin_limits = default("bin_limits")

        self.fig = None
        self.plot = None
        self.options = None

        # Load data from files
        self.labels, self.data = data_loader.load(info_file, data_file)

        # Enumerate available plot types and keep an ordered
        # dict mapping descriptions to classes.
        # Using an ordered dict means the order in which classes
        # are listed in plot_types will be preserved in the GUI.
        self.plots = OrderedDict()
        for plot_class in plots.plot_types:
            self.plots[plot_class.description] = plot_class

        #######################################################################

        # Combo-box for various plot types

        typetitle = gtk.Button("Plot type:")
        self.typebox = gtk.combo_box_new_text()
        for description in self.plots.keys():
            self.typebox.append_text(description)
        self.typebox.set_active(default_plot_type)  # Set to default plot type

        #######################################################################

        # Combo box for selecting x-axis variable

        xtitle = gtk.Button("x-axis variable:")
        self.xbox = gtk.combo_box_new_text()
        for label in self.labels.itervalues():
            self.xbox.append_text(label)
        self.xbox.set_wrap_width(5)
        self.xbox.connect('changed', self._cx)
        self.xtext = gtk.Entry()
        self.xtext.set_text(self.labels[self.xindex])
        self.xtext.connect("changed", self._cxtext)
        self.xbox.set_active(self.xindex)

        #######################################################################

        # Combo box for selecting y-axis variable

        ytitle = gtk.Button("y-axis variable:")
        self.ybox = gtk.combo_box_new_text()
        for label in self.labels.itervalues():
            self.ybox.append_text(label)
        self.ybox.set_wrap_width(5)
        self.ybox.connect('changed', self._cy)
        self.ytext = gtk.Entry()
        self.ytext.set_text(self.labels[self.yindex])
        self.ytext.connect("changed", self._cytext)
        self.ybox.set_active(self.yindex)

        #######################################################################

        # Combo box for selecting z-axis variable

        ztitle = gtk.Button("z-axis variable:")
        self.zbox = gtk.combo_box_new_text()
        for label in self.labels.itervalues():
            self.zbox.append_text(label)
        self.zbox.set_wrap_width(5)
        self.zbox.connect('changed', self._cz)
        self.ztext = gtk.Entry()
        self.ztext.set_text(self.labels[self.zindex])
        self.ztext.connect("changed", self._cztext)
        self.zbox.set_active(self.zindex)

        #######################################################################

        # Check buttons for log Scaling

        self.logx = gtk.CheckButton('Log x-data.')
        self.logy = gtk.CheckButton('Log y-data.')
        self.logz = gtk.CheckButton('Log z-data.')

        #######################################################################

        # Text boxt for plot title

        tplottitle = gtk.Button("Plot title:")
        self.plottitle = gtk.Entry()
        self.plottitle.set_text(default("plot_title"))

        #######################################################################

        # Legend properties

        # Text box for legend title
        tlegtitle = gtk.Button("Legend title:")
        self.legtitle = gtk.Entry()
        self.legtitle.set_text("")

        # Combo box for legend position
        tlegpos = gtk.Button("Legend position:")
        self.legpos = gtk.combo_box_new_text()
        for loc in ["best", "upper right", "lower left", "lower right",
                    "right", "center left", "center right", "lower center",
                    "upper center", "center", "no legend"]:
            self.legpos.append_text(loc)
        self.legpos.set_active(0)  # Default is first in above list - "best"

        #######################################################################

        # Spin button for number of bins per dimension

        tbins = gtk.Button("Bins per dimension:")
        self.bins = gtk.SpinButton()
        self.bins.set_increments(10, 10)
        self.bins.set_range(5, 10000)
        self.bins.set_value(default("nbins"))

        #######################################################################

        # Axes limits

        alimits = gtk.Button("Comma separated plot limits\n"
                             "x_min, x_max, y_min, y_max:")
        self.alimits = gtk.Entry()
        self.alimits.connect("changed", self._calimits)
        self.alimits.append_text("")

        #######################################################################

        # Bin limits

        blimits = gtk.Button("Comma separated bin limits\n"
                             "x_min, x_max, y_min, y_max:")
        self.blimits = gtk.Entry()
        self.blimits.connect("changed", self._cblimits)
        self.blimits.append_text("")

        #######################################################################

        # Check buttons for optional plot elements

        self.show_best_fit = gtk.CheckButton("Best-fit")
        self.show_posterior_mean = gtk.CheckButton("Posterior mean")
        self.show_credible_regions = gtk.CheckButton("Credible regions")
        self.show_conf_intervals = gtk.CheckButton("Confidence intervals")
        self.show_posterior_pdf = gtk.CheckButton("Posterior PDF")
        self.show_prof_like = gtk.CheckButton("Profile Likelihood")
        self.show_best_fit.set_active(True)
        self.show_posterior_mean.set_active(True)
        self.show_credible_regions.set_active(True)
        self.show_conf_intervals.set_active(True)
        self.show_posterior_pdf.set_active(True)
        self.show_prof_like.set_active(True)

        #######################################################################

        # Make plot button

        makeplot = gtk.Button('Make plot.')
        makeplot.connect("clicked", self._pmakeplot)

        #######################################################################

        # Check boxes to control what is saved (note we only attach them to the
        # window after showing a plot)

        self.save_image = gtk.CheckButton('Save image')
        self.save_image.set_active(True)
        self.save_summary = gtk.CheckButton('Save statistics in plot')
        self.save_summary.set_active(True)
        self.save_pickle = gtk.CheckButton('Save pickle of plot')
        self.save_pickle.set_active(True)

        #######################################################################

        # Layout - GTK Table

        self.gridbox = gtk.Table(17, 5, False)

        self.gridbox.attach(typetitle, 0, 1, 0, 1, xoptions=gtk.FILL)
        self.gridbox.attach(self.typebox, 1, 2, 0, 1, xoptions=gtk.FILL)

        self.gridbox.attach(xtitle, 0, 1, 1, 2, xoptions=gtk.FILL)
        self.gridbox.attach(self.xbox, 1, 2, 1, 2, xoptions=gtk.FILL)
        self.gridbox.attach(self.xtext, 1, 2, 2, 3, xoptions=gtk.FILL)

        self.gridbox.attach(ytitle, 0, 1, 3, 4, xoptions=gtk.FILL)
        self.gridbox.attach(self.ybox, 1, 2, 3, 4, xoptions=gtk.FILL)
        self.gridbox.attach(self.ytext, 1, 2, 4, 5, xoptions=gtk.FILL)

        self.gridbox.attach(ztitle, 0, 1, 5, 6, xoptions=gtk.FILL)
        self.gridbox.attach(self.zbox, 1, 2, 5, 6, xoptions=gtk.FILL)
        self.gridbox.attach(self.ztext, 1, 2, 6, 7, xoptions=gtk.FILL)

        self.gridbox.attach(self.logx, 0, 1, 2, 3, xoptions=gtk.FILL)
        self.gridbox.attach(self.logy, 0, 1, 4, 5, xoptions=gtk.FILL)
        self.gridbox.attach(self.logz, 0, 1, 6, 7, xoptions=gtk.FILL)

        self.gridbox.attach(tplottitle, 0, 1, 9, 10, xoptions=gtk.FILL)
        self.gridbox.attach(self.plottitle, 1, 2, 9, 10, xoptions=gtk.FILL)

        self.gridbox.attach(tlegtitle, 0, 1, 10, 11, xoptions=gtk.FILL)
        self.gridbox.attach(self.legtitle, 1, 2, 10, 11, xoptions=gtk.FILL)

        self.gridbox.attach(tlegpos, 0, 1, 11, 12, xoptions=gtk.FILL)
        self.gridbox.attach(self.legpos, 1, 2, 11, 12, xoptions=gtk.FILL)

        self.gridbox.attach(tbins, 0, 1, 12, 13, xoptions=gtk.FILL)
        self.gridbox.attach(self.bins, 1, 2, 12, 13, xoptions=gtk.FILL)

        self.gridbox.attach(alimits, 0, 1, 13, 14, xoptions=gtk.FILL)
        self.gridbox.attach(self.alimits, 1, 2, 13, 14, xoptions=gtk.FILL)

        self.gridbox.attach(blimits, 0, 1, 14, 15, xoptions=gtk.FILL)
        self.gridbox.attach(self.blimits, 1, 2, 14, 15, xoptions=gtk.FILL)

        point_plot_container = gtk.VBox()
        point_plot_box_upper = gtk.HBox(homogeneous=True)
        point_plot_box_lower = gtk.HBox(homogeneous=True)

        for check_box in [self.show_conf_intervals,
                          self.show_credible_regions,
                          self.show_best_fit]:
            point_plot_box_upper.pack_start_defaults(check_box)

        for check_box in [self.show_posterior_mean,
                          self.show_posterior_pdf,
                          self.show_prof_like]:
            point_plot_box_lower.pack_start_defaults(check_box)

        point_plot_container.pack_start_defaults(point_plot_box_upper)
        point_plot_container.pack_start_defaults(point_plot_box_lower)

        self.gridbox.attach(point_plot_container,
                            0, 2, 15, 16,
                            xoptions=gtk.FILL)

        self.gridbox.attach(makeplot, 0, 2, 16, 17, xoptions=gtk.FILL)

        #######################################################################

        # Make main GUI window

        self.window = gtk.Window()
        self.window.maximize()
        self.window.set_title("SuperPlot")
        # Quit if cross is pressed
        self.window.connect('destroy', lambda w: gtk.main_quit())

        # Add the table to the window and show
        self.window.add(self.gridbox)
        self.gridbox.show()
        self.window.show_all()

        return
import sys

from sklearn.externals.joblib import dump

import data_loader
import names
import tfidf_pipeline
import model_presets


if __name__ == '__main__':
    for (category_name, model_name) in [('stars', 'linreg'), ('binary', 'svc')]:

        print 'Loading ' + category_name + ' data'
        train,_ = data_loader.load('split', names.categories[category_name])

        print 'Training ' + model_name
        clf = tfidf_pipeline.make(model_name)
        clf.fit(train.data, train.target)

        print 'Dumping ' + model_name
        dump(clf, 'web_clf_' + category_name + '.pkl')
        
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
                           dataset='data/mnist.pkl.gz',
                           batch_size=600):

    training_set, validation_set, testing_set, = data_loader.load(dataset)
    training_set_x  , training_set_y   = training_set
    validation_set_x, validation_set_y = validation_set
    testing_set_x   , testing_set_y    = testing_set

    # compute number of minibatches for training, validation and testing
    n_train_batches = training_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = validation_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches  = testing_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = tensor.lscalar()

    # generate symbolic variables for input (x and y represent a
    # minibatch)
    x = tensor.matrix('x')
    y = tensor.ivector('y')

    classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)
    cost = classifier.negative_log_likelihood(y)

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: testing_set_x[index * batch_size: (index + 1) * batch_size],
            y: testing_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: validation_set_x[index * batch_size: (index + 1) * batch_size],
            y: validation_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # compute the gradient of cost with respect to theta = (W,b)
    g_W = tensor.grad(cost=cost, wrt=classifier.W)
    g_b = tensor.grad(cost=cost, wrt=classifier.b)

    # update the parameters of the model
    updates = [(classifier.W, classifier.W - learning_rate * g_W),
               (classifier.b, classifier.b - learning_rate * g_b)]

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: training_set_x[index * batch_size: (index + 1) * batch_size],
            y: training_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    ###############
    # TRAIN MODEL #
    ###############
    print '... training the model'
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is # found
    improvement_threshold = 0.995  # a relative improvement of this much is considered significant
    validation_frequency = 5 * n_train_batches # requency of training

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            # iter: number of minibatches used)
            iter = epoch * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)
                    # update best_validation_loss
                    best_validation_loss = this_validation_loss
                    # test it on the test set
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(
                        (
                            '     epoch %i, minibatch %i/%i, test error of'
                            ' best model %f %%'
                        ) %
                        (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            test_score * 100.
                        )
                    )

                    # save the best model
                    with open('best_model.pkl', 'w') as f:
                        cPickle.dump(classifier, f)

            if patience <= iter:
                done_looping = True
                break
        epoch = epoch + 1

    end_time = timeit.default_timer()
    print(
        (
            'Optimization complete with best validation score of %f %%,'
            'with test performance %f %%'
        )
        % (best_validation_loss * 100., test_score * 100.)
    )
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
Example #29
0
from train import train_model
from model import Model
from network import Network
from standard_networks import FC
from data_loader import load
from optimizer import Optimizer
import torch

train = 2
test = 8

train_queries = load('data/train{}_test{}_train.txt'.format(train, test))
test_queries = load('data/train{}_test{}_test.txt'.format(train, test))


def neural_pred(network, i1, i2):
    d = torch.zeros(20)
    d[int(i1)] = 1.0
    d[int(i2) + 10] = 1.0
    d = torch.autograd.Variable(d.unsqueeze(0))
    output = network.net(d)
    return output.squeeze(0)


fc1 = FC(20, 2)
adam = torch.optim.Adam(fc1.parameters(), lr=1.0)
swap_net = Network(fc1, 'swap_net', neural_pred, optimizer=adam)

#with open('compare.pl') as f:
with open('quicksort.pl') as f:
    problog_string = f.read()
Example #30
0
import numpy as np
import sklearn.tree
import sklearn.cross_validation
import data_loader
from sklearn.metrics import make_scorer


# Load train data
(X_train, Y_train) = data_loader.load("Dataset/churn.data.txt", standardize=False)
(X_test, Y_test) = data_loader.load("Dataset/churn.test.txt", standardize=False)


def custom_scorer(ground_truth, predictions):
    ground_truth = ground_truth
    predictions = predictions
    prec = sklearn.metrics.precision_score(ground_truth, predictions)
    rec = sklearn.metrics.recall_score(ground_truth, predictions)
    f1 = sklearn.metrics.f1_score(ground_truth, predictions)

    print "prec: " + str(prec)
    print "rec: " + str(rec)
    print "f1: " + str(f1)

    return f1


model = sklearn.tree.DecisionTreeClassifier()
model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)
score = custom_scorer(Y_test, Y_pred)
Example #31
0

import numpy as np
from sklearn.svm import *
import sklearn.cross_validation
import data_loader
from sklearn.metrics import make_scorer
from sklearn.feature_selection import SelectFromModel


# Load train data
(X_train, Y_train) = data_loader.load("Dataset/churn.data.txt")
(X_test, Y_test) = data_loader.load("Dataset/churn.test.txt")

def custom_scorer(ground_truth, predictions):
    ground_truth = ground_truth
    predictions = predictions
    prec = sklearn.metrics.precision_score(ground_truth, predictions)
    rec = sklearn.metrics.recall_score(ground_truth, predictions)
    f1 = sklearn.metrics.f1_score(ground_truth, predictions)

    print "prec: " + str(prec)
    print "rec: " + str(rec)
    print "f1: " + str(f1)
    
    return f1

# Build linear SVM classifier, l1 regularization to perform implicit feature selection
lsvc = LinearSVC(C=0.01, penalty="l1", dual=False).fit(X_train, Y_train)
model = SelectFromModel(lsvc, prefit=True)

features_selected = [elem for selected, elem in zip(model.get_support(), data_loader.get_feature_names()) if selected]
Example #32
0
def main():
    print("data loading...")
    loading = load(data_path)
    train_size, test_size, val_size = loading.return_len()

    x_train_torch = torch.empty(train_size, 127, 200)
    x_val_torch = torch.empty(test_size, 127, 200)
    x_test_torch = torch.empty(val_size, 127, 200)
    y_train_torch = torch.empty(train_size)
    y_val_torch = torch.empty(test_size)
    y_test_torch = torch.empty(val_size)

    print(x_train_torch.shape)
    print(x_test_torch.shape)
    print(x_val_torch.shape)

    x_train_torch, x_val_torch, x_test_torch, y_train_torch, y_val_torch, y_test_torch = loading.main_processing()

    print(x_train_torch.shape, x_val_torch.shape, x_test_torch.shape, y_train_torch.shape, y_val_torch.shape, y_test_torch.shape)

    print("data loading success")

    x_train_loader = torch.utils.data.DataLoader(x_train_torch, batch_size=batch_size
                                                 , shuffle=False, num_workers=0, drop_last=True)
    y_train_loader = torch.utils.data.DataLoader(y_train_torch, batch_size=batch_size
                                                 , shuffle=False, num_workers=0, drop_last=True)

    x_val_loader = torch.utils.data.DataLoader(x_val_torch, batch_size=batch_size
                                               , shuffle=False, num_workers=0, drop_last=True)
    y_val_loader = torch.utils.data.DataLoader(y_val_torch, batch_size=batch_size
                                               , shuffle=False, num_workers=0, drop_last=True)

    x_test_loader = torch.utils.data.DataLoader(x_test_torch, batch_size=batch_size
                                                , shuffle=False, num_workers=0, drop_last=True)
    y_test_loader = torch.utils.data.DataLoader(y_test_torch, batch_size=batch_size
                                                , shuffle=False, num_workers=0, drop_last=True)
    model = Text_CNN().to(device)  # 모델을 gpu에 올림
    loss_func = nn.BCELoss()  # lossfunction 정의
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  # optimizer 정의

    loss_arr = [[0 for i in range(len(x_train_loader))] for j in range(num_epoch)]
    train_acc = []
    val_acc = []
    for i in range(num_epoch):
        start = time.time()  # 시간 측정
        for j, (data, label) in enumerate(zip(x_train_loader, y_train_loader)):

            x = data.to(device)  # data를 gpu에 올림
            y_ = label.to(device)  # label을 gpu에 올림

            optimizer.zero_grad()  # optimizer 초기화
            output = model.forward(x)  # 모델 foward 진행
            loss = loss_func(output, y_)  # loss function을 사용해서 loss 측정.
            loss.backward()  # 가중치에 대한 Loss의 변화량을 측정함.
            optimizer.step()  # loss가 감소하는 방향으로 가중치를 업데이트
            # loss_arr[i].append(loss.cpu().detach().numpy())
            loss_arr[i][j] = loss.item()

            if j == len(x_train_loader) - 1:  # 하나의 epoch를 보면 아래를 실행.
                print("Epoch :", i + 1, " Loss :", sum(loss_arr[i], 0.0) / len(loss_arr[i]))  # 평균 loss 출력

                train_acc.append(eval(x_train_loader, y_train_loader, model))
                print("Accuracy of Train Data : {}".format(train_acc[i]))

                val_acc.append(eval(x_val_loader, y_val_loader, model))
                print("Accuracy of Validation Data : {}".format(val_acc[i]))
                loss_arr.append(loss.cpu().detach().numpy())

        print("running time :", time.time() - start)
        print('---------------------------------------------------------')

        # learning rate decay
        lr = learning_rate * (0.1 ** (i // learing_rate_decay))
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

        #print(param_group['lr'])

    f1_score(x_val_loader, y_val_loader, model, device)

    # Test Data에 대한 Accuracy
    print("Accuracy of Test Data : {}".format(eval(x_test_loader, y_test_loader, model)))

    # 모델 저장
    torch.save(model.state_dict(), './test1.pth')
Example #33
0
def run():
    data = data_loader.load(DATASET,
                            n_train=N_TRAIN,
                            n_test=N_TEST,
                            train_noise=TRAIN_NOISE,
                            test_noise=TEST_NOISE,
                            ood=OOD)

    stratify = DATASET not in ["abalone", "segment"]

    if DATASET not in [
            'arcene', 'moon', 'toy_Story', 'toy_Story_ood', 'segment'
    ]:
        print(DATASET)
        x = data_loader.prepare_inputs(data['features'])
        y = data['labels']
        '''
        # check whether the choice of N_OOD is reasonable
        classes = np.argmax(y, axis=1)
        number_of_each_class = [(classes == ic).sum() for ic in range(int(classes.max()))]
        number_of_each_class.reverse()
        percentage_of_each_class = np.cumsum(np.array(number_of_each_class)) / np.array(number_of_each_class).sum()
        n_ood = np.where(percentage_of_each_class>=0.1)[0][0] + 1

        #n_in = y.shape[1] - n_ood
        #stratify = classes < n_in
        '''
        x_train, x_test, y_train, y_test = train_test_split(
            x,
            y,
            train_size=TRAIN_TEST_RATIO,
            stratify=y if stratify else None)

    else:
        #n_ood = int(N_OOD)
        if DATASET == 'moon' or DATASET == 'toy_Story' or DATASET == 'toy_Story_ood':
            x_train, x_test = data['x_train'], data['x_val']
        else:
            x_train, x_test = data_loader.prepare_inputs(
                data['x_train'], data['x_val'])
        y_train, y_test = data['y_train'], data['y_val']

    if 'N_OOD' in globals() and N_OOD >= 1:
        n_ood = prepare_ood_from_args(data, DATASET, N_OOD)
        n_in = y_train.shape[1] - n_ood

        # training
        train_in_idxs = np.argmax(y_train, axis=1) < n_in
        train_ood_idxs = np.argmax(y_train, axis=1) >= n_in
        #val_in_idxs = np.argmax(y_val, axis=1) < n_in
        #val_ood_idxs = np.argmax(y_val, axis=1) >= n_in
        x_train_in = x_train[train_in_idxs]
        y_train_in = y_train[train_in_idxs][:, 0:n_in]
        x_train_out = x_train[train_ood_idxs]
        y_train_out = y_train[train_ood_idxs][:, 0:n_in]

        # Generate validation split
        x_train_in, x_val_in, y_train_in, y_val_in = train_test_split(
            x_train_in,
            y_train_in,
            train_size=TRAIN_TEST_RATIO,
            stratify=y_train_in if stratify else None)

        x_val = np.concatenate((x_train_out, x_val_in), axis=0)
        y_val = np.concatenate((y_train_out, y_val_in), axis=0)
        y_test = y_test[:, 0:n_in]
        y_val = y_val[:, 0:n_in]

        x_train = x_train_in.astype(np.float32)
        x_val = x_val.astype(np.float32)
    else:
        x_train, x_val, y_train, y_val = train_test_split(
            x_train,
            y_train,
            train_size=TRAIN_TEST_RATIO,
            stratify=y_train if stratify else None)

    #####################

    print('Finish loading data')
    gdrive_rpath = './experiments_ood'

    t = int(time.time())
    log_dir = os.path.join(gdrive_rpath, MODEL_NAME, '{}/logs'.format(t))
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir)
    file_writer_cm = tf.summary.create_file_writer(log_dir + '/cm')

    checkpoint_filepath = os.path.join(gdrive_rpath, MODEL_NAME,
                                       '{}/ckpt/'.format(t))
    if not os.path.exists(checkpoint_filepath):
        os.makedirs(checkpoint_filepath)

    model_path = os.path.join(gdrive_rpath, MODEL_NAME,
                              '{}/model'.format(format(t)))
    if not os.path.exists(model_path):
        os.makedirs(model_path)

    model_cp_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_filepath,
        save_weights_only=True,
        monitor='val_auc_of_ood',
        mode='max',
        save_best_only=True)

    model = build_model(x_train.shape[1], y_train.shape[1], MODEL, args)

    def plot_boundary(epoch, logs):
        # Use the model to predict the values from the validation dataset.
        xy = np.mgrid[-10:10:0.1, -10:10:0.1].reshape(2, -1).T
        hat_z = tf.nn.softmax(model(xy, training=False), axis=1)
        # scipy.special.softmax(hat_z, axis=1)
        c = np.sum(np.arange(hat_z.shape[1] + 1)[1:] * hat_z, axis=1)
        # c = np.argmax(np.arange(6)[1:]*scipy.special.softmax(hat_z, axis=1), axis=1
        # xy = np.mgrid[-1:1.1:0.01, -2:2.1:0.01].reshape(2,-1).T
        figure = plt.figure(figsize=(8, 8))
        plt.scatter(xy[:, 0], xy[:, 1], c=c, cmap="brg")
        image = plot_to_image(figure)
        # Log the confusion matrix as an image summary.
        with file_writer_cm.as_default():
            tf.summary.image("Boundaries", image, step=epoch)

    def plot_boundary_pretrain(epoch, logs):
        # Use the model to predict the values from the validation dataset.
        xy = np.mgrid[-1:1.1:0.01, -2:2.1:0.01].reshape(2, -1).T
        hat_z = tf.nn.softmax(model(xy, training=False), axis=1)
        # scipy.special.softmax(hat_z, axis=1)
        c = np.sum(np.arange(6)[1:] * hat_z, axis=1)
        # c = np.argmax(np.arange(6)[1:]*scipy.special.softmax(hat_z, axis=1), axis=1
        # xy = np.mgrid[-1:1.1:0.01, -2:2.1:0.01].reshape(2,-1).T
        figure = plt.figure(figsize=(8, 8))
        plt.scatter(xy[:, 0], xy[:, 1], c=c, cmap="brg")
        image = plot_to_image(figure)
        # Log the confusion matrix as an image summary.
        with file_writer_cm.as_default():
            tf.summary.image("Boundaries_pretrain", image, step=epoch)

    border_callback_pretrain = tf.keras.callbacks.LambdaCallback(
        on_epoch_end=plot_boundary_pretrain)
    border_callback = tf.keras.callbacks.LambdaCallback(
        on_epoch_end=plot_boundary)

    training_generator = mixup.data_generator(x_train_in,
                                              y_train_in,
                                              batch_size=BATCH_SIZE,
                                              n_channels=N_CHANNELS,
                                              shuffle=SHUFFLE,
                                              mixup_scheme=MIXUP_SCHEME,
                                              k=N_NEIGHBORS,
                                              alpha=ALPHA,
                                              local=LOCAL_RANDOM,
                                              out_of_class=OUT_OF_CLASS,
                                              manifold_mixup=MANIFOLD_MIXUP)

    validation_generator = mixup.data_generator(x_val,
                                                y_val,
                                                batch_size=x_val.shape[0],
                                                n_channels=N_CHANNELS,
                                                shuffle=False,
                                                mixup_scheme='none',
                                                alpha=0,
                                                manifold_mixup=MANIFOLD_MIXUP)

    test_generator = mixup.data_generator(x_test,
                                          y_test,
                                          batch_size=x_test.shape[0],
                                          n_channels=N_CHANNELS,
                                          shuffle=False,
                                          mixup_scheme='none',
                                          alpha=0,
                                          manifold_mixup=MANIFOLD_MIXUP)

    # Pretraining
    # if DATASET=='toy_Story':
    #   pre_x = np.mgrid[-1:1.1:0.01, -2:2.1:0.01].reshape(2,-1).T
    #   pre_y = .2*np.ones(shape=[pre_x.shape[0], 5])
    #   model.fit(x=pre_x, y=pre_y, epochs=1, callbacks=[border_callback_pretrain])

    training_history = model.fit(
        x=training_generator,
        validation_data=validation_generator,
        epochs=EPOCHS,
        callbacks=[
            tensorboard_callback,
            model_cp_callback,
            # border_callback
        ],
    )

    print(model.summary())
    model.load_weights(checkpoint_filepath)
    model.save(model_path)
    print('Tensorboard callback directory: {}'.format(log_dir))

    metric_file = os.path.join(gdrive_rpath, MODEL_NAME,
                               '{}/results.txt'.format(t))
    loss = model.evaluate(test_generator, return_dict=True)
    test_outputs = model.predict(test_generator)
    with open(metric_file, "w") as f:
        f.write(str(loss))
Example #34
0
def run_multiclass(data_dir: str):
    data_loader.load(data_dir,
                     ['frame_time_relative', 'dns_qry_name', 'dns_qry_type'])
    query_name_learner.run_multiclass()
Example #35
0
visualize = 0

torch.manual_seed(2)

#############################################################################
#############################################################################

#############################################################################
############################## Load Data ####################################
#############################################################################

if load_train_data:

    from data_loader import load

    DATA = load(horizon=21, num_nodes=125, num_layers=3, num_rsc=7)

    X_train, Y_train = DATA.read_train(num_sample=10000)

    X_train = 1 - X_train

    # "X" and "Y" are Nxn matrices where "N" is the number of
    # scenarios and "n" is the number of nodes.   Each row of
    # "X" is a binary vector which has a "0" when the node is
    # damaged and "1" when the node is repaired. Each element
    # of "Y" gives the time-step at which the node is repair-
    # ed and "0" if the node is not damaged.

    print("\nTraining data was successfully loaded!\n")

if load_test_data:
Example #36
0
                     num_processes):
    ## Step 1: Projection (Parallel)
    (tiles, scalar) = c_prime.par_map_to_tiles(all_points, precision,
                                               threshold, num_processes)

    ## Step 2: Agglomeration (Sequential)
    clusters = c_prime.raster_clustering_tiles(tiles, min_size)

    return (clusters, scalar)


if __name__ == "__main__":

    # load input data
    data_path = "../0_data_generators/output/data_1000_shuffled.csv"
    all_points = dl.load(data_path)
    """
    1) RASTER clusters

    RASTER projects points to tiles and disregards the former after the
    projection has been performed. Thus, it requires merely constant
    space, assuming bounded integers or a bounded coordinate system like
    the GPS coordinate system for our planet.

    Input is projected to points that represent tiles.

    """
    precision = 3
    threshold = 5
    min_size = 4
Example #37
0
import data_loader
_, test_data = data_loader.load()

import mnist_loader
training_data, validation_data, _ = mnist_loader.load_data_wrapper()

# import network
# net = network.Network([784, 30, 10])
# net.learn(training_data, 30, 10, 3.0, test_data)

import network2
net = network2.Network([784, 30, 10], cost=network2.CrossEntropyCost)
net.large_weight_initializer()
net.SGD(training_data, 30, 10, 0.5, evaluation_data=test_data, monitor_evaluation_accuracy=True)
Example #38
0
def run():
    data = data_loader.load(DATASET,
                            n_train=N_TRAIN,
                            n_test=N_TEST,
                            train_noise=TRAIN_NOISE,
                            test_noise=TEST_NOISE)

    stratify = DATASET not in ["abalone", "segment"]

    if DATASET not in [
            'arcene', 'moon', 'toy_Story', 'toy_Story_ood', 'segment'
    ]:
        print(DATASET)
        x = data_loader.prepare_inputs(data['features'])
        y = data['labels']
        x_train, x_test, y_train, y_test = train_test_split(
            x,
            y,
            train_size=TRAIN_TEST_RATIO,
            stratify=y if stratify else None,
            random_state=0)

    else:
        if DATASET == 'moon' or DATASET == 'toy_Story' or DATASET == 'toy_Story_ood':
            x_train, x_test = data['x_train'], data['x_val']
        else:
            x_train, x_test = data_loader.prepare_inputs(
                data['x_train'], data['x_val'])
        y_train, y_test = data['y_train'], data['y_val']

    # Generate validation split
    x_train, x_val, y_train, y_val = train_test_split(
        x_train,
        y_train,
        train_size=TRAIN_TEST_RATIO,
        stratify=y_train if stratify else None,
        random_state=0)

    x_train = x_train.astype(np.float32)
    x_val = x_val.astype(np.float32)
    x_test = x_test.astype(np.float32)

    if 'N_OOD' in globals() and N_OOD >= 1:
        n_ood = update_n_ood(data, DATASET, N_OOD)
        n_ood = y_val.shape[1] - n_ood - 1
        print("Number of ood classes: {n_ood}")
        x_train, x_val, x_test, y_train, y_val, y_test, x_ood, y_ood = prepare_ood(
            x_train, x_val, x_test, y_train, y_val, y_test, n_ood, NORM)
        # x_test_with_ood = np.concatenate([x_test, x_ood], axis=0)
        # y_test_with_ood = np.concatenate([y_test, y_ood], axis=0)
        x_ood_val, x_ood_test, y_ood_val, y_ood_test = train_test_split(
            x_ood, y_ood, test_size=0.5, random_state=0)
        x_test_with_ood = np.concatenate([x_test, x_ood_test], axis=0)
        y_test_with_ood = np.concatenate([y_test, y_ood_test], axis=0)
        x_val_with_ood = np.concatenate([x_val, x_ood_val], axis=0)
        y_val_with_ood = np.concatenate([y_val, y_ood_val], axis=0)
    else:
        n_ood = 0
    print('Finish loading data')
    gdrive_rpath = './experiments_all'

    t = int(time.time())
    log_dir = os.path.join(gdrive_rpath, MODEL_NAME, '{}'.format(t))
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir)
    file_writer_cm = tf.summary.create_file_writer(log_dir + '/cm')

    checkpoint_filepath = os.path.join(log_dir, 'ckpt')
    if not os.path.exists(checkpoint_filepath):
        os.makedirs(checkpoint_filepath)

    model_path = os.path.join(log_dir, 'model')
    if not os.path.exists(model_path):
        os.makedirs(model_path)

    model_cp_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_filepath,
        save_weights_only=True,
        monitor=MONITOR,
        mode='max',
        save_best_only=True,
        verbose=1)

    model = build_model(x_train.shape[1], y_train.shape[1], MODEL, args)

    def plot_boundary(epoch, logs):
        # Use the model to predict the values from the validation dataset.
        xy = np.mgrid[-10:10:0.1, -10:10:0.1].reshape(2, -1).T
        hat_z = tf.nn.softmax(model(xy, training=False), axis=1)
        # scipy.special.softmax(hat_z, axis=1)
        c = np.sum(np.arange(hat_z.shape[1] + 1)[1:] * hat_z, axis=1)
        # c = np.argmax(np.arange(6)[1:]*scipy.special.softmax(hat_z, axis=1), axis=1
        # xy = np.mgrid[-1:1.1:0.01, -2:2.1:0.01].reshape(2,-1).T
        figure = plt.figure(figsize=(8, 8))
        plt.scatter(xy[:, 0], xy[:, 1], c=c, cmap="brg")
        image = plot_to_image(figure)
        # Log the confusion matrix as an image summary.
        with file_writer_cm.as_default():
            tf.summary.image("Boundaries", image, step=epoch)

    border_callback = tf.keras.callbacks.LambdaCallback(
        on_epoch_end=plot_boundary)

    training_generator = mixup.data_generator(x_train,
                                              y_train,
                                              batch_size=BATCH_SIZE,
                                              n_channels=N_CHANNELS,
                                              shuffle=SHUFFLE,
                                              mixup_scheme=MIXUP_SCHEME,
                                              k=N_NEIGHBORS,
                                              alpha=ALPHA,
                                              local=LOCAL_RANDOM,
                                              out_of_class=OUT_OF_CLASS,
                                              manifold_mixup=MANIFOLD_MIXUP)

    validation_generator = mixup.data_generator(x_val,
                                                y_val,
                                                batch_size=x_val.shape[0],
                                                n_channels=N_CHANNELS,
                                                shuffle=False,
                                                mixup_scheme='none',
                                                alpha=0,
                                                manifold_mixup=MANIFOLD_MIXUP)

    test_generator = mixup.data_generator(x_test,
                                          y_test,
                                          batch_size=x_test.shape[0],
                                          n_channels=N_CHANNELS,
                                          shuffle=True,
                                          mixup_scheme='none',
                                          alpha=0,
                                          manifold_mixup=MANIFOLD_MIXUP)

    if N_OOD > 0:
        in_out_test_generator = mixup.data_generator(
            x_test_with_ood,
            y_test_with_ood,
            batch_size=x_test_with_ood.shape[0],
            n_channels=N_CHANNELS,
            shuffle=True,
            mixup_scheme='none',
            alpha=0,
            manifold_mixup=MANIFOLD_MIXUP)

    callbacks = [tensorboard_callback, model_cp_callback]
    if DATASET == 'Toy_story' or DATASET == 'Toy_story_ood':
        border_callback = tf.keras.callbacks.LambdaCallback(
            on_epoch_end=cb.plot_boundary)
        callbacks += [border_callback]
    if MODEL in ['jem', 'jemo', 'jehm', 'jehmo', 'jehmo_mix']:
        callbacks += [cb.jem_n_epochs()]

    ## buffer ##
    '''
    if MODEL in ['jehmo', 'jehmo_mix']:
        if model.with_buffer_out:
            model.replay_buffer_out = get_buffer(model.buffer_size,
                                                 training_generator.x.shape[1],
                                                 x=training_generator.x)
    '''
    ## training ##
    t_train_start = int(time.time())
    training_history = model.fit(x=training_generator,
                                 validation_data=validation_generator,
                                 epochs=EPOCHS,
                                 callbacks=callbacks)
    t_train_end = int(time.time())
    used_time = t_train_end - t_train_start
    model.load_weights(checkpoint_filepath)
    # model.save(model_path)
    print('Tensorboard callback directory: {}'.format(log_dir))

    ood_loss = 0
    metric_file = os.path.join(gdrive_rpath, 'results.txt')
    loss = model.evaluate(test_generator, return_dict=True)
    # if N_OOD>0:
    #    ood_loss = model.evaluate(in_out_test_generator, return_dict=True)
    # with open(metric_file, "a+") as f:
    #    f.write(f"{MODEL}, {DATASET}, {t}, {loss['acc_with_ood']:.3f}," \
    #            f"{loss['ece_metrics']:.3f}, {loss['oe_metrics']:.3f}," \
    #            f"{loss['loss']:.3f}, {n_ood}, {ood_loss['auc_of_ood']}\n")
    if N_OOD > 0:
        ood_loss = model.evaluate(in_out_test_generator, return_dict=True)
        with open(metric_file, "a+") as f:
            f.write(f"{MODEL}, {MIXUP_SCHEME}, {DATASET}, {t}, {loss['accuracy']:.3f}," \
                    f"{loss['ece_metrics']:.3f}, {loss['oe_metrics']:.3f}," \
                    f"{ood_loss['accuracy']:.3f}," \
                    f"{ood_loss['ece_metrics']:.3f}, {ood_loss['oe_metrics']:.3f},"
                    f"{n_ood}, {ood_loss['auc_of_ood']}, {used_time}\n")
    else:
        with open(metric_file, "a+") as f:
            f.write(f"{MODEL}, {MIXUP_SCHEME}, {DATASET}, {t}, {loss['accuracy']:.3f}," \
                    f"{loss['ece_metrics']:.3f}, {loss['oe_metrics']:.3f}," \
                    f"None, " \
                    f"None, None,"
                    f"{n_ood}, None, {used_time}\n")

    arg_file = os.path.join(log_dir, 'args.txt')
    with open(arg_file, "w+") as f:
        f.write(str(args))
    def __init__(self, grams_file):
        self.data = data_loader.load(grams_file)

        tk = self.data.keys()[1] if self.data.keys()[0] == '*' else self.data.keys()[0]
        self.gram_size = len(tk)