Beispiel #1
0
    print('window_size = ', window_size)
    print('num_walks_per_node = ', num_walks_per_node)

    # Load the config file
    cfg = yaml.load(open('config.yml', 'r'))

    #########################################
    #   Set up random seeds                 #
    #########################################
    random.seed(252)
    np.random.seed(252)

    #########################################
    #  Load graph data                      #
    #########################################
    num_nodes, num_edges, node_list, edges, node_type, type_idx = data_helpers.load_dataset(
        cfg['path_data'], dataset)

    mp_idx = gen_mp_candidates(edges, node_type, 10, window_size)
    print('{} different metapaths.'.format(len(mp_idx)), file=sys.stderr)
    # print(mp_idx)
    num_metapaths = len(mp_idx) + 1
    types = ['v', 'a', 'i', 'f']

    #########################################
    #  MARU walks                           #
    #########################################
    out_file = cfg['path_walks'] + 'maru_walks.{}.L{}.W{}.S{}'.format(
        dataset, walk_len, window_size, num_walks_per_node)
    with open(out_file, 'w') as wp:
        for walk_itr in range(num_walks_per_node):
            print('Iteration #{}'.format(walk_itr), file=sys.stderr)
import numpy as np
import data_helpers as dh

import matplotlib.pyplot as plt
import os

import tensorflow as tf
import datetime as dt


X,Y,files = dh.load_dataset('shared/Digits_1f1',(32,32),1)

# Parameters
learning_rate = 0.001
batch_size = 64
training_iters =500 
display_step = 50

# Network Parameters
n_input = 32*32 
n_classes = 10 # 
dropout = 0.75 # Dropout, probability to keep units

# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)

# Create model
def conv2d(img, w, b):
    return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(img, w, strides=[1, 1, 1, 1], padding='SAME'),b))
Beispiel #3
0
    #########################################
    #   Load arguments and configure file   #
    #########################################
    # Process CML arguments
    FLAGS = handle_flags()

    random.seed(FLAGS.random_seed)
    np.random.seed(FLAGS.random_seed)
    tf.set_random_seed(FLAGS.random_seed)

    file_path = "../../data/"
    train_index_list = list(map(int, FLAGS.train_data_set.split(',')))
    test_index_list = list(map(int, FLAGS.test_data_set.split(',')))
    fea_train, lbl_train, fea_valid, lbl_valid, fea_test, lbl_test = data_helpers.load_dataset(
        file_path, train_index_list, test_index_list, FLAGS.start_time,
        FLAGS.sequence_length)
    print("finish loading data set...")
    print("fea_train " + str(fea_train.shape))
    print("lbl_train " + str(lbl_train.shape))
    print("fea_valid " + str(fea_valid.shape))
    print("lbl_valid " + str(lbl_valid.shape))
    print("fea_test " + str(fea_test.shape))
    print("lbl_test " + str(lbl_test.shape))

    with tf.Graph().as_default():
        # set up session configuration
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
Beispiel #4
0
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    seed = 1

    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)

    epochs = 20
    em_types = ['glove', 'word2vec', 'fasttext']
    databases = ["MR", "SST-1", "SST-2", "SUBJ", "TREC", "ProcCons", "IMDB"]
    optimizers = ['adam', 'adagrad']
    schedules = ['ReduceLROnPlateau', 'StepLR']

    save_dir = './multi'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # Train
    for em in em_types:
        print('EM {}'.format(em))
        print('*' * 50)

        for d in databases:
            print(d)

            dir = save_dir + '/' + d
            if not os.path.exists(dir):
                os.makedirs(dir)

            # Load data
            train_loader, dev_loader, test_loader, num_class = load_dataset(
                d, 64, em)

            A, B, C, D = 64, 8, 16, 16
            model = capsules(A=A, B=B, C=C, D=D, E=num_class,
                             iters=2).to(device)

            for o in optimizers:
                for s in schedules:

                    folder = dir + "/em=" + em + ",o=" + o + ",s=" + s
                    if not os.path.exists(folder):
                        os.makedirs(folder)

                    criterion = SpreadLoss(num_class=num_class,
                                           m_min=0.2,
                                           m_max=0.9)

                    if o == 'adam':
                        optimizer = optim.Adam(model.parameters(),
                                               lr=0.01,
                                               weight_decay=0)
                    elif o == 'adagrad':
                        optimizer = optim.Adagrad(model.parameters(), lr=0.01)

                    if s == 'ReduceLROnPlateau':
                        scheduler = optim.lr_scheduler.ReduceLROnPlateau(
                            optimizer, 'max', patience=1)
                    elif s == 'StepLR':
                        scheduler = optim.lr_scheduler.StepLR(optimizer,
                                                              step_size=3,
                                                              gamma=0.1)

                    out_acc = open(folder + "/acc.csv", "w")
                    out_loss = open(folder + "/loss.csv", "w")

                    out_acc.write('epoch,phase,acc\n')
                    out_loss.write('epoch,phase,loss\n')

                    for epoch in range(1, epochs + 1):
                        if s == 'StepLR':
                            scheduler.step()

                        torch.cuda.empty_cache()

                        print('Epoch {}/{}'.format(epoch, epochs))
                        print('-' * 30)

                        train_acc, train_loss = train(train_loader, model,
                                                      criterion, optimizer,
                                                      epoch, device)

                        out_acc.write('{},{},{:.4f}\n'.format(
                            epoch, 'train', train_acc))
                        out_loss.write('{},{},{:.4f}\n'.format(
                            epoch, 'train', train_loss))

                        dev_acc, dev_loss = test(dev_loader, model, criterion,
                                                 'dev', device)

                        out_acc.write('{},{},{:.4f}\n'.format(
                            epoch, 'dev', dev_acc))
                        out_loss.write('{},{},{:.4f}\n'.format(
                            epoch, 'dev', dev_loss))

                        if s == 'ReduceLROnPlateau':
                            scheduler.step(train_acc)

                    out_acc.close()
                    out_loss.close()
                    save_plot_to_file(folder + "/acc.csv", 'acc',
                                      folder + "/acc.png", epochs)
                    save_plot_to_file(folder + "/loss.csv", 'loss',
                                      folder + "/loss.png", epochs)

                    test_acc, test_loss = test(test_loader, model, criterion,
                                               'TEST', device)

                    out_test = open(folder + "/test.txt", "w")
                    out_test.write('Accuracy: {:.6f}, Loss: {:.6f} \n'.format(
                        test_acc, test_loss))
                    out_test.close()

                    snapshot(model, folder, epochs)
    resize_height = 256
    resize_width = 256
    cropped_height = 224
    cropped_width = 224
    no_classes = 200
    INITIAL_LR = 1e-3
    weight_decay_constant = 5e-4
    train_count = 4794
    val_count = 1199
    DNN_BEST_MODEL = 'ft_all_layer.hdf5'
    init_weights_path = 'ft_last_layer.hdf5'
    EPOCHS_PATIENCE_BEFORE_STOPPING = 5
    EPOCHS_PATIENCE_BEFORE_DECAY = 2

    train_generator = load_dataset(train_file_paths, train_labels, batch_size,
                                   no_classes, resize_height, resize_width,
                                   cropped_height, cropped_width)
    val_generator = load_dataset(val_file_paths, val_labels, batch_size,
                                 no_classes, resize_height, resize_width,
                                 cropped_height, cropped_width)

    # Set batches of training and validation required
    train_batches = int(np.ceil(train_count / batch_size))
    val_batches = int(np.ceil(val_count / batch_size))

    cbcnn_model = vgg_16_cbcnn(input_shape=(cropped_height, cropped_width, 3),
                               no_classes=no_classes,
                               bilinear_output_dim=8192,
                               sum_pool=True,
                               weight_decay_constant=weight_decay_constant,
                               multi_label=False,
Beispiel #6
0
                        "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

word2id = data_helpers.load_vocab(FLAGS.vocab_file)
print('vocabulary size: {}'.format(len(word2id)))

response_data = data_helpers.load_responses(FLAGS.response_file, word2id,
                                            FLAGS.max_response_len)
test_dataset = data_helpers.load_dataset(FLAGS.test_file, word2id,
                                         FLAGS.max_utter_len,
                                         FLAGS.max_utter_num, response_data)
print('test_pairs: {}'.format(len(test_dataset)))

target_loss_weight = [1.0, 1.0]

print("\nEvaluating...\n")
checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
print(checkpoint_file)

graph = tf.Graph()
with graph.as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
Beispiel #7
0
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

# Load data
print("Loading data...")

vocab = data_helpers.load_vocab(FLAGS.vocab_file)
print('vocabulary size: {}'.format(len(vocab)))
charVocab = data_helpers.load_char_vocab(FLAGS.char_vocab_file)
print('charVocab size: {}'.format(len(charVocab)))

response_data = data_helpers.load_responses(FLAGS.response_file, vocab, FLAGS.max_response_len)
print('response_data size: {}'.format(len(response_data)))

train_dataset = data_helpers.load_dataset(FLAGS.train_file, vocab, FLAGS.max_utter_len, FLAGS.max_utter_num, response_data)
print('train_pairs: {}'.format(len(train_dataset)))
valid_dataset = data_helpers.load_dataset(FLAGS.valid_file, vocab, FLAGS.max_utter_len, FLAGS.max_utter_num, response_data)
print('valid_pairs: {}'.format(len(valid_dataset)))

target_loss_weight = [1.0, 1.0]

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
      allow_soft_placement=FLAGS.allow_soft_placement,
      log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        u2u_imn = U2U_IMN(
            max_utter_len=FLAGS.max_utter_len,
            max_utter_num=FLAGS.max_utter_num,
Beispiel #8
0
import numpy as np
import data_helpers as dh

import matplotlib.pyplot as plt
import os

import tensorflow as tf
import datetime as dt


img_w = 56
img_h = 32
digits=2
Xdata,Y,files = dh.load_dataset('shared/Digits_2',(img_w,img_h),digits)

img_w = 104
img_h = 32
digits=4
Xdata,Y,files = dh.load_dataset('shared/Digits_4',(img_w,img_h),digits)

img_w = 160
img_h = 32
digits=6
Xdata,Y,files = dh.load_dataset('shared/Digits_6f3',(img_w,img_h),digits)

# invert and normalize to [0,1]
# X =  (255- Xdata)/255.0


# standarization 
#compute mean across the rows, sum elements from each column and divide
Beispiel #9
0
def main():
    em_type = 'glove'
    database = 'IMDB'
    folder = database

    if not os.path.exists(database):
        os.makedirs(database)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    seed = 1

    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)

    # Load data
    train_loader, dev_loader, test_loader, num_class = load_dataset(database, 64, em_type)

    A, B, C, D = 64, 8, 16, 16
    #A, B, C, D = 32, 32, 32, 32
    model = capsules(A=A, B=B, C=C, D=D, E=num_class, iters=2).to(device)

    # Save the model to the file
    model_file = open(folder + "/model.txt", "w")
    model_file.write('Model:\n{}\n'.format(model))
    model_file.write('Total number of parameters:{}\n'.format(sum(p.numel() for p in model.parameters())))
    model_file.write('Total number of trainable parameters:{}\n'.format(sum(p.numel() for p in model.parameters() if p.requires_grad)))
    model_file.close()

    criterion = SpreadLoss(num_class=num_class, m_min=0.2, m_max=0.9)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=1)

    out_acc = open(folder + "/acc.csv", "w")
    out_loss = open(folder + "/loss.csv", "w")

    out_acc.write('epoch,phase,acc\n')
    out_loss.write('epoch,phase,loss\n')

    epochs = 2
    for epoch in range(1, epochs + 1):
        torch.cuda.empty_cache()

        print('Epoch {}/{}'.format(epoch, epochs))
        print('-' * 30)

        train_acc, train_loss = train(train_loader, model, criterion, optimizer, epoch, device)

        out_acc.write('{},{},{:.4f}\n'.format(epoch, 'train', train_acc))
        out_loss.write('{},{},{:.4f}\n'.format(epoch, 'train', train_loss))

        dev_acc, dev_loss = test(test_loader, model, criterion, 'dev', device)

        out_acc.write('{},{},{:.4f}\n'.format(epoch, 'dev', dev_acc))
        out_loss.write('{},{},{:.4f}\n'.format(epoch, 'dev', dev_loss))

        scheduler.step(train_acc)

    out_acc.close()
    out_loss.close()
    save_plot_to_file(folder + "/acc.csv", 'acc', folder + "/acc.png", epochs)
    save_plot_to_file(folder + "/loss.csv", 'loss', folder + "/loss.png", epochs)

    test_acc, test_loss = test(test_loader, model, criterion, 'TEST', device)

    out_test = open(folder + "/test.txt", "w")
    out_test.write('Accuracy: {:.6f}, Loss: {:.6f} \n'.format(test_acc, test_loss))
    out_test.close()

    snapshot(model, database, epochs)
Beispiel #10
0
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

# Load data
print("Loading data...")
word2id = data_helpers.load_vocab(FLAGS.vocab_file)
print('vocabulary size: {}'.format(len(word2id)))

response_data = data_helpers.load_responses(FLAGS.response_file, word2id,
                                            FLAGS.max_response_len)
# 数据集中的一行 = context为多轮对话 + 标签(0:不相关,1:相关) + response_context
train_dataset = data_helpers.load_dataset(FLAGS.train_file, word2id,
                                          FLAGS.max_utter_len,
                                          FLAGS.max_utter_num, response_data)
print('train_pairs: {}'.format(len(train_dataset)))
valid_dataset = data_helpers.load_dataset(FLAGS.valid_file, word2id,
                                          FLAGS.max_utter_len,
                                          FLAGS.max_utter_num,
                                          response_data)  # *varied-length*
print('valid_pairs: {}'.format(len(valid_dataset)))
test_dataset = data_helpers.load_dataset(FLAGS.test_file, word2id,
                                         FLAGS.max_utter_len,
                                         FLAGS.max_utter_num, response_data)
print('test_pairs: {}'.format(len(test_dataset)))

target_loss_weight = [1.0, 1.0]

with tf.Graph().as_default():
    with h5py.File(model_file_path, mode='r') as f:
        topology.load_weights_from_hdf5_group(f['model_weights'],
                                              cbcnn_model.layers)

    # Get necessary data from raw csvs
    test_mat = pd.read_csv(test_file_name, sep=' ').as_matrix()
    test_filenames = test_mat[:, 0]
    test_labels = test_mat[:, 1]
    test_count = len(test_filenames)

    # Get test data set generator
    test_file_paths = [
        os.path.join(base_dir, test_file) for test_file in test_filenames
    ]
    test_generator = load_dataset(test_file_paths, test_labels, batch_size,
                                  no_classes, resize_height, resize_width,
                                  cropped_height, cropped_width)

    # Get test predictions
    cnt = 0
    test_batches = int(np.ceil(test_count / batch_size))

    actual_labels = []
    predicted_labels = []
    for test_batch_tuple in test_generator:
        if cnt == test_batches:
            break

        test_data = test_batch_tuple[0]
        test_labels = test_batch_tuple[1]