Ejemplo n.º 1
0
def train(model, Parse_parameters, opts, dictionaries):
    train_data = load_dataset(Parse_parameters, opts.train, dictionaries)
    dev_data = load_dataset(Parse_parameters, opts.dev, dictionaries)
    optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay=1e-4)
    
    # Number of Epoch, It needs 10 epoches to converge
    n_epochs = 1
    #eval_epoch(model, dev_data, dictionaries, opts)
    for epoch in xrange(n_epochs): 
        print("Trian epoch: %d"%(epoch))
        train_epoch(model, train_data, opts, optimizer)
Ejemplo n.º 2
0
def load_dataset():
    """
	 Loads the data using the DataLoader implementation in loader.py.
	 Hard Coding: For this project the data is Mindboggle only, so the values are hard coded.
	"""
    data_dir = '../data/Mindboggle'
    data = loader.load_dataset(data_dir, dataset='Mindboggle', goal='segment')
    return data
Ejemplo n.º 3
0
def run(**kwargs):
    # load graph
    graph_nx, dump_folder = loader.load_dataset(kwargs['dataset'])

    # initialize tNodeEmbed
    task = kwargs['task']
    test_size = kwargs['test_size']
    tnodeembed = models.tNodeEmbed(graph_nx,
                                   task=task,
                                   dump_folder=dump_folder,
                                   test_size=test_size,
                                   **kwargs['n2vargs'])

    # load dataset
    X, y = tnodeembed.get_dataset(train_skip=kwargs['train_skip'])

    # fit
    keras_args = kwargs['keras_args']
    batch_size = keras_args.pop('batch_size', 32)
    steps_per_epoch = ceil(len(X['train']) / batch_size)
    # tNodeEmbed
    generator = loader.dataset_generator(X['train'],
                                         y['train'],
                                         tnodeembed.graph_nx,
                                         tnodeembed.train_time_steps,
                                         batch_size=batch_size)
    tnodeembed.fit_generator(generator, steps_per_epoch, **keras_args)
    # node2vec
    static_model = models.StaticModel(task=task)
    generator = loader.dataset_generator(X['train'],
                                         y['train'],
                                         tnodeembed.graph_nx,
                                         [max(tnodeembed.train_time_steps)],
                                         batch_size=batch_size)
    static_model.fit_generator(generator, steps_per_epoch, **keras_args)

    # predict
    steps = ceil(len(X['test']) / batch_size)
    generator = loader.dataset_generator(X['test'],
                                         y['test'],
                                         tnodeembed.graph_nx,
                                         tnodeembed.train_time_steps,
                                         batch_size=batch_size,
                                         shuffle=False)
    tnodeembed_metrics = get_metrics(
        y['test'], tnodeembed.predict_generator(generator, steps))
    generator = loader.dataset_generator(X['test'],
                                         y['test'],
                                         tnodeembed.graph_nx,
                                         [max(tnodeembed.train_time_steps)],
                                         batch_size=batch_size,
                                         shuffle=False)
    node2vec_metrics = get_metrics(
        y['test'], static_model.predict_generator(generator, steps))

    print(f'tnodeembed: {tnodeembed_metrics}')
    print(f'node2vec: {node2vec_metrics}')
Ejemplo n.º 4
0
def training(modelName, dataAugmentationFlag, pretrainedFlag, epochsNumber):

    print("Modello: ", modelName)
    # Caricamento del dataset
    train_loader, valid_loader, test_loader = load_dataset(
        dataAugmentationFlag)

    # Definizione del modello
    num_class = 4

    if modelName == 'SqueezeNet':
        model = squeezenet1_0(pretrained=True)
        model.classifier[1] = nn.Conv2d(512,
                                        num_class,
                                        kernel_size=(1, 1),
                                        stride=(1, 1))
        model.num_classes = num_class
    else:
        model = vgg16(pretrained=True)
        model.classifier[6] = nn.Linear(4096, num_class)

    model = trainval_classifier(model,
                                pretrainedFlag,
                                modelName,
                                train_loader,
                                valid_loader,
                                lr=0.001,
                                exp_name=modelName,
                                momentum=0.99,
                                epochs=epochsNumber)

    # Fase di test
    predictions_test, labels_test = test_classifier(model, test_loader)

    # Predizioni di test del modello all'ultima epoch
    accuracy = accuracy_score(labels_test, predictions_test) * 100
    print("Accuracy di ", modelName, ": ", accuracy)

    return accuracy, model
Ejemplo n.º 5
0
Parse_parameters['train'] = opts.train
Parse_parameters['development'] = opts.dev
Parse_parameters['vocab_size'] = opts.vocab_size

# Check parameters validity
assert os.path.isfile(opts.train)
assert os.path.isfile(opts.dev)
if opts.pre_emb:
    assert opts.embedding_dim in [50, 100, 200, 300]
    assert opts.lower == 1

# load datasets
dictionaries = prepare_dictionaries(Parse_parameters)
tagset_size = len(dictionaries['tag_to_id'])

train_data = load_dataset(Parse_parameters, opts.train, dictionaries)
dev_data = load_dataset(Parse_parameters, opts.dev, dictionaries)

# Model parameters
Model_parameters = OrderedDict()
Model_parameters['vocab_size'] = opts.vocab_size
Model_parameters['embedding_dim'] = opts.embedding_dim
Model_parameters['hidden_dim'] = opts.hidden_dim
Model_parameters['tagset_size'] = tagset_size
Model_parameters['lower'] = opts.lower == 1
Model_parameters['decode_method'] = opts.decode_method
Model_parameters['loss_function'] = opts.loss_function

model = LstmModel.LSTMTagger(Model_parameters)
#model = LstmCrfModel.BiLSTM_CRF(Model_parameters)
optimizer = optim.Adam(model.parameters(), lr=0.01)
Ejemplo n.º 6
0
from sklearn.metrics import average_precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score

device_name = tf.test.gpu_device_name()
device = torch.device("cuda")

batch_size = 1

print('Loading BERT tokenizer...')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                          do_lower_case=True)

data_dir = "/home1/kchandu/research1/persona_movies/data/categorical_data"

test_inputs, test_masks, test_labels, test_categories = load_dataset(
    os.path.join(data_dir, "test.all"), tokenizer)
test_data = TensorDataset(test_inputs, test_masks, test_labels,
                          test_categories)
test_sampler = SequentialSampler(test_data)
test_dataloader = DataLoader(test_data,
                             sampler=test_sampler,
                             batch_size=batch_size)
#model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
model = BertForSequenceClassification.from_pretrained("./model_save/")
# Tell pytorch to run this model on the GPU.
model.cuda()

model.eval()

# Tracking variables
predictions, true_labels, pred_labels = [], [], []
Ejemplo n.º 7
0
import numpy as np
from neural_network import NeuralNetwork
from layers import Layer
import loader
import csv
import multiprocessing
import time
import matplotlib.pyplot as plt

np.random.seed()
# This is the configuration of the present experiment.

X, y, X_test, y_test = loader.load_dataset("CasosArtrite2.csv",
                                           input_dimension=17,
                                           test_ratio=0.3)

epochs = 10000


def single_run(hidden_neurons, learning_rate, hidden_activation,
               output_activation):
    model_data = np.vstack([np.arange(0, epochs), np.zeros([4, epochs])])

    nn = NeuralNetwork(learning_rate=learning_rate)
    nn.add_layer(Layer(hidden_neurons, activation=hidden_activation))

    nn.initialize(X.shape[1], y.shape[1], output_activation=output_activation)

    epoch_values = model_data[0, :]
    start = time.time()
Ejemplo n.º 8
0
    def assign(self, point):
        self.centroid_buffer = self.centroid_buffer * self.pt_count / (
            self.pt_count + 1) + point[:0 - 1] / (self.pt_count + 1
                                                  )  # update centroid position
        self.pt_count += 1
        self.labels[int(point[-1])] += 1
        self.ss_loss += self.dist(point)  # add squared distance to loss

    def update_centroid(self):
        self.centroid = self.centroid_buffer


if __name__ == "__main__":
    assert (len(sys.argv)) == 3
    # load iris dataset
    iris_set = load_dataset()

    # initialize clusters
    seed_examples = random.sample(iris_set, int(sys.argv[1]))
    clusters = [Cluster(seed) for seed in seed_examples]

    # iterate
    for i in xrange(int(sys.argv[2])):
        for cluster in clusters:
            cluster.clear_points()  # reset points in cluster, keep centroid
        for point in iris_set:
            nearest_cluster = np.argmin(
                [cluster.dist(point) for cluster in clusters])
            clusters[nearest_cluster].assign(
                point)  # reassign points to clusters
        for cluster in clusters:
Ejemplo n.º 9
0
import compute
import loader
import plotter

dataset = loader.load_dataset('patient_1')
data = dataset["data"]
mask = dataset["mask"]
threshold = 95.0

scaled = compute.to_hounsfield_units(data)
masked = list(map(lambda i: compute.apply_mask(i, mask, threshold), scaled))
gradients = compute.compute_gradients(masked)
masked_gradients = list(
    map(lambda i: compute.apply_mask(i, mask, threshold), gradients))


def slicer(arr):
    return arr[30, :, :]


scaled_slices = list(map(slicer, scaled))
masked_slices = list(map(slicer, masked))
gradient_slices = list(map(slicer, masked_gradients))

plotter.plot(scaled_slices, masked_slices, gradient_slices)
plotter.show()
Ejemplo n.º 10
0
  01:       576         580         700 iterations (595 in 100 iterations)
  02:       473         491         (200 iterations)
  03:       641         673         (300 iterations)
  04:       1001        1092        (1100 after 100 iterations)
  05:       749         801         400 iterations
  06:       876         113         200 iterations
  07:       885
  08:       4437        5362

  801 with 5 10 20 population profile on problem 05. 500 iterations.
  839 with 5 population profile on problem 05. 500 iterations.
  002 with 5 population profile on problem 05. 500 iterations. local optimum..

'''

depots, customers, durations, n_paths_per_depot = loader.load_dataset(filename)
conf = configparser.parse_config('configs/default.conf')

if len(plt.get_fignums()) > 0:
    ax0, ax1 = plt.gcf().get_axes()
else:
    _, (ax0, ax1) = plt.subplots(1, 2)

model = MDVRPModel(customers, depots, n_paths_per_depot, conf)
optimal_solution = utils.visualize_solution(model, solution_file)

model.evolve(3)
one = model.population[0]  # debug

L = [each.fitness_score() for each in model.population]
best = model.population[np.argmin(L)]
Ejemplo n.º 11
0
        'C': [0.001, 1, 1000],
        'penalty': ['l2'],
        'solver': ['newton-cg', 'lbfgs']
    }]

    param_object = ParameterGrid(tuned_parameters)
    skf = StratifiedKFold(
        n_splits=NUMBER_OF_FOLDS,
        random_state=SEED)  # Splits the data into 5 stratified folds

    # Word level: Unigrams, Bigrams, Trigrams, UniBiTri_combined | Character level: Trigrams, 4grams, 5grams, Tri45_combined
    map_analyzer_ngram_options = {
        'word': [(1, 1), (2, 2), (3, 3), (1, 3)],
        'char': [(3, 3), (4, 4), (5, 5), (3, 5)]
    }

    data_path = '/data/annotations_UQ.csv'

    results_path = '/path/unpalatable-questions/results/model-results/LogReg_results.tsv'
    results_file = open(results_path, "w")
    results_file.write(
        "Agreement\tContext\tModel\tF1-score\tAUROC\tWeighted F1\tPrecision\tRecall\tAccuracy\tAUPRC\n"
    )

    for conf in [0.6, 1.0]:  # 3/5 and 5/5 agreement
        question_sentences, reply_sentences, comment_sentences, y = loader.load_dataset(
            data_path, conf=conf)
        random_baseline(y)
        for analyzer in ['word', 'char']:
            run_experiments(analyzer)
Ejemplo n.º 12
0
import loader
import csv
import multiprocessing
import time
import matplotlib.pyplot as plt


def adjust_output(data):
    return data[:, :]


np.random.seed()
# This is the configuration of the present experiment.

X, y, r, X_test, y_test, r_test = loader.load_dataset("~/anti_stationary.csv",
                                                      input_dimension=9,
                                                      test_ratio=0.3)

y = adjust_output(y)
y_test = adjust_output(y_test)

print(X_test)
print(y_test)

print(r_test)

epochs = 5000


def single_run(hidden_neurons, learning_rate, hidden_activation,
               output_activation):
Ejemplo n.º 13
0
from helper import flat_accuracy, format_time

device = torch.device("cuda")

epochs = 3
batch_size = 32

#Tokenization
print('Loading BERT tokenizer...')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                          do_lower_case=True)

data_dir = "/home1/kchandu/research1/persona_movies/data/categorical_data"

#Data loading
train_inputs, train_masks, train_labels, train_categories = load_dataset(
    os.path.join(data_dir, "train.all"), tokenizer)
train_data = TensorDataset(train_inputs, train_masks, train_labels,
                           train_categories)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data,
                              sampler=train_sampler,
                              batch_size=batch_size)

validation_inputs, validation_masks, validation_labels, validation_categories = load_dataset(
    os.path.join(data_dir, "dev.all"), tokenizer)
validation_data = TensorDataset(validation_inputs, validation_masks,
                                validation_labels, validation_categories)
validation_sampler = RandomSampler(validation_data)
validation_dataloader = DataLoader(validation_data,
                                   sampler=validation_sampler,
                                   batch_size=batch_size)
    assert opts.embedding_dim in [50, 100, 200, 300]

# load datasets
if not opts.load:
    dictionaries = prepare_dictionaries(Parse_parameters)
else:
    # load dictionaries
    with open(opts.load+'/dictionaries.dic', 'rb') as f:
        dictionaries = cPickle.load(f)
    # load parameters
    opts = load_parameters(opts.load, opts)


tagset_size = len(dictionaries['tag_to_id'])

train_data = load_dataset(Parse_parameters, opts.train, dictionaries)
dev_data = load_dataset(Parse_parameters, opts.dev, dictionaries)


# Model parameters
Model_parameters = OrderedDict()
Model_parameters['vocab_size'] = opts.vocab_size
Model_parameters['embedding_dim'] = opts.embedding_dim
Model_parameters['hidden_dim'] = opts.hidden_dim
Model_parameters['tagset_size'] = tagset_size
Model_parameters['decode_method'] = opts.decode_method
Model_parameters['loss_function'] = opts.loss_function
Model_parameters['freeze'] = opts.freeze


#model = LstmModel.LSTMTagger(Model_parameters)