def main(args):
    # args_string = str(args)

    argp = _argparse().parse_args(args[1:])

    sae = SparseAutoEncoder(28 * 28, argp.hu)
    data_loader = DataLoader()

    datasets = data_loader.load_data()

    trainer = sgd_trainer(argp.batch_size, argp.learning_rate, argp.sl, argp.t, argp.r, argp.sc)
    trainer.trainAutoEncoder(sae)
    W = sae.W1.get_value(borrow=True)

    out_dir = check_create_observations_dir("AutoEncoder")
    target_file = os.path.join(out_dir, "autoencoderfilter.png")
    display_sparse_encoder(W, target_file)

    test_set_x, test_set_y = datasets[2]
    test_inpt = test_set_x[:10, :]

    mnist_vis_file = os.path.join(out_dir, "autoencoderrec.png")
    display_reconstructions(test_inpt, sae.encode(test_inpt), mnist_vis_file)

    cmd_file_path = os.path.join(out_dir, "command.txt")
    f = open(cmd_file_path, "w")
    f.write("python ")
    for a in args:
        f.write(str(a))
        f.write(" ")
    f.close()
    print "THE END"

    print "THE END"
Example #2
0
    def train(self, x, y):
        data_loader = DataLoader()

        for classifier in self.classifiers:
            sampled_data = data_loader.underSample(x, y)
            x_sampled = sampled_data[0]
            y_sampled = sampled_data[1]
            classifier.train(x_sampled, y_sampled)
 def test_empty_directory(self):
     mock_glob = self.mocker.replace('glob.glob')
     mock_glob("some_directory/*")
     self.mocker.result(['.'])
     self.mocker.replay()
     with self.assertRaises(ValueError) as cm:
         DataLoader.load_check_ins_from_directory("some_directory")
     self.assertEqual(cm.exception.message, 'Error: directory some_directory is empty')
def start(p = 12,num_clusters =100,max_iter=50, batch_size = 500,init = 'random' ):
    
    data_loader = DataLoader()
    cifar_data = data_loader.load_cifar_data()     
    images = cifar_data['data'].reshape((-1,3,32,32)).astype('float32')
#     img_test = images[2,:,:,:]
#     img_test = np.rollaxis(img_test,0,3)
#     img_test = img_test[:,:,::-1]
#     plt.imshow(img_test)
#     plt.show()

    images = np.rollaxis(images,1,4)
    images = images[:,:,:,::-1]
    
    num_patches = images.shape[0]
    patch_size = [p,p]
#   
    kmeans = MiniBatchKMeans(num_clusters,max_iter,batch_size,init)  
    
    patches_img = kmeans.generate_patches(images, patch_size)
    
#     plt.imshow(patches_img[0,:,:,:])
#     plt.show();
    # Convert to matrix form rows X cols
    patches=patches_img.reshape(patches_img.shape[0],-1)
#     i=display(patches[0,:], patch_size)
#     plt.imshow(i)
#     plt.show()
    
    
    #pre-processing
    
    centers,counts = kmeans.fit(patches)
    
    fig = plt.figure()
    disp_row_size = np.ceil(np.sqrt(kmeans.num_clusters))
    
    for i in xrange(kmeans.num_clusters):
        subplot = fig.add_subplot(disp_row_size, disp_row_size, i+1)    
        subplot.get_xaxis().set_visible(False)
        subplot.get_yaxis().set_visible(False)
        img = display(centers[:,i], patch_size)
        subplot.imshow(img, interpolation='none')
    
    #plt.show()
    
    directory=check_create_observations_dir()
    
    plt.savefig(directory+'/repFields.png')
#     patch_test=patches[0,:,:,:]
#     plt.imshow(patch_test)
#     plt.show()
    
    display_bar(counts,directory+'/clusterCount.png')
    
    
    print "THE END" 
def fit_mnist():
    print "Computing for MNIST"
    data_loader = DataLoader()
    train_set,valid_set,test_set=data_loader.load_data()        
    
    train_set_x,train_set_y=train_set  
    
    plt=get_pairwise_plot(train_set_x, train_set_y)
    
    obs_dir=check_create_observations_dir("PCA")
    target_path = os.path.join(obs_dir,"scatterplotMNIST.png")
    plt.savefig(target_path)
    print "THE END" 
def fit_cifar():
    print "Computing for CIFAR 10"
    data_loader = DataLoader()
    cifar_data=data_loader.load_cifar_data()        
    
    train_set_x=cifar_data['data']
    train_set_y=cifar_data['labels']  
    
    plt=get_pairwise_plot(train_set_x, train_set_y)
    
    obs_dir=check_create_observations_dir("PCA")
    target_path = os.path.join(obs_dir,"scatterplotCIFAR.png")
    plt.savefig(target_path)
    print "THE END" 
    def loadData(self, stopCount=0):
        self.X = []
        self.y = []


        dl = DataLoader()
        matchGenerator = dl.getMatch()

        count = 0
        if stopCount != 0:
            print "Loading up to {} matches...".format(stopCount)
        else:
            print "Loading ALL matches..."

        try:
            while True:
                if stopCount > 0 and count == stopCount:
                    raise StopIteration
                current_list = []
                raw_data = matchGenerator.next()
                count += 1
                current_data = dl.filterMatchFields(raw_data)

                # data fields
                current_list.append(ModelGlobals.TIERS[current_data["matchTier"]])
                current_list.append(ModelGlobals.PATCHES[current_data["patch"]])

                offset = len(current_list)
                empty_fields = [0] * self.empty_array_width

                current_list.extend(empty_fields)

                for champion in current_data["teamA"]:
                    current_list[self._findChampIndex("teamA", champion, offset)] = 1
                for champion in current_data["teamB"]:
                    current_list[self._findChampIndex("teamB", champion, offset)] = 1

                self.X.append(current_list)

                # target value
                if current_data["winnerTeamA"]:
                    self.y.append(0)
                else:
                    self.y.append(1)


        except StopIteration as e:
            print "Done reading match data, {} matches".format(count)
def main():
    model = Doc2Vec.load('400_pvdm_doc2vec.d2v')
    model_dbow = Doc2Vec.load('400_pvdbow_doc2vec.d2v')
    #mistake pvdm is actually pv-dbow
    path = 'datasets/'

    files = [f for f in listdir(path) if isfile(join(path,f))]
    files.pop(0)

    data_loader = DataLoader(path)

    domains = data_loader.csv_files


    names = {1: 'title', 4: 'abstract', 5: 'mesh', 'y': 6}

    domain_features = data_loader.get_feature_matrix(names)

    #get size
    n_total_documents = 0

    for domain in domain_features:
        n_total_documents+=len(domain[0])

    all_features = numpy.zeros(shape=(n_total_documents, 800))
    all_labels = numpy.asarray([])
    i = 0

    for domain in domain_features:
        features, labels = domain
        all_labels = numpy.hstack((all_labels, labels))
        for feature_vector in features:
            preprocessed_line = list(preprocess(feature_vector))
            all_features[i, 0:400] = numpy.float_(model.infer_vector(preprocessed_line))
            all_features[i, 400:] = numpy.float_(model_dbow.infer_vector(preprocessed_line))
            i+=1
    all_labels = numpy.asarray(all_labels)
    all_labels[all_labels == -1] = 0
    all_labels = numpy.intc(all_labels)
    train, test = data_loader.create_random_samples(all_features, all_labels)
    train_x, train_y = train
    test_x, test_y = test

    classifier = NeuralNet(n_hidden_units=[200], output_size=2, batch_size=20, n_epochs=200, dropout=True,
                                   activation_function='relu', learning_rate=.3, momentum=True, momentum_term=.5)

    classifier.train(train_x, train_y)
    classifier.test(test_x, test_y)
    def test_same_check_in_ids_in_same_file(self):
        self.file.write("418|12|2012-07-18 14:43:38|37.6164|-122.386|41059b00f964a520850b1fe3|empty_message\n418|12|2012-07-18 12:34:45|45.54|45.6|41059b00f964a520850b1fe3|empty_message")
        self.file.seek(0)

        mock_glob = self.mocker.replace('glob.glob')
        mock_glob("some_directory/*")
        self.mocker.result(['.', 'file1'])

        mock_open = self.mocker.replace('__builtin__.open')
        mock_open("file1", 'rU')
        self.mocker.result(self.file)

        self.mocker.replay()
        with self.assertRaises(ValueError) as cm:
            DataLoader.load_check_ins_from_directory("some_directory")
        self.assertEqual(cm.exception.message, 'Error processing file file1: check-in with ID 12 has already been encountered for user 418')
 def trainAutoEncoder(self,ae):    
     
     data_loader = DataLoader()
     datasets=data_loader.load_shared_data()
     train_set_x, train_set_y = datasets[0]
     
     n_train_batches = train_set_x.get_value(borrow=True).shape[0] / self.batch_size
     
     index = T.lscalar()   
     x = ae.input
     
     
     main_cost = ae.get_cross_enropy_cost()
     
     if self.reconst_cost == "sqr":
         main_cost =ae.get_squared_error_cost()
     
     sparsity_cost= ae.get_KL_divergence_cost()
     
     if self.sparsity_cost =="l1":
         sparsity_cost = ae.get_L1_cost()
     
     cost = main_cost + self.sparsity_lambda * sparsity_cost
     
     gparams = T.grad(cost, ae.params)
     
     updates = [
         (param, param - self.learning_rate * gparam)
         for param, gparam in zip(ae.params, gparams)
     ]
     
     train_model = theano.function(
     [index],
     cost,
     updates=updates,
     givens={
         x: train_set_x[index * self.batch_size: (index + 1) * self.batch_size]
     }
     )
     
     for epoch_no in xrange(self.num_epochs):
         c=[]
         for batch_index in xrange(n_train_batches):
             c.append(train_model(batch_index))
         
         print 'Training epoch no: %d, cost ' % epoch_no, np.mean(c)
 def __init__(self, url = 'http://localhost:8080/workspace0', filename='Citation_Stream', on_gui = True):
     self.url = url;
     self.filename = filename;
     self.loader = DataLoader();     # create an instance of DataLoader
     self.loader.loadData(self.filename);    # load data from file
     self.g = GephiJsonClient(url=self.url);     # create an instance of GephiJsonClient
     self.g.cleanAll();
     self.degree_dict = {};
     self.cited_dict = {};
     
     # elemts for GUI
     self.is_run = True;
     self.is_gui = on_gui;
     if self.is_gui == True:
         self.initializeUI();
Example #12
0
    def connect(self):
        if (self.csv_loaded == False):
            self.plotWidget.clear()
            line = 1
            rfile = ReadCSV.read("test.csv")
            for row in rfile:
			    if(line == 1):
				    line = line+1
				    continue
			    self.upload_data.append(row)

            Fixer.fixTimeAndAlt(self.upload_data)
            buff = Buffer()
            if os.path.exists('data.txt'):
                 os.remove('data.txt')
            for i in self.upload_data:
	            buff.sendToBuffer(i)
	            buff.sendData()
            self.data = (DataLoader.read('data.txt'))
            self.csv_loaded = True
	    self.infocsv.setText(_fromUtf8("Zaladowano"))
            self.info.setText("")
            self.wys = self.kat = self.dyst = self.pred = self.odchyl = 0
    def test_single_directory_happy_path(self):
        self.file.write("418|12|2012-07-18 14:43:38|37.6164|-122.386|41059b00f964a520850b1fe3|empty_message\n418|13|2012-07-18 12:34:45|45.54|45.6|41059b00f964a520850b1fe3|empty_message")
        self.file.seek(0)
        self.file2.write("418|14|2012-07-18 14:43:38|37.6164|-122.386|41059b00f964a520850b1fe3|empty_message\n418|15|2012-07-18 12:34:45|45.54|45.6|41059b00f964a520850b1fe3|empty_message")
        self.file2.seek(0)

        mock_glob = self.mocker.replace('glob.glob')
        mock_glob("some_directory/*")
        self.mocker.result(['.', 'file1', 'file2'])

        mock_open = self.mocker.replace('__builtin__.open')
        mock_open("file1", 'rU')
        self.mocker.result(self.file)
        mock_open("file2", 'rU')
        self.mocker.result(self.file2)

        self.mocker.replay()
        expected_dict = {
            '418': [{'venue_id': '41059b00f964a520850b1fe3', 'latitude': 37.6164, 'check_in_message': 'empty_message', 'check_in_id': '12', 'longitude': -122.386, 'date': datetime.datetime(2012, 7, 18, 14, 43, 38)},
                    {'venue_id': '41059b00f964a520850b1fe3', 'latitude': 45.54, 'check_in_message': 'empty_message', 'check_in_id': '13', 'longitude': 45.6, 'date': datetime.datetime(2012, 7, 18, 12, 34, 45)},
                    {'venue_id': '41059b00f964a520850b1fe3', 'latitude': 37.6164, 'check_in_message': 'empty_message', 'check_in_id': '14', 'longitude': -122.386, 'date': datetime.datetime(2012, 7, 18, 14, 43, 38)},
                    {'venue_id': '41059b00f964a520850b1fe3', 'latitude': 45.54, 'check_in_message': 'empty_message', 'check_in_id': '15', 'longitude': 45.6, 'date': datetime.datetime(2012, 7, 18, 12, 34, 45)}]}
        actual_dict = DataLoader.load_check_ins_from_directory("some_directory")
        self.assertDictEqual(expected_dict, actual_dict)
Example #14
0
    def training_data_scibert(self):
        print("Creating SciBERT training files.\n")

        # Load training and validation data
        d_train = DataLoader()
        df_train = d_train.training_data_with_abstracts_citations().data

        d_val = DataLoader()
        df_validation = d_val.validation_data_with_abstracts_citations().data

        train_val_data = pd.concat((df_train, df_validation),
                                   axis=0).reset_index(drop=True)
        scibert_embeddings = self._scibert_embeddings(train_val_data)
        print("Saving SciBERT embeddings to disk...")
        scibert_embeddings_file = os.path.join(self.path_persistent,
                                               "scibert_embeddings.pkl")
        with open(scibert_embeddings_file, "wb") as f:
            pickle.dump(scibert_embeddings, f)
        print("Saved.\n")
        print("SciBERT training files created.")
Example #15
0
def upload(data_file):
	table = "Straws"
	url = "http://dbweb6.fnal.gov:8080/mu2edev/hdb/loader"
	#url = "http://rexdb02.fnal.gov:8500/swhite/HdbHandler.py/loader"
	queryUrl = "http://dbweb6.fnal.gov:8088/QE/mu2e_hw_dev/app/SQ/query"
	group = "Straw Tables"
	password = "******"

	with open(data_file) as file_input:
		reader = csv.reader(file_input)
		for row in reader:
			dataLoader = DataLoader(password,url,group,table)
			dataLoader.addRow(createRow(row))
			retVal,code,text =  dataLoader.send()
	
			if retVal:
				print(str(row[0])+" successful upload")
				print(text)
			else:
				print (str(row[0])+ "FAILED upload")
				print(code)
				print(text)

			dataLoader.clearRows()
Example #16
0
def example_many_to_one_hot_nn_optimization():
    #data = load_iris()
    data = load_mnist()
    train, val, test = create_train_val_test_data(data, 0.25, 0.10)
    print(data[0][0].shape[0])
    print(data[0][1].shape[0])

    # VERIFICATION PLOT OF MNIST
    pixels = data[0][0].reshape((28, 28))

    # Plot
    plt.imshow(pixels, cmap='gray')
    plt.show()
    #

    data_gen = DataLoader(train)
    neural_predictor = lambda nn: predictor_fitness(nn.predict,
                                                    data_gen.generator())

    simple_nn = NeuralNetwork(data[0][0].shape[0], [3, 3, 3],
                              data[0][1].shape[0])
    x0w, x0b = simple_nn.get_weights_and_biases()
    print("Number of weights:", len(x0w))
    print("Number of biases:", len(x0b))
    x0 = x0w
    x0.extend(x0b)
    fitness = lambda ind: network_opt(ind, simple_nn, neural_predictor)

    # OPTIMIZE HERE
    best_ind = x0
    print("Best fitness is:", fitness(best_ind))
    print(simple_nn.get_weights_and_biases())
    #

    simple_nn.set_all(best_ind)
    data_gen = DataLoader(test)
    print(
        "Fitness on test data:",
        predictor_fitness(simple_nn.predict,
                          data_gen.generator(),
                          batch_size=test.shape[0]))
Example #17
0
def resistanceupload():
	def createRow():
			return{'straw_barcode': str(row[0]),
			'create_time' : str(row[1]), #Website gets real time somehow.
			'worker_barcode' : str(row[2]),
			'workstation_barcode' : str(row[3]),
			'resistance' : str(row[4]),
			'temperature' : str(row[5]),
			'humidity' : str(row[6]),
			'resistance_measurement_type' : str(row[7]),
			'comments' : str(row[8]),}
	for row in upload_file:
		table = "straw_resistance_measurements"
		dataLoader = DataLoader(password,url,group,table)
		dataLoader.addRow(createRow())
		retVal,code,text =  dataLoader.send()
		if retVal:
			print "upload resistance success!\n"
			print text
		else:
			print "upload resistance failed!\n"
			print code
			print text
		dataLoader.clearRows()     
Example #18
0
def train_network(sess, clear=True, continue_training=False):
    if not os.path.exists(cfg.DIR.tensorboard_log_dir):
        os.mkdir(cfg.DIR.tensorboard_log_dir)
    else:
        if clear:
            shutil.rmtree(cfg.DIR.tensorboard_log_dir)

    if not os.path.exists(cfg.DIR.model_save_dir):
        os.mkdir(cfg.DIR.model_save_dir)

    writer = tf.summary.FileWriter(cfg.DIR.tensorboard_log_dir)
    writer.add_graph(sess.graph)

    # initialize the global parameters
    sess.run(tf.global_variables_initializer())

    if continue_training:
        value_list = []
        value_list.extend(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='global/network_scope'))
        restore = tf.train.Saver(value_list)
        restore.restore(sess, tf.train.latest_checkpoint(cfg.DIR.model_save_dir))


    X = tf.placeholder(shape=(-1, 28, 28, 1), dtype=tf.float32)
    Y = tf.placeholder(shape=(-1, 10), dtype=tf.float32)

    logits, layer = build_network(X)

    loss = tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=logits)

    optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)

    train_data, train_labels, validate_data, validate_labels = split_train_validate_set(cfg.DIR.training_data)

    train_data_loader = DataLoader(train_data, train_labels)

    val_data_loader = DataLoader(validate_data, validate_labels)

    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    for key, value in layer.items():
        tf.summary.histogram(key, value)

    tf.summary.scalar("train_loss", loss)
    tf.summary.scalar("train_accuracy", accuracy)

    summary_op = tf.summary.merge_all()

    val_loss_holder = tf.placeholder(tf.float32)
    val_loss_tensor = tf.summary.scalar("val_loss", val_loss_holder)

    val_accuracy_holder = tf.placeholder(tf.float32)
    val_accuracy_tensor = tf.summary.scalar("val_accuracy", val_accuracy_holder)

    saver = tf.train.Saver(max_to_keep=10)

    if not os.path.exists(cfg.DIR.model_save_dir):
        os.makedirs(cfg.DIR.model_save_dir)

    start_time = time.time()

    tf.get_default_graph().finalize()

    train_step = 0
    val_step = 0
    for epoch in range(1, cfg.TRAIN.EPOCHS + 1):
        while(train_data_loader.hasNextBatch()):
            train_step += 1
            batch_data, batch_label = train_data_loader.getNextBatch(cfg.TRAIN.BATCH_SIZE)
            _,_,_,summary = sess.run([loss, accuracy, optimizer, summary_op], feed_dict={X:batch_data, Y:batch_label})
            writer.add_summary(summary, train_step)
        print("Epoch %d finished." % epoch)
        train_data_loader.reset()

        if epoch % cfg.TRAIN.SAVE_STEPS == 0:
            filename = 'digit_recognizer_{:d}'.format(epoch)
            filename = os.path.join(cfg.DIR.model_save_dir , filename)
            saver.save(sess, filename, global_step=epoch)

        if (epoch % cfg.TRAIN.VALIDATE_EPOCHES == 0):
            while val_data_loader.hasNextBatch():
                val_step += 1
                batch_val_data, batch_val_label = val_data_loader.getNextBatch(cfg.TRAIN.BATCH_SIZE)
                val_loss, val_accuracy, _ = sess.run([loss, accuracy],
                                            feed_dict={X: batch_val_data, Y: batch_val_label})

                feed = {val_loss_holder: val_loss}
                val_loss_str = sess.run(val_loss_tensor, feed_dict=feed)
                writer.add_summary(val_loss_str, val_step)

                feed1 = {val_accuracy_holder: val_accuracy}
                val_accuracy_str = sess.run(val_accuracy_tensor, feed_dict=feed1)
                writer.add_summary(val_accuracy_str, val_step)

            val_data_loader.reset()

    filename = os.path.join(cfg.DIR.model_save_dir, 'digit_recognizer_final')
    saver.save(sess, filename)
    end_time = time.time()

    print("The total time used in training: {}".format(end_time - start_time))
Example #19
0
 def __init__(self):
     self.dataLoader = DataLoader()
     self.model = None
    parser.add_argument('--delta', default="", dest="delta")
    args = vars(parser.parse_args())

    if args['delta'] == "":
        delta = 200
    else:
        delta = int(args['delta'])

    if args['end'] == "":
        end_date = datetime.datetime.today()
    else:
        end_date = datetime.datetime.strptime(args['end'], DATE_FORMAT_STRING)

    if args['start'] == "":
        start_date = end_date - datetime.timedelta(days=delta)
    else:
        start_date = datetime.datetime.strptime(args['start'],
                                                DATE_FORMAT_STRING)

    stock_id = args['stock_id']
    end_date = end_date.strftime(DATE_FORMAT_STRING)
    start_date = start_date.strftime(DATE_FORMAT_STRING)

    data_loader = DataLoader(from_date=start_date,
                             to_date=end_date,
                             stock_list=stock_id)
    data = data_loader.load()

    bb = BollingerBand(stock_id, data)
    bb.graph()
Example #21
0
from DataLoader import DataLoader

import Hyperparameter as param
from Batch import Batch
from Util import numpy_to_var, toData, decoder_initial
from Vocab import Vocab
import numpy as np
import math
import sys
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.autograd import Variable

targetfile1 = "target.txt"
inputfile1 = "ie_out.txt"
device = "cpu"
dataloader1 = DataLoader(inputfile1, targetfile1, device)
x, y = dataloader1.caseload()

vocab_size = 5000
vocab = Vocab(vocab_size)
vocab.w2i = dataloader1.word2idx

vocab.i2w = dataloader1.idx2word
print("vocab.i2w")
print(vocab.i2w)
vocab.count = len(vocab.w2i)
for w in ['<PAD>', '<UNK>', '<SOS>', '<EOS>']:
    vocab.w2i[w] = vocab.count
    vocab.i2w[vocab.count] = w
    vocab.count += 1
print("<unk>")
Example #22
0
def main():
    '''Main Function'''

    parser = argparse.ArgumentParser(description='translate.py')

    parser.add_argument('-model', required=True, help='Path to model .pt file')
    '''
    parser.add_argument('-src', required=True,
                        help='Source sequence to decode (one line per sequence)')
    parser.add_argument('-vocab', required=True,
                        help='Source sequence to decode (one line per sequence)')
    '''
    parser.add_argument('-output',
                        default='pred.txt',
                        help="""Path to output the predictions (each line will
                        be the decoded sequence""")
    parser.add_argument('-beam_size', type=int, default=100, help='Beam size')
    parser.add_argument('-batch_size', type=int, default=1, help='Batch size')
    parser.add_argument('-n_best',
                        type=int,
                        default=1,
                        help="""If verbose is set, will output the n_best
                        decoded sentences""")
    parser.add_argument('-no_cuda', action='store_true')

    opt = parser.parse_args()
    opt.cuda = not opt.no_cuda

    # Prepare DataLoader

    test_data = DataLoader(use_valid=True,
                           batch_size=opt.batch_size,
                           cuda=opt.cuda)

    translator = Translator(opt)
    translator.model.eval()

    numuser = test_data.user_size

    num_right = 0
    num_total = 0

    avgF1 = 0
    avgPre = 0
    avgRec = 0

    avgF1_long = 0
    avgPre_long = 0
    avgRec_long = 0

    avgF1_short = 0
    avgPre_short = 0
    avgRec_short = 0
    numseq = 0  # number of test seqs

    # for micro pre rec f1
    right = 0.
    pred = 0.
    total = 0.
    right_long = 0.
    pred_long = 0.
    total_long = 0.
    right_short = 0.
    pred_short = 0.
    total_short = 0.

    with open(opt.output, 'w') as f:
        for batch in tqdm(test_data,
                          mininterval=2,
                          desc='  - (Test)',
                          leave=False):
            all_samples = translator.translate_batch(batch).data

            for bid in range(batch.size(0)):
                numseq += 1.0

                ground_truth = np.zeros([numuser])
                num_ground_truth = 0
                for user in batch.data[bid][1:-1]:
                    if user == Constants.EOS or user == Constants.PAD:
                        break
                    ground_truth[user] = 1.0
                    num_ground_truth += 1

                pred_cnt = np.zeros([numuser])
                for beid in range(opt.beam_size):
                    for pred_uid in all_samples[bid, beid,
                                                1:num_ground_truth + 1]:
                        if pred_uid == Constants.EOS:
                            break
                        else:
                            pred_cnt[pred_uid] += 1.0 / opt.beam_size

                F1, pre, rec = getF1(ground_truth, pred_cnt)
                avgF1 += F1
                avgPre += pre
                avgRec += rec
                right += np.dot(ground_truth, pred_cnt)
                pred += np.sum(pred_cnt)
                total += np.sum(ground_truth)

                # for short user
                ground_truth = np.zeros([numuser])
                num_ground_truth = 0
                for user in batch.data[bid][1:-1]:
                    if user == Constants.EOS or user == Constants.PAD:
                        break
                    ground_truth[user] = 1.0
                    num_ground_truth += 1
                    if num_ground_truth >= 5:
                        break

                pred_cnt = np.zeros([numuser])
                for beid in range(opt.beam_size):
                    #total += len(ground_truth)
                    for pred_uid in all_samples[bid, beid,
                                                1:num_ground_truth + 1]:
                        if pred_uid == Constants.EOS:
                            break
                            #continue
                        else:
                            pred_cnt[pred_uid] += 1.0 / opt.beam_size

                F1, pre, rec = getF1(ground_truth, pred_cnt)
                avgF1_short += F1
                avgPre_short += pre
                avgRec_short += rec
                right_short += np.dot(ground_truth, pred_cnt)
                pred_short += np.sum(pred_cnt)
                total_short += np.sum(ground_truth)

    print('[Info] Finished.')
    print('Macro')
    print(avgF1 / numseq)
    print(avgPre / numseq)
    print(avgRec / numseq)
    print('Results for the first no more than 5 predictions')
    print(avgF1_short / numseq)
    print(avgPre_short / numseq)
    print(avgRec_short / numseq)

    print('Micro')
    pmi = right / pred
    rmi = right / total
    print(2 * pmi * rmi / (pmi + rmi))
    print(pmi)
    print(rmi)

    print('Results for the first no more than 5 predictions')
    pmi_long = right_short / pred_short
    rmi_long = right_short / total_short
    print(2 * pmi_long * rmi_long / (pmi_long + rmi_long))
    print(pmi_long)
    print(rmi_long)
import pandas as pd

from DataLoader import DataLoader
from StateForecaster import StateForecaster
from preprocessing import preprocess_dataframe_for_forecasting_training

if __name__ == "__main__":
    dataframe = DataLoader().load_dataframe_from_datapath()

    forecaster_dataframe = preprocess_dataframe_for_forecasting_training(
        dataframe)
    print(forecaster_dataframe)
    forecaster_model = StateForecaster(forecaster_dataframe)
    result = forecaster_model.fit(5)
    print(result.summary())

    lag_order = result.k_ar
    print(lag_order)
    forecast = result.forecast(forecaster_dataframe.values[-lag_order:],
                               steps=300)
    df_forecast = pd.DataFrame(forecast,
                               index=forecaster_dataframe.index[-300:],
                               columns=forecaster_dataframe.columns)
    print(df_forecast)
Example #24
0
 def __init__(self):
     self.antigens = DataLoader().load_test_genes()
Example #25
0
def transfer_learning(print_output=True):
    path = 'datasets/'
    data_loader = DataLoader(path)
    names = {1: 'title', 4: 'abstract', 5: 'mesh', 'y': 6}
    transformed_data_sets = []

    path = 'datasets/'

    files = [f for f in listdir(path) if isfile(join(path,f))]
    files.pop(0)
    data_loader = DataLoader(path)
    domains = data_loader.csv_files
    all_domains = copy.deepcopy(domains)
    training_domains = data_loader.csv_files
    all_domains_svm_wda_metrics_list = []
    all_domains_svm_metrics_list = []
    all_domains_svm_bow_mlp_list = []
    all_domains_mlp_fold_scores = []

    for i, held_out_domain in enumerate(domains):
        training_domains.pop(i)
        names = {1: 'title', 4: 'abstract', 5: 'mesh', 'y': 6}
        svm_wda_metrics_list = []
        svm_metrics_list = []
        svm_bow_mlp_list = []

        folder_name = '/' + files[i]
        domain_name = files[i].__str__()
        domain_name = domain_name.split('.')[0]
        folder_name = 'output' + '/' + domain_name

        output = "Dataset: {}".format(files[i])
        if print_output:
            print(output)

        #shuffle(data_loader.csv_files)
        data_loader.csv_files = training_domains
        data_sets = data_loader.csv_files
        domains = data_loader.get_feature_matrix(names)

        #Get one file out of the csv files in the dataloader use this as the held out domain

        #Get the feature representation of the held out data
        held_out_x, held_out_y = data_loader.get_feature_matrix(names, held_out_domain)
        #Create the folds for the held out data in this case the default 5
        folds = data_loader.cross_fold_valdation(held_out_x, held_out_y)
        #Get the total number of domains i.e., the number of files with documents
        n_source_domains = len(data_sets)
        os.makedirs(folder_name)

        #Must convert the data type of the matrix for theano
        feature_engineer = Feature_Engineer()

        #Start the 5 fold cross validation
        for n_fold, fold in enumerate(folds):
            output = "Fold {}: \n".format(n_fold)
            if print_output:
                print(output)
            output = '{}/{}/fold_{}.csv'.format(os.getcwd(), folder_name, (n_fold + 1))
            file = open(output, 'w')
            csv_writer = csv.writer(file)

            #Each sample is a list that contains the x and y for the classifier
            #Typically fold[0] would be the train sample but because it is switched for
            #testing the effectiveness of the domain adaptation
            train_sample = fold[1]
            test_sample = fold[0]

            #These are the original copies to be copied over the augmented feature matrix
            #Each sample contains the text and y labels from the data before it is put into the sklearn count vectorizer
            train_x, train_y = train_sample
            test_x, test_y = test_sample

            train_y[train_y == 0] = 2
            train_y[train_y == 1] = 3
            test_y[test_y == 0] = 2
            test_y[test_y == 1] = 3


            #Get the bag of words representation of the small 20% target source data and transform the other 80%
            #of the data.
            train_x = data_loader.get_transformed_features(train_x, True, False, True)
            test_x = data_loader.transform(test_x, True, True)

            transformed_domains = []

            #Transform the domains with respect to the training data
            for domain in domains:
                domain_x, domain_y = domain
                transformed_domain_x = data_loader.transform(domain_x, True, True)
                transformed_domain_x, domain_y = data_loader.underSample(transformed_domain_x, domain_y)
                transformed_domains.append([transformed_domain_x, domain_y])

            augmented_feature_matrix_train, augmented_y_train = feature_engineer.augmented_feature_matrix(transformed_domains,
                                                                                              [train_x, train_y])
            augmented_feature_matrix_test, augmented_y_test = feature_engineer.augmented_feature_matrix(held_out_domain=[test_x, test_y],
                                                                                                        train_or_test=False,
                                                                                                        n_source_domains=len(transformed_domains))
            augmented_y_test[augmented_y_test == 2] = 0
            augmented_y_test[augmented_y_test == 3] = 1
            #SVM with the augmented feature matrix for domain adaptation
            svm_wda = SVM()
            svm_wda.train(augmented_feature_matrix_train, augmented_y_train)
            svm_wda.test(augmented_feature_matrix_test, augmented_y_test)
            output = "\nSVM with domain adaptation metrics:"
            csv_writer.writerow([output])
            if print_output:
                print(output)
                print(svm_wda)
                print("\n")
            svm_wda_metrics_list.append(svm_wda.metrics)

            classifier = NeuralNet(n_hidden_units=[250], output_size=4, batch_size=20, n_epochs=200, dropout=True,
                                   activation_function='relu', learning_rate=.3, momentum=True, momentum_term=.5)
            write_to_csv(svm_wda.metrics, csv_writer)


            y_for_mlp = []
            #Set up the x and y data for the MLP
            for p, domain in enumerate(transformed_domains):
                domain_x, domain_y = domain
                domain_x = domain_x.todense()
                y_for_mlp.append(domain_y)

                if p == 0:
                    neural_net_x_train = domain_x
                    neural_net_y_train = domain_y
                else:
                    neural_net_x_train = numpy.vstack((neural_net_x_train, domain_x))
                    neural_net_y_train = numpy.hstack((neural_net_y_train, domain_y))

            neural_net_x_train = numpy.float_(neural_net_x_train)


            classifier.train(neural_net_x_train, neural_net_y_train)

            test_y[test_y == 2] = 0
            test_y[test_y == 3] = 1
            svm_y_train = neural_net_y_train
            svm_y_train[svm_y_train == 2] = 0
            svm_y_train[svm_y_train == 3] = 1

            #SVM without the domain adaptation
            svm = SVM()
            svm.train(sparse.coo_matrix(neural_net_x_train), svm_y_train)
            svm.test(test_x, test_y)
            output = "\nSVM without domain adaptation"
            if print_output:
                print(output)
                print(svm)
                print("\n")
            csv_writer.writerow([output])
            svm_metrics_list.append(svm.metrics)
            write_to_csv(svm.metrics, csv_writer)


            #Transform the feature vectors of the held out data to the learned hidden layer features of the previous
            #MLP trained with all n-1 datasets

            perceptron_train_x = theano.shared(neural_net_x_train)
            perceptron_test_x = theano.shared(test_x.todense())

            transformed_perceptron_train_x = classifier.transfer_learned_weights(perceptron_train_x)
            transformed_perceptron_test_x = classifier.transfer_learned_weights(perceptron_test_x)

            modified_transformed_perceptron_train_x = numpy.hstack((transformed_perceptron_train_x,
                                                                    neural_net_x_train))
            modified_transformed_perceptron_test_x = numpy.hstack((transformed_perceptron_test_x,
                                                                   test_x.todense()))

            output = "\nSVM with BoW and transformed features"
            csv_writer.writerow([output])
            if print_output:
                print(output)
            svm_mlp_bow = SVM()
            svm_mlp_bow.train(sparse.coo_matrix(modified_transformed_perceptron_train_x), svm_y_train)
            svm_mlp_bow.test(sparse.coo_matrix(modified_transformed_perceptron_test_x), test_y)
            write_to_csv(svm_mlp_bow.metrics, csv_writer)
            if print_output:
                print(svm_mlp_bow)
            svm_bow_mlp_list.append(svm_mlp_bow.metrics)


            output = "*********** End of fold {} ***********".format(n_fold)

            if print_output:
                print(output)


        training_domains = copy.deepcopy(all_domains)
        file_name = '{}/{}/fold_averages.csv'.format(os.getcwd(), folder_name)
        file = open(file_name, 'w+')
        csv_writer = csv.writer(file)

        if print_output:
            output = "----------------------------------------------------------------------------------------" \
                     "\nFold Scores\n " \
                     "SVM with domain adaptation"
            print_write_output(output, svm_wda_metrics_list, all_domains_svm_wda_metrics_list, csv_writer)

            output = "\nSVM without domain adaptation"
            print_write_output(output, svm_metrics_list, all_domains_svm_metrics_list, csv_writer)

            output = "SVM with BoW and transformed features"
            print_write_output(output, svm_bow_mlp_list, all_domains_svm_bow_mlp_list, csv_writer)



    file_name = '{}/output/all_fold_averages.csv'.format(os.getcwd())
    file = open(file_name, 'w+')
    csv_writer = csv.writer(file)
    if print_output:
        output = "*******************************************************************************************" \
                 "\nAll domain macro metric scores\n " \
                 "SVM with domain adaptation"
        print_macro_scores("SVM with domain adaptation", all_domains_svm_wda_metrics_list, csv_writer)

        output = "\nSVM without domain adaptation"
        print_macro_scores(output, all_domains_svm_metrics_list, csv_writer)

        output = "SVM with BoW and transformed features"
        print_macro_scores(output, all_domains_svm_bow_mlp_list, csv_writer)
    def train_NN(self, nn):
        
        trainer.train_NN(self, nn) 
                  
        data_loader = DataLoader()
        datasets = data_loader.load_shared_data()
    
        train_set_x, train_set_y = datasets[0]
        valid_set_x, valid_set_y = datasets[1]
        test_set_x, test_set_y = datasets[2]
    
        # compute number of minibatches for training, validation and testing
        batch_size = self.batch_size;
        
        n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
        
        index = T.lscalar()  # index to a [mini]batch
        x = nn.input  # the data is presented as rasterized images
        y = T.ivector('y') 
        
        cost = (nn.negative_log_likelihood_dropout(y)
        + self.L1_lambda * nn.L1
        + self.L2_lambda * nn.L2
        )
        
        train_err_model = theano.function(
        inputs=[index],
        outputs=nn.errors(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        }
        )
        
        test_err_model = theano.function(
        inputs=[index],
        outputs=nn.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
        )

        validate_err_model = theano.function(
        inputs=[index],
        outputs=nn.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })
                                         
        gparams = [T.grad(cost, param) for param in nn.params]
        
        updates = [
            (param, param - self.learning_rate * gparam)
            for param, gparam in zip(nn.params, gparams)
        ]
                                         
        train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
        )
       

        
        
        def validate():
        
            validation_losses = [validate_err_model(i)
                                         for i in xrange(n_valid_batches)]
            this_validation_loss = numpy.mean(validation_losses)
           
            return this_validation_loss;
        
        def test():
            test_losses = [test_err_model(i)
                                         for i in xrange(n_test_batches)]
            this_test_loss = numpy.mean(test_losses)
           
            return this_test_loss;    
        
        def train():
            train_losses = [train_err_model(i)
                                         for i in xrange(n_train_batches)]
            this_train_loss = numpy.mean(train_losses)
           
            return this_train_loss; 
        print '... training'
            #Train in mini batches
        minEpochs = 4
        validationFrequency =  n_train_batches;
        iteration = 0;
        bestValidationLoss = numpy.Inf;
        
        directory=check_create_observations_dir()
        self.output_directory = directory
        max_epoch_reached = False
        
        while not max_epoch_reached : 
            
            iteration = iteration + 1;
            epochNo = (iteration / n_train_batches) + 1
            batchId= iteration % n_train_batches;
            currentCost=train_model(batchId)
            
            #print "Cost = %f" %(currentCost)
            if iteration % validationFrequency == 0:
                validation_err = validate()
                train_err = train()
                test_err = test()
                
                self.add_train_data(epochNo, train_err, validation_err, test_err)
                print "Epoch no: %d Validation Loss = %f" %(epochNo,validation_err*100)
                if validation_err < bestValidationLoss:
                    bestValidationLoss = validation_err
                    
                if epochNo > minEpochs and validation_err *self.early_stopping_threshold > bestValidationLoss:
                    #print "------------------------Validation Loss = %f" %(validationLoss*100)
                    break;
             
        if epochNo >= self.n_epochs:
            max_epoch_reached = True
            
        testLoss=test()
        trainer.save_errors(self, directory)
        repfields_final_path=os.path.join(directory,"repFields.png")
        W_vals=nn.W1.get_value(borrow=True)
        
        display(W_vals,repfields_final_path)
        print  "iteration  %d complete. Cost = %f Best Validation Loss = %f Test Loss = %f" %(iteration,currentCost,bestValidationLoss *100,testLoss *100)   
 def test_latitude_not_a_number(self):
     self.file.write("418|12|2012-07-18 14:43:38|37.6164|-122.386|41059b00f964a520850b1fe3|empty_message\n418|12|2012-07-18 12:34:45|abcd|-122.386|41059b00f964a520850b1fe3|empty_message")
     self.file.seek(0)
     with self.assertRaises(ValueError) as cm:
         DataLoader.load_check_ins_from_file(self.file)
     self.assertEqual(cm.exception.message, 'Error in line 2: latitude should be a float number')
import math
import numpy as np
import random

from collections import Counter

from DataLoader import DataLoader
from Models import StanfordModel, NCGModel
from Utils import Utils


global_results_stanford = []
global_results_radiation = []

datasets = Utils.separate_dataset_by_days(DataLoader.load_check_ins_from_file(open("104665558.csv", "U")))
users = datasets.keys()
users = ["104665558"]

for user in users:

    for i in range(0, 1):

        days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
        # days = ['Wednesday']

        results_stanford = {}
        results_radiation = {}
        for day in days:
            results_stanford[day] = []
            results_radiation[day] = []
 def test_latitude_out_of_bounds(self):
     self.file.write("418|12|2012-07-18 14:43:38|37.6164|-122.386|41059b00f964a520850b1fe3|empty_message\n418|12|2012-07-18 12:34:45|100|-122.386|41059b00f964a520850b1fe3|empty_message")
     self.file.seek(0)
     with self.assertRaises(ValueError) as cm:
         DataLoader.load_check_ins_from_file(self.file)
     self.assertEqual(cm.exception.message, 'Error in line 2: latitude should be between -90 and 90')
Example #30
0
from SchemaProcessor import SchemaProcessor
from Conversion import Conversion
from MigrationStateManager import MigrationStateManager
from StructureLoader import StructureLoader
from ReportGenerator import ReportGenerator
from DataLoader import DataLoader
from ConstraintsProcessor import ConstraintsProcessor
from DBAccess import DBAccess
from BinaryDataDecoder import BinaryDataDecoder


if __name__ == '__main__':
    print(BootProcessor.get_introduction_message())
    base_dir = os.getcwd()
    config = FsOps.read_config(base_dir)
    config = FsOps.read_extra_config(config, base_dir)
    conversion = Conversion(config)
    FsOps.create_logs_directory(conversion)
    BootProcessor.boot(conversion)
    FsOps.read_data_types_map(conversion)
    SchemaProcessor.create_schema(conversion)
    MigrationStateManager.create_state_logs_table(conversion)
    MigrationStateManager.create_data_pool_table(conversion)
    StructureLoader.load_structure(conversion)
    MigrationStateManager.read_data_pool(conversion)
    DataLoader.send_data(conversion)
    BinaryDataDecoder.decode(conversion)
    ConstraintsProcessor.process_constraints(conversion)
    DBAccess.close_connection_pools(conversion)
    ReportGenerator.generate_report(conversion, 'Migration is accomplished.')
Example #31
0
    batchSizeForOneThread = Config.getint("RBM", "batchSizeForOneThread")
    M = Config.getint("RBM", "artistsNumber")
    K = Config.getint("RBM", "ranksNumber")
    F = Config.getint("RBM", "hiddenLayerSize")
    learningRate = Config.getfloat("RBM", "learningRate")
    wDecay = Config.getfloat("RBM", "wDecay")
    updateFrequencyMAX = Config.getint("RBM", "updateFrequencyMin")
    numberOfEpoch = Config.getint("RBM", "numberOfEpoch")
    Verbose = Config.getboolean("RBM", "Verbose")

    TrainingSetFile = Config.get("RBM", "trainingSetFile")
    ValidationSetFile = Config.get("RBM", "validationSetFile")
    ValidationFromTrainingSetFile = Config.get("RBM", "validationFromTrainingSetFile")
    TestSetFile = Config.get("RBM", "testSetFile")

    dataLoader = DataLoader(trainingSetFile = TrainingSetFile, validationSetFile = ValidationSetFile, validationFromTrainingSetFile = ValidationFromTrainingSetFile, testSetFile = TestSetFile, K = K, M = M, batchSizeForOneThread = batchSizeForOneThread, threadsNumber = threadsNumber, verbose = Verbose)

    whereUpdateMax = np.where(dataLoader.updateFrequency > updateFrequencyMAX)
    dataLoader.updateFrequency[whereUpdateMax] = updateFrequencyMAX

    dataLoader.vBiasesInitialization[np.where(dataLoader.vBiasesInitialization < np.float64(0.1e-100))] = np.float64(0.1e-100)

    momentum = 0.5

    rbm = RBM(M, K, F, learningRate, momentum, wDecay, dataLoader.vBiasesInitialization, dataLoader.updateFrequency)
    numberOfMiniSets = np.int(np.ma.floor(dataLoader.trainingSetSize / (threadsNumber * batchSizeForOneThread)))


    with open("Outs/"+sys.argv[1]+"_validation_RMSE.txt", "a") as rmsesFile:
        dataLoader.StartNewValidationSet()
        GetVisiableLayer = dataLoader.GiveVisibleLayerForValidation
class CitationStream(Tkinter.Tk):
    '''
    This class is responsible for sending citation edge streams (loaded by using DataLoader)
    to Gephi's Master server by using GephiJsonClient.
    '''
    
    def __init__(self, url = 'http://localhost:8080/workspace0', filename='Citation_Stream', on_gui = True):
        self.url = url;
        self.filename = filename;
        self.loader = DataLoader();     # create an instance of DataLoader
        self.loader.loadData(self.filename);    # load data from file
        self.g = GephiJsonClient(url=self.url);     # create an instance of GephiJsonClient
        self.g.cleanAll();
        self.degree_dict = {};
        self.cited_dict = {};
        
        # elemts for GUI
        self.is_run = True;
        self.is_gui = on_gui;
        if self.is_gui == True:
            self.initializeUI();
    
    
    def run(self):
        self.loader.flush();
        self.g.cleanAll();
        self.degree_dict.clear();
        self.cited_dict.clear();
        self.streamIn(IN_THRESHOLD , OUT_THRESHOLD, BATCH_SPEED);
        
    
    def runForever(self):
        i = 1;
        while self.is_run:
            print "ROUND #", i;
            self.run();
            print "Waiting 10 seconds for the next round...";
            if self.is_gui == True:
                self.date_txt.set("Waiting 10 seconds for the next round...");
                
            time.sleep(10);  
            i += 1;
  
              
    def streamIn(self, in_threshold = 10, out_threshold = 30, timeout=1):
        '''
        Feed fetch batch into Gephi's pool.
        The maximum number of nodes displayed in the pool is set to be 1000.
        The nodes with in-degree >= in_threshold or out-degree >= out_threshold 
        will be highlighted using different colors and sizes. 
        timeout will be the time for sleeping between two batches.
        '''
        displayed_nodes = deque(maxlen=1000);    # create a queue and set its size 1000. There are 1000 nodes will be displayed in Gephi's pool
        displayed_dict = {};    # store nodes which are currently existing in Gephi's pool
        
        while True:
            tm,edgeset = self.loader.sendData();
            if tm == -1 or edgeset == None:
                break;
            
            print "Batch: ", tm;
            if self.is_gui == True:
                self.date_txt.set(tm);
                
            for fromnode,tonode in edgeset:
                # update the cited list of the tonode
                self.cited_dict[tonode] = self.cited_dict.get(tonode,[]);
                self.cited_dict[tonode].append(fromnode);
                
                # update degrees
                if self.degree_dict.get(fromnode) == None:
                    self.degree_dict[fromnode] = [0,1, tm];  # [in-degree, out-degree, date]
                else:
                    self.degree_dict[fromnode][1] += 1;
                if self.degree_dict.get(tonode) == None:
                    self.degree_dict[tonode] = [1,0, tm];    # [in-degree, out-degree, date]
                else:
                    self.degree_dict[tonode][0] += 1;
                
                # add the fromnode to Gephi's pool
                node_attributes = NODE_ATTRIBUTE.copy();
                if displayed_dict.get(fromnode) == None:
                    displayed_dict[fromnode] = fromnode;
                    # check the size
                    if len(displayed_nodes) >= displayed_nodes.maxlen:
                        deletenode = displayed_nodes.popleft();
                        del displayed_dict[deletenode];
                        self.g.deleteNode(deletenode);  # delete the node from Gephi's pool
                    self.g.addNode(fromnode, **node_attributes);
                    displayed_nodes.append(fromnode);
                    
                # check fromnode's in-degree, and update it in Gephis' pool
                if self.degree_dict[fromnode][1] >= out_threshold:
                    sz = setSize(self.degree_dict[fromnode][1]);
                    node_attributes['size'] = sz;
                    node_attributes['r'] = 0.0/255;
                    node_attributes['g'] = 200.0/255;
                    node_attributes['b'] = 0.0/255;
                    self.g.changeNode(fromnode, **node_attributes);
                    
                # check tonode's out-degree, and update it in Gephi's pool
                node_attributes = NODE_ATTRIBUTE.copy();
                if displayed_dict.get(tonode) == None:
                    if str(int(self.degree_dict[tonode][2][0:4])+5)+self.degree_dict[tonode][2][4:7] >= tm[0:7]:
                        displayed_dict[tonode] = tonode;
                        # check the size
                        if len(displayed_nodes) >= displayed_nodes.maxlen:
                            deletenode = displayed_nodes.popleft();
                            del displayed_dict[deletenode];
                            self.g.deleteNode(deletenode);
                        if self.degree_dict[tonode][0] >= in_threshold and self.degree_dict[tonode][1] >= out_threshold:
                            sz = setSize(self.degree_dict[tonode][0]);
                            node_attributes['size'] = sz;
                            node_attributes['r'] = 0.0/255;
                            node_attributes['g'] = 0.0/255;
                            node_attributes['b'] = 100.0/255;
                        elif self.degree_dict[tonode][0] >= in_threshold:
                            sz = setSize(self.degree_dict[tonode][0]);
                            node_attributes['size'] = sz;
                            node_attributes['r'] = 200.0/255;
                            node_attributes['g'] = 0.0/255;
                            node_attributes['b'] = 0.0/255;
                        self.g.addNode(tonode, **node_attributes);
                        displayed_nodes.append(tonode);
                        # connect the tonode to those nodes that cite it and already in Gephi's pool
                        for eachcit in self.cited_dict[tonode]:
                            if displayed_dict.get(eachcit) != None:
                                self.g.addEdge(str(eachcit+"->"+tonode), eachcit, tonode, directed=True);
                else:
                    if self.degree_dict[tonode][0] >= in_threshold and self.degree_dict[tonode][1] >= out_threshold:
                        sz = setSize(self.degree_dict[tonode][0]);
                        node_attributes['size'] = sz;
                        node_attributes['r'] = 0.0/255;
                        node_attributes['g'] = 0.0/255;
                        node_attributes['b'] = 100.0/255;
                    elif self.degree_dict[tonode][0] >= in_threshold:
                        sz = setSize(self.degree_dict[tonode][0]);
                        node_attributes['size'] = sz;
                        node_attributes['r'] = 200.0/255;
                        node_attributes['g'] = 0.0/255;
                        node_attributes['b'] = 0.0/255;
                    self.g.changeNode(tonode, **node_attributes);         
                self.g.addEdge(str(fromnode+"->"+tonode), fromnode, tonode, directed=True);
            
            # sleep for seconds if one timestamp is done
            time.sleep(timeout);  
        
        # clear
        displayed_nodes.clear();
        displayed_dict.clear();
         
                       
    def clearData(self):
        '''
        Clear up.
        '''
        self.loader.clearData();
        self.degree_dict.clear();
        self.cited_dict.clear();
        
    
    #----------------------
    # Functions for the UI
    #----------------------
    def initializeUI(self):
        '''
        Initialize the components needed in the UI.
        '''
        Tkinter.Tk.__init__(self);  
        self.title("Dynamic Citation Network");                
        self.date_txt = Tkinter.StringVar();
            
        # part 1
        self.intro_lf = Tkinter.LabelFrame(self, text="INTRODUCTION", height=200, width=150);
        self.intro_lf.pack(fill=Tkinter.BOTH, expand=1);
        intro_lbl = Tkinter.Label(self.intro_lf, text=INTRO_INFO, wraplength=400, justify=Tkinter.LEFT, padx=10, pady=10);
        intro_lbl.pack(side=Tkinter.LEFT, expand=1);
        space_lbl_1 = Tkinter.Label(self, text="");
        space_lbl_1.pack(fill=Tkinter.BOTH, expand=1);
        #part 2
        self.legnd_lf = Tkinter.LabelFrame(self, text="LEGEND", height=200, width=150);
        self.legnd_lf.pack(fill=Tkinter.BOTH, expand=1);
        legnd_cvs = Tkinter.Canvas(self.legnd_lf, height=200, width=150);
        legnd_cvs.pack(fill=Tkinter.BOTH, expand=1);
        legnd_cvs.create_text(10,2,anchor=Tkinter.NW, text="Node Color:");
        legnd_cvs.create_oval(15,22,30,37,fill="gray"); legnd_cvs.create_text(40,22,text="an ordinary paper",anchor=Tkinter.NW);
        legnd_cvs.create_oval(15,42,30,57,fill="green"); legnd_cvs.create_text(40,42,text="a paper cites >= "+str(OUT_THRESHOLD)+" papers",anchor=Tkinter.NW);
        legnd_cvs.create_oval(15,62,30,77,fill="blue"); legnd_cvs.create_text(40,62,text="a paper cites >= "+str(OUT_THRESHOLD)+" papers and cited by >= "+str(IN_THRESHOLD)+" papers",anchor=Tkinter.NW);
        legnd_cvs.create_oval(15,82,30,97,fill="red"); legnd_cvs.create_text(40,82,text="a paper cited by >= "+str(IN_THRESHOLD)+" papers",anchor=Tkinter.NW);
        legnd_cvs.create_text(10,112,anchor=Tkinter.NW, text="Node Size:");
        legnd_cvs.create_text(15, 132, text="large:", anchor=Tkinter.NW); legnd_cvs.create_text(75,132,text="in-degree(out-degree) >= 50", anchor=Tkinter.NW);
        legnd_cvs.create_text(15, 152, text="medium:", anchor=Tkinter.NW); legnd_cvs.create_text(75,152,text="in-degree(out-degree) >= 30", anchor=Tkinter.NW);
        legnd_cvs.create_text(15, 172, text="small:", anchor=Tkinter.NW); legnd_cvs.create_text(75,172,text="in-degree(out-degree) >= 10", anchor=Tkinter.NW);
        space_lbl_1 = Tkinter.Label(self, text="");
        space_lbl_1.pack(fill=Tkinter.BOTH, expand=1);
        
        #part 3
        self.date_lf = Tkinter.LabelFrame(self, text="CURRENT DATE", height=200, width=150);
        self.date_lf.pack(fill=Tkinter.BOTH, expand=1);
        self.date_txt = Tkinter.StringVar();
        date_lbl = Tkinter.Label(self.date_lf, textvariable=self.date_txt, padx=10);
        date_lbl.pack(side=Tkinter.LEFT);
        space_lbl_1 = Tkinter.Label(self, text="");
        space_lbl_1.pack(fill=Tkinter.BOTH, expand=1);
        #part 4
        self.btn_lf = Tkinter.LabelFrame(self, text="", height=100, width=150);
        self.btn_lf.pack(fill=Tkinter.BOTH);
        self.is_run = True;
        self.start_btn = Tkinter.Button(self.btn_lf, text="START", command = self.pressStart);
        self.start_btn.pack(side=Tkinter.LEFT);
        self.quit_btn = Tkinter.Button(self.btn_lf, text="QUIT", command = self.pressQuit);
        self.quit_btn.pack(side=Tkinter.RIGHT);
        
        
    def pressStart(self):
        '''
        Function triggered by clicking 'START' button.
        It will start a new thread to simulate the streaming fashion.
        '''
        thread.start_new(self.runForever, ());
        self.start_btn['state'] = Tkinter.DISABLED;
    
    
    def pressQuit(self):
        '''
        Function triggered by clicking 'QUIT' button.
        It will stop running application and quit it.
        '''
        self.is_run = False;
        self.quit();
Example #33
0
def train():
    """
    train model
    :return:
    """
    model, base_model, seq_step_len = build_model()
    print('input lengths ', seq_step_len, 'label length', config.max_seq_len)
    train_dataset = DataLoader(DataMode.Train).load_batch_from_tfrecords()
    val_dataset = DataLoader(DataMode.Val).load_batch_from_tfrecords()

    train_summary_writer = tf.summary.create_file_writer(
        os.path.join(TENSORBOARD_DIR, 'trainLogs'))
    val_summary_writer = tf.summary.create_file_writer(
        os.path.join(TENSORBOARD_DIR, 'valLogs'))

    latest_ckpt = tf.train.latest_checkpoint(CHECKPOINT_DIR)
    start_epoch = 0
    if latest_ckpt:
        start_epoch = int(latest_ckpt.split('-')[1].split('.')[0])
        model.load_weights(latest_ckpt)
        event_logger.info('model resumed from: {}, start at epoch: {}'.format(
            latest_ckpt, start_epoch))
    else:
        event_logger.info(
            'passing resume since weights not there. training from scratch')

    def _validation():
        """
        validate the model's acc
        :return: loss and acc
        """
        _val_losses = []
        _val_accuracy = []
        for _batch, _data in enumerate(val_dataset):
            _images, _labels = _data
            _input_length = np.array(np.ones(len(_images)) * int(seq_step_len))
            _label_length = np.array(
                np.ones(len(_images)) * config.max_seq_len)
            _loss = model.evaluate(
                [_images, _labels, _input_length, _label_length],
                _labels,
                verbose=0)
            _acc = _compute_acc(_images, _labels, _input_length)
            _val_losses.append(_loss)
            _val_accuracy.append(_acc)
        return np.mean(_val_losses), np.mean(_val_accuracy)

    def _compute_acc(_images, _labels, _input_length):
        """
        :param _images: a batch of images, [samples, w, h, c]
        :param _labels:
        :param _input_length:
        :return: acc
        """
        _y_pred = base_model.predict_on_batch(x=_images)
        # print(_y_pred)  # (64, 9, 37)
        _decoded_dense, _ = ctc_decode(
            _y_pred,
            _input_length,
            greedy=config.ctc_greedy,
            beam_width=config.beam_width,
            top_paths=config.top_paths,
            merge_repeated=config.decode_merge_repeated)
        _error_count = 0
        for pred, real in zip(_decoded_dense[0], _labels):
            str_real = ''.join([config.characters[x] for x in real if x != -1])
            str_pred = ''.join([config.characters[x] for x in pred if x != -1])
            # print(str_real, str_pred)
            if str_pred != str_real:
                _error_count += 1
        _acc = (len(_labels) - _error_count) / len(_labels)
        return _acc

    # start training progress
    for epoch in range(start_epoch, config.epochs):
        train_acc_avg = []
        train_loss_avg = []
        start = time.time()
        for batch, data in enumerate(train_dataset):
            images, labels = data
            input_length = np.array(np.ones(len(images)) * int(seq_step_len))
            label_length = np.array(np.ones(len(images)) * config.max_seq_len)
            train_loss = model.train_on_batch(
                x=[images, labels, input_length, label_length], y=labels)
            train_acc = _compute_acc(images, labels, input_length)
            train_acc_avg.append(train_acc)
            train_loss_avg.append(train_loss)
        train_loss = np.mean(train_loss_avg)
        train_acc = np.mean(train_acc_avg)
        val_loss, val_acc = _validation()
        # write train and val logs
        with train_summary_writer.as_default():
            tf.summary.scalar('loss', train_loss, step=epoch)
            tf.summary.scalar('acc', train_acc, step=epoch)
        with val_summary_writer.as_default():
            tf.summary.scalar('loss', val_loss, step=epoch)
            tf.summary.scalar('acc', val_acc, step=epoch)
        print(
            'Epoch: [{epoch}/{epochs}], train_loss: {train_loss}, train_acc: {train_acc}, '
            'val_loss: {val_loss}, val_acc: {val_acc}, '
            'one epoch costs time: {time} s, learning rate: {lr}'.format(
                epoch=epoch + 1,
                epochs=config.epochs,
                train_loss=train_loss,
                train_acc=train_acc,
                val_loss=val_loss,
                val_acc=val_acc,
                time=time.time() - start,
                lr=config.lr))
        ckpt_path = os.path.join(
            CHECKPOINT_DIR, '{cnn}&{rnn}-{epoch}'.format(cnn=config.cnn_type,
                                                         rnn=config.rnn_type,
                                                         epoch=epoch + 1))
        model.save_weights(ckpt_path)
        if val_acc >= config.end_acc or val_loss <= config.end_cost:
            # tf.saved_model.save(base_model, os.path.join(SVAED_MODEL_DIR, '{name}_model.h5'.format(name=config.dataset)))
            base_model.save(
                os.path.join(SVAED_MODEL_DIR,
                             '{name}_model.h5'.format(name=config.dataset)))
            break
 def test_invalid_date(self):
     self.file.write("418|12|2012-07-18 14:43:38|37.6164|-122.386|41059b00f964a520850b1fe3|empty_message\n418|12|123asd|37.6164|-122.386|41059b00f964a520850b1fe3|empty_message")
     self.file.seek(0)
     with self.assertRaises(ValueError) as cm:
         DataLoader.load_check_ins_from_file(self.file)
     self.assertEqual(cm.exception.message, 'Error in line 2: invalid format of date, should be YYYY-MM-DD HH:MM:SS')
Example #35
0
        for s in size:
            num_features *= s
        return num_features


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("enter new or continue or testonly")
        exit(0)

    MODE = sys.argv[1]
    if MODE != "new" and MODE != "continue" and MODE != "testonly":
        print("enter new or continue or testonly")
        exit(0)

    data_loader = DataLoader()
    trainloader = data_loader.get_trainloader()
    testloader = data_loader.get_testloader()

    net = Net()
    if MODE == "continue" or MODE == "testonly":
        net.load_state_dict(torch.load(PATH))

    criterion = nn.CrossEntropyLoss()
    #criterion = nn.MSELoss()
    optimizer = optim.SGD(net.parameters(), lr=0.0005, momentum=0.6)

    if MODE != "testonly":
        for epoch in range(TOTAL_EPOCH):
            running_loss = 0.0
            for i, data in enumerate(trainloader, 0):
 def test_empty_strings_in_end(self):
     self.file.write("418|23|2012-07-18 14:43:38|37.6164|-122.386|41059b00f964a520850b1fe3|empty_message\n ")
     self.file.seek(0)
     with self.assertRaises(ValueError) as cm:
         DataLoader.load_check_ins_from_file(self.file)
     self.assertEqual(cm.exception.message, "Error in line 2: the line should contain user_id, check-in_id, date, latitude, longitude, venue_id and check-in_message, separated by |")
Example #37
0
__author__ = 'ezequiel'

from MainController import MainController
from DataLoader import DataLoader


if __name__ == "__main__":
    # load sample data
    sample_data = DataLoader.load_sampledata()
    controller = MainController(sample_data)
    controller.menu_redirect()
Example #38
0
from __future__ import print_function

import tensorflow.python.platform

import math
import random
import numpy as np
from six.moves import urllib
from six.moves import xrange  # pylint: disable=redefined-builtin
import tensorflow as tf

from DataLoader import DataLoader


# Step 1: Download the data.
dataset = DataLoader()
filename = dataset.maybe_download('text8.zip', 31344016)
words = dataset.read_data(filename)
print('==> Data size', len(words))

# Step 2: Build the dictionary and 
# replace rare words with UNK token.
vocabulary_size = 50000
data, count, dictionary, reverse_dictionary = dataset.build_dataset(
  words, vocabulary_size
)
# Hint to reduce memory.
del words 
print('Most common words (+UNK)', count[:5])
print('Sample data', data[:10])
Example #39
0
def main():
    '''Main Function'''

    parser = argparse.ArgumentParser(description='translate.py')

    parser.add_argument('-model', required=True, help='Path to model .pt file')
    parser.add_argument(
        '-src',
        required=True,
        help='Source sequence to decode (one line per sequence)')
    parser.add_argument('-vocab',
                        required=True,
                        help='preprocess file to provide vocabulary')
    parser.add_argument('-output',
                        default='pred.txt',
                        help="""Path to output the predictions (each line will
                        be the decoded sequence""")
    parser.add_argument('-beam_size', type=int, default=5, help='Beam size')
    parser.add_argument('-batch_size', type=int, default=30, help='Batch size')
    parser.add_argument('-lambda_1',
                        type=float,
                        default=2 / 3,
                        help='diversity factor for hamming diversity')
    parser.add_argument('-lambda_2',
                        type=float,
                        default=2 / 3,
                        help='diversity factor for bi-gram diversity')
    parser.add_argument('-lambda_3',
                        type=float,
                        default=2 / 3,
                        help='diversity factor for tri-gram diversity')
    parser.add_argument('-no_cuda', action='store_true')

    opt = parser.parse_args()
    opt.cuda = not opt.no_cuda

    # Prepare DataLoader
    preprocess_data = torch.load(opt.vocab)
    preprocess_settings = preprocess_data['settings']

    test_src_word_insts = read_instances_from_file(
        opt.src, preprocess_settings.max_word_seq_len,
        preprocess_settings.keep_case)

    test_src_insts = convert_instance_to_idx_seq(
        test_src_word_insts, preprocess_data['dict']['src'])

    test_data = DataLoader(preprocess_data['dict']['src'],
                           preprocess_data['dict']['tgt'],
                           src_insts=test_src_insts,
                           cuda=opt.cuda,
                           shuffle=False,
                           batch_size=opt.batch_size)

    translator = Translator_idbs(opt)
    translator.model.eval()

    print('[Info] Start translating...')
    f = open(opt.output, 'w')
    for batch in tqdm(test_data, mininterval=2, desc='  - (Test)',
                      leave=False):
        all_hyp = translator.translate_batch(batch)
        for idx_seq in all_hyp:
            pred_line = ' '.join(
                [test_data.tgt_idx2word[idx] for idx in idx_seq])  #转化成单词拼接起来
            f.write(pred_line + '\n')
            f.flush()
    f.close()
    print('[Info] Finished.')
        return patches


def display(data_row, patch_size):

    data_row = data_row - data_row.min()
    data_row = data_row / data_row.max()
    img = data_row.reshape(3, patch_size[0], patch_size[1]).astype("float32")
    img = np.rollaxis(img, 0, 3)
    return img
    # plt.imshow(img)
    # plt.show()


if __name__ == "__main__":
    data_loader = DataLoader()
    cifar_data = data_loader.load_cifar_data()
    images = cifar_data["data"].reshape((-1, 3, 32, 32)).astype("float32")
    #     img_test = images[2,:,:,:]
    #     img_test = np.rollaxis(img_test,0,3)
    #     img_test = img_test[:,:,::-1]
    #     plt.imshow(img_test)
    #     plt.show()

    images = np.rollaxis(images, 1, 4)
    images = images[:, :, :, ::-1]

    num_patches = images.shape[0]
    patch_size = [12, 12]
    #
    kmeans = KMeans()
import detectron2
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.engine import DefaultTrainer

from CustomVisualizer import CustomVisualizer
from DataLoader import DataLoader
from MetricsVisualizer import MetricsVisualizer
from ImageDisplayer import ImageDisplayer

# %%
root_dir = "./../Data"  # change this to download to a specific location on your pc
DataLoader().download_datasets(root_dir)
DataLoader().download_trained_models(root_dir)
DataLoader().generateAllJsonDataAnnotations(root_dir)

# %%
annotation_type = "system_measures"
json_path = os.path.join(root_dir, "CVC_muscima_" + annotation_type + ".json")
muscima_data = DataLoader().load_from_json(json_path)

json_path = os.path.join(root_dir, "AudioLabs_" + annotation_type + ".json")
audioLabs_data = DataLoader().load_from_json(json_path)


# %%
def registerDataset(data_name, d, data, classes):
    DatasetCatalog.register(data_name, lambda d=d: data)
Example #42
0
    read_hdf_lock.acquire()
    model.save("%s_final.hdf5" % train_name)
    read_hdf_lock.release()
    return fit_hist


# In[13]:

TauLosses.SetSFs(1, 2.5, 5, 1.5)
print(TauLosses.Le_sf, TauLosses.Lmu_sf, TauLosses.Ltau_sf, TauLosses.Ljet_sf)
compile_model(model, 1e-3)

# In[14]:

loader = DataLoader('N:/tau-ml/tuples-v2-training-v2-t1/training/part_*.h5',
                    netConf_full,
                    100,
                    2000,
                    validation_size=10000000,
                    max_queue_size=40,
                    n_passes=-1,
                    return_grid=True)
print(loader.file_entries)
print(loader.total_size, loader.data_size, loader.validation_size)

# In[ ]:

fit_hist = run_training('step{}'.format(1), model_name, loader, 0, 10)

# In[ ]:
Example #43
0
import torch
import time
import torch.nn as nn


trainlist = './cfg/trainlist_7.txt'
label = 'E:\Person_detection\Dataset\Yolov3_labels\labels'
epoch = 1000
batch_size = 12


models = MnasNet().to('cuda')
models.train()
models.load_state_dict(torch.load('./checkpoints/pretrain/yolo3_32.pt'))
dataset = dataset(trainlist=trainlist, label=label, batch_size=batch_size)
dataloader = DataLoader(shuffle=True, dataset=dataset, batch_size=batch_size, num_workers=2)
optimizer = optim.Adam(models.parameters(), lr=0.0001)



def main():
    for i in range(epoch):
        t1 = time.time()
        for step, (image, tcoord) in enumerate(dataloader):
           outputs = models(image.to('cuda'))
           print('epoch:', i, 'step:', step, 'loss:')
        t2 = time.time()
        print('epoch time:', (t2 - t1))
        print('*************save models**********************')
        torch.save(models.state_dict(), './checkpoints/yolo3_{}.pt'.format(i))
 def test_single_file_happy_path(self):
     self.file.write("418|12|2012-07-18 14:43:38|37.6164|-122.386|41059b00f964a520850b1fe3|empty_message\n418|12|2012-07-18 14:43:38|37.6164|-122.386|41059b00f964a520850b1fe3|empty_message")
     self.file.seek(0)
     expected = {'418': [{'venue_id': '41059b00f964a520850b1fe3', 'latitude': 37.6164, 'check_in_message': 'empty_message', 'check_in_id': '12', 'longitude': -122.386, 'date': datetime.datetime(2012, 7, 18, 14, 43, 38)}, {'venue_id': '41059b00f964a520850b1fe3', 'latitude': 37.6164, 'check_in_message': 'empty_message', 'check_in_id': '12', 'longitude': -122.386, 'date': datetime.datetime(2012, 7, 18, 14, 43, 38)}]}
     actual = DataLoader.load_check_ins_from_file(self.file)
     self.assertDictEqual(expected, actual)
Example #45
0
File: train.py Project: zyksir/kbqa
word_vocab = torch.load(args.vocab_file)
logging.info('load word vocab, size: %s' % len(word_vocab))
rel_vocab = torch.load(args.rel_vocab_file)
logging.info('load relation vocab, size: %s' % len(rel_vocab))
ent_vocab = torch.load(args.ent_vocab_file)
logging.info('load entity vocab, size: %s' % len(ent_vocab))

if args.atten_mode == "arsmcnn":
    train_loader = ArsmcnnLoader("./data/arsmcnn_train.pt", device)
    logging.info('load train data, batch_num: %d\tbatch_size: %d' %
                 (train_loader.batch_num, train_loader.batch_size))
    valid_loader = ArsmcnnLoader("./data/arsmcnn_valid.pt", device)
    logging.info('load valid data, batch_num: %d\tbatch_size: %d' %
                 (valid_loader.batch_num, valid_loader.batch_size))
else:
    train_loader = DataLoader(args.train_file, device)
    logging.info('load train data, batch_num: %d\tbatch_size: %d' %
                 (train_loader.batch_num, train_loader.batch_size))
    valid_loader = DataLoader(args.valid_file, device)
    logging.info('load valid data, batch_num: %d\tbatch_size: %d' %
                 (valid_loader.batch_num, valid_loader.batch_size))

os.makedirs(args.save_path, exist_ok=True)


#############################################
#               bulid model                 #
#############################################
def get_models(args):
    if args.atten_mode in ["seq", "both"]:
        encoder_output_size = args.d_hidden * 2 + args.d_rel_embed
Example #46
0
from sklearn.cross_validation import cross_val_score
from sklearn.cross_validation import train_test_split
from pylab import rcParams
from DataLoader import DataLoader
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.decomposition import PCA, FastICA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.random_projection import SparseRandomProjection
from sklearn.neural_network import MLPClassifier

rcParams['figure.figsize'] = 10, 7

# load data
output_path = 'outputs\\Marketing'
dl_1 = DataLoader('data\\UCI-bank-marketing.csv', output_path, 'Marketing')
dl_1.load_data()
dl_1.scaled_data()
X, y = dl_1.get_data()


# k-means clustering
# Expectation Maximization
# PCA
# ICA
# Randomized Projections
# LDA
def clustering_algo(X, y, cluster, n_c=2, n_i=10):
    if cluster == 'KM':
        clf = KMeans(n_clusters=n_c, n_init=n_i).fit(X)
    elif cluster == 'EM':
Example #47
0
def main(opts):
    # Set number of actions
    opts.A = opts.delta_M * opts.delta_N
    # Set random seeds
    set_random_seeds(opts.seed)

    if opts.dataset == 0:
        if opts.mean_subtract:
            opts.mean = [119.16, 107.68, 95.12]
            opts.std = [61.88, 61.72, 67.24]
        else:
            opts.mean = [0, 0, 0]
            opts.std = [0, 0, 0]
        opts.num_channels = 3
    elif opts.dataset == 1:
        if opts.mean_subtract:
            opts.mean = [193.0162338615919]
            opts.std = [37.716024486312811]
        else:
            opts.mean = [0]
            opts.std = [0]
        opts.num_channels = 1
    else:
        raise ValueError('Dataset %d does not exist!' % (opts.dataset))

    # Create tensorboard writer
    writer = SummaryWriter(log_dir=opts.save_path_vis)

    loader = DataLoader(opts)
    agent = Agent(opts, mode='eval')
    loaded_state = torch.load(opts.load_model)
    agent.policy.load_state_dict(loaded_state['state_dict'])

    h5file = h5py.File(opts.save_path_h5, 'w')

    all_splits = ['train', 'val', 'test']
    if opts.dataset == 1:
        all_splits.append('test_unseen')

    for split in all_splits:
        true_images, utility_images, utility_matrices = get_utility_maps(
            loader, agent, split, opts)
        reward_matrices = []
        for i in range(len(true_images)):
            shape = true_images[i].shape
            reward_matrix = np.zeros((shape[0], opts.N, opts.M))
            for j in range(shape[0]):
                optimal_views, utility_value = get_submodular_views(
                    utility_matrices[i][j], 4)
                for k in optimal_views:
                    for itera in [
                            a_val % opts.N
                            for a_val in range(k[0] - opts.nms_nbd, k[0] +
                                               opts.nms_nbd + 1)
                    ]:
                        for iterb in [
                                b_val % opts.M
                                for b_val in range(k[1] - opts.nms_nbd, k[1] +
                                                   opts.nms_nbd + 1)
                        ]:
                            reward_matrix[j, itera, iterb] += 255.0 / 4.0**(
                                max(abs(k[0] - itera), abs(k[1] - iterb)))
            reward_matrix = np.minimum(reward_matrix, 255.0)
            reward_matrices.append(reward_matrix)

        if opts.debug:
            num_batches = len(true_images)
            assert (len(utility_images) == num_batches)
            assert (len(utility_matrices) == num_batches)
            for i in range(num_batches):
                batch_size = true_images[i].shape[0]
                assert (utility_images[i].shape == (batch_size, opts.N, opts.M,
                                                    opts.N, opts.M,
                                                    opts.num_channels, 8, 8))
                assert (utility_matrices[i].shape == (batch_size, opts.N,
                                                      opts.M, opts.N, opts.M))

        if split == 'val':
            images_count = 0
            # Iterate through the different batches
            for i in range(len(true_images)):
                shape = true_images[i].shape
                true_images[i] = np.reshape(
                    true_images[i].transpose(0, 3, 1, 4, 2, 5),
                    (shape[0], 1, shape[3], shape[1] * shape[4],
                     shape[2] * shape[5])) / 255.0
                utility_images_normal = np.reshape(
                    utility_images[i].transpose(0, 1, 2, 5, 3, 6, 4, 7),
                    (shape[0], opts.N * opts.M, opts.num_channels, opts.N * 8,
                     opts.M * 8))
                for j in range(shape[0]):
                    x = vutils.make_grid(torch.Tensor(
                        utility_images_normal[j]),
                                         padding=3,
                                         normalize=False,
                                         scale_each=False,
                                         nrow=opts.M)
                    images_count += 1
                    writer.add_image(
                        'Panorama #%5.3d utility' % (images_count), x, 0)
                    # ---- Apply submodularity based greedy algorithm to get near-optimal views ----
                    optimal_views, utility_value = get_submodular_views(
                        utility_matrices[i][j], 4)
                    optimal_views_images = np.zeros(
                        (opts.N, opts.M, opts.num_channels, 32, 32))
                    # Convert the scores into images for visualization
                    for k in optimal_views:
                        optimal_views_images[k[0], k[1]] = 1.0
                    optimal_views_images = np.reshape(
                        optimal_views_images.transpose(2, 0, 3, 1, 4),
                        (1, opts.num_channels, opts.N * 32, opts.M * 32))
                    # Get the reward image computed based on optimal_views
                    reward_image = np.repeat(np.repeat(np.repeat(
                        reward_matrices[i][j][:, :, np.newaxis, np.newaxis,
                                              np.newaxis],
                        repeats=opts.num_channels,
                        axis=2),
                                                       repeats=32,
                                                       axis=3),
                                             repeats=32,
                                             axis=4)
                    reward_image = np.reshape(
                        reward_image.transpose(2, 0, 3, 1, 4),
                        (1, opts.num_channels, opts.N * 32,
                         opts.M * 32)) / 255.0

                    # Concatenate the true image, optimal view image and reward image for display
                    concatenated_images = np.concatenate([
                        true_images[i][j], optimal_views_images, reward_image
                    ],
                                                         axis=0)
                    x = vutils.make_grid(torch.Tensor(concatenated_images),
                                         padding=3,
                                         normalize=False,
                                         scale_each=False,
                                         nrow=1)
                    writer.add_image('Panorama #%5.3d image' % (images_count),
                                     x, 0)

        utility_matrices = np.concatenate(utility_matrices, axis=0)
        reward_matrices = np.concatenate(reward_matrices, axis=0)
        h5file.create_dataset('%s/utility_maps' % split, data=utility_matrices)
        h5file.create_dataset('%s/nms' % (split), data=reward_matrices)

    json.dump(vars(opts), open(opts.save_path_json, 'w'))
    writer.close()
    h5file.close()
Example #48
0
        running_loss += loss.item()


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("enter new or continue or testonly")
        exit(0)

    MODE = sys.argv[1]
    if MODE != "new" and MODE != "continue" and MODE != "testonly":
        print("enter new or continue or testonly")
        exit(0)

    ## input format + loss function

    data_loader = DataLoader(INPUT_PATH, labels_type="float")
    trainloader = data_loader.get_trainloader()
    testloader = data_loader.get_testloader()

    net = Net()
    net = net.cuda()
    if MODE == "continue" or MODE == "testonly":
        net.load_state_dict(torch.load(PATH))

    criterion = nn.SmoothL1Loss()
    #criterion = nn.MSELoss()
    optimizer = optim.SGD(net.parameters(), lr=0.0004, momentum=0.7)

    if MODE != "testonly":
        for epoch in range(TOTAL_EPOCH):
            running_loss = 0.0
Example #49
0
def main():
    ''' Main function '''
    parser = argparse.ArgumentParser()

    parser.add_argument('-data', required=True)

    parser.add_argument('-epoch', type=int, default=10)
    parser.add_argument('-batch_size', type=int, default=64)

    #parser.add_argument('-d_word_vec', type=int, default=512)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-d_inner_hid', type=int, default=1024)
    parser.add_argument('-d_k', type=int, default=64)
    parser.add_argument('-d_v', type=int, default=64)

    parser.add_argument('-n_head', type=int, default=8)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-n_warmup_steps', type=int, default=4000)

    parser.add_argument('-dropout', type=float, default=0.1)
    parser.add_argument('-embs_share_weight', action='store_true')
    parser.add_argument('-proj_share_weight', action='store_true')

    parser.add_argument('-log', default=None)
    parser.add_argument('-save_model', default=None)
    parser.add_argument('-save_mode',
                        type=str,
                        choices=['all', 'best'],
                        default='best')

    parser.add_argument('-no_cuda', action='store_true')

    opt = parser.parse_args()
    opt.cuda = not opt.no_cuda
    opt.d_word_vec = opt.d_model

    #========= Loading Dataset =========#
    data = torch.load(opt.data)
    opt.max_token_seq_len = data['settings'].max_token_seq_len

    #========= Preparing DataLoader =========#
    training_data = DataLoader(data['dict']['src'],
                               data['dict']['tgt'],
                               src_insts=data['train']['src'],
                               tgt_insts=data['train']['tgt'],
                               batch_size=opt.batch_size,
                               cuda=opt.cuda)

    validation_data = DataLoader(data['dict']['src'],
                                 data['dict']['tgt'],
                                 src_insts=data['valid']['src'],
                                 tgt_insts=data['valid']['tgt'],
                                 batch_size=opt.batch_size,
                                 shuffle=False,
                                 test=True,
                                 cuda=opt.cuda)

    opt.src_vocab_size = training_data.src_vocab_size
    opt.tgt_vocab_size = training_data.tgt_vocab_size

    #========= Preparing Model =========#
    if opt.embs_share_weight and training_data.src_word2idx != training_data.tgt_word2idx:
        print(
            '[Warning]',
            'The src/tgt word2idx table are different but asked to share word embedding.'
        )

    print(opt)

    transformer = Transformer(opt.src_vocab_size,
                              opt.tgt_vocab_size,
                              opt.max_token_seq_len,
                              proj_share_weight=opt.proj_share_weight,
                              embs_share_weight=opt.embs_share_weight,
                              d_k=opt.d_k,
                              d_v=opt.d_v,
                              d_model=opt.d_model,
                              d_word_vec=opt.d_word_vec,
                              d_inner_hid=opt.d_inner_hid,
                              n_layers=opt.n_layers,
                              n_head=opt.n_head,
                              dropout=opt.dropout)

    #print(transformer)

    optimizer = ScheduledOptim(
        optim.Adam(transformer.get_trainable_parameters(),
                   betas=(0.9, 0.98),
                   eps=1e-09), opt.d_model, opt.n_warmup_steps)

    def get_criterion(vocab_size):
        ''' With PAD token zero weight '''
        weight = torch.ones(vocab_size)
        weight[Constants.PAD] = 0
        return nn.CrossEntropyLoss(weight, size_average=False)

    crit = get_criterion(training_data.tgt_vocab_size)

    if opt.cuda:
        transformer = transformer.cuda()
        crit = crit.cuda()

    print("===>TRAIN\n")
    train(transformer, training_data, validation_data, crit, optimizer, opt)
result = pd.merge(ratings,
                  items[[itemID_column, itemName_column]],
                  how='left',
                  on=[itemID_column])
merged_data = result[[
    userID_column, itemID_column, itemName_column, ratings_column
]]
# -

testUser = 78
k = 10

merged_data[merged_data['user_id'] == testUser].sort_values(
    by=['rating'], ascending=False)[:40]

ml = DataLoader(items_path, ratings_path, userID_column, itemID_column,
                ratings_column, itemName_column, size_of_data)
data = ml.loadData(rating_scale_min, rating_scale_max)

trainSet = data.build_full_trainset()

sim_options = {'name': 'cosine', 'user_based': False}

model = KNNBasic(sim_options=sim_options)
model.fit(trainSet)
simsMatrix = model.compute_similarities()

simsMatrix.shape

testUserInnerID = trainSet.to_inner_uid(testUser)

# Get the top K items we rated
Example #51
0
import pandas as pd
import numpy as np
from math import log
from DataLoader import DataLoader

[X, y, df] = DataLoader.getDataSet()

buildTree(X)


def buildTree(X):
    print(X)
    def train_LR(self, lr):
        
        trainer.train_LR(self, lr) 
                  
        dataloader = DataLoader()
        datasets =dataloader.load_shared_data()
    
        train_set_x, train_set_y = datasets[0]
        valid_set_x, valid_set_y = datasets[1]
        test_set_x, test_set_y = datasets[2]    
        
        params=np.empty((28*28)*10+10);
    
        climin.initialize.randomize_normal(params,0,1) 
        params = params/(28*28)
    
        lr.setParams(params);
        
        x=lr.x
        y=lr.y
        cost = (
                lr.negative_log_likelihood(y)
                + self.L1_lambda * lr.L1
                + self.L2_lambda * lr.L2_sqr
            )
                 
        g_W = T.grad(cost=cost, wrt=lr.W)
        g_b = T.grad(cost=cost, wrt=lr.b)   
                
        g_W_model = theano.function(
                                    inputs=[x,y],
                                    outputs=g_W         
                                    )
        g_b_model = theano.function(
                                    inputs=[x,y],
                                    outputs=g_b         
                                    )    
        
        
        batch_size = self.batch_size
        index = T.lscalar()
        
        test_err_model = theano.function(
        inputs=[index],
        outputs=lr.zeroOneLoss(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
        )

        train_err_model = theano.function(
        inputs=[index],
        outputs=lr.zeroOneLoss(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        }
        )
        
        validate_err_model = theano.function(
        inputs=[index],
        outputs=lr.zeroOneLoss(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

                # compute number of minibatches for training, validation and testing
        batch_size = self.batch_size;
        
        n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
        
        def train_error():
            train_losses = [train_err_model(i)
                                         for i in xrange(n_train_batches)]
            this_train_losses = np.mean(train_losses)
           
            return this_train_losses;
        
        def validate_error():
        
            validation_losses = [validate_err_model(i)
                                         for i in xrange(n_valid_batches)]
            this_validation_loss = np.mean(validation_losses)
           
            return this_validation_loss;
        
        def test_error():
            test_losses = [test_err_model(i)
                                         for i in xrange(n_test_batches)]
            this_test_loss = np.mean(test_losses)
           
            return this_test_loss;           
                
        def d_loss_wrt_pars(parameters, inpt, targets):
                lr.setParams(parameters)
               
               
                gwValue = g_W_model(inpt,targets)
                gbValue = g_b_model(inpt,targets)
               
                return np.concatenate([gwValue.flatten(),gbValue])   
        
        args = ((i, {}) for i in climin.util.iter_minibatches([train_set_x.eval(), train_set_y.eval()], self.batch_size, [0, 0]))
   
        opt = climin.rmsprop.RmsProp(params, d_loss_wrt_pars, step_rate=self.learning_rate,decay=self.decay, momentum=self.momentum, args=args)
        
        validation_frequency = n_train_batches
        directory=check_create_observations_dir()  
        self.output_directory = directory
        bestValidationLoss = np.Inf;
        for info in opt:
            if info['n_iter'] % validation_frequency ==0:
                epoch_no = info['n_iter']/n_train_batches
                
                train_err=train_error()
                validation_err = validate_error()
                test_err = test_error()
                self.add_train_data(epoch_no, train_err, validation_err, test_err)
                if epoch_no % 10 ==0:
                    repfields_path=os.path.join(directory,"repFields"+str(epoch_no).zfill(3)+'.png')
                    W_vals=lr.W.get_value(borrow=True)
                    display(W_vals,repfields_path)
                
                if epoch_no >= self.n_epochs:
                    break
                
                if validation_err < bestValidationLoss:
                    bestValidationLoss = validation_err
    #                     
#                 if  validation_err *0.95 > bestValidationLoss:
#                       
#                         print "Best Validation Error : %f Validation err:%f  " %(bestValidationLoss,validation_err)
#                         break;
                
                if epoch_no > 15 and train_err < 0.9* validation_err:
                    break
                
                print "Iteration no: %d Validation error = %f" %(epoch_no,validation_err*100)
            
         
        trainer.save_errors(self, directory)
        repfields_final_path=os.path.join(directory,"repFields.png")
        W_vals=lr.W.get_value(borrow=True)
        display(W_vals,repfields_final_path)
Example #53
0
        """
        mask = np.zeros(
            (input_image.shape[0], input_image.shape[1], self.num_classes))
        image = color.convert_colorspace(input_image, "RGB", color_space)[:, :,
                                                                          1]
        image = (image - np.min(image)) / (np.max(image) - np.min(image))
        image = exposure.adjust_gamma(image, gamma=0.5)
        for i in range(self.window_size // 2,
                       mask.shape[0] - (self.window_size // 2)):
            for j in range(self.window_size // 2,
                           mask.shape[1] - (self.window_size // 2)):
                window = image[i - (self.window_size // 2):i +
                               (self.window_size // 2),
                               j - (self.window_size // 2):j +
                               (self.window_size // 2)]
                mask[i, j, :] = self.sigmoid(window.reshape(1, -1), self.W,
                                             self.b)
        return mask


if __name__ == "__main__":
    DM = DataLoader("ECE276A_HW1/trainset/", 0.9,
                    ["barrel_blue", "non_barrel_blue", "rest"])
    window_size = 10
    gen = DM.data_generator("labeled_data/Stored_Values.pickle",
                            window_size=window_size,
                            step_size=2)
    model = LogisticRegression(window_size, num_classes=3)
    model.train(gen, epochs=1000, learning_rate=0.01)
    pickle_file = "trained_models/model2.pickle"
Example #54
0
from Experiments import *
from DataLoader import DataLoader

if __name__ == '__main__':

    # load data
    dl_1 = DataLoader('data\\UCI-bank-marketing.csv', 'outputs\\Marketing',
                      'Marketing')
    dl_1.load_data()
    # run classifier
    ANN(dl_1)
    BOOST(dl_1)
    SVM_RBF(dl_1)
    SVM_linear(dl_1)
    KNN(dl_1)
    DT(dl_1)

    # load data
    dl_2 = DataLoader('data\\Heart.csv', 'outputs\\Heart', 'Heart')
    dl_2.load_data()
    # run classifier
    ANN(dl_2)
    BOOST(dl_2)
    SVM_linear(dl_2)
    KNN(dl_2)
    SVM_RBF(dl_2)
    DT(dl_2)

    # # load data
    # dl_3 = DataLoader('data\\Cancer.csv', 'outputs\\Cancer', 'Cancer')
    # dl_3.load_data()
Example #55
0
import argparse
from Align2D import Align2D

parser = argparse.ArgumentParser(
    description='visualizer for raw measurements from the intel dataset')
parser.add_argument('--laser_file',
                    type=str,
                    default='../data/intel_LASER_.txt',
                    help='name of the laser scanner log file')
parser.add_argument('--odometry_file',
                    type=str,
                    default='../data/intel_ODO.txt',
                    help='name of the odometry log file')
args = parser.parse_args()

loader = DataLoader(args.laser_file, args.odometry_file)
measurements = loader.measurements
disc = 0.25
matcher = SDFScanMatcher(discretization=disc)

matcher.AddScan(measurements[0].points)
matcher.AddScan(measurements[1].points)
matcher.AddScan(measurements[2].points)
matcher.AddScan(measurements[34].points)
matcher.AddScan(measurements[35].points)
res, J, grads = matcher.GetResidualAndJacobian(measurements[35].points,
                                               np.identity(3))
#sdf = SDFMap([10,10])
print("residual on next scan: {:f}".format(np.linalg.norm(res**2)))

fig = plt.figure()
Example #56
0
    def _predict_regression(self, x):
        return self.w.dot(x)

def evaluate_accuracy(Y_pred, Y_true):
    return np.mean(Y_pred == Y_true, axis=0)

def evaluate_squared_error(Y_pred, Y_true):
    return np.sqrt(np.mean((Y_pred-Y_true)**2, axis=0))

if __name__ == '__main__':

    from DataLoader import DataLoader

    try:
        if sys.argv[1] == 'mnist':
            data = DataLoader('mnist')
            training_size = 1000
            eta0 = 1
            gamma = 0.1
        if sys.argv[1] == 'cs':
            data = DataLoader('cs')
            training_size = 15
            eta0 = 0.05
            gamma = 5e-1
    except:
        print 'usage: python %s [mnist|cs]' % sys.argv[0]
        sys.exit(1)

    if data.type == 'classification':
        evaluate = evaluate_accuracy
    if data.type == 'regression':
import math
import numpy as np
import random

from collections import Counter

from DataLoader import DataLoader
from Exceptions import TooSmallSingularValueError
from Models import StanfordModel, NCGModel, SocialModelStanford, CorrectSocialModelStanford, AdvancedSocialModel, SimpleSocialModel
from Utils import Utils

datasets = DataLoader.load_check_ins_from_directory("top_felix_users")

users = datasets.keys()

network = DataLoader.load_social_network(open("top_felix_users_connections.csv"))

"""friends = []
for user in datasets:
	if user == '104665558':
		continue
	friends.append(len(network[user]))
print np.min(friends)
print np.mean(friends)
print np.max(friends)
exit()"""

#print users
#exit()
#users = ["10221"]
#users = ['45474206', '276391406', '21913365', '27818171', '40557413', '19836108', '488667514', '94173972', '28668373', '33660680', '292750714', '104665558', '23209554', '549041707', '18488759', '82666753', '133067027', '30235429', '41234692', '29109326', '169585114', '14665537', '54670715', '258576072', '16332709', '83111133', '75911133', '573461782', '563315196', '111258523', '2365991', '24441491', '240102387'] 
class ASGCNModelEvaluation:

    def __init__(self, embedding_type, dataset, model_name, max_degree=696,
                 learning_rate=0.001, weight_decay=5e-4, dropout=0.0,
                 epochs=300, early_stopping=30, hidden1=16, rank=128, skip=0,
                 var=0.5, sampler_device="cpu", gpu=None, recs=10):

        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

        self.d = DataLoader()
        self.model = ASGCNModel(embedding_type, dataset, model_name,
                                max_degree, learning_rate, weight_decay,
                                dropout, epochs, early_stopping, hidden1,
                                rank, skip, var, sampler_device, gpu, recs)

    def evaluate(self):
        # Load test data
        query_test, truth = self.d.evaluation_data_with_abstracts_citations()

        # Retrieve predictions
        recommendation = self.model.query_batch(query_test)

        # Evaluate
        print("Evaluating...")
        evaluation = EvaluationContainer()
        evaluation.evaluate(recommendation, truth)

    def main():
        parser = argparse.ArgumentParser(
                description='Arguments for ASGCN model evaluation.')
        parser.add_argument('embedding_type',
                            choices=["AVG_L", "AVG_2L", "AVG_SUM_L4",
                                     "AVG_SUM_ALL", "MAX_2L",
                                     "CONC_AVG_MAX_2L", "CONC_AVG_MAX_SUM_L4",
                                     "SUM_L", "SUM_2L"
                                     ],
                            help="Type of embedding.")
        parser.add_argument('dataset',
                            help='Name of the object file that stores the '
                            + 'training data.')
        parser.add_argument("model_name",
                            choices=["gcn_adapt", "gcn_adapt_mix"],
                            help="Model names.")
        parser.add_argument('--max_degree',
                            type=int,
                            default=696,
                            help='Maximum degree for constructing the ' +
                                 'adjacent matrix.')
        parser.add_argument('--learning_rate',
                            type=float,
                            default=0.001,
                            help='Learning rate.')
        parser.add_argument('--weight_decay',
                            type=float,
                            default=5e-4,
                            help='Weight decay.')
        parser.add_argument('--dropout',
                            type=float,
                            default=0.0,
                            help='Dropout rate (1 - keep probability).')
        parser.add_argument('--epochs',
                            type=int,
                            default=300,
                            help='Number of epochs to train.')
        parser.add_argument('--early_stopping',
                            type=int,
                            default=30,
                            help='Tolerance for early stopping (# of epochs).')
        parser.add_argument("--hidden1",
                            type=int,
                            default=16,
                            help="Number of units in hidden layer 1.")
        parser.add_argument("--rank",
                            type=int,
                            default=128,
                            help="The number of nodes per layer.")
        parser.add_argument('--skip',
                            type=float,
                            default=0,
                            help='If use skip connection.')
        parser.add_argument('--var',
                            type=float,
                            default=0.5,
                            help='If use variance reduction.')
        parser.add_argument("--sampler_device",
                            choices=["gpu", "cpu"],
                            default="cpu",
                            help="The device for sampling: cpu or gpu.")
        parser.add_argument('--gpu',
                            type=int,
                            help='Which gpu to use.')
        parser.add_argument('--recs',
                            type=int,
                            default=10,
                            help='Number of recommendations.')
        args = parser.parse_args()

        from ASGCNModelEvaluation import ASGCNModelEvaluation
        print("Starting...")
        model = ASGCNModelEvaluation(
                args.embedding_type, args.dataset, args.model_name,
                args.max_degree, args.learning_rate, args.weight_decay,
                args.dropout, args.epochs, args.early_stopping, args.hidden1,
                args.rank, args.skip, args.var, args.sampler_device, args.gpu,
                args.recs)
        model.evaluate()
        print("Finished.")

    if __name__ == "__main__":
        main()
Example #59
0
__email__ = "*****@*****.**"
__maintainer__ = "Mathew Sam"

from LogisticRegression import LogisticRegression
from DataLoader import DataLoader

import numpy as np
from scipy import ndimage
import matplotlib.pyplot as plt
import pickle

from skimage.morphology import disk, square, erosion, dilation
from skimage import exposure, color
import cv2

DM = DataLoader("ECE276A_HW1/trainset/", 0.8,
                ["barrel_blue", "non_barrel_blue", "rest"])
pickle_file = "trained_models/model1.pickle"

with open(pickle_file, 'rb') as handle:
    model = pickle.load(handle)

figure_num = 0
for file_name in DM.train_files:
    figure_num = figure_num + 1
    plt.figure(figure_num)
    file_name = DM.root_location + file_name
    plt.subplot(2, 1, 1)
    image = plt.imread(file_name)
    plt.imshow(image), plt.xticks([]), plt.yticks(
        []), plt.title("Original image")
    mask = model.test_image(image)
 def test_invalid_venue(self):
     self.file.write("418|12|2012-07-18 14:43:38|37.6164|-122.386|41059b00f964a520850b1fe3|empty_message\n418|12|2012-07-18 12:34:45|34|-122.386||empty_message")
     self.file.seek(0)
     with self.assertRaises(ValueError) as cm:
         DataLoader.load_check_ins_from_file(self.file)
     self.assertEqual(cm.exception.message, 'Error in line 2: venue_id can not be an empty string')