def main():
    train_data = loader.load_data('data/train.csv')
    valid_data = loader.load_data('data/dev.csv')
    print('Data fetched')

    vocab = make_vocab(train_data)

    word2index, index2word = make_indices(vocab)

    pickle_in1 = open("dict.pickle", "rb")
    wordEmbeddings = pickle.load(pickle_in1)

    pickle_in2 = open("mean.pickle", "rb")
    meanVec = pickle.load(pickle_in2)

    train_id, train, train_labels = convert_to_vector_representation(
        train_data, word2index, wordEmbeddings, meanVec)
    valid_id, valid, valid_labels = convert_to_vector_representation(
        valid_data, word2index, wordEmbeddings, meanVec)

    classifier = svm.SVC()
    print('Training')
    classifier.fit(train, train_labels)

    print('Evaluating')
    print(classifier.predict(valid))
    print(classifier.score(valid, valid_labels))
Example #2
0
 def __init__(self, train_file, test_file, addnoise, noise_scale = 0.0):
     if (not (os.path.isfile(train_file))):
         raise Exception('No Such Training File')
     if (not (os.path.isfile(test_file))):
         raise Exception('No Such Test File')
     self.train_x, self.train_t, self.train_y = load_data(train_file)
     if (addnoise):
         self.train_y += np.random.normal(0, noise_scale, size = self.train_y.shape)
     self.test_x, self.test_t, self.test_y = load_data(test_file)
     self.x = self.train_x
     self.t = self.train_t
     self.y = self.train_y
Example #3
0
def import_aq_database(opts, force=False):

    # try:

    exec_sql = ExecuteSQL.ExecuteSQL(opts.db_type, opts.db_host, opts.db_user, opts.db_pass, opts.db_name)

    create_db_directories(opts.aq_db_path, opts.aq_db_name, force)
    db_ini_filename = generate_ini(opts.aq_db_path, opts.aq_db_name, opts.aq_engine, opts.aq_loader)

    generate_base_desc(exec_sql, opts.aq_db_name, opts.aq_db_path + "/" + opts.aq_db_name + "/base_struct/base.aqb")
    export_data(exec_sql, opts.aq_db_path + "/" + opts.aq_db_name + "/data_orga/tables/")

    loader.load_data(opts.aq_tools, opts.aq_db_name)  # FIXME
Example #4
0
def predict(args):
    "perdict on a given dataset"
    if not args.private_file:
        data = load_data(DATA_NAME, samp_size=100000, all_=False)
    else:
        data = load_data(args.private_file)

    prep = ChrunPrep()
    X, index = prep.transform(data)

    classifier = Modelling(args.model_type)
    preds = classifier.predict(X)
    maybe_mkdir(args.out_path)
    out_path = os.path.join(args.out_path, "preds.csv")
    pd.Series(preds, index=index).to_csv(out_path, sep=";")
Example #5
0
def create_pickle_dataset():
    loader.maybe_download_and_extract()

    test_images, test_cls, test_labels = loader.load_data("test")
    dataset = split_test_dataset(test_images, test_cls, test_labels)

    train_images, train_cls, train_labels = loader.load_data("train")
    dataset.setdefault('train_images', train_images)
    dataset.setdefault('train_labels', train_labels)
    dataset.setdefault('train_cls', train_cls)

    dataset.setdefault('class_names', loader.labels)

    to_pickle(dataset)
    return dataset
Example #6
0
def main():

	train = loader.load_data() 
	train = clean_data(lm_cook_processer(train))

	test = loader.load_data(test=True)
	test = clean_data(lm_cook_processer(test,test=True))

	# tuning parameter a for language model
	# a = [0.05,0.1,0.15,0.2,0.25,0.3]
	# for e in a:
	# 	print "a: " + str(e)
	# 	print "score: " + str(cross_validation(train,'ingredients','cuisine', e))
	model = lm(train,'ingredients','cuisine')
	pred = lm.predict(test['ingredients'])
	pred_to_out(pred,test.index)
Example #7
0
def train(rundir, diagnosis, epochs, learning_rate, use_gpu):
    train_loader, valid_loader, test_loader = load_data(diagnosis, use_gpu)
    
    model = MRNet()
    
    if use_gpu:
        model = model.cuda()

    optimizer = torch.optim.Adam(model.parameters(), learning_rate, weight_decay=.01)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=.3, threshold=1e-4)

    best_val_loss = float('inf')

    start_time = datetime.now()

    for epoch in range(epochs):
        change = datetime.now() - start_time
        print('starting epoch {}. time passed: {}'.format(epoch+1, str(change)))
        
        train_loss, train_auc, _, _ = run_model(model, train_loader, train=True, optimizer=optimizer)
        print(f'train loss: {train_loss:0.4f}')
        print(f'train AUC: {train_auc:0.4f}')

        val_loss, val_auc, _, _ = run_model(model, valid_loader)
        print(f'valid loss: {val_loss:0.4f}')
        print(f'valid AUC: {val_auc:0.4f}')

        scheduler.step(val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss

            file_name = f'val{val_loss:0.4f}_train{train_loss:0.4f}_epoch{epoch+1}'
            save_path = Path(rundir) / file_name
            torch.save(model.state_dict(), save_path)
    def build_dataset(self, msg_queue):
        save_func = save.get_save_func(self.final["DATASET_FILE_EXTENSION"])

        msg_queue.put("Loading datasets. This may take a while. \n")
        # Load individual datasests
        datasets = loader.load_data(self.final["CONFIGS"], msg_queue)
        msg_queue.put("Finished loading datasets! \n" + "-" * 50 + "\n")
        msg_queue.put("Constructing new dataset \n")
        # Construct new dataset
        training_set, test_set = construct.get_subset(datasets, self.final)
        msg_queue.put("Finished Constructing datasets! \n" + "-" * 50 + "\n")

        msg_queue.put("Saving dataset \n")
        save_path = self.get_save_path(self.final["DATASET_NAME"] +
                                       "_training_set")
        save_func(save_path, training_set)

        save_path = self.get_save_path(self.final["DATASET_NAME"] +
                                       "_test_set")
        save.save_multiple(test_set, save_path, save_func)
        msg_queue.put("Finished saving dataset \n" + "-" * 50 + "\n")

        msg = "Do you want to save dataset statistics ?"
        if messagebox.askyesno("SAVE", msg):
            msg_queue.put("Saving dataset statistics \n")
            # Save statistics of each individual dataset
            datasets_stats = stats.generate_stats(test_set)
            save_path = self.get_save_path("test_set_stats")
            save_func(save_path,
                      datasets_stats,
                      fieldnames=datasets_stats[0].keys())
            msg_queue.put("Finished saving statistics \n" + "-" * 50 + "\n")

        self.manager.update()
Example #9
0
def svm_baseline():
    training_data=[]
    validation_data=[]
    test_data=[]
    training_data, validation_data, test_data = loader.load_data()
    # train
    
    clf = svm.SVC()
    #print training_data[1]
    #print training_data[0]
    #t=np.reshape(training_data,(-1,1))
    t_x=np.asmatrix(training_data[0])
    t_y=np.asmatrix(training_data[1])
    te_x=np.asmatrix(test_data[0])
    
    te_x=np.asmatrix(test_data[1])
    clf.fit(training_data[0], training_data[1])
    # test
    predictions = [int(a) for a in clf.predict(test_data[0])]
    num_correct = sum(int(a == y) for a, y in zip(predictions, test_data[1]))
    print "Baseline classifier using an SVM."
    print "%s of %s values correct." % (num_correct, len(test_data[1]))
    a=num_correct
    b=len(test_data[1])
    c=a/(b*1.0)*100
    print "accuracy - %f"%c
def evaluate(path, split, model_path, use_gpu):

    train_loader, valid_loader, test_loader = load_data(path, use_gpu)

    model = TripleMRNet()
    state_dict = torch.load(model_path,
                            map_location=(None if use_gpu else 'cpu'))
    model.load_state_dict(state_dict)

    if use_gpu:
        model = model.cuda()

    if split == 'train':
        loader = train_loader
    elif split == 'valid':
        loader = valid_loader
    elif split == 'test':
        loader = test_loader
    else:
        raise ValueError("split must be 'train', 'valid', or 'test'")

    loss, auc, accuracy, preds, labels = run_model(model, loader)

    print(f'{split} loss: {loss:0.4f}')
    print(f'{split} AUC_abnormal: {auc[0]:0.4f}')
    print(f'{split} AUC_acl: {auc[1]:0.4f}')
    print(f'{split} AUC_meniscus: {auc[2]:0.4f}')

    return preds, labels
def main():
    """
    Perform n-fold cross-validation to evaluate knn and perceptron algorithms
    for classification of a dataset of Iris species.
    """

    # Load the data
    x_labels = [
        "SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"
    ]
    x, y, type2id = loader.load_data('Iris.csv',
                                     y_label="Species",
                                     x_labels=x_labels)

    # Split into 75% train & 25% test
    train_x, train_y, test_x, test_y = loader.split_data(x, y, ratio=.25)

    print("RUNNING Perceptron: ")
    run_classifier(train_x,
                   train_y,
                   test_x,
                   test_y,
                   Classifier=Perceptron,
                   Param='N')
    print("RUNNING KNN: ")
    run_classifier(train_x, train_y, test_x, test_y, Classifier=KNN, Param='K')
    def __init__(self, epoch=1000, lr=0.0001):
        super(TrainLeNet, self).__init__()

        print("训练准备.......")  # 开始
        # 二进制模型文件
        self.model_file = "lenet.pth"
        # self.CUDA true false
        self.CUDA = torch.cuda.is_available()

        self.train_loader, self.test_loader = loader.load_data()

        self.net = LeNet5()
        params = self.net.parameters()
        if self.CUDA:
            self.net.cuda()
        if os.path.exists(self.model_file):
            print("加载本地模型")
            # 加载本地模型
            state = torch.load(self.model_file)
            self.net.load_state_dict(state)

        # 3、参数
        self.epoch = epoch
        self.lr = lr
        # 损失函数
        self.loss_function = torch.nn.CrossEntropyLoss()

        # 优化器 学习率
        self.optimizer = torch.optim.Adam(self.net.parameters(), self.lr)

        if self.CUDA:
            self.loss_function = self.loss_function.cuda()
Example #13
0
def evaluate(split, model_name, model_path, augment, use_gpu):
    train_loader, valid_loader, test_loader = load_data(augment, use_gpu)

    writer = SummaryWriter()
    model = NetFactory.createNet(model_name)
    state_dict = torch.load(model_path,
                            map_location=(None if use_gpu else 'cpu'))
    model.load_state_dict(state_dict)

    if use_gpu:
        model = model.cuda()

    if split == 'train':
        loader = train_loader
    elif split == 'valid':
        loader = valid_loader
    elif split == 'test':
        loader = test_loader
    else:
        raise ValueError("split must be 'train', 'valid', or 'test'")

    loss, auc, preds, labels = run_model(writer, 1, model, loader)

    print(f'{split} loss: {loss:0.4f}')
    print(f'{split} AUC: {auc:0.4f}')

    return preds, labels
def evaluate(path, split, angle, face, model_path, use_gpu, filename):
    
    data_train, data_valid, data_test, data_A, data_B, data_D = load_data(path)
        
    model = CNNNet()
    state_dict = torch.load(model_path, map_location=(None if use_gpu else 'cpu'))
    model.load_state_dict(state_dict)

    if use_gpu:
        model = model.cuda()

    if split == 'tileA':        
        loader = DataLoader(data_A, batch_size=32, num_workers=12, shuffle=False)

    elif split == 'tileB':        
        loader = DataLoader(data_B, batch_size=32, num_workers=12, shuffle=False)
        
    elif split == 'tileD':
        loader = DataLoader(data_D, batch_size=32, num_workers=12, shuffle=False)
        
    else:
        raise ValueError("split must be 'train', 'valid', or 'test'")
        
    _, _, preds, labels = run_model(model, loader)
   
    # figure
    acc = plot_class(split, angle, face, preds, labels, filename)
      
    return preds, labels, acc
Example #15
0
def main():

	train = loader.load_data() 
	train = clean_data(lm_cook_processer(train))

	test = loader.load_data(test=True)
	test = clean_data(lm_cook_processer(test,test=True))

	# tuning parameter a for language model
	# a = [0.05,0.1,0.15,0.2,0.25,0.3]
	# for e in a:
	# 	print "a: " + str(e)
	# 	print "score: " + str(cross_validation(train,'ingredients','cuisine', e))
	model = lm.fit(train,'ingredients','cuisine')
	pred = lm.predict(test['ingredients'],model)
	pred_to_out(pred,test.index)
Example #16
0
def makepredictions(modelpath,outputfile,training_dir):

    train_dir = training_dir
    image_num = 16000
    val_split = 0
    
    X_test_filenames, y_test = load_data(train_dir, image_num, val_split)
    
    batch_size = 11
    
    print(np.argmax(y_test,axis=1))

    test_generator = Generator(X_test_filenames, y_test, batch_size)
    
    reconstructed_model = keras.models.load_model(modelpath)
    
    reconstructed_model.summary()

    prediction = reconstructed_model.predict_generator(test_generator)

    ## drei größten fehler von dogs in wildlife
    #    label = np.argmax(y_test,axis=1)
    #    wildlife = prediction[:,1]
    #    wilddogs = wildlife[label==2]
    #    dogfiles = X_test_filenames[label==2]
    #    indices = wilddogs.argsort()[-3:][::-1]
    #    print(indices)
    #    print(wilddogs[indices])
    #    print(dogfiles[indices])
    
    np.savetxt(outputfile,np.vstack((np.arange(len(y_test)),np.argmax(y_test,axis=1),np.array(prediction[:,0]),np.array(prediction[:,1]),np.array(prediction[:,2]))).T)
Example #17
0
 def __init__(self, args):
     self.args = args
     self.timeline = deque()
     self.data = load_data(args)
     self.bucket = {}
     self.topo_graph = {}  # is dominated, can find global using dfs
     self.reverse_topo_graph = {}  # dominate, used to find fathers
Example #18
0
def evaluate(split, model_path, diagnosis, use_gpu):
    train_loader, valid_loader, test_loader = load_data(diagnosis, use_gpu)

    model = MRNet()
    state_dict = torch.load(model_path,
                            map_location=(None if use_gpu else 'cpu'))
    model.load_state_dict(state_dict)

    if use_gpu:
        model = model.cuda()

    if split == 'train':
        loader = train_loader
    elif split == 'valid':
        loader = valid_loader
    elif split == 'test':
        loader = test_loader
    else:
        raise ValueError("split must be 'train', 'valid', or 'test'")

    loss, auc, preds, labels = run_model(model, loader)

    print(f'{split} loss: {loss:0.4f}')
    print(f'{split} AUC: {auc:0.4f}')

    return preds, labels
def evaluate(split, model_dir, use_gpu=True):
    model = Combine()
    if use_gpu:
        model = model.cuda()
    state_dict = torch.load(
        '/home/Mara/run_baseline_acl_meniscus_gap/val0.3271_train0.2068_epoch22',
        map_location=(None if use_gpu else 'cpu'))
    model.load_state_dict(state_dict)

    train_loader, valid_loader, test_loader = load_data(model_dir, use_gpu)
    if split == 'train':
        loader = train_loader
    elif split == 'valid':
        loader = valid_loader
    elif split == 'test':
        loader = test_loader
    else:
        raise ValueError("split must be 'train', 'valid', or 'test'")

    loss, auc, accuracy, preds, labels = run_model(model, loader)

    print(f'{split} loss: {loss:0.4f}')
    print(f'{split} AUC_acl: {auc[0]:0.4f}')
    print(f'{split} AUC_meniscus: {auc[1]:0.4f}')

    #     print(f'{split} AUC_abnormal: {auc[0]:0.4f}')

    return preds, labels, model, loader
def raw_value_comparison(coh, plot=False):
    """Return the average differences in raw copy number values between the
    gene-level calls in hg19 and hg38 for each gene for a given tumor type
    'coh.' If plot=True, plot the genes' differences in a histogram."""

    # load in the data
    df_38, df_19 = loader.load_data(hg38_dir, hg19_dir, coh, thresh=False)

    # compute average sample-by-sample differences for each gene
    df_s = df_38 - df_19
    avg_diff = {
        g: np.average(df_s[g])
        for g in df_s.columns.get_level_values('Gene Symbol')
    }

    # take note of which genes are altered more than our threshold of 4*std
    results = []
    std = np.std([avg_diff[x] for x in avg_diff])
    for g in avg_diff:
        if avg_diff[g] > 4 * std:
            results.append([coh, 'Pos', g, avg_diff[g]])
        elif avg_diff[g] < -4 * std:
            results.append([coh, 'Neg', g, avg_diff[g]])

    if plot:
        plt.hist([avg_diff[x] for x in avg_diff], bins=1000)
        plt.title(coh, fontsize=16)
        plt.xlabel('Average CN Difference Between Alignments', fontsize=14)
        plt.ylabel('Genes', fontsize=14)
        sns.despine()
        plt.savefig(coh + '_genehist.pdf')
        plt.savefig(coh + '_genehist.png')
        plt.clf()

    return results
Example #21
0
def run_test():
	print(' loading data ...')
	documents = load_data()

	# results = [find_full_line_names(d) for d in documents]
	# results = [find_dates(d) for d in documents]
	# results = [find_locations(d) for d in documents]
	print(len(documents))
Example #22
0
def main():
    train_data, train_label, test_data, test_label = load_data(5000, 1000)
    knn = KNN(train_data, train_label)
    prediction = knn.predict(test_data, 9)
    num_correct = np.sum(prediction == test_label)
    print("KNN")
    print(f"Correct:\t{num_correct} / {len(test_label)}")
    print(f"Accuracy:\t{num_correct / len(test_label)}")
Example #23
0
def svm_baseline():
    training_data, validation_data, test_data = loader.load_data()
    clf = svm.SVC()
    clf.fit(training_data[0], training_data[1])
    predictions = [int(a) for a in clf.predict(test_data[0])]
    num_correct = sum(int(a == y) for a, y in zip(predictions, test_data[1]))
    print "Baseline classifier using an SVM."
    print "%s of %s values correct." (num_correct, len(test_data[1]))
Example #24
0
File: main.py Project: QFSW/JiTBoy
def main():
    data = {
        'JIT'           : loader.load_data('output/results_jit.csv', jit=True),
        'JIT -L'        : loader.load_data('output/results_jit(-L).csv'),
        'Interpreter'   : loader.load_data('output/results_interpreter.csv'),
        'Hybrid'        : loader.load_data('output/results_hybrid.csv'),
        'Hybrid -L'     : loader.load_data('output/results_hybrid(-L).csv'),
        'Hybrid -LS'    : loader.load_data('output/results_hybrid(-LS).csv'),
    }

    t_vals = [1, 10, 100, 1000, 10000]
    data_hybrid_t = {
        'Hybrid -L -T%d' % t: loader.load_data('output/results_hybrid(-L-T%d).csv' % t) for t in t_vals
    }

    col_map = plot.make_col_map(data)
    data_sets = {
        'all'        : data,
        'emulators'  : data_proc.select(data, ['JIT -L', 'Interpreter', 'Hybrid -L']),
        'jit'        : data_proc.select(data, ['JIT', 'JIT -L']),
        'hybrid'     : data_proc.select(data, ['Hybrid', 'Hybrid -L', 'Hybrid -LS']),
        'single/jit' : data_proc.select(data, ['JIT']),
    }

    draw_vs_scatters(data)
    draw_histograms(data)

    for (name, data) in data_sets.items():
        draw_testbatches(name, data, col_map)
        draw_scatters(name, data)

    draw_testbatches('hybrid_t', data_hybrid_t)
Example #25
0
 def test_plot_image(self):
     x_train, y_train, x_test, y_test = loader.load_data(
         param_config.PATH,
         param_config.X_PKL,
         param_config.Y_PKL,
         param_config.INCEPT_WIDTH,
         param_config.INCEPT_HEIGHT
     )
     loader.plot_image(x_train[random.randint(0, len(x_train))])
Example #26
0
def get_symbol_classifier():
    train_data_dir = "./data/"
    tr_X, tr_y = load_data(train_data_dir)
    tr_feat = get_features(tr_X)
    knn_classifier = KNeighborsClassifier(n_neighbors=10,
                                          weights='uniform',
                                          n_jobs=4)
    knn_classifier.fit(tr_feat, tr_y)
    return knn_classifier
Example #27
0
def main():
    train_data, train_label, test_data, test_label = load_data(5000, 1000)
    svm = SVM(train_data.shape[1], 10)
    svm.train(train_data, train_label, 1000)
    prediction = svm.predict(test_data)
    num_correct = np.sum(prediction == test_label)
    print("SVM")
    print(f"Correct:\t{num_correct} / {len(test_label)}")
    print(f"Accuracy:\t{num_correct / len(test_label)}")
Example #28
0
def preprocess_data(size):
    training_size = int(size * 0.9)
    all_data = loader.load_data(size)
    random.shuffle(all_data)
    training_data = all_data[:training_size]
    validation_data = all_data[training_size:]

    return (np.array([x[1] for x in training_data]), np.array([x[-1][0] for x in training_data])), \
            (np.array([x[1] for x in validation_data]), np.array([x[-1][0] for x in validation_data]))
Example #29
0
def import_aq_database(opts, force=False):

    # try:

    exec_sql = ExecuteSQL.ExecuteSQL(opts.db_type, opts.db_host, opts.db_user,
                                     opts.db_pass, opts.db_name)

    create_db_directories(opts.aq_db_path, opts.aq_db_name, force)
    db_ini_filename = generate_ini(opts.aq_db_path, opts.aq_db_name,
                                   opts.aq_engine, opts.aq_loader)

    generate_base_desc(
        exec_sql, opts.aq_db_name,
        opts.aq_db_path + '/' + opts.aq_db_name + '/base_struct/base.aqb')
    export_data(exec_sql,
                opts.aq_db_path + '/' + opts.aq_db_name + '/data_orga/tables/')

    loader.load_data(opts.aq_tools, opts.aq_db_name)  # FIXME
Example #30
0
def data_to_hdf5(_):
    from loader import load_data
    print('Loading data...')
    calendar, prices, sales = load_data()

    print('Saving as hdf5...')
    calendar.to_hdf('data/data.h5', key='calendar')
    prices.to_hdf('data/data.h5', key='prices')
    sales.to_hdf('data/data.h5', key='sales')
Example #31
0
def train(rundir, path, epochs, learning_rate, use_gpu):

    rundir = rundir + '/'

    train_loader, valid_loader, test_loader = load_data(path, use_gpu)

    model = TripleMRNet()  #1, 32, 64

    if use_gpu:
        model = model.cuda()

    optimizer = torch.optim.Adam(model.parameters(),
                                 learning_rate,
                                 weight_decay=0.01)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=5,
                                                           factor=.3,
                                                           threshold=1e-5)

    best_val_loss = float('inf')

    start_time = datetime.now()

    for epoch in range(epochs):

        change = datetime.now() - start_time
        print('starting epoch {}. time passed: {}\n'.format(
            epoch + 1, str(change)))

        train_loss, train_auc, train_accuracy, _, _ = run_model(
            model, train_loader, train=True, optimizer=optimizer)
        print(f'train loss: {train_loss:0.4f}')
        print(f'train AUC_abnormal: {train_auc[0]:0.4f}')
        print(f'train AUC_acl: {train_auc[1]:0.4f}')
        print(f'train AUC_meniscus: {train_auc[2]:0.4f}\n')
        #print(f'train accuracy_abnormal: {train_accuracy[0]:0.4f}')
        #print(f'train accuracy_acl: {train_accuracy[1]:0.4f}')
        #print(f'train accuracy_meniscus: {train_accuracy[2]:0.4f}\n')

        val_loss, val_auc, val_accuracy, _, _ = run_model(model, valid_loader)
        print(f'valid loss: {val_loss:0.4f}')
        print(f'valid AUC_abnormal: {val_auc[0]:0.4f}')
        print(f'valid AUC_acl: {val_auc[1]:0.4f}')
        print(f'valid AUC_meniscus: {val_auc[2]:0.4f}\n')
        #print(f'valid accuracy_abnormal: {val_accuracy[0]:0.4f}')
        #print(f'valid accuracy_acl: {val_accuracy[1]:0.4f}')
        #print(f'valid accuracy_meniscus: {val_accuracy[2]:0.4f}\n')

        scheduler.step(val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss

            file_name = f'val{val_loss:0.4f}_train{train_loss:0.4f}_epoch{epoch+1}'
            save_path = Path(rundir) / file_name
            torch.save(model.state_dict(), save_path)
Example #32
0
def fit(args):
    "fit preprocessor and model"
    data = load_data(DATA_NAME, samp_size=10000, all_=False)

    prep = ChrunPrep()
    X = prep.fit_transform(data)
    y = prep.create_labels(data)

    classifier = Modelling(model=args.model_type)
    classifier.fit(X, y)
Example #33
0
def main():
    img = loader.load_data(sys.argv[FIRST_ARG])
    for power in range(1, 5):
        centroids = init_centroids.init_centroids(np.power(2, power))
        model = k_means.KMeans(centroids, img)
        new_img = model.algorithm(EPOCH)
        new_img = np.reshape(
            new_img,
            (int(sys.argv[SEC_ARG]), int(sys.argv[THIRD_ARG]), RGB_SIZE))
        plot.plot(new_img)
Example #34
0
def main():

	train = loader.load_data() 
	ref, (train_x, train_y) = clean_data(tfidf_cook_processer(train))

	test = loader.load_data(test=True)
	test_x = clean_data(tfidf_cook_processer(test,test=True),test=True)

	# get from cross validation
	gamma = 1	
	C = 3.1622776601683795

	clf = SVC(gamma=gamma, C=C, probability=True)
	# print cross_validation.cvScore(clf, train_x, train_y).mean()

	# random forest
	# clf = RandomForestClassifier(n_estimators=100) #rank 1078

	clf.fit(train_x,train_y)
	pred_to_out(clf.predict(test_x),ref,test)
def augment(n=4):
    for i in range(n):
        for image_dir in image_dirs:
            # Dummy model
            model = Sequential()
            model.add(Convolution2D(
                1, 1, 1, input_shape=(1, img_rows, img_cols)))
            model.add(Flatten())
            model.add(Dense(4))

            model.compile(loss='mse', optimizer='SGD')

            datagen = ImageDataGenerator(
                featurewise_center=True,
                featurewise_std_normalization=False,  # Seems like not working at all!
                rotation_range=180,
                zca_whitening=False,
                # shear_range=0.3,
                # zoom_range=0.1,
                # width_shift_range=0.1,
                # height_shift_range=0.1,
                horizontal_flip=True,
                vertical_flip=True)

            (X_train, Y_train), (X_test, Y_test) = load_data(
                False, [image_dir])

            save_path = image_dir + data_dir[:-1] + 'A'

            if not path.exists(save_path):
                makedirs(save_path)vim-gas

            datagen.fit(X_train)
            model.fit_generator(datagen.flow(X_train, Y_train,
                                             save_to_dir=save_path,
                                             save_prefix='_' + str(i), save_format='png'),
                                samples_per_epoch=X_train.shape[0], nb_epoch=1)
hyperParameters = Params()

def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]

print("Loading data...")
data = loader.load_data()
x_train = data['X_train']
y_train = data['Y_train']
x_test = data['X_test']
y_test = data['Y_test']

input_var = T.tensor4('inputs')
target_var = T.ivector('targets')
print("Building model and compiling functions...")
network = networks.build_cnn(input_var)

prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean()
# We could add some weight decay as well here, see lasagne.regularization.
def test_mlp(
        initial_learning_rate,
        learning_rate_decay,
        squared_filter_length_limit,
        n_epochs,
        batch_size,
        mom_params,
        activations,
        dropout,
        dropout_rates,
        results_file_name,
        layer_sizes,
        dataset,
        use_bias,
        random_seed=1234):
    """
    The dataset is the one from the mlp demo on deeplearning.net.  This training
    function is lifted from there almost exactly.

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz


    """
    assert len(layer_sizes) - 1 == len(dropout_rates)

    # extract the params for momentum
    mom_start = mom_params["start"]
    mom_end = mom_params["end"]
    mom_epoch_interval = mom_params["interval"]


    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################

    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()    # index to a [mini]batch
    epoch = T.scalar()
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels
    learning_rate = theano.shared(np.asarray(initial_learning_rate,
        dtype=theano.config.floatX))

    rng = np.random.RandomState(random_seed)

    # construct the MLP class
    classifier = MLP_Dropout(rng=rng, input=x,
                     layer_sizes=layer_sizes,
                     dropout_rates=dropout_rates,
                     activations=activations,
                     use_bias=use_bias)

    # Build the expresson for the cost function.
    cost = classifier.negative_log_likelihood(y)
    dropout_cost = classifier.dropout_negative_log_likelihood(y)

    # Compile theano function for testing.
    test_model = theano.function(inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: test_set_x[index * batch_size:(index + 1) * batch_size],
                y: test_set_y[index * batch_size:(index + 1) * batch_size]})
    #theano.printing.pydotprint(test_model, outfile="test_file.png",
    #        var_with_name_simple=True)

    # Compile theano function for validation.
    validate_model = theano.function(inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                y: valid_set_y[index * batch_size:(index + 1) * batch_size]})
    #theano.printing.pydotprint(validate_model, outfile="validate_file.png",
    #        var_with_name_simple=True)

    # Compute gradients of the model wrt parameters
    gparams = []
    for param in classifier.params:
        # Use the right cost function here to train with or without dropout.
        gparam = T.grad(dropout_cost if dropout else cost, param)
        gparams.append(gparam)

    # ... and allocate mmeory for momentum'd versions of the gradient
    gparams_mom = []
    for param in classifier.params:
        gparam_mom = theano.shared(np.zeros(param.get_value(borrow=True).shape,
            dtype=theano.config.floatX))
        gparams_mom.append(gparam_mom)

    # Compute momentum for the current epoch
    mom = ifelse(epoch < mom_epoch_interval,
            mom_start*(1.0 - epoch/mom_epoch_interval) + mom_end*(epoch/mom_epoch_interval),
            mom_end)

    # Update the step direction using momentum
    updates = OrderedDict()
    for gparam_mom, gparam in zip(gparams_mom, gparams):
        # Misha Denil's original version
        #updates[gparam_mom] = mom * gparam_mom + (1. - mom) * gparam

        # change the update rule to match Hinton's dropout paper
        updates[gparam_mom] = mom * gparam_mom - (1. - mom) * learning_rate * gparam

    # ... and take a step along that direction
    for param, gparam_mom in zip(classifier.params, gparams_mom):
        # Misha Denil's original version
        #stepped_param = param - learning_rate * updates[gparam_mom]

        # since we have included learning_rate in gparam_mom, we don't need it
        # here
        stepped_param = param + updates[gparam_mom]

        # This is a silly hack to constrain the norms of the rows of the weight
        # matrices.  This just checks if there are two dimensions to the
        # parameter and constrains it if so... maybe this is a bit silly but it
        # should work for now.
        if param.get_value(borrow=True).ndim == 2:
            #squared_norms = T.sum(stepped_param**2, axis=1).reshape((stepped_param.shape[0],1))
            #scale = T.clip(T.sqrt(squared_filter_length_limit / squared_norms), 0., 1.)
            #updates[param] = stepped_param * scale

            # constrain the norms of the COLUMNs of the weight, according to
            # https://github.com/BVLC/caffe/issues/109
            col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0))
            desired_norms = T.clip(col_norms, 0, T.sqrt(squared_filter_length_limit))
            scale = desired_norms / (1e-7 + col_norms)
            updates[param] = stepped_param * scale
        else:
            updates[param] = stepped_param


    # Compile theano function for training.  This returns the training cost and
    # updates the model parameters.
    output = dropout_cost if dropout else cost
    train_model = theano.function(inputs=[epoch, index], outputs=output,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]})
    #theano.printing.pydotprint(train_model, outfile="train_file.png",
    #        var_with_name_simple=True)

    # Theano function to decay the learning rate, this is separate from the
    # training function because we only want to do this once each epoch instead
    # of after each minibatch.
    decay_learning_rate = theano.function(inputs=[], outputs=learning_rate,
            updates={learning_rate: learning_rate * learning_rate_decay})

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    best_params = None
    best_validation_errors = np.inf
    best_iter = 0
    test_score = 0.
    epoch_counter = 0
    start_time = time.clock()

    results_file = open(results_file_name, 'wb')

    while epoch_counter < n_epochs:
        # Train this epoch
        epoch_counter = epoch_counter + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_model(epoch_counter, minibatch_index)

        # Compute loss on validation set
        validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
        this_validation_errors = np.sum(validation_losses)

        # Report and save progress.
        print "epoch {}, test error {}, learning_rate={}{}".format(
                epoch_counter, this_validation_errors,
                learning_rate.get_value(borrow=True),
                " **" if this_validation_errors < best_validation_errors else "")

        best_validation_errors = min(best_validation_errors,
                this_validation_errors)
        results_file.write("{0}\n".format(this_validation_errors))
        results_file.flush()

        new_learning_rate = decay_learning_rate()

    end_time = time.clock()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_errors * 100., best_iter, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Example #38
0
# start
logger = get_logger()
logger.info('--------------- Encoder - Decoder ---------------')

# load data
src_vocab_size = 5000
dest_vocab_size = 5000
logger.info('loading data...')

(train_x, test_x, valid_x,
 train_y, test_y, valid_y,
 mask_train_x, mask_test_x, mask_valid_x,
 mask_train_y, mask_test_y, mask_valid_y,
 src_index2word, src_word2index,
 dest_index2word, dest_word2index) = load_data('data/train50000.ja', 'data/train50000.en', train_size=0.8, valid_size=0.2,
                                               src_word_limit=src_vocab_size, dest_word_limit=dest_vocab_size)

logger.info('loaded training source senstences: {0}, max length: {1}, vocabulary size: {2}'.format(
    len(train_x.get_value(borrow=True)), len(mask_train_x.get_value(borrow=True)), len(list(src_word2index))))
logger.info('loaded training target senstences: {0}, max length: {1}, vocabulary size: {2}'.format(
    len(train_y.get_value(borrow=True)), len(mask_train_y.get_value(borrow=True)), len(list(dest_word2index))))

# encoder
encoder_vocab_size = len(src_word2index)
encoder_embedding_size = 100
encoder_hidden_size = 50
encoder = Encoder(encoder_vocab_size, encoder_embedding_size, encoder_hidden_size)

# decoder
decoder_vocab_size = len(dest_word2index)
decoder_embedding_size = 100
Example #39
0
import numpy as np
from loader import load_data, standardize
from classification.mlp1 import MLP

n_in = 10
n_hidden = 25
n_out = 2
dropout_prob = 0.5
learning_rate = 0.1
n_epochs = 500
batch_size = 10
weight_decay = 0.0001
K = 10

x, y = load_data('../data1.txt')
roc_file = open('roc.txt', 'w')

step = len(x) / K + 1
confusion_matrix = np.array([0, 0, 0, 0])
for i in np.arange(0, len(x) + 1, step):
    valid_set = x[i:i + step, :], y[i:i + step]
    train_set = np.delete(x, range(i, i + step), axis=0), np.delete(y, range(i, i + step))
    datasets = standardize(train_set, valid_set)

    ml = MLP(n_in, n_hidden, n_out, dropout_prob, learning_rate, weight_decay)
    confusion_matrix += ml.test_mlp(datasets, n_epochs, batch_size, roc_file)

print 'tp tn fp fn : ', confusion_matrix
roc_file.close()
Example #40
0
# --------------------------- PREPARE TRAINING & TEST DATA ----------------------------

import librosa
import json
from numpyEncoder import *

import loader

trainingData = loader.load_data()

# this is used for testing with the same people
testData = loader.alternative_testData()

print "length of training set: ",len(trainingData)
print "length of test data: ", len(testData)

# ------------------------------------ NETWORK ----------------------------------------

import network

eta = 1.5
NUM_EPOCHS = 30
# for CQT
# INPUT_NEURONS = 84 * 44
# for mel spectogram
INPUT_NEURONS = 128 * 44

HIDDEN_LAYER1 = 50

HIDDEN_LAYER2 = 50
Example #41
0
@author: SeylomA
'''

import numpy as np
import loader
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import scale
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from sklearn.cross_validation import StratifiedKFold

if __name__ == "__main__":
    # added for multiprocessor support in windows.
    
    train_X, train_Y, test = loader.load_data()
    
    n_samples = len(train_Y)
    
    ###############################################################################
#    # Set the parameters by cross-validation
#    tuned_parameters = [{'kernel': ['poly'], 'gamma': [2 ** -9, 2 ** -8.25, 2 ** -8.5, 2 ** -8.25, 2 ** -7],
#                         'C': [2 ** 1.8, 2 ** 2.2, 2 ** 2.4, 2 ** 2 ** 2.26, 2 ** 2 ** 2.8, 2 ** 3], 'degree':[3, 4]}]

#    # Set the parameters by cross-validation
    param_grid = [{'kernel': ['rbf'], 'gamma': [2 ** -15, 2 ** -13],
                         'C': [2 ** -4, 2 ** -2], 'degree':[3]}]

    
    X_train, X_test, y_train, y_test = train_test_split(
        train_X[:n_samples], train_Y[:n_samples], test_fraction=0.3, random_state=1)
Example #42
0
def plot(limit_type, filename, logy=True, smooth_data=True):
    """
    Application entry point
    """

    supported_limit_types = ("narrow", "wide", "kk")
    if limit_type not in supported_limit_types:
        raise RuntimeError("supported limit types: {0!r}".format(supported_limit_types))

    data = load_data(filename, 0.1)

    if smooth_data:
        smooth.data(data, n=40, log=logy)

    # print(sorted(data.keys()))
    # for m in sorted(data.keys()):
    #    print(m, data[m][0])

    legend = ROOT.TLegend(0.5, 0.50, 0.80, 0.88)

    line_width = 3
    combo = ROOT.TMultiGraph()

    # 2 sigma band
    cur = get_limits(data)
    graph = ROOT.TGraphAsymmErrors(
        len(cur["x"]), cur["x"], cur["expected"], cur["xerr"], cur["xerr"], cur["two_sigma_down"], cur["two_sigma_up"]
    )
    graph.SetFillColor(ROOT.kGray + 1)
    graph.SetLineWidth(0)
    combo.Add(graph)
    g_2s = graph

    # 1 sigma band
    graph = ROOT.TGraphAsymmErrors(
        len(cur["x"]), cur["x"], cur["expected"], cur["xerr"], cur["xerr"], cur["one_sigma_down"], cur["one_sigma_up"]
    )
    graph.SetFillColor(ROOT.kGray)
    graph.SetLineWidth(0)
    combo.Add(graph)
    g_1s = graph

    # expected
    graph = ROOT.TGraph(len(cur["x"]), cur["x"], cur["expected"])
    graph.SetLineColor(ROOT.kBlack)
    graph.SetLineWidth(line_width)
    combo.Add(graph)
    legend.AddEntry(graph, "Expected (95% Bayesian)", "l")

    # observed
    graph = ROOT.TGraph(len(cur["observed_x"]), cur["observed_x"], cur["observed"])
    graph.SetLineColor(ROOT.kRed + 1)
    graph.SetLineWidth(line_width)
    combo.Add(graph)
    legend.AddEntry(graph, "Observed (95% Bayesian)", "l")

    # Theory
    theories = {
        "narrow": ([theory.zprime, 1.2, False],),
        "wide": ([theory.zprime, 10.0, False],),
        "kk": ([theory.kkgluon, None, None],),
    }.get(limit_type)

    for index, (theory_function, theory_width, use_old_theory) in enumerate(theories, 0):

        class Theory(object):
            pass

        x, y, label = theory_function(theory_width, use_old_theory) if theory_width else theory_function()

        # remove all the points below 500 GeV
        x, y = zip(*[(xi, yi) for xi, yi in zip(x, y) if not 750 > xi])

        graph = ROOT.TGraph(len(x), array("d", x), array("d", y))
        graph.SetLineColor([ROOT.kBlue + 1, ROOT.kMagenta + 1, ROOT.kGreen + 1][index] if index < 3 else ROOT.kBlue + 1)
        graph.SetLineWidth(3)
        graph.SetLineStyle(2)
        combo.Add(graph)
        legend.AddEntry(graph, label, "l")

        theory_data = Theory()
        theory_data.x = x
        theory_data.y = y

        expected_exclusion, observed_exclusion = exclude(cur, theory_data)
        print("Expected exclusion:", expected_exclusion)
        print("Observed exclusion:", observed_exclusion)

    legend.AddEntry(g_1s, "#pm 1 s.d. Expected", "f")
    legend.AddEntry(g_2s, "#pm 2 s.d. Expected", "f")

    # Draw
    cv = ROOT.TCanvas()
    style.canvas(cv)

    combo.Draw("3al")
    legend.Draw()

    if logy:
        cv.SetLogy(True)

    if limit_type == "kk":
        style.combo(
            combo,
            ytitle="Upper Limit #sigma_{g_{KK}} x B [pb]",
            maximum=1e2 if logy else None,
            minimum=1e-2 if logy else None,
        )
    else:
        style.combo(combo, maximum=1e2 if logy else None)

    style.legend(legend)
    legend.SetTextSize(0.04)

    plot_labels = labels.create(
        {"narrow": "Narrow Width Assumption", "wide": "10% Width Assumption", "kk": "KK Gluon Assumption"}.get(
            limit_type, None
        )
    )

    map(ROOT.TObject.Draw, plot_labels)

    cv.Update()
    cv.SaveAs("limits-{0}.pdf".format(limit_type))
Example #43
0
def main():
    if 4 != len(sys.argv):
        raise RuntimeError("insufficient arguments")

    data = load_data(*sys.argv[1:3])

    # container for low and high limits
    class Limits(object): pass

    limits = Limits()
    limits.low = {
            "visible": None,
            "invisible": None
            }

    limits.high = copy.deepcopy(limits.low)

    # convert YAML to dictionary
    (limits.low["visible"],
     limits.low["invisible"]) = get_limits(data.low, is_low_mass=True)

    (limits.high["visible"],
     limits.high["invisible"]) = get_limits(data.high, is_low_mass=False)

    legend = ROOT.TLegend(0.5, 0.50, 0.80, 0.80)
    legend.SetHeader("Expected limits")

    line_width = 3
    combo = ROOT.TMultiGraph()

    # expected
    cur = limits.low["visible"]
    graph = ROOT.TGraph(len(cur["x"]), cur["x"], cur["expected"])
    graph.SetLineColor(ROOT.kAzure - 7)
    graph.SetLineWidth(line_width)
    combo.Add(graph)
    legend.AddEntry(graph, "threshold", "l")

    cur = limits.low["invisible"]
    graph = ROOT.TGraph(len(cur["x"]), cur["x"], cur["expected"])
    graph.SetLineColor(ROOT.kAzure + 2)
    graph.SetLineWidth(line_width)
    graph.SetLineStyle(7)
    combo.Add(graph)

    cur = limits.high["visible"]
    graph = ROOT.TGraph(len(cur["x"]), cur["x"], cur["expected"])
    graph.SetLineColor(ROOT.kOrange + 2)
    graph.SetLineWidth(line_width)
    combo.Add(graph)
    legend.AddEntry(graph, "boosted", "l")

    # Draw
    cv = ROOT.TCanvas()
    style.canvas(cv)

    combo.Draw("3al")
    legend.Draw()

    cv.SetLogy(True)
    style.combo(combo)
    style.legend(legend)

    plot_labels = labels.create(sys.argv[3])
    map(ROOT.TObject.Draw, plot_labels)

    cv.Update()
    cv.SaveAs("expected.png")

    raw_input("enter")
Example #44
0
# -*- coding: UTF-8 -*-

import numpy
from sklearn import preprocessing

def reform(datasets):
    new_datasets = []
    scaler = None
    for dataset in datasets:
        new_dataset_x = []
        new_dataset_y = []
        for x, y in zip(dataset[0],dataset[1]):
            for i in range(0, len(x)/10*10, 10):
                new_dataset_x.append(x[i:i+10,:].flatten())
                new_dataset_y.append(y)
        new_dataset_x = numpy.asarray(new_dataset_x)
        new_dataset_y = numpy.asarray(new_dataset_y)

        new_datasets.append((new_dataset_x, new_dataset_y))

    return tuple(new_datasets)

if __name__ == '__main__':
    from loader import load_data
    from feature_extractor import extract_features

    datasets = extract_features(load_data()[0])

    new_datasets = reform(datasets)

    print new_datasets[0][0][0].shape
Example #45
0
def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
                    dataset='mnist.pkl.gz',
                    nkerns=[20, 50], batch_size=500):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, 28, 28),
        filter_shape=(nkerns[0], 1, 5, 5),
        poolsize=(2, 2)
    )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 12, 12),
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        poolsize=(2, 2)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * 4 * 4,
        n_out=500,
        activation=T.tanh
    )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegressionLayer(input=layer2.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params



    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = sgdVanilla(params, cost, learning_rate)

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )


    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(('The code for file ' +
           os.path.split(__file__)[1] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)))
Example #46
0
#---------------------------------------------#
def get_batch(in_data, batch_size):
    batch_imgs = np.empty([batch_size, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS], np.float32)
    batch_data = np.empty([batch_size, N_IN_DATA], np.float32)
    batch_desi = np.empty([batch_size, N_OUT_DATA], np.float32)
    for i in range(batch_size):
        x = randint(1, len(in_data)) - 1
        batch_imgs[i], batch_data[i], batch_desi[i] = in_data[x].load_data()
    return batch_imgs, batch_data, batch_desi

#---------------------------------------------#
# Running the Network
#---------------------------------------------#

# Load the network and the data
data = ld.load_data()
nn = Network()
if LOAD_NETWORK:
    nn.load_network(LOAD_LOCATION)

# Main loop
for i in tqdm(range(4000, 10000000)):
    # Generate the batch and train
    img_batch, data_batch, desired_batch = get_batch(data, BATCH_SIZE)
    loss = nn.train(img_batch, data_batch, desired_batch, USE_WEIGHTED_LOSS)

    # Print the loss
    if i % 20 == 0:
        print i, loss, CHECKPOINT_END

    # Save the network
Example #47
0
def plot(limit_type, low_mass, high_mass, logy=True, smooth_data=True):
    '''
    Application entry point
    '''

    supported_limit_types = ("narrow", "wide", "kk")
    if limit_type not in supported_limit_types:
        raise RuntimeError("supported limit types: {0!r}".format(
                            supported_limit_types))

    data = load_data(low=low_mass, high=high_mass, scale_high=0.1)

    if smooth_data:
        smooth.data(data.low, n=40, log=logy)
        smooth.data(data.high, n=40, log=logy)

    # Computting splitting point based on expected values
    split_point = 0
    mass_high_min = min(data.high.keys())
    for mass_low in sorted(data.low.keys()):
        if mass_low < mass_high_min:
            continue
        mass_high = 0
        for mass in sorted(data.high.keys()):
            if mass >= mass_low:
                mass_high = mass
                break
        if data.low[mass_low][0] > data.high[mass_high][0]:
            print(mass_low, mass_high)
            split_point = mass_high
            break

    class Limits(object): pass

    # container for low and high limits
    limits = Limits
    limits.low = {
            "visible": None,
            "invisible": None
            }

    limits.high = copy.deepcopy(limits.low)
    limits.low_fix_observed = copy.deepcopy(limits.low)

    (limits.low["visible"],
     limits.low["invisible"]) = get_limits(data.low, is_low_mass=True,
                                           split_point=split_point,
                                           transform_x=gev_to_tev)

    (limits.high["visible"],
     limits.high["invisible"]) = get_limits(data.high, is_low_mass=False,
                                            split_point=split_point,
                                            transform_x=gev_to_tev)

    (limits.low_fix_observed["visible"],
     limits.low_fix_observed["invisible"]) = get_limits(data.low, is_low_mass=True,
                                                        split_point=split_point,
                                                        low_mass_x=True,
                                                        transform_x=gev_to_tev)

    legend = ROOT.TLegend(0.5, 0.50, 0.80, 0.88)

    line_width = 3
    combo = ROOT.TMultiGraph()

    # 2 sigma band
    cur = limits.low["visible"]
    graph = ROOT.TGraphAsymmErrors(len(cur["x"]), cur["x"], cur["expected"],
                                   cur["xerr"], cur["xerr"],
                                   cur["two_sigma_down"], cur["two_sigma_up"])
    graph.SetFillColor(ROOT.kGray + 1)
    graph.SetLineWidth(0)
    combo.Add(graph)
    g_2s_lm = graph

    cur = limits.high["visible"]
    graph = ROOT.TGraphAsymmErrors(len(cur["x"]), cur["x"], cur["expected"],
                                   cur["xerr"], cur["xerr"],
                                   cur["two_sigma_down"], cur["two_sigma_up"])
    graph.SetFillColor(ROOT.kGray + 1)
    graph.SetLineWidth(0)
    combo.Add(graph)
    g_2s_hm = graph

    # 1 sigma band
    cur = limits.low["visible"]
    graph = ROOT.TGraphAsymmErrors(len(cur["x"]), cur["x"], cur["expected"],
                                   cur["xerr"], cur["xerr"],
                                   cur["one_sigma_down"], cur["one_sigma_up"])
    graph.SetFillColor(ROOT.kGray)
    graph.SetLineWidth(0)
    combo.Add(graph)
    g_1s_lm = graph

    cur = limits.high["visible"]
    graph = ROOT.TGraphAsymmErrors(len(cur["x"]), cur["x"], cur["expected"],
                                   cur["xerr"], cur["xerr"],
                                   cur["one_sigma_down"], cur["one_sigma_up"])
    graph.SetFillColor(ROOT.kGray)
    graph.SetLineWidth(0)
    combo.Add(graph)
    g_1s_hm = graph

    # expected
    cur = limits.low["visible"]
    graph = ROOT.TGraph(len(cur["x"]), cur["x"], cur["expected"])
    graph.SetLineColor(ROOT.kBlack)
    graph.SetLineWidth(line_width)
    combo.Add(graph)
    legend.AddEntry(graph, "Expected (95% CL)", "l")

    cur = limits.high["visible"]
    graph = ROOT.TGraph(len(cur["x"]), cur["x"], cur["expected"])
    graph.SetLineColor(ROOT.kBlack)
    graph.SetLineWidth(line_width)
    combo.Add(graph)


    # observed
    cur = limits.low_fix_observed["visible"]
    graph = ROOT.TGraph(len(cur["observed_x"]), cur["observed_x"], cur["observed"])
    graph.SetLineColor(ROOT.kRed + 1)
    graph.SetLineWidth(line_width)
    graph.SetLineStyle(2)
    combo.Add(graph)
    legend.AddEntry(graph, "Observed (95% CL)", "l")

    cur = limits.high["visible"]
    graph = ROOT.TGraph(len(cur["observed_x"]), cur["observed_x"], cur["observed"])
    graph.SetLineColor(ROOT.kRed + 1)
    graph.SetLineWidth(line_width)
    graph.SetLineStyle(2)
    combo.Add(graph)

    # Theory
    theories = {
            "narrow": ([theory.zprime, 1.2, False], ),
            "wide": ([theory.zprime, 10.0, False], ),
            "kk": ([theory.kkgluon, None, None], )
            }.get(limit_type)

    for index, (theory_function, theory_width, use_old_theory) in enumerate(theories, 0):
        class Theory(object): pass

        x, y, label = (theory_function(theory_width, use_old_theory)
                        if theory_width
                        else theory_function())

        # remove all the points below 500 GeV
        x, y = zip(*[(xi, yi) for xi, yi in zip(x, y) if not 500 > xi])

        x = gev_to_tev(x)
        graph = ROOT.TGraph(len(x), array('d', x), array('d', y))
        graph.SetLineColor([ROOT.kBlue + 1, ROOT.kMagenta + 1, ROOT.kGreen + 1][index] if index < 3 else ROOT.kBlue + 1)
        graph.SetLineWidth(3)
        graph.SetLineStyle(9)
        combo.Add(graph)
        legend.AddEntry(graph, label, "l")

        theory_data = Theory()
        theory_data.x = x
        theory_data.y = y

        '''
        print("low mass".capitalize())
        expected_exclusion, observed_exclusion = exclude(limits.low["visible"], theory_data)

        print("Expected exclusion:", expected_exclusion)
        print("Observed exclusion:", observed_exclusion)

        print()
        print("high mass".capitalize())
        expected_exclusion, observed_exclusion = exclude(limits.high["visible"], theory_data)

        print("Expected exclusion:", expected_exclusion)
        print("Observed exclusion:", observed_exclusion)
        '''

    legend.AddEntry(g_1s_lm, "Expected #pm 1 s.d.", "f")
    legend.AddEntry(g_2s_lm, "Expected #pm 2 s.d.", "f")

    # Draw
    cv = ROOT.TCanvas()
    style.canvas(cv)

    combo.Draw("3al")

    # Split point
    line = ROOT.TGraph(2)
    line.SetPoint(0, split_point * 1e-3, 1e1)
    line.SetPoint(1, split_point * 1e-3, 5e-3)
    line.SetLineColor(ROOT.kGray + 2)
    line.SetLineStyle(9)
    line.SetLineWidth(3)
    line.Draw("L")

    legend.Draw()

    if logy:
        cv.SetLogy(True)

    if limit_type == 'kk':
        style.combo(combo, maximum=4e2 if logy else None,
                    minimum=1e-2 if logy else None,
                    ytitle="Upper Limit #sigma_{g_{KK}} x B [pb]")
    else:
        style.combo(combo, maximum=4e2 if logy else None)

    style.legend(legend)
    legend.SetTextSize(0.04)

    plot_labels = labels.create({
        #"narrow": "{0:.1f}% Width Assumption".format(theory_width if theory_width else 0),
        "narrow": "Z' with 1.2% Decay Width",
 
        "wide": "Z' with 10% Decay Width",
        "kk": "KK Gluon"}.get(limit_type, None))
    map(ROOT.TObject.Draw, plot_labels)

    cv.Update()
    cv.SaveAs("limits-{0}.pdf".format(limit_type))
        mfcc_feats = mfcc(sig, rate)

        def diff(feats):
            feats_diff = numpy.zeros(feats.shape)
            for i in range(2, feats.shape[0]-2):
                feats_diff[i,:] = 2*feats[i-2,:] - feats[i-2,:] + feats[i+1,:] + 2*feats[i+2,:]
            return feats_diff

        mfcc_diff_feats = diff(mfcc_feats)
        mfcc_diff2_feats = diff(mfcc_diff_feats)

        _, energy_feat = fbank(sig, rate)
        log_energy_feat = numpy.log(energy_feat).reshape(energy_feat.shape[0],1)

        return numpy.concatenate((mfcc_feats, mfcc_diff_feats, mfcc_diff2_feats, log_energy_feat), axis=1)[2:-2]

    new_datasets = []
    for dataset in datasets:
        new_datasets.append(([get_features(sample) for sample in dataset[0]], dataset[1]))

    return tuple(new_datasets)

if __name__ == '__main__':
    from loader import load_data
    datasets, n_classes = load_data()

    new_datasets = extract_features(datasets)
    for data in new_datasets[0][0]:
        print data.shape

Example #49
0
import matplotlib.pyplot as plt
from keras.layers import Dense, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.models import Sequential
from keras.optimizers import SGD
from loader import load_data
from params import img_rows, img_cols, nb_classes

(X_train, Y_train), (X_test, Y_test) = load_data()


def get_model():
    model = Sequential()

    model.add(Convolution2D(32, 7, 7, input_shape=(1, img_rows, img_cols),
                            activation='relu', init='he_normal'))

    model.add(Convolution2D(48, 5, 5, activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(1024, activation='relu'))
    model.add(Dense(nb_classes))
    model.add(Activation('softmax'))
    sgd = SGD(momentum=0.9, nesterov=True)
    model.compile(loss='binary_crossentropy',
                  optimizer=sgd, metrics=['accuracy'])

    return model

Example #50
0
def plot(limit_type, low_mass, high_mass, logy=True):
    '''
    Application entry point
    '''

    supported_limit_types = ("narrow", "wide", "kk")
    if limit_type not in supported_limit_types:
        raise RuntimeError("supported limit types: {0!r}".format(
                            supported_limit_types))

    data = load_data(low_mass, high_mass)

    class Limits(object): pass

    # container for low and high limits
    limits = Limits
    limits.low = {
            "visible": None,
            "invisible": None
            }

    limits.high = copy.deepcopy(limits.low)
    limits.low_fix_observed = copy.deepcopy(limits.low)

    # convert YAML to dictionary
    split_point = {
            "narrow": 1000,
            "wide": 1100,
            "kk": 1000
            }.get(limit_type)

    (limits.low["visible"],
     limits.low["invisible"]) = get_limits(data.low, is_low_mass=True,
                                           split_point=split_point,
                                           transform_x=gev_to_tev)

    (limits.high["visible"],
     limits.high["invisible"]) = get_limits(data.high, is_low_mass=False,
                                            split_point=split_point,
                                            transform_x=gev_to_tev)

    (limits.low_fix_observed["visible"],
     limits.low_fix_observed["invisible"]) = get_limits(data.low, is_low_mass=True,
                                                        split_point=split_point,
                                                        low_mass_x=True,
                                                        transform_x=gev_to_tev)

    smooth.data(limits.high["visible"], n=2)

    legend = ROOT.TLegend(0.5, 0.50, 0.80, 0.88)

    line_width = 3
    combo = ROOT.TMultiGraph()

    # 2 sigma band
    cur = limits.low["visible"]
    graph = ROOT.TGraphAsymmErrors(len(cur["x"]), cur["x"], cur["expected"],
                                   cur["xerr"], cur["xerr"],
                                   cur["two_sigma_down"], cur["two_sigma_up"])
    graph.SetFillColor(ROOT.kGray + 1)
    graph.SetLineWidth(0)
    combo.Add(graph)
    g_2s_lm = graph

    cur = limits.high["visible"]
    graph = ROOT.TGraphAsymmErrors(len(cur["x"]), cur["x"], cur["expected"],
                                   cur["xerr"], cur["xerr"],
                                   cur["two_sigma_down"], cur["two_sigma_up"])
    graph.SetFillColor(ROOT.kGray + 1)
    graph.SetLineWidth(0)
    combo.Add(graph)
    g_2s_hm = graph

    # 1 sigma band
    cur = limits.low["visible"]
    graph = ROOT.TGraphAsymmErrors(len(cur["x"]), cur["x"], cur["expected"],
                                   cur["xerr"], cur["xerr"],
                                   cur["one_sigma_down"], cur["one_sigma_up"])
    graph.SetFillColor(ROOT.kGray)
    graph.SetLineWidth(0)
    combo.Add(graph)
    g_1s_lm = graph

    cur = limits.high["visible"]
    graph = ROOT.TGraphAsymmErrors(len(cur["x"]), cur["x"], cur["expected"],
                                   cur["xerr"], cur["xerr"],
                                   cur["one_sigma_down"], cur["one_sigma_up"])
    graph.SetFillColor(ROOT.kGray)
    graph.SetLineWidth(0)
    combo.Add(graph)
    g_1s_hm = graph

    # expected
    cur = limits.low["visible"]
    graph = ROOT.TGraph(len(cur["x"]), cur["x"], cur["expected"])
    graph.SetLineColor(ROOT.kBlack)
    graph.SetLineWidth(line_width)
    combo.Add(graph)
    legend.AddEntry(graph, "Expected (95% CL)", "l")

    cur = limits.high["visible"]
    graph = ROOT.TGraph(len(cur["x"]), cur["x"], cur["expected"])
    graph.SetLineColor(ROOT.kBlack)
    graph.SetLineWidth(line_width)
    combo.Add(graph)

    # observed
    cur = limits.low_fix_observed["visible"]
    graph = ROOT.TGraph(len(cur["observed_x"]), cur["observed_x"], cur["observed"])
    graph.SetLineColor(ROOT.kRed + 1)
    graph.SetLineWidth(line_width)
    combo.Add(graph)
    legend.AddEntry(graph, "Observed (95% CL)", "l")

    cur = limits.high["visible"]
    graph = ROOT.TGraph(len(cur["observed_x"]), cur["observed_x"], cur["observed"])
    graph.SetLineColor(ROOT.kRed + 1)
    graph.SetLineWidth(line_width)
    combo.Add(graph)

    # Theory
    theories = {
            "narrow": ([theory.zprime, 1.2, False], ),
            "wide": ([theory.zprime, 10.0, False], ),
            "kk": ([theory.kk, None, None], )
            }.get(limit_type)

    for index, (theory_function, theory_width, use_old_theory) in enumerate(theories, 0):
        class Theory(object): pass

        x, y, label = (theory_function(theory_width, use_old_theory)
                        if theory_width
                        else theory_function())

        # remove all the points below 500 GeV
        x, y = zip(*[(xi, yi) for xi, yi in zip(x, y) if not 500 > xi])

        x = gev_to_tev(x)
        graph = ROOT.TGraph(len(x), array('d', x), array('d', y))
        graph.SetLineColor([ROOT.kBlue + 1, ROOT.kMagenta + 1, ROOT.kGreen + 1][index] if index < 3 else ROOT.kBlue + 1)
        graph.SetLineWidth(3)
        graph.SetLineStyle(2)
        combo.Add(graph)
        legend.AddEntry(graph, label, "l")

        theory_data = Theory()
        theory_data.x = x
        theory_data.y = y

        if limit_type != 'kk':
            print("low mass".capitalize())
            expected_exclusion, observed_exclusion = exclude(limits.low["visible"], theory_data)

            print("Expected exclusion:", expected_exclusion)
            print("Observed exclusion:", observed_exclusion)

            print()

        print("high mass".capitalize())
        expected_exclusion, observed_exclusion = exclude(limits.high["visible"], theory_data)

        print("Expected exclusion:", expected_exclusion)
        print("Observed exclusion:", observed_exclusion)

    legend.AddEntry(g_1s_lm, "#pm 1 #sigma Expected", "f")
    legend.AddEntry(g_2s_lm, "#pm 2 #sigma Expected", "f")

    # Draw
    cv = ROOT.TCanvas()
    style.canvas(cv)

    combo.Draw("3al")

    # Split point
    if limit_type != 'kk':
        line = ROOT.TGraph(2)
        line.SetPoint(0, split_point * 1e-3, 1e1)
        line.SetPoint(1, split_point * 1e-3, 3e-2)
        line.SetLineColor(ROOT.kGray + 2)
        line.SetLineStyle(2)
        line.SetLineWidth(3)
        line.Draw("L")

    legend.Draw()

    if logy:
        cv.SetLogy(True)

    style.combo(combo, maximum=1e2 if logy else None)
    combo.GetXaxis().SetRangeUser(0, 4)
    style.legend(legend)
    legend.SetTextSize(0.04)

    plot_labels = labels.create({
        #"narrow": "{0:.1f}% Width Assumption".format(theory_width if theory_width else 0),
        "narrow": "Z' with 1% Decay Width",
 
        "wide": "Z' with 10% Decay Width",
        "kk": "KK Gluon Assumption"}.get(limit_type, None))
    map(ROOT.TObject.Draw, plot_labels)

    cv.Update()
    cv.SaveAs("limits-{0}.pdf".format(limit_type))
Example #51
0
File: learn.py Project: i3149/lc
def load_data(filename):
    return ld.load_data(filename)
Example #52
0
#!/usr/bin/env python

'''
Created by Samvel Khalatyan, Jun 03, 2012
Copyright 2012, All rights reserved
'''

from __future__ import division

import copy

import ROOT

from loader import load_data,get_limits

data = load_data()

class Limits(object): pass

limits = Limits
limits.low = {
        "visible": None,
        "invisible": None
        }

limits.high = copy.deepcopy(limits.low)

(limits.low["visible"],
 limits.low["invisible"]) = get_limits(data.low, is_low_mass=True)

(limits.high["visible"],
import falcon
import default_handler

from loader import load_data

import logging
import sys

root = logging.getLogger()
root.setLevel(logging.DEBUG)
#
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.DEBUG)
ch.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
root.addHandler(ch)


MAPPING = load_data()

api = application = falcon.API()

for url, expectations in MAPPING.items():
    print(url)
    logging.info('Route added -->> %s' % url)
    api.add_route('%s' % url, default_handler.DefaultHandler(expectations))


if __name__ == '__main__':
    httpd = simple_server.make_server('127.0.0.1', 8000, application)
    httpd.serve_forever()
features_listB = ['to_messages', 'from_poi_to_this_person', 'from_messages', 'from_this_person_to_poi', 'shared_receipt_with_poi'] 


features_listC = ['maildata', 'to_ratio', 'from_ratio',
                  'comm', 'comm_sum', 'comm_max', 'comm_min',
                  'comm_ratio', 'comm2', 'from_ratio_log']

features_label = ['poi']

features_list_full = features_label + features_listA + features_listB + features_listC

features_list = features_list_full


### Load modified dataset as my_dataset
my_dataset = load_data()

print 'initial features count: ', len(features_listA + features_listB)
print 'total datapoints count: ', len(my_dataset)

### Extract the labels and selected features from my dataset
data = featureFormat(my_dataset, features_list, sort_keys = True)
labels, features = targetFeatureSplit(data)




### Task 3.2:  Evaluate features
print '\nInitial features:'
print features_list
	def run_simulation(self, save_to_dot=True, save_to_json=True):
		#import Seed and lifetable data
		this_generation_population = Population()
		next_generation_population = None

		seed_group = seed.load_group(this_generation_population)
		table_data = loader.load_data()
		lifetable = table_data.life_table
		dispersal_table =\
		 table_data.dispersal_table
		random_module = RandomModule()

		#create analytics lists
		age_record_list = []
		population_record_list = []
		male_population_record_list = []
		female_population_record_list = []

		real_birth_rate_list = []
		real_death_rate_list = []

		edges_per_agent_list = []
		adult_males_list = []
		adult_females_list = []
		adult_females_per_males_list = []

		total_agent_relationships_list = []

		group_composition_list = []

		death_counter = Counter() #used to make sure the correct number
		#of deaths occur
		birth_counter = Counter() #used to make sure the correct number 
		#of births take place

		#assign all_groups by creating several copies of the 
		#seed generation
		for i in range(0, self.NUMBER_OF_SEED_GROUPS + 1):
			this_generation_population.add_group(copy.deepcopy(seed_group))
		
		"""
		I was having a strange error where the 0th group 
		was loaded incorrectly. This is a temporary fix

		"""
		del this_generation_population.groups[0]

		for i in range (0, self.NUMBER_OF_GENERATIONS):
			self.per_generation_printout(i)
			#analytics
			this_age_record = []
			this_population_record = 0
			this_male_population_record = 0
			this_female_population_record = 0
			this_edges_per_agent = 0
			this_generation_adult_males = 0
			this_generation_adult_females = 0
			this_generation_group_composition_list = []

			#reset counters
			death_counter.reset()
			birth_counter.reset()

			#make the next gen population a copy of this gen's pop
			this_generation_population.generation = i

			next_generation_population =\
			 copy.deepcopy(this_generation_population)

			#run the simulation for each sub_group.
			for j in range(0, len(this_generation_population.groups)):	
				this_generation = this_generation_population.groups[j]
				new_generation = next_generation_population.groups[j]

				females_to_male =\
				 this_generation.get_females_to_male()

				for agent_index in this_generation.whole_set:
					#print str(agent_index) + ", " + str(len(this_generation.agent_array))

					this_agent =\
					 this_generation.agent_dict[agent_index]
					new_agent =\
					 new_generation.agent_dict[agent_index]

					#increment age
					new_generation.promote_agent(new_agent)

					#check birth_rate
					if this_agent.index in this_generation.female_set:
						chance_of_birth =\
						 lifetable.chance_of_birth(females_to_male, 
						 	this_agent.age)

					#check for birth
					self.check_for_birth(this_generation, new_generation,
						this_agent, new_agent, females_to_male,
						agent_index, lifetable, random_module,
						birth_counter, male_population_record_list)

					#check for death
					self.check_for_death(lifetable, females_to_male, 
						this_agent, new_agent, new_generation,
						random_module, death_counter)

					#check for dispersal
					self.check_for_dispersal(dispersal_table, females_to_male,
						this_agent, new_agent, this_generation,
						new_generation,
						this_generation_population, 
						next_generation_population, random_module)

					#check for friendships
					friendships.check_for_friendships(this_agent,
						new_agent, this_generation, new_generation,
						random_module)

					#unique changes
					self.conduct_changes_unique_to_experiment_at_agent(
						this_generation_population, 
						next_generation_population,
						this_generation, new_generation, this_agent, 
						new_agent, females_to_male, lifetable, 
						random_module, table_data
						)

					#analytics
					this_edges_per_agent += this_agent.edges()

					this_age_record.append(this_agent.age)
					this_population_record += 1

					if (this_agent.index in this_generation.male_set):
						this_male_population_record += 1
					elif (this_agent.index in this_generation.female_set):
						this_female_population_record += 1

				this_generation_adult_males +=\
				 len(this_generation.male_set)
				this_generation_adult_females +=\
				 len(this_generation.female_set)

				this_generation_group_composition_list.append(
					len(this_generation.whole_set)
					)

			self.conduct_changes_unique_to_experiment_at_gen(
				this_generation_population, next_generation_population,
				i, self.NUMBER_OF_GENERATIONS, table_data)

			#set the old gen to the new one
			this_generation_population = next_generation_population

			group_composition_list.append(this_generation_group_composition_list)

			number_of_groups = len(this_generation_population.groups)

			adult_males_per_group =\
			 float(this_generation_adult_males)/number_of_groups
			adult_females_per_group =\
			 float(this_generation_adult_females)/number_of_groups
			adult_males_list.append(adult_males_per_group)
			adult_females_list.append(adult_females_per_group)

			#handle div by 0 errors in calculating 
			#females per male
			if (adult_males_per_group == 0):
				adult_females_per_males_list.append(
					adult_females_per_group/1
					)
			elif (adult_females_per_group == 0):
				adult_females_per_males_list.append(0)
			else:
				adult_females_per_males_list.append(
					float(adult_females_per_group)/float(adult_males_per_group)
					)

			if (save_to_dot):
				self.save_data_to_dot(this_generation_population.get_dot_string(), i)
			if (save_to_json):
				self.save_data_to_json(this_generation_population.get_json_string(), i)

			average_edges_per_agent =\
			 float(this_edges_per_agent)/this_population_record
			edges_per_agent_list.append(average_edges_per_agent)

			real_death_rate_list.append(
				float(death_counter.getCount())/this_population_record)
			real_birth_rate_list.append(
				float(birth_counter.getCount())/this_population_record)
			age_record_list.append(this_age_record)
			male_population_record_list.append(this_male_population_record)
			female_population_record_list.append(
				this_female_population_record)
			population_record_list.append(this_population_record)
			
			total_agent_relationships_list = (
				this_generation_population.\
				get_population_relationship_stats())

		self.save_data(population_record_list, male_population_record_list,
		 female_population_record_list, age_record_list, 
		 real_birth_rate_list, real_death_rate_list,
		 edges_per_agent_list,
		 adult_females_per_males_list,
		 group_composition_list,
		 total_agent_relationships_list)

		print (birth_counter.getCount())
		print (death_counter.getCount())
Example #56
0
    Run on GPU: THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python mnist_cnn.py
    Get to 99.25% test accuracy after 12 epochs (there is still a lot of margin for parameter tuning).
    16 seconds per epoch on a GRID K520 GPU.
'''

batch_size = 128
nb_classes = 10
nb_epoch = 12

img_rows, img_cols = 28, 28             # input image dimensions
nb_filters = 32                         # number of convolutional filters to use
nb_pool = 2                             # size of pooling area for max pooling
nb_conv = 3   #3                        # convolution kernel size

# the data, shuffled and split between tran and test sets
(X_train, y_train), (X_test, y_test) = loader.load_data("mnist.pkl")
checkpoint = tm.time()

X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
X_train = X_train.astype("float32")
X_test = X_test.astype("float32")
X_train /= 255
X_test /= 255
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

X_real_test = pd.read_csv('test.csv').values
X_real_test = X_real_test.reshape(X_real_test.shape[0], 1, img_rows, img_cols)
X_real_test = X_real_test.astype("float32")
	def setUp(self):
		self.data = loader.load_data()
Example #58
0
import numpy as np
from loader import load_data
from regression.mlp2 import MLP

n_in = 22
n_hidden = 40
n_out = 1
dropout_prob = 0.5
learning_rate = 0.1
n_epochs = 500
batch_size = 10
weight_decay = 0.00001
K = 10

x, y = load_data('../data2.txt')

step = len(x) / K + 1
mse = 0.0
for i in np.arange(0, len(x) + 1, step):
    valid_set = x[i:i + step, :], y[i:i + step]
    train_set = np.delete(x, range(i, i + step), axis=0), np.delete(y, range(i, i + step))
    datasets = {'train': train_set, 'valid': valid_set}

    ml = MLP(n_in, n_hidden, n_out, dropout_prob, learning_rate, weight_decay)
    mse += ml.test_mlp(datasets,n_epochs, batch_size)

print 'mse:', mse/K