Python create_data Examples, preprocess.create_data Python Examples

Example #1

0

Show file

File: predlinks.py Project: ericmjonas/connectodiscovery

def create_truth_bb(dbfile, outfiles):
    conn = sqlite3.connect(dbfile)
    for THOLD_i, outfile in zip(THOLDS, outfiles):
        cells, conn_mat, dist_mats = preprocess.create_data(conn, process.THOLDS[THOLD_i])

        irm_latent, irm_data = irm.irmio.default_graph_init(conn_mat,
                                                        'BetaBernoulliNonConj')


        irm_latent['relations']['R1']['hps'] = {'alpha' : 1.0, 
                                                'beta' : 1.0}


        irm_latent['domains']['d1']['assignment'] = irm.util.canonicalize_assignment(cells['type_id'])


        irm_model = irm.irmio.create_model_from_data(irm_data)
        rng = irm.RNG()
        irm.irmio.set_model_latent(irm_model, irm_latent, rng)
        irm.irmio.estimate_suffstats(irm_model, rng, ITERS=40)

        learned_latent = irm.irmio.get_latent(irm_model)

        pred = compute_prob_matrix(learned_latent, irm_data, 
                                   model_name="BetaBernoulliNonConj")


        pickle.dump({'pred_mat' : pred, 
                    'truth_mat' : irm_data['relations']['R1']['data'],
                    'thold_i' : THOLD_i}, 
                    open(outfile, 'w'))

Example #2

0

Show file

def create_truth_bb(dbfile, outfiles):
    conn = sqlite3.connect(dbfile)
    for THOLD_i, outfile in zip(THOLDS, outfiles):
        cells, conn_mat, dist_mats = preprocess.create_data(
            conn, process.THOLDS[THOLD_i])

        irm_latent, irm_data = irm.irmio.default_graph_init(
            conn_mat, 'BetaBernoulliNonConj')

        irm_latent['relations']['R1']['hps'] = {'alpha': 1.0, 'beta': 1.0}

        irm_latent['domains']['d1'][
            'assignment'] = irm.util.canonicalize_assignment(cells['type_id'])

        irm_model = irm.irmio.create_model_from_data(irm_data)
        rng = irm.RNG()
        irm.irmio.set_model_latent(irm_model, irm_latent, rng)
        irm.irmio.estimate_suffstats(irm_model, rng, ITERS=40)

        learned_latent = irm.irmio.get_latent(irm_model)

        pred = compute_prob_matrix(learned_latent,
                                   irm_data,
                                   model_name="BetaBernoulliNonConj")

        pickle.dump(
            {
                'pred_mat': pred,
                'truth_mat': irm_data['relations']['R1']['data'],
                'thold_i': THOLD_i
            }, open(outfile, 'w'))

Example #3

0

Show file

File: predlinks.py Project: ericmjonas/connectodiscovery

def create_truth(dbfile, outfiles):
    conn = sqlite3.connect(dbfile)
    for THOLD_i, outfile in zip(THOLDS, outfiles):
        cells, conn_mat, dist_mats = preprocess.create_data(conn, process.THOLDS[THOLD_i])


        irm_latent, irm_data = models.create_conn_dist_lowlevel(conn_mat, dist_mats, 'xyz', model_name="LogisticDistance")

        irm_latent['relations']['R1']['hps'] = {'lambda_hp': 50.0, 'mu_hp': 50.0, 'p_max': 0.9, 'p_min': 0.01}


        irm_latent['domains']['d1']['assignment'] = irm.util.canonicalize_assignment(cells['type_id'])


        irm_model = irm.irmio.create_model_from_data(irm_data)
        rng = irm.RNG()
        irm.irmio.set_model_latent(irm_model, irm_latent, rng)
        irm.irmio.estimate_suffstats(irm_model, rng, ITERS=40)

        learned_latent = irm.irmio.get_latent(irm_model)

        pred = compute_prob_matrix(learned_latent, irm_data)


        pickle.dump({'pred_mat' : pred, 
                    'truth_mat' : irm_data['relations']['R1']['data']['link'],
                    'thold_i' : THOLD_i}, 
                    open(outfile, 'w'))

Example #4

0

Show file

def create_truth(dbfile, outfiles):
    conn = sqlite3.connect(dbfile)
    for THOLD_i, outfile in zip(THOLDS, outfiles):
        cells, conn_mat, dist_mats = preprocess.create_data(
            conn, process.THOLDS[THOLD_i])

        irm_latent, irm_data = models.create_conn_dist_lowlevel(
            conn_mat, dist_mats, 'xyz', model_name="LogisticDistance")

        irm_latent['relations']['R1']['hps'] = {
            'lambda_hp': 50.0,
            'mu_hp': 50.0,
            'p_max': 0.9,
            'p_min': 0.01
        }

        irm_latent['domains']['d1'][
            'assignment'] = irm.util.canonicalize_assignment(cells['type_id'])

        irm_model = irm.irmio.create_model_from_data(irm_data)
        rng = irm.RNG()
        irm.irmio.set_model_latent(irm_model, irm_latent, rng)
        irm.irmio.estimate_suffstats(irm_model, rng, ITERS=40)

        learned_latent = irm.irmio.get_latent(irm_model)

        pred = compute_prob_matrix(learned_latent, irm_data)

        pickle.dump(
            {
                'pred_mat': pred,
                'truth_mat': irm_data['relations']['R1']['data']['link'],
                'thold_i': THOLD_i
            }, open(outfile, 'w'))

Example #5

0

Show file

def main():
    #-- Load data --#

    imdb_pos = 'imdb/imdb.pos'
    imdb_neg = 'imdb/imdb.neg'
    rt_pos = 'imdb/rt_critics.pos'
    rt_neg = 'imdb/rt_critics.neg'
    data_imdb, etq_imdb = create_data(imdb_pos, imdb_neg)
    data_rt, etq_rt = create_data(rt_pos, rt_neg)

    # train data and test
    data_tr, etq_train = fusion_data(data_imdb, data_rt, etq_imdb, etq_rt)
    test_rt, label_rt = loadTest('imdb/rt_critics.test')

    # list vocabulaire
    list_vocab = vocab(data_tr + test_rt)
    print(len(list_vocab))
    size_vocab = len(list(list_vocab.keys()))

    # convert word to number for neural network
    data_train = convert_data(data_tr, list_vocab)
    data_test = convert_data(test_rt, list_vocab)

    # split train into train and valid data
    X_train, X_val, y_train, y_val = train_test_split(data_train,
                                                      etq_train,
                                                      test_size=0.2)

    # determine sequence max
    sequence_max = max_len(X_train, X_val, data_test)
    print('sequence max :', sequence_max)
    label_rt = np.asarray(label_rt)
    data_test = pad_sequences(np.asarray(data_test),
                              maxlen=sequence_max,
                              padding='post')
    #model = train_model(X_train,y_train, X_val, y_val, sequence_max, size_vocab, output_dim=300, batch_size = 50)
    model = load_model('CBOW_keras_model.hdf5')
    print(model.evaluate(data_test, label_rt))

Example #6

0

Show file

def make_predictions(path, datadir, model):
    """

    Process the images in the DATA_TO_PREDICT folder to extract the segments then passes them to the
    model to get the predictions.

    :param path: path to the main directory that contains the model
    :type path: str
    :param datadir: path to the image folder
    :type datadir: str
    :param model: model to ask for predictions to be made
    :type model: tensorflow.python.keras.engine.sequential.Sequential
    :return: array of predictions
    :rtype: Tuple[numpy.ndarray, List[str]]
    """
    titles = []
    for file in os.listdir(os.path.join(path, datadir)):
        title = file.title().lower()
        if title.split('.')[-1] == config.UTILS.get('IMAGE_EXTENTION'):
            titles.append(title)
    try:
        print('Image processing ...')
        cropped_images, titles = create_data(os.path.join(path, datadir),
                                             showImages=False)
    except (CreateDataError, CannotLoadImagesError, ValueError) as e:
        raise

    shape = model.input_shape
    resized_segments = resize_segments(cropped_images, shape[1:3])

    resized_segments = np.array(resized_segments)
    resized_segments = resized_segments.reshape(-1, resized_segments.shape[1],
                                                resized_segments.shape[2], 1)

    try:
        predictions = model.predict(resized_segments)
    except Exception as e:
        raise

    if predictions is not None and len(predictions) != 0:
        return predictions, titles
    else:
        raise Exception("Couldn't make any predictions")

Example #7

0

Show file

File: rundata.py Project: ericmjonas/connectodiscovery

def create_data(infile, outfile):

    conn = sqlite3.connect(dbname)
    cells, conn_mat, dist_mats = preprocess.create_data(conn, AREA_THOLD_MIN)
    dist_xyz = np.sqrt(dist_mats['x']**2 + dist_mats['y']**2 +
                       dist_mats['z']**2)
    dist_yz = np.sqrt(dist_mats['y']**2 + dist_mats['z']**2)

    conn_mat = conn_mat.astype(np.float)

    have_edges_i = (conn_mat.sum(axis=1) > 0)
    conn_mat_have_edges = conn_mat[have_edges_i]
    conn_mat_have_edges = conn_mat_have_edges[:, have_edges_i]

    pickle.dump(
        {
            'conn_mat': conn_mat,
            'cells': cells,
            'dist_mats': dist_mats,
            'dist_yz': dist_yz,
            'dist_xyz': dist_xyz,
            'have_edges_i': have_edges_i,
            'conn_mat_have_edges': conn_mat_have_edges
        }, open(outfile, 'w'))

Example #8

0

Show file

    systematicaly vary the threshold for "synapse" and whether or not
    we use the z-axis
    """
    for tholdi, thold in enumerate(THOLDS):
        outfile = td("retina.%d.data.pickle" % tholdi)
        yield RETINA_DB, [outfile], thold


@files(create_tholds)
def data_create_thold(dbname, (retina_outfile, ), AREA_THOLD_MIN):
    """
    """

    np.random.seed(0)
    conn = sqlite3.connect(dbname)
    cells, conn_mat, dist_mats = preprocess.create_data(conn, AREA_THOLD_MIN)

    pickle.dump({
        'cells': cells,
        'conn_mat': conn_mat,
        'dist_mats': dist_mats
    }, open(retina_outfile, 'w'))


def create_latents_clist_params():
    for a in create_tholds():
        inf = a[1][0]
        for vsi, vs in enumerate(VAR_SCALES):
            for cki, comp_k in enumerate(COMP_KS):

                outf_base = inf[:-len('.data.pickle')]

Example #9

0

Show file

File: main.py Project: y95847frank/Semantic-Relations-Classifier

    (train_pos1, val_pos1) = (pos1_data[bound:], pos1_data[:bound])
    (train_pos2, val_pos2) = (pos2_data[bound:], pos2_data[:bound])
    (train_y, val_y) = (y_data[bound:], y_data[:bound])

    return (train_token,
            val_token), (train_pos1,
                         val_pos1), (train_pos2,
                                     val_pos2), (train_lexical,
                                                 val_lexical), (train_y, val_y)


with open('word2IDx.pickle', 'rb') as f:

    word2IDx = pickle.load(f)

    train_token, train_pos1, train_pos2, train_lexical, train_y = create_data(
        word2IDx, 'train', pos_pos1_list, pos_pos2_list)
    class_weights = class_weight.compute_class_weight('balanced',
                                                      np.unique(train_y),
                                                      train_y)
    #print(class_weights)
    class_weight_dict = dict(enumerate(class_weights))
    #print(class_weight_dict)

    train_y = np_utils.to_categorical(train_y, n_out)

    test_token, test_pos1, test_pos2, test_lexical = create_data(
        word2IDx, 'test', pos_pos1_list, pos_pos2_list)
    (train_token,
     val_token), (train_pos1,
                  val_pos1), (train_pos2,
                              val_pos2), (train_lexical,

Example #10

0

Show file

File: preprocess_nnW3L2.py Project: LouiseGdeF/MOPSI

import sys
from preprocess import np, pkl, create_data
# Pour Vivi :
sys.path.insert(1, "C:/Users/viniv/OneDrive/Bureau/MOPSI2/")
from network import *

# Number of points on which the value of the function is known.
N = 2000

# Build a neural network with 2 layers of width 3
nn = Net2(3)

def n(x: float):
    """ Function that is calculated thanks to a neural network
    
    Arguments:
        x {float}
    
    Returns:
        float
    """
    x_tensor = torch.FloatTensor([x])
    y_tensor = nn(x_tensor)
    y = y_tensor.item()
    return y


create_data(N, n, "nnW3L2")
path = "C:/Users/viniv/OneDrive/Bureau/MOPSI2/preprocessing/nnw3L2.py"
torch.save(nn.state_dict(), path)

Example #11

0

Show file

File: process_debug.py Project: ericmjonas/connectodiscovery

    we use the z-axis
    """
    for tholdi, thold in enumerate(THOLDS):
        outfile = td("retina.%d.data.pickle" % tholdi)
        yield RETINA_DB, [outfile], thold


@files(create_tholds)
def data_create_thold(dbname, 
                        (retina_outfile,), AREA_THOLD_MIN):
    """
    """

    np.random.seed(0)
    conn = sqlite3.connect(dbname)
    cells, conn_mat, dist_mats = preprocess.create_data(conn, AREA_THOLD_MIN)


    pickle.dump({'cells' : cells, 
                 'conn_mat' : conn_mat, 
                 'dist_mats' : dist_mats}, 
                open(retina_outfile, 'w'))






def create_latents_clist_params():
    for a in create_tholds():
        inf = a[1][0]

Example #12

0

Show file

File: test.py Project: hellodmp/ImageQC_keras

                  "V13317.h5", "V13346.h5", "V16531.h5","V16552.h5", "V16578.h5"]
    x_train, y_train, x_test, y_test = preprocess.create_data(dir, path_list)
    y_predict = image_train(x_train, y_train, x_test)

    print len(y_test), len(y_predict)
    errors = caculate_erros(x_test, y_test, y_predict)
    for i in range(0, len(errors)):
        print errors[i]
'''


if __name__ == '__main__':
    dir = "/home/dmp/ct/data/refine/"
    #dir = "/home/dmp/ct/data/inptv/"
    path_list = ["V13244.h5", "V13285.h5", "V13317.h5", "V13346.h5", "V16531.h5","V16552.h5",
                 "V19799_outptv.h5","V19868_outptv.h5","V20101_outptv.h5","V20243_outptv.h5"]
    x_train, y_train, x_test, y_test = preprocess.create_data(dir, path_list)
    for i in range(0,20):
        print "i=",i
        y_predict = image_train(x_train, y_train, x_test, "model"+str(i)+".json", "weights"+str(i)+".h5")
        errors = caculate_erros(x_test, y_test, y_predict)
        for i in range(0, len(errors)):
            print errors[i]


'''
if __name__ == '__main__':
   test("model.json", "weights/weights-best.h5", "/home/dmp/ct/data/test_outPTV/")
   #choose("model.json", "weights/weights-best.h5", "/home/dmp/ct/data/outptv/")
'''

Example #13

0

Show file

File: train.py Project: DavidDavidsonDK/Chess-Board-Recognition

import os
import numpy as np
from preprocess import create_data
from sklearn.model_selection import train_test_split
from keras.applications.vgg16 import preprocess_input
from vgg_network import VGG_Network
import keras

if __name__ == '__main__':
    print('create test and dev data')
    #os.chdir('./Chess-Board-Recognition/src')

    train_path = '../data/raw/Chess ID Public Data/output_train/'
    test_path = '../data/raw/Chess ID Public Data/output_test/'

    train_data, train_lbl = create_data(train_path, train=True)
    test_data, test_lbl = create_data(test_path, train=False)

    x = np.concatenate([train_data, test_data], axis=0)
    y = np.concatenate([train_lbl, test_lbl], axis=0)
    train_data, test_data, train_lbl, test_lbl = train_test_split(
        x, y, test_size=0.22, random_state=42)

    train_data = preprocess_input(train_data, mode='tf')
    test_data = preprocess_input(test_data, mode='tf')
    input_shape = (227, 227, 3)
    print('Define model')
    model = VGG_Network(input_dim=input_shape,
                        output_classes=13,
                        last_freez_layers=None,
                        dropout=0.15)

Example #14

0

Show file

File: preprocess_piecewise2.py Project: LouiseGdeF/MOPSI

# -*- coding: utf-8 -*-
"""
 Initialization of the training sets for a continuous piecewise function
 with less discontinuity than the first one.
"""

# Pour Louise et Vivi
# from preprocessing.preprocess import np, pkl, create_data
# Pour Jean :
from preprocess import np, pkl, create_data

# Number of points on which the value of the function is known.
N = 2000


def g2(x: float):
    """ The piecewise continuous function that will be used to check
    the quality of the approximation.

    Arguments:
        x {float} -- Will be taken in [0, 1]

    Returns:
        int -- 0 or 1
    """
    return int(x * 2)


create_data(N, g2, "piecewise2")

Example #15

0

Show file

File: preprocess_hat.py Project: LouiseGdeF/MOPSI

""" Initialization of the training sets of the hat function.
"""
# Pour Louise et Vivi
from preprocess import np, pkl, create_data

# Pour Jean :
# from preprocess import np, pkl, create_data

# Number of points on which the value of the function is known.
N = 2000


def g(x: float):
    """ The hat function that will be used to check
    the quality of the approximation.

    Arguments:
        x {float} -- Will be taken in [0, 1]

    Returns:
        float
    """
    res = 2 * x
    if x > 0.5:
        res = 2 * (1 - x)
    return res


create_data(N, g, "hat")