Python loadData Examples, load_data.loadData Python Examples

Example #1

0

Show file

File: cnsi.py Project: wanglezi/Visual_Story_Telling

def main(config, process, modeltype='cnsi', model2test=None):

    print('model name..')
    print(modeltype)

    if process == 'pretrain':
        print('pretraining config..:')
        print(json.dumps(config['pretrain'], indent=2))
        pretrain(config, 'coco')

    print('loading data..')
    starttime = time.time()
    num_words, embedding_matrix, train_data, valid_data, test_data = loadData(
        config)
    print('data loaded in {} seconds'.format(time.time() - starttime))

    if process == 'trainstage1':
        print('stage 1 model parameters..:')
        print(json.dumps(config['stage1'], indent=2))
        trainstage1(config, train_data, valid_data, num_words,
                    embedding_matrix)

    if process == 'trainstage2':
        print('stage 2 model parameters..:')
        print(json.dumps(config['stage2'], indent=2))
        trainstage2(config, train_data, valid_data, num_words,
                    embedding_matrix, modeltype)

    if process == 'test':
        test(config, model2test, test_data, num_words, embedding_matrix,
             modeltype)

    return True

Example #2

0

Show file

File: generate_features.py Project: saragustafsson/kth_afe

def main():
    directory = 'common'
    windowSize = 256
    windowSlide = 32
    removePart = 0.15

    testing = False  # if True use a smaller set of data and don't save features to csv files
    classify = True  # make classification and store results using ALL features if True
    #classWeight = None # balanced or None (for decision tree classifier)
    classWeight = 'balanced'

    parameters = str(windowSize) + '_' + str(windowSlide)

    activities, data, totalDataPoints, labels = loadData(
        "data\ADL_Dataset", windowSize, windowSlide, removePart)

    print("\nActivities:\n", activities)
    print("\nNumber of activities read: ", len(activities))
    print("Number of data points read: ", totalDataPoints)
    print("Number of readings read: ", len(data))
    print("Number of window labels read: ", len(labels), "\n")

    if testing:
        N = 30
    else:
        N = len(labels)

    # convert data into a Pandas dataframe with the specified column names
    # the id column contains one number for each different window and is used to group datapoints for each window
    df_data = pd.DataFrame(data, columns=['x', 'y', 'z', 'id'])
    print("\ndf_data shape:")
    print(df_data.shape)

    # we extract the smaller set of data or if not specified the entire data set is used
    df_test = df_data.iloc[:windowSize * (N), :]
    print("\ndf_test shape:")
    print(df_test.shape)
    print("\ndf_test first 5 rows:")
    print(df_test.head())

    # convert label data to Pandas series
    y = pd.Series(labels[:N])
    print("\nShape of Y: ")
    print(y.shape)
    print(y.head())

    # Extract features on the windows
    X = extractFeatures(df_test)
    #X = loadCsv('common', '288_32_features_all_190409_1511.csv')

    # save all features to csv file
    if not testing:
        writeCsv(directory, parameters, 'features_all', X)
        writeCsv(directory, parameters, 'labels_all', y)

    ##### train classifier on all features #####

    if classify:
        classification(parameters, classWeight, X, y, directory)

Example #3

0

Show file

def baseLineError(datasetName, baseLineRefDict, baselineName):
    ''' Compute baselines method and evaluate them by comparing them to some reference

    Args:
        datasetName (str): the name of the dataset to compute baselines on
        baseLineRefDict (dict): a dictionnary containing one reference vector for each type of baseline
        baselineName (str): the type of baseline desired (can be 'mos','sr_mos' or 'zs_sr_mos'.)
    Returns:
        the error made by the chosen baseline method on the chosen dataset relative to the reference
    '''

    dataset, _ = load_data.loadData(datasetName)
    baseline, _ = computeBaselines(dataset, baselineName)

    error = np.sqrt(
        np.power(baseline - baseLineRefDict[baselineName], 2).sum() /
        len(baseLineRefDict[baselineName]))

    return error

Example #4

0

Show file

File: baseline.py Project: wanglezi/Visual_Story_Telling

def main(config, process):

    # load all data
    print('Loading data')
    num_words, embedding_matrix, train_data, valid_data, test_data = loadData(
        config)

    assert config['model'] == 'baseline'
    print('config:')
    print(json.dumps(config, indent=2))

    init(config['stage1'])
    if process == 'train':
        train(config, num_words, embedding_matrix, train_data, valid_data)
    if process == 'test':
        test(config, num_words, embedding_matrix, test_data)
    if process == 'eval':
        evaluate(config)

    return True

Example #5

0

Show file

def main():
    directory = 'common'
    windowSize = 256
    windowSlide = 32
    removePart = 0.15
    dataInfo = 'windows_all'

    parameters = str(windowSize) + '_' + str(windowSlide)

    activities, data, totalDataPoints, labels = loadData(
        "data\ADL_Dataset", windowSize, windowSlide, removePart)

    print("\nActivities:\n", activities)
    print("\nNumber of activities read: ", len(activities))
    print("Number of data points read: ", totalDataPoints)
    print("Number of readings read: ", len(data))
    print("Number of window labels read: ", len(labels), "\n")

    df_data = pd.DataFrame(data, columns=['x', 'y', 'z', 'id'])
    print("\ndf_data shape:")
    print(df_data.shape)

    writeCsv(directory, parameters, dataInfo, df_data)

Example #6

0

Show file

File: calc_Cv.py Project: js850/histogram_reweighting

    #get temperatures and filenames
    Tlist = list(np.genfromtxt('temperatures'))
    rep1 = 0  #don't use the first rep1 replicas.  There is a better way to do this.
    Tlist = Tlist[(rep1):]
    nrep = len(Tlist)
    filenames = [inprefix + '.' + str(n + rep1 + 1) for n in range(nrep)]

    #OK, now we have a list of temperatures and filenames for each replicas
    print "replica list:"
    for n in range(nrep):
        print Tlist[n], filenames[n]

    print "USING nfree = ", nfree

    #load data
    datalist = load_data.loadData(filenames, [ecolumn], fskip=rskip)

    #determine bin edges
    binenergy1 = load_data.determineBinEdge(nebins,
                                            datalist,
                                            column=0,
                                            exponential_bins=True)

    #histogram the data
    visits1d = load_data.binData1d(binenergy1, datalist)
    #visits1d = np.transpose(visits1d)

    wham = WHAM.Wham1d(Tlist, binenergy1[:-1], visits1d)

    wham.minimize()
    #wham.globalMinimization()

Example #7

0

Show file

File: calc_Cv.py Project: js850/PyGMIN

    rep1=0 #don't use the first rep1 replicas.  There is a better way to do this.
    Tlist=Tlist[(rep1):]
    nrep = len(Tlist)
    filenames=[inprefix+'.'+str(n+rep1+1) for n in range(nrep)]

    #OK, now we have a list of temperatures and filenames for each replicas
    print "replica list:"
    for n in range(nrep):
        print Tlist[n], filenames[n]


    print "USING nfree = ", nfree


    #load data
    datalist = load_data.loadData(filenames, [ecolumn], fskip=rskip )
    
    #determine bin edges
    binenergy1 = load_data.determineBinEdge(nebins, datalist, column=0, exponential_bins=True)
    
    #histogram the data
    visits1d = load_data.binData1d(binenergy1, datalist)
    #visits1d = np.transpose(visits1d)

    wham = WHAM.wham1d(Tlist, binenergy1[:-1], visits1d)

    wham.minimize()
    #wham.globalMinimization()

    print "dumping WHAM1d to pickle file: ", pklname
    pickle.dump(wham,open(pklname,"wb"))

Example #8

0

Show file

import sys
from load_data import loadData
from nightowl.app import app

if __name__ == '__main__':
    if len(sys.argv) == 2:
        if sys.argv[1] == 'insert_test_data':
            loadData('instance')
    else:
        app.run()

Example #9

0

Show file

def main(argv=None):

    #Getting arguments from config file and command line
    #Building the arg reader
    argreader = ArgReader(argv)

    argreader.parser.add_argument(
        '--only_init',
        action='store_true',
        help='To initialise a model without training it.\
                                                                        This still computes the confidence intervals'
    )

    argreader.parser.add_argument(
        '--init_id',
        type=str,
        metavar="N",
        help='The index of the model to use as initialisation. \
                                                                            The weight of the last epoch will be used.'
    )

    #Reading the comand line arg
    argreader.getRemainingArgs()

    args = argreader.args

    torch.manual_seed(args.seed)
    random.seed(args.seed)
    np.random.seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    #The folders where the experience file will be written
    if not (os.path.exists("../vis/{}".format(args.exp_id))):
        os.makedirs("../vis/{}".format(args.exp_id))
    if not (os.path.exists("../results/{}".format(args.exp_id))):
        os.makedirs("../results/{}".format(args.exp_id))
    if not (os.path.exists("../models/{}".format(args.exp_id))):
        os.makedirs("../models/{}".format(args.exp_id))

    #Loading data
    trainSet, distorNbList = load_data.loadData(args.dataset)

    #Write the arguments in a config file so the experiment can be re-run
    argreader.writeConfigFile("../models/{}/model{}.ini".format(
        args.exp_id, args.ind_id))

    if args.cuda:
        trainSet = trainSet.cuda()

    #Building the model
    model = modelBuilder.modelMaker(trainSet.size(1), len(trainSet),
                                    distorNbList, args)

    if args.cuda:
        model = model.cuda()

    #Inititialise the model
    if args.start_mode == "base_init":
        model.init(trainSet,args.dataset,args.score_dis,args.param_not_gt,\
                    args.true_scores_init,args.bias_init,args.diffs_init,args.incons_init,truescores_tanh=args.truescores_tanh,bias_tanh=args.bias_tanh,bias_ampl=args.bias_ampl)
        startEpoch = 1
    elif args.start_mode == "iter_init":
        model.init(trainSet,args.dataset,args.score_dis,args.param_not_gt,\
                    args.true_scores_init,args.bias_init,args.diffs_init,args.incons_init,truescores_tanh=args.truescores_tanh,bias_tanh=args.bias_tanh,bias_ampl=args.bias_ampl,iterInit=True)
        startEpoch = 1
    elif args.start_mode == "fine_tune":
        init_path = sorted(glob.glob("../models/{}/model{}_epoch*".format(
            args.exp_id, args.init_id)),
                           key=processResults.findNumbers)[-1]
        model.load_state_dict(torch.load(init_path))
        startEpoch = processResults.findNumbers(
            os.path.basename(init_path).replace("model{}".format(args.init_id),
                                                ""))

    else:
        raise ValueError("Unknown init method : {}".format(args.start_mode))

    #Adding normal noise to the gradients
    gradNoise = GradNoise(ampl=args.noise)
    for p in model.parameters():
        p.register_hook(gradNoise)

    torch.save(model.state_dict(),
               "../models/{}/model{}_epoch0".format(args.exp_id, args.ind_id))

    #Write the parameters of the model and its confidence interval in a csv file
    loss = model(trainSet)

    paramsToCsv(loss,
                model,
                args.exp_id,
                args.ind_id,
                epoch=0,
                scoresDis=args.score_dis,
                score_min=args.score_min,
                score_max=args.score_max,
                truescores_tanh=args.truescores_tanh,
                bias_tanh=args.bias_tanh,
                bias_ampl=args.bias_ampl)

    if not args.only_init:
        #Getting the contructor and the kwargs for the choosen optimizer
        optimConst, kwargs = get_OptimConstructor(args.optim, args.momentum)

        #If no learning rate is schedule is indicated (i.e. there's only one learning rate),
        #the args.lr argument will be a float and not a float list.
        #Converting it to a list with one element makes the rest of processing easier
        if type(args.lr) is float:
            args.lr = [args.lr]

        model.setPrior(args.prior, args.dataset)

        loss, epoch = train(model,
                            optimConst,
                            kwargs,
                            trainSet,
                            args,
                            startEpoch=startEpoch,
                            truescores_tanh=args.truescores_tanh,
                            bias_tanh=args.bias_tanh,
                            bias_ampl=args.bias_ampl)
        torch.save(
            model.state_dict(),
            "../models/{}/model{}_epoch{}".format(args.exp_id, args.ind_id,
                                                  epoch))
        paramsToCsv(loss,
                    model,
                    args.exp_id,
                    args.ind_id,
                    epoch,
                    args.score_dis,
                    args.score_min,
                    args.score_max,
                    truescores_tanh=args.truescores_tanh,
                    bias_tanh=args.bias_tanh,
                    bias_ampl=args.bias_ampl)

Example #10

0

Show file

# -*- coding: utf-8 -*-

from sklearn.neural_network import MLPClassifier
import numpy as np

# Load the cats and dogs
import load_data

X_train, y_train = load_data.loadData(_set='train',
                                      batch_iter=0,
                                      batch_size=100)

clf = MLPClassifier(solver='lbfgs',
                    activation='logistic',
                    alpha=1e-2,
                    hidden_layer_sizes=(1000, 1000),
                    random_state=1)

clf.fit(X_train.T, np.ravel(y_train.T))

out = clf.predict(X_train.T)

print(np.mean(out == np.ravel(y_train.T)) * 100)

X_test, y_test = load_data.loadData(32, _set='test')

out2 = clf.predict(X_test.T)

print(np.mean(out2 == np.ravel(y_train.T)) * 100)

Example #11

0

Show file

def handle_request():
    import keras.backend.tensorflow_backend as tb
    tb._SYMBOLIC_SCOPE.value = True

    imagefile = flask.request.files['image']
    filename = werkzeug.utils.secure_filename(imagefile.filename)
    print("\nReceived image File name : " + imagefile.filename)
    imagefile.save("result" + filename)
    print('\n')

    parser = argparse.ArgumentParser()
    parser.add_argument("-f",
                        "--fname",
                        type=str,
                        required=False,
                        default='models/trained_model.hdf5',
                        help="full path to model")
    # parser.add_argument( "-i", "--integer", type=int, default=50)
    parser.add_argument(
        '-d',
        '--disp_test_images',
        type=str2bool,
        default=False,
        help="whether to disp test image names. default: False")
    parser.add_argument('-s',
                        '--save_out_images',
                        type=str2bool,
                        default=True,
                        help="whether to ave out images. default: True")

    args = parser.parse_args()

    # Path to csv-file. File should contain X-ray filenames as first column,
    # mask filenames as second column.

    csv_path = dataset_bow_legs_dir + '/' + 'idx_test.csv'

    # Path to the folder with images. Images will be read from path + path_from_csv
    path = csv_path[:csv_path.rfind('/')] + '/'

    df = pd.read_csv(csv_path)
    if args.disp_test_images == True:
        print('\n')
        print(df)
        print('\n')

    # Load test data
    im_shape = (512, 256)
    X, y = loadData(df, path, im_shape, imagefile)

    print('\n[*]loadData() finished\n')

    n_test = X.shape[0]
    inp_shape = X[0].shape

    # Load model
    if ".hdf5" not in args.fname:
        list_of_files = glob.glob(args.fname + '/' + '*.hdf5')  #
        # print(list_of_files)
        model_weights = list_of_files[0]
    else:
        model_weights = args.fname

    # load json and create model
    json_file = open('models/model_bk.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    print("model_from_json() finished ...")

    # load weights into new model
    loaded_model.load_weights(model_weights)
    print("Loaded model from disk")

    # evaluate loaded model on test data
    UNet = loaded_model
    model = loaded_model
    model.compile(optimizer=RMSprop(lr=0.0001),
                  loss=bce_dice_loss,
                  metrics=[dice_coeff])
    print("model compiled ")

    # For inference standard keras ImageGenerator is used.
    test_gen = ImageDataGenerator(rescale=1.)

    ious = np.zeros(n_test)
    dices = np.zeros(n_test)
    mean_IoU = np.zeros(n_test)

    i = 0
    '''	
    for xx, yy in test_gen.flow(X, y, batch_size=1):
    '''
    num_imgs = X.shape[0]
    for ii in range(num_imgs):
        xx_ = X[ii, :, :, :]
        yy_ = y[ii, :, :, :]
        xx = xx_[None, ...]
        yy = yy_[None, ...]
        img = exposure.rescale_intensity(np.squeeze(xx), out_range=(0, 1))
        pred = UNet.predict(xx)[..., 0].reshape(inp_shape[:2])
        mask = yy[..., 0].reshape(inp_shape[:2])

        # Binarize masks
        gt = mask > 0.5
        pr = pred > 0.5

        pr_bin = img_as_ubyte(pr)
        pr_openned = morphology.opening(pr_bin)

        # Remove regions smaller than 2% of the image
        pr = remove_small_regions(
            pr, 0.005 * np.prod(im_shape)
        )  # pr = remove_small_regions(pr, 0.02 * np.prod(im_shape))
        pr_out = img_as_ubyte(pr)

        sub_dir_file_name = df.iloc[i][0]
        file_name = sub_dir_file_name[9:]
        sub_dir_name = sub_dir_file_name[:8]
        if args.disp_test_images == True:
            print('\n')
            print('sub_dir_name={}  file_name={}\n\n'.format(
                sub_dir_name, file_name))

        if args.save_out_images == True:
            dir_img_mask = 'results/bow-legs_test/{}'.format(sub_dir_name)
            if not os.path.exists(dir_img_mask):
                os.makedirs(dir_img_mask)
            img_name = '{}/{}'.format(dir_img_mask, file_name)
            if args.disp_test_images == True:
                print('img_name={}\n'.format(img_name))

            cv2.imwrite(img_name, pr_openned)

        file_name_no_ext = os.path.splitext(
            file_name
        )[0]  # ('file', '.ext')  --> os.path.splitext(file_name_no_ext)[0] ('file')
        file_name_in = dataset_bow_legs_dir + '/' + sub_dir_name + '/' + file_name_no_ext + '_mask' + '.png'  # dataset_bow-legs/mask_001/img_0001_mask.png
        if args.disp_test_images == True:
            print('file_name_in={}\n'.format(file_name_in))
        if args.save_out_images == True:

            file_name_out = 'results/bow-legs_test' + '/' + sub_dir_name + '/' + file_name_no_ext + '_mask_manual' + '.png'  # results/bow-legs_test/mask_006/img_0006_mask_manual.png

            img_exists = os.path.isfile(file_name_in)
            if img_exists == False:
                print('{} does not exists\n'.format(file_name_in))
                sys.exit("exiting ...")

            shutil.copy2(file_name_in, file_name_out)

        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        im_name_x_ray_original_size = data_bow_legs_dir + '/' + 'x-ray/' + file_name  # data_bow-legs/x-ray/img_0001.png
        im_name_x_ray_original_size_test = data_bow_legs_dir + '/' + 'x-ray_test/' + file_name  # data_bow-legs/x-ray/img_0001.png
        if args.disp_test_images == True:
            print('im_name_x_ray_original_size = {}\n'.format(
                im_name_x_ray_original_size_test))

        im_x_ray_original_size = cv2.imread(im_name_x_ray_original_size_test,
                                            cv2.IMREAD_GRAYSCALE)
        if im_x_ray_original_size is None:  ## Check for invalid input
            print("Could not open or find the image: {}. ".format(
                im_name_x_ray_original_size_test))
            shutil.copy2(im_name_x_ray_original_size,
                         im_name_x_ray_original_size_test)
            print('Made a copy from {}\n'.format(im_name_x_ray_original_size))
            im_x_ray_original_size = cv2.imread(
                im_name_x_ray_original_size_test, cv2.IMREAD_GRAYSCALE)

        height, width = im_x_ray_original_size.shape[:
                                                     2]  # height, width  -- original image size

        ratio = float(height) / width

        new_shape = (4 * 256, int(4 * 256 * ratio))

        im_x_ray_4x = cv2.resize(im_x_ray_original_size, new_shape)

        dir_img_x_ray_4x = 'results/bow-legs_test_4x/{}'.format(sub_dir_name)
        if not os.path.exists(dir_img_x_ray_4x):
            os.makedirs(dir_img_x_ray_4x)
        im_name_x_ray_4x = '{}/{}'.format(dir_img_x_ray_4x, file_name)
        cv2.imwrite(im_name_x_ray_4x, im_x_ray_4x)

        # mask
        im_mask_original_size = cv2.imread(file_name_in, cv2.IMREAD_GRAYSCALE)
        im_mask_4x = cv2.resize(im_mask_original_size, new_shape)
        im_name_mask_4x = '{}/{}'.format(
            dir_img_x_ray_4x, '/' + file_name_no_ext + '_mask_manual' + '.png')
        cv2.imwrite(im_name_mask_4x, im_mask_4x)

        # Unet output
        pr_openned_4x = cv2.resize(pr_openned, new_shape)
        im_name_pr_openned_4x = '{}/{}'.format(
            dir_img_x_ray_4x, file_name_no_ext + '_mask_Unet' + '.png')
        if args.disp_test_images == True:
            print('im_name_pr_openned_4x={}\n'.format(im_name_pr_openned_4x))
        cv2.imwrite(im_name_pr_openned_4x, pr_openned_4x)

        gt_4x = cv2.resize(img_as_ubyte(gt), new_shape)

        gt_4x = gt_4x > 0.5
        pr_openned_4x = pr_openned_4x > 0.5
        im_x_ray_4x_ = im_x_ray_4x / 255.0
        if args.disp_test_images == True:
            print('img.max()={} gt.max()={} pr.max()={}\n'.format(
                im_x_ray_4x_.max(), gt_4x.max(), pr_openned_4x.max()))
        im_masked_4x = masked(im_x_ray_4x, gt_4x, pr_openned_4x,
                              0.5)  # img.max()=1.0 gt.max()=True pr.max()=True

        if args.save_out_images == True:
            dir_im_masked_4x = 'results/bow-legs_masked_4x'
            if not os.path.exists(dir_im_masked_4x):
                os.makedirs(dir_im_masked_4x)
            im_name_masked_4x = '{}/{}'.format(dir_im_masked_4x, file_name)

            im_masked_4x = img_as_ubyte(im_masked_4x)
            io.imsave(im_name_masked_4x, im_masked_4x)

            # convert numpy array to PIL Image
            img = Image.fromarray(im_masked_4x.astype('uint8'))
            # create file-object in memory
            file_object = in_out.BytesIO()
            # write PNG in file-object
            img.save(file_object, 'PNG')
            # move to beginning of file so `send_file()` it will read from start
            file_object.seek(0)

            return send_file(file_object, mimetype='image/PNG')

        ious[i] = IoU(gt, pr)
        dices[i] = Dice(gt, pr)
        print('{}  {:.4f} {:.4f}'.format(df.iloc[i][0], ious[i], dices[i]))

        with open("results/bow-legs_results.txt", "a", newline="\r\n") as f:
            print('{}  {:.4f} {:.4f}'.format(df.iloc[i][0], ious[i], dices[i]),
                  file=f)

        i += 1
        if i == n_test:
            break

    print('Mean IoU:{:.4f} Mean Dice:{:.4f}'.format(ious.mean(), dices.mean()))
    with open("results/bow-legs_results.txt", "a", newline="\r\n") as f:
        print('Mean IoU:{:.4f} Mean Dice:{:.4f}'.format(
            ious.mean(), dices.mean()),
              file=f)
        print('\n', file=f)

    with open("results/bow-legs_IoU_Dice.txt", "a", newline="\r\n") as f:
        print('Mean IoU:{:.4f} Mean Dice:{:.4f}'.format(
            ious.mean(), dices.mean()),
              file=f)

    return "DONE"

Example #12

0

Show file

File: calc_Fq.py Project: js850/histogram_reweighting

    rep1 = 0  # don't use the first 8 replicas.  There is a better way to do this.
    Tlist = Tlist[(rep1):]
    nrep = len(Tlist)
    filenames = [inprefix + "." + str(n + rep1 + 1) for n in range(nrep)]

    # OK, now we have a list of temperatures and filenames for each replicas
    print "replica list:"
    for n in range(nrep):
        print Tlist[n], filenames[n]

    # data = load_data.loadData2dExp(filenames, ecolumn, qcolumn, nqbins, fskip=rskip, qcombine=qcombine, nebins=nebins, dEmin=dEmin)
    # load data
    columns = [ecolumn]
    if len(qcombine) != 3:
        columns.append(qcolumn)
    datalist = load_data.loadData(filenames, columns, fskip=rskip, qcombine=qcombine)

    # determine bin edges
    binenergy1 = load_data.determineBinEdge(nebins, datalist, column=0, exponential_bins=use_exponential_bins)
    binq1 = load_data.determineBinEdge(nqbins, datalist, column=1, exponential_bins=False)

    # create histogram
    visits2d = load_data.binData2d(binenergy1, binq1, datalist)
    # visits2dnew = np.zeros( [nebins, nqbins, nrep] )
    # for k in range(nrep): visits2dnew[:,:,k] = visits2d[k,:,:]
    # visits2d = visits2dnew

    wham = WHAM.wham2d(Tlist, binenergy1[:-1], binq1[:-1], visits2d)

    wham.minimize()

Example #13

0

Show file

 def __init__(self):
     self.sejongUnitDic = loadData("sejongUnitDic")

Example #14

0

Show file

def convSpeed(exp_id, refModelIdList, refModelSeedList, varParamList):
    ''' Plot the distance between the vector found by models on subdatasets and the vector found by those same models on the full datasets

    The distance is plot as a function of the annotator number.
    Some parameters can be varying among models (e.g. the score distribution, the learning rate). One curve is draws for each combination of those parameters.

    Args:
        exp_id (str): the experience name
        refModelIdList (list): the list of ids of models which are trained on a full dataset
        refModelSeedList (list): the list of seeds of models which are trained on a full dataset
        varParamList (list): the list of parameters which are varying among all the models in the experiment
    '''

    modelConfigPaths = sorted(glob.glob(
        "../models/{}/model*.ini".format(exp_id)),
                              key=findNumbers)
    modelIds = list(
        map(lambda x: findNumbers(os.path.basename(x)), modelConfigPaths))

    #Collect the scores of each reference model
    refTrueScoresDict = {}
    allBaseDict = {}
    for j, refModelId in enumerate(refModelIdList):
        refTrueScoresPath = sorted(glob.glob(
            "../results/{}/model{}_epoch*_trueScores.csv".format(
                exp_id, refModelId)),
                                   key=findNumbers)[-1]
        refTrueScores = np.genfromtxt(refTrueScoresPath, delimiter="\t")[:, 0]

        datasetName = readConfFile(
            "../models/{}/model{}.ini".format(exp_id, refModelId),
            ["dataset"])[0]

        paramValue = ''
        for varParam in varParamList:
            paramValue += " " + lookInModelAndData(
                "../models/{}/model{}.ini".format(exp_id, refModelId),
                varParam,
                typeVal=str)
        #print(paramValue)
        dataset, _ = load_data.loadData(datasetName)
        #print(datasetName,paramValue)
        baseLineRefDict = {}
        for baselineType in baselinesTypes:
            baseLineRefDict[baselineType], _ = computeBaselines(
                dataset, baselineType)

        #Get the color for each baseline
        baseColMaps = cm.Blues(
            np.linspace(0, 1, int(1.5 * len(baseLineRefDict.keys()))))
        baseColMapsDict = {}
        for i, key in enumerate(baseLineRefDict):
            baseColMapsDict[key] = baseColMaps[-i - 1]

        #Collect the true scores of this reference model
        if not paramValue in refTrueScoresDict.keys():
            refTrueScoresDict[paramValue] = {}
        refTrueScoresDict[paramValue][refModelSeedList[j]] = refTrueScores

        if not refModelSeedList[j] in allBaseDict.keys():
            allBaseDict[refModelSeedList[j]] = baseLineRefDict

    errorArray = np.zeros(len(modelConfigPaths))
    nbAnnotArray = np.zeros(len(modelConfigPaths))

    #Store the error of each baseline method
    allErrorBaseDict = {}
    #for key in baseLineRefDict:
    #    errorArrayDict[key] = np.zeros(len(modelConfigPaths))

    paramValueList = []
    colorInds = []

    #Will contain a list of error for each value of the varying parameters
    valuesDict = {}
    baseDict = {}
    for i, modelPath in enumerate(modelConfigPaths):

        datasetName, modelId = readConfFile(modelPath, ["dataset", "ind_id"])

        paramValue = ''
        for varParam in varParamList:
            paramValue += " " + lookInModelAndData(
                modelPath, varParam, typeVal=str)

        if not paramValue in paramValueList:
            paramValueList.append(paramValue)

        colorInds.append(paramValueList.index(paramValue))

        nbAnnot, seed = readConfFile("../data/{}.ini".format(datasetName),
                                     ["nb_annot", "seed"])

        trueScoresPath = sorted(glob.glob(
            "../results/{}/model{}_epoch*_trueScores.csv".format(
                exp_id, modelId)),
                                key=findNumbers)[-1]
        trueScores = np.genfromtxt(trueScoresPath, delimiter="\t")[:, 0]

        error = np.sqrt(
            np.power(trueScores - refTrueScoresDict[paramValue][int(seed)],
                     2).sum() / len(refTrueScoresDict[paramValue][int(seed)]))

        if not paramValue in valuesDict.keys():
            valuesDict[paramValue] = [(error, nbAnnot)]
        else:
            valuesDict[paramValue].append((error, nbAnnot))

        for baselineType in baselinesTypes:

            if not baselineType in allErrorBaseDict.keys():
                allErrorBaseDict[baselineType] = {}

            if not nbAnnot in allErrorBaseDict[baselineType].keys():
                allErrorBaseDict[baselineType][nbAnnot] = {}

            if not int(seed) in allErrorBaseDict[baselineType][nbAnnot].keys():
                #Computing the baseline error relative to the right baseline
                error = baseLineError(datasetName, allBaseDict[int(seed)],
                                      baselineType)
                #print(baselineType,nbAnnot,int(seed))
                allErrorBaseDict[baselineType][nbAnnot][int(seed)] = error

    colors = cm.autumn(np.linspace(0, 1, len(paramValueList)))
    markers = [
        m for m, func in Line2D.markers.items()
        if func != 'nothing' and m not in Line2D.filled_markers
    ]
    paramValueList = list(map(lambda x: paramValueList[x], colorInds))

    fig = plt.figure(figsize=(7, 5))
    ax = fig.add_subplot(111)
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

    plt.xlabel("Nb of annotators")
    plt.ylabel("RMSE")

    #Plot the models
    for i, paramValue in enumerate(valuesDict.keys()):
        nbAnnotAgreg, ymeans, yerr = agregate(valuesDict[paramValue])
        plt.errorbar(np.array(nbAnnotAgreg, dtype=str).astype(int) + 0.1 * i,
                     ymeans,
                     yerr=yerr,
                     label=paramValue,
                     marker=markers[i])

    #Plot the baselines
    for baseline in allErrorBaseDict.keys():

        means = np.zeros(len(allErrorBaseDict[baseline].keys()))
        stds = np.zeros(len(allErrorBaseDict[baseline].keys()))
        annotNbs = np.zeros(len(allErrorBaseDict[baseline].keys()))
        for i, annotNb in enumerate(allErrorBaseDict[baseline].keys()):
            valToAgr = np.array([
                allErrorBaseDict[baseline][annotNb][seed]
                for seed in allErrorBaseDict[baseline][annotNb].keys()
            ])
            means[i], stds[i] = valToAgr.mean(), valToAgr.std()
            annotNbs[i] = annotNb

        #annotNbs = annotNbs.astype(int).astype(str)
        means, stds, annotNbs = zip(
            *sorted(zip(means, stds, annotNbs), key=lambda x: x[2]))

        plt.errorbar(annotNbs,
                     means,
                     yerr=stds,
                     color=baseColMapsDict[baseline],
                     label=baseline)

    fig.legend(loc='right')
    plt.savefig("../vis/{}/convSpeed_{}.png".format(exp_id, exp_id))

Example #15

0

Show file

def main(argv=None):

    #Getting arguments from config file and command line
    #Building the arg reader
    argreader = ArgReader(argv)

    argreader.parser.add_argument(
        '--comp_gt',
        type=str,
        nargs="*",
        metavar='PARAM',
        help=
        'To compare the parameters found with the ground truth parameters. Require a fake dataset. The argument should\
                                    be the list of parameters varying across the different models in the experiment.'
    )
    argreader.parser.add_argument(
        '--comp_gt_agr',
        type=str,
        nargs="*",
        metavar='PARAM',
        help=
        'To compare the parameters found with the ground truth parameters. Require a fake dataset. The argument should\
                                    be the list of parameters varying across the different models in the experiment. The accuracies of models having the same value for those parameters will be agregated.'
    )

    argreader.parser.add_argument(
        '--comp_gt_evol',
        type=str,
        nargs="*",
        metavar='PARAM',
        help=
        'To plot the evolution of the error across epochs. The argument should\
                                    be the list of parameters varying across the different models in the experiment.'
    )

    argreader.parser.add_argument(
        '--error_metric',
        type=str,
        metavar='ERROR',
        default="rmse",
        help=
        'The error metric used in \'--comp_gt\' and \'--comp_gt_agr\'. Can be \'rmse\' or \'relative\'. Default is \'RMSE\'.'
    )

    argreader.parser.add_argument(
        '--artif_data',
        action='store_true',
        help=
        'To plot the real and empirical distribution of the parameters of a fake dataset. \
                                    The fake dataset to plot is set by the --dataset argument'
    )

    argreader.parser.add_argument(
        '--plot_param',
        type=str,
        nargs="*",
        help=
        'To plot the error of every parameters at each epoch for each model. The argument values are the index of the models to plot.'
    )
    argreader.parser.add_argument(
        '--plot_dist',
        type=int,
        nargs="*",
        help=
        'To plot the distance travelled by each parameters and the negative log-likelihood at each epoch. \
                                    The argument values are the index of the models to plot. The two last arguments are the epochs at which to start and finish the plot.'
    )

    argreader.parser.add_argument(
        '--two_dim_repr',
        type=str,
        nargs="*",
        help=
        'To plot the t-sne visualisation of the values taken by the parameters during training. \
                                    The first argument value is the id of the model to plot and the second is the start epoch. The following argument are the parameters to plot.'
    )

    argreader.parser.add_argument(
        '--conv_speed',
        type=str,
        nargs='*',
        metavar='ID',
        help=
        'To plot the error as a function of the number of annotator. The value is a list of parameters varying between \
                                    the reference models.')

    argreader.parser.add_argument(
        '--plot_video_raw_scores',
        type=str,
        nargs='*',
        metavar='ID',
        help=
        'To plot histograms of scores for some videos of a dataset. The value of this argument is the list of videos \
                                    line index to plot. The dataset should also be indicated with the dataset argument'
    )

    argreader.parser.add_argument(
        '--plot_range_pca',
        type=float,
        nargs=4,
        metavar="RANGE",
        help=
        'The range to use when ploting the PCA. The values should be indicated in this order : xmin,xmax,ymin,ymax.'
    )
    argreader.parser.add_argument(
        '--plot_range_dist',
        type=float,
        nargs=2,
        metavar="RANGE",
        help=
        'The range to use when ploting the distance. The values should be indicated in this order : ymin,ymax.'
    )
    argreader.parser.add_argument(
        '--labels',
        type=str,
        nargs='*',
        metavar="RANGE",
        help=
        'The label names for the model, in the order where they will be appear in the plot.'
    )

    #Reading the comand line arg
    argreader.getRemainingArgs()

    #Getting the args from command line and config file
    args = argreader.args

    torch.manual_seed(args.seed)
    random.seed(args.seed)
    np.random.seed(args.seed)

    #The folders where the experience file will be written
    if not (os.path.exists("../vis/{}".format(args.exp_id))):
        os.makedirs("../vis/{}".format(args.exp_id))
    if not (os.path.exists("../results/{}".format(args.exp_id))):
        os.makedirs("../results/{}".format(args.exp_id))
    if not (os.path.exists("../models/{}".format(args.exp_id))):
        os.makedirs("../models/{}".format(args.exp_id))

    scoreMat, distorNbList = load_data.loadData(args.dataset)

    if args.comp_gt:
        compareWithGroundTruth(args.exp_id, args.comp_gt, args.error_metric)
    if args.comp_gt_agr:
        compareWithGroundTruth(args.exp_id, args.comp_gt_agr,
                               args.error_metric)
        agregateCpWGroundTruth(
            args.exp_id, "../results/{}/err_epoch-1.csv".format(args.exp_id))
        agregateCpWGroundTruth(
            args.exp_id,
            "../results/{}/inclPerc_epoch-1.csv".format(args.exp_id))

    if args.comp_gt_evol:

        #Find an id of one model in the experiment
        modelInd = findNumbers(
            os.path.basename(
                sorted(glob.glob("../models/{}/model*.ini".format(
                    args.exp_id)))[0]))
        #The list of epoch number that have been logged
        epochs = sorted(
            list(
                map(
                    lambda x: findNumbers(os.path.basename(x).split("_")[1]),
                    glob.glob("../results/{}/model{}_epoch*_{}.csv".format(
                        args.exp_id, modelInd, paramKeys[0])))))
        epochs = np.array(epochs)

        csvDict = {}
        for epoch in epochs:
            if not os.path.exists("../results/{}/err_epoch{}_agreg.csv".format(
                    args.exp_id, epoch)):
                compareWithGroundTruth(args.exp_id,
                                       args.comp_gt_evol,
                                       args.error_metric,
                                       epoch=epoch)
                agregateCpWGroundTruth(
                    args.exp_id,
                    "../results/{}/err_epoch{}.csv".format(args.exp_id, epoch))
            csvDict[epoch] = csvToDict(
                "../results/{}/err_epoch{}_agreg.csv".format(
                    args.exp_id, epoch))

        #Collect the values of the varying hyper parameters:
        #These values identifie the models from each other
        varHyperParamValues = csvDict[list(csvDict.keys())[0]].keys()

        for param in paramKeys:
            print("Ploting ", param)
            plt.figure()

            for i, hyperParam in enumerate(varHyperParamValues):

                points = list(
                    map(lambda x: csvDict[x][hyperParam][param]['mean'],
                        epochs))
                stds = list(
                    map(lambda x: csvDict[x][hyperParam][param]['std'],
                        epochs))

                plt.errorbar(epochs + 100 * i,
                             points,
                             yerr=stds,
                             label="{}={}".format(args.comp_gt_evol,
                                                  hyperParam))

            plt.legend()
            plt.savefig("../vis/{}/err_evol_{}.png".format(args.exp_id, param))

    if args.artif_data:
        fakeDataDIstr(args)

    if args.plot_param:
        plotParam(args.dataset, args.exp_id, args.plot_param, args.labels)

    if args.plot_dist:
        plotDist(args.exp_id, args.plot_dist[0], args.plot_dist[1],
                 args.plot_dist[2], args.plot_range_dist)

    if args.two_dim_repr:
        twoDimRepr(args.exp_id, int(args.two_dim_repr[0]),
                   int(args.two_dim_repr[1]), args.two_dim_repr[2:],
                   args.plot_range_pca)

    if args.conv_speed:

        #Collect the configuration files
        configFiles = glob.glob("../models/{}/model*.ini".format(args.exp_id))

        def get_Seed_NbAnnot(x):
            datasetName = readConfFile(x, ["dataset"])[0]
            seed, nb_annot = readConfFile("../data/{}.ini".format(datasetName),
                                          ["seed", "nb_annot"])
            return int(seed), int(nb_annot)

        #Gets the ids, the seeds and the nb of annotators for every models in the experience
        ids = list(map(lambda x: findNumbers(os.path.basename(x)),
                       configFiles))
        seeds, nb_annots = zip(*list(map(get_Seed_NbAnnot, configFiles)))

        ids_seeds_nbAnnots = zip(ids, seeds, nb_annots)

        #Find the ids and seeds of models which are trained on the full dataset
        argmaxs = np.argwhere(nb_annots == np.amax(nb_annots)).flatten()
        ids = np.array(ids)[argmaxs]
        seeds = np.array(seeds)[argmaxs]

        #Sort with the ids value to make debut easier
        ids, seeds = zip(*sorted(zip(ids, seeds), key=lambda x: x[0]))

        convSpeed(args.exp_id, ids, seeds, args.conv_speed)

    if args.plot_video_raw_scores:
        plotVideoRawScores(args.dataset, args.plot_video_raw_scores,
                           args.score_min, args.score_max)

Example #16

0

Show file

File: scoring.py Project: psy2848048/tissuetissue

 def __init__(self):
     self.junction_info = loadData("junction_info")

Example #17

0

Show file

def predicateImage(image, image_path, mask, mask_path, filename_result,
                   dir_result):
    # Load test data
    im_shape = (512, 256)
    csv_path = dataset_bow_legs_dir + '/' + 'idx_test.csv'
    df = pd.read_csv(csv_path)

    # Images sent by client
    X, y = loadData(image, mask, im_shape)
    # Images saved on the server
    # X, y = loadDataOrig(df, path, im_shape)

    print('\n[*]loadData() finished\n')

    n_test = X.shape[0]
    inp_shape = X[0].shape

    # Load model
    model_weights = "models/trained_model.hdf5"
    print('model_weights ' + model_weights)
    # load json and create model
    json_file = open('models/model_bk.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    print("model_from_json() finished ...")

    # load weights into new model
    loaded_model.load_weights(model_weights)
    print("Loaded model from disk")

    # evaluate loaded model on test data
    UNet = loaded_model
    model = loaded_model
    model.compile(optimizer=RMSprop(lr=0.0001),
                  loss=bce_dice_loss,
                  metrics=[dice_coeff])
    print("model compiled ")

    ious = np.zeros(n_test)
    dices = np.zeros(n_test)

    i = 0
    '''	
    for xx, yy in test_gen.flow(X, y, batch_size=1):
    '''
    num_imgs = X.shape[0]
    for ii in range(num_imgs):
        xx_ = X[ii, :, :, :]
        yy_ = y[ii, :, :, :]
        xx = xx_[None, ...]
        yy = yy_[None, ...]
        pred = UNet.predict(xx)[..., 0].reshape(inp_shape[:2])
        mask = yy[..., 0].reshape(inp_shape[:2])

        # Binarize masks
        gt = mask > 0.5
        pr = pred > 0.5

        pr_bin = img_as_ubyte(pr)
        pr_openned = morphology.opening(pr_bin)

        # Remove regions smaller than 2% of the image
        pr = remove_small_regions(pr, 0.005 * np.prod(im_shape))

        sub_dir_file_name = df.iloc[i][0]
        file_name = sub_dir_file_name[9:]
        sub_dir_name = sub_dir_file_name[:8]

        file_name_no_ext = os.path.splitext(file_name)[0]
        file_name_in = mask_path

        im_name_x_ray_original_size = image_path
        im_name_x_ray_original_size_test = image_path

        im_x_ray_original_size = cv2.imread(im_name_x_ray_original_size_test,
                                            cv2.IMREAD_GRAYSCALE)
        if im_x_ray_original_size is None:  ## Check for invalid input
            print("Could not open or find the image: {}. ".format(
                im_name_x_ray_original_size_test))
            shutil.copy2(im_name_x_ray_original_size,
                         im_name_x_ray_original_size_test)
            print('Made a copy from {}\n'.format(im_name_x_ray_original_size))
            im_x_ray_original_size = cv2.imread(
                im_name_x_ray_original_size_test, cv2.IMREAD_GRAYSCALE)

        height, width = im_x_ray_original_size.shape[:
                                                     2]  # height, width  -- original image size

        ratio = float(height) / width

        new_shape = (4 * 256, int(4 * 256 * ratio))

        im_x_ray_4x = cv2.resize(im_x_ray_original_size, new_shape)

        dir_img_x_ray_4x = 'results/bow-legs_test_4x/{}'.format(sub_dir_name)
        if not os.path.exists(dir_img_x_ray_4x):
            os.makedirs(dir_img_x_ray_4x)
        im_name_x_ray_4x = '{}/{}'.format(dir_img_x_ray_4x, file_name)
        cv2.imwrite(im_name_x_ray_4x, im_x_ray_4x)

        # mask
        im_mask_original_size = cv2.imread(file_name_in, cv2.IMREAD_GRAYSCALE)
        im_mask_4x = cv2.resize(im_mask_original_size, new_shape)
        im_name_mask_4x = '{}/{}'.format(
            dir_img_x_ray_4x, '/' + file_name_no_ext + '_mask_manual' + '.png')
        cv2.imwrite(im_name_mask_4x, im_mask_4x)

        # Unet output
        pr_openned_4x = cv2.resize(pr_openned, new_shape)
        cv2.imwrite(filename_result, pr_openned_4x)

        ious[i] = IoU(gt, pr)
        dices[i] = Dice(gt, pr)
        print('{}  {:.4f} {:.4f}'.format(df.iloc[i][0], ious[i], dices[i]))

        with open("results/bow-legs_results.txt", "a", newline="\r\n") as f:
            print('{}  {:.4f} {:.4f}'.format(df.iloc[i][0], ious[i], dices[i]),
                  file=f)

        i += 1
        if i == n_test:
            break

    print('Mean IoU:{:.4f} Mean Dice:{:.4f}'.format(ious.mean(), dices.mean()))
    with open("results/bow-legs_results.txt", "a", newline="\r\n") as f:
        print('Mean IoU:{:.4f} Mean Dice:{:.4f}'.format(
            ious.mean(), dices.mean()),
              file=f)
        print('\n', file=f)

    with open("results/bow-legs_IoU_Dice.txt", "a", newline="\r\n") as f:
        print('Mean IoU:{:.4f} Mean Dice:{:.4f}'.format(
            ious.mean(), dices.mean()),
              file=f)

Example #18

0

Show file

File: table_summary.py Project: zhangxu0307/time-series-data-summary

        sortTime = self.dataset[self.timeFeature].sort_values()
        timesatmp = sortTime.unique()  # 时间戳去重，防止多个序列交织在一起

        minTime = str(timesatmp[0])
        maxTime = str(timesatmp[-1])
        interval = int(timesatmp[1] - timesatmp[0]) / 1e9

        basicInfo = {
            "row_num": rowNum,
            "col_num": colNum,
            "time_feature": self.timeFeature,
            "digital_feature_num": len(self.digitalFeatures),
            "category_feature_num": len(self.categroyFeatures),
            "max_datetime": maxTime,
            "min_datetime": minTime,
            "interval": interval
        }

        return basicInfo


if __name__ == '__main__':

    dataset = loadData('data/deepAD_data_summary_test_data1.csv')
    summary = TableSummary(dataset)
    basicInfo = summary.getBasicInformation()
    print(basicInfo)
    recommend = summary.getColumnRecommend()
    print(recommend)

Example #19

0

Show file

File: calc_Fq.py Project: smcantab/histogram_reweighting

    nrep = len(Tlist)
    filenames=[inprefix+'.'+str(n+rep1+1) for n in range(nrep)]
  
    #OK, now we have a list of temperatures and filenames for each replicas
    print "replica list:"
    for n in range(nrep):
        print Tlist[n], filenames[n]
  
  
  
    
    #data = load_data.loadData2dExp(filenames, ecolumn, qcolumn, nqbins, fskip=rskip, qcombine=qcombine, nebins=nebins, dEmin=dEmin)
    #load data
    columns = [ecolumn]
    if len(qcombine) != 3: columns.append(qcolumn)
    datalist = load_data.loadData(filenames, columns, fskip=rskip, qcombine=qcombine )
    
    #determine bin edges
    binenergy1 = load_data.determineBinEdge(nebins, datalist, column=0, exponential_bins=use_exponential_bins)
    binq1 = load_data.determineBinEdge(nqbins, datalist, column=1, exponential_bins=False)

    #create histogram
    visits2d = load_data.binData2d(binenergy1, binq1, datalist)
    #visits2dnew = np.zeros( [nebins, nqbins, nrep] )
    #for k in range(nrep): visits2dnew[:,:,k] = visits2d[k,:,:]
    #visits2d = visits2dnew
  
    wham = WHAM.wham2d(Tlist, binenergy1[:-1], binq1[:-1], visits2d)
  
    wham.minimize()

Example #20

0

Show file

File: main.py Project: bartoszhalwa/golden_shields-skyhacks2019

from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
from tensorflow import keras

import numpy as np
import matplotlib.pyplot as plt

import load_data

train_labels, train_images = load_data.loadData("bin/labels.csv", "bin/images")

print("DATA")

# Ustawienie layerów modelu
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(64, 64)),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])


# Ustawienie przed kompilowaniem
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

print("GO COMPILE!")
# Kompilacja modelu
train_labels = np.argmax(train_labels, axis=1)
model.fit(