Example #1
0
def val(net, dataset, criterion, max_iter=100):
    print('Start val')

    for p in model.parameters():
        p.requires_grad = False

    net.eval()
    data_loader = torch.utils.data.DataLoader(dataset,
                                              shuffle=True,
                                              batch_size=opt.batchSize,
                                              num_workers=int(opt.workers))
    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    max_iter = min(max_iter, len(data_loader))
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_masks = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        utils.loadData(mask, cpu_masks)

        mask1d = mask[:, 2, :, :]
        '''t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)'''

        # print('val_image:',image.shape)
        # print('val_mask:',mask1d.shape)
        preds = model(image)
        # print('val_preds:',preds.shape)
        preds = preds.view(batch_size, 32, 100)

        # preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        # cost = criterion(preds, text, preds_size, length) / batch_size

        cost = criterion(preds, mask1d)
        loss_avg.add(cost)
        '''_, preds = preds.max(2)
        preds = preds.squeeze()
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, cpu_masks):
            if pred == target.lower():
                n_correct += 1'''

        # 觉得在这种情况下其实是没有所谓的正确的,就设定成loss<0.007吧,没有特殊的原因
        if cost < 0.007:
            n_correct += 1
    '''raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))'''

    # 下面这个计算方式应该有点问题,因为batch_size不是每一次都是满的,所以这样除肯定会除多
    accuracy = n_correct / float(max_iter * opt.batchSize)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
Example #2
0
def trainBatch(net, criterion, optimizer):
    data = train_iter.next()
    cpu_images, cpu_masks = data
    # print('cpu_images.shape ',cpu_images.shape)
    # print('cpu_masks.shape  ',cpu_masks.shape)
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    utils.loadData(mask, cpu_masks)
    # print('mask.shape:',mask.shape)
    # print('batch_size ',batch_size)
    mask1d = mask[:, 2, :, :]
    '''t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)'''

    # print('image.shape:',image.shape)
    # print('mask1d.shape):',mask1d.shape)
    preds = model(image)
    # print('preds.shape',preds.shape)
    preds = preds.view(batch_size, 32, 100)
    #preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    #cost = criterion(preds, text, preds_size, length) / batch_size

    # MSELoss是平均到每个位置上的,所以这里就不用再去除以batch size了
    cost = criterion(preds, mask1d)
    # print('single_cost:',cost)
    model.zero_grad()
    cost.backward()
    optimizer.step()
    return cost
Example #3
0
def trainBatch(net, criterion, optimizer):
    data = train_iter.next()
    cpu_images, cpu_bbs = data
    #print('img: ',cpu_images.shape)
    #print('mask: ',cpu_masks.shape)
    batch_size = cpu_images.size(0)
    utils.loadData(image_rgb, cpu_images)
    utils.loadData(bb, cpu_bbs)
    '''t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)'''

    # print('val_image:',image.shape)
    # print('val_mask:',mask.shape)
    mask = model_mask(image_rgb)
    # print('val_preds:',preds.shape)
    mask = mask.view(batch_size, 1, 32, 100)
    #preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    #cost = criterion(preds, text, preds_size, length) / batch_size
    image_2channel = torch.cat((mask, image_rgb), dim=1)

    preds_bb = model_bb(image_2channel)

    #print('preds_bb shape:',preds_bb.shape)
    #print('bb shape:',bb.shape)
    cost = criterion(preds_bb.permute(1, 0, 2), bb)
    model_bb.zero_grad()
    cost.backward()
    optimizer.step()
    return cost
Example #4
0
def trainBatch(net, criterionAttention,criterionCTC, optimizer):
    data = train_iter.next()
    cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    tAttention, lAttention = converterAttention.encode(cpu_texts)
    tCTC, lCTC = converterCTC.encode(cpu_texts)
    #print (image)
    #print (t)
    #print (l)
    utils.loadData(textAttention, tAttention)
    utils.loadData(lengthAttention, lAttention)
    utils.loadData(textCTC, tCTC)
    utils.loadData(lengthCTC, lCTC)

    if opt.lang:
        predsCTC, predsAttention = crnn(image, lengthAttention, textAttention)
    else:
        predsCTC, predsAttention = crnn(imageAttention, lengthAttention)
    costAttention = criterionAttention(predsAttention, textAttention)
    preds_size = Variable(torch.IntTensor([predsCTC.size(0)] * batch_size))
    #print (predsCTC,textCTC,preds_size,lengthCTC)
    costCTC = criterionCTC(predsCTC, textCTC, preds_size, lengthCTC) / batch_size
    crnn.zero_grad()
    #cost = torch.sum(costCTC + costAttention)
   # print(costCTC,costAttention)
    cost = costCTC.cuda() + costAttention
    #cost = costCTC
   # print(cost)
    cost.backward()
    optimizer.step()
    return costCTC,costAttention,cost
Example #5
0
def test_by_xzy(net, test_dataset):
    print('Start test')

    for p in net.parameters():
        p.requires_grad = False

    net.eval()
    data_loader = torch.utils.data.DataLoader(test_dataset,
                                              shuffle=True,
                                              batch_size=64,
                                              num_workers=int(2))
    val_iter = iter(data_loader)

    img_name_List = []
    img_pred_List = []

    for i in range(len(data_loader)):
        data = val_iter.next(
        )  #batch must contain tensors, numbers, dicts or lists; found <class 'PIL.Image.Image'>
        i += 1
        cpu_images, cpu_img_name = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        # t, l = converter.encode(cpu_img_name)
        # utils.loadData(text, t)
        # utils.loadData(length, l)

        preds = net(image)

        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))

        _, preds = preds.max(2)
        # preds = preds.squeeze(2)    #xzy 新pytorch再max(2)之后,已经没有了第2个维度。
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(
            preds.data, preds_size.data,
            raw=False)  #sim_preds是预测出的字符串,类似“ XG78233838 ”

        for pred, name in zip(sim_preds, cpu_img_name):
            img_name_List.append(str(name))
            img_pred_List.append(str(pred))

    img_pred_List = np.array(img_pred_List)
    img_name_List = np.array(img_name_List)
    print(img_pred_List[0])
    print(img_name_List[0])

    df = pd.DataFrame({
        'name': img_name_List,
        'label': img_pred_List
    })  #ValueError: arrays must all be same length
    column_order = ['name', 'label']
    df = df[column_order]
    # predictionFile = '../../../../dataset_formal/classify_data/crnnData/result/result_crnn_with_ctpn.csv'
    # predictionFile = '../../../../dataset_formal/classify_data/crnnData/result/result_crnn_with_1800ctpn.csv'
    # predictionFile = '../../../../dataset_formal/classify_data/crnnData/result/result_crnn_with_1800ctpn_continue.csv'
    predictionFile = '../../../../dataset_formal/classify_data/crnnData/result/result_crnn_tight_ctpn.csv'
    df.to_csv(predictionFile, index=False)

    print("\nover")
Example #6
0
def train_models(resnet, bayesian_resnet, num_epochs=10):
    '''Train models until num_epochs reached.'''
    #load training and validation datasets
    train, train_labels = loadData('data/ros_data.npz')
    val, val_labels = loadData('data/val.npz')

    opt = SGD(learning_rate=1e-3)  #setup optimizer

    resnet.fit(np.expand_dims(train, axis=-1),
               to_categorical(train_labels),
               validation_data=[
                   np.expand_dims(val, axis=-1),
                   to_categorical(val_labels)
               ],
               epochs=num_epochs,
               batch_size=5,
               optimizer=opt,
               save=True)  #train model

    bayesian_resnet.fit(np.expand_dims(train, axis=-1),
                        to_categorical(train_labels),
                        validation_data=[
                            np.expand_dims(val, axis=-1),
                            to_categorical(val_labels)
                        ],
                        epochs=num_epochs,
                        batch_size=5,
                        optimizer=opt,
                        save=True)  #train model
Example #7
0
def test(test_loader, max_iter=10):
    test_size = 0
    total_cer_loss = 0
    total_ctc_loss = 0

    test_iter = iter(test_loader)
    max_iter = min(max_iter, len(test_loader))

    crnn.eval()
    with torch.no_grad():
        for i in range(max_iter):
            data = test_iter.next()
            cpu_images, cpu_texts = data

            utils.loadData(image, cpu_images)
            batch_size = cpu_images.size(0)
            test_size += batch_size
            preds = crnn(image)
            preds_size = Variable(torch.IntTensor([preds.size(0)] *
                                                  batch_size))
            total_ctc_loss += criterion(preds, text, preds_size, length)

            _, preds = preds.max(2)
            preds = preds.transpose(1, 0).contiguous().view(-1)
            sim_preds = converter.decode(preds.data,
                                         preds_size.data,
                                         raw=False)

            # sim_preds = converter.beam_decode(preds.data)

            total_cer_loss += utils.cer_loss(sim_preds,
                                             cpu_texts,
                                             ignore_case=False)

    return total_ctc_loss * 1.0 / test_size, total_cer_loss * 1.0 / test_size
Example #8
0
def main():
    resnet_crnn = ResNetCRNN(rc_params.imgH,
                             1,
                             len(rc_params.alphabet) + 1,
                             rc_params.nh,
                             resnet_type=rc_params.resnet_type,
                             feat_size=rc_params.feat_size)
    resnet_crnn = torch.nn.DataParallel(resnet_crnn)
    state_dict = torch.load(
        './work_dirs/resnet18_rcnn_sgd_imgh128_rgb_512x1x16_lr_0.00100_batchSize_8_time_0319110013_/crnn_Rec_done_epoch_7.pth'
    )
    resnet_crnn.load_state_dict(state_dict)
    test_dataset = dataset.lmdbDataset(root='to_lmdb/test_index', rgb=True)
    converter = utils.strLabelConverter(rc_params.alphabet)

    resnet_crnn.eval()
    resnet_crnn.cuda()
    data_loader = torch.utils.data.DataLoader(
        test_dataset,
        shuffle=False,
        batch_size=1,
        num_workers=int(rc_params.workers),
        collate_fn=alignCollate(imgH=rc_params.imgH,
                                imgW=rc_params.imgW,
                                keep_ratio=rc_params.keep_ratio,
                                rgb=True))
    val_iter = iter(data_loader)
    max_iter = len(data_loader)
    record_dir = 'test_out/test_out.txt'
    r = 1
    f = open(record_dir, "a")

    image = torch.FloatTensor(rc_params.batchSize, 3, rc_params.imgH,
                              rc_params.imgH)
    prog_bar = mmcv.ProgressBar(max_iter)
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        # image = cpu_images.cuda()

        with torch.no_grad():
            preds = resnet_crnn(image)
            preds_size = torch.IntTensor([preds.size(0)] * batch_size)

        _, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        if not isinstance(sim_preds, list):
            sim_preds = [sim_preds]
        for pred in sim_preds:
            f.write(str(r).zfill(6) + ".jpg " + pred + "\n")
            r += 1

        prog_bar.update()
    print("")
    f.close()
Example #9
0
def main(args):
    xr, log_igfr_r, labels_r = loadData('NEW_GFR_TRAIN')
    xe, log_igfr_e, labels_e = loadData('NEW_GFR_TEST')

    train_ds = tf.data.Dataset.from_tensor_slices((xr, log_igfr_r, labels_r))
    test_ds = tf.data.Dataset.from_tensor_slices((xe, log_igfr_e, labels_e))

    train_ds = train_ds.shuffle(xr.shape[0]).batch(batch_size)
    # test_ds = test_ds.batch(batch_size)
    test_ds = test_ds.batch(1)

    model = KidneyModel(n_cat)
    init_lr, momentum = args.learning_rate, 0.9
    lr = tfe.Variable(init_lr, name="learning_rate")
    optimizer = tf.train.AdamOptimizer(lr)

    with tf.device('/cpu:0'):
        lr = tfe.Variable(init_lr, name="learning_rate")
        optimizer = tf.train.AdamOptimizer(lr)
        for epoch in range(args.epochs):
            print('epoch', epoch)
            train_acc = tfe.metrics.Accuracy('train_accuracy')
            total_loss, total_batch = 0.0, 0.0
            for (batch, (x, log_igfr,
                         labels)) in enumerate(tfe.Iterator(train_ds)):
                with tf.GradientTape() as tape:
                    mean, var, logits, igfr = model(x)
                    loss_value = loss(mean, var, logits, igfr, labels,
                                      log_igfr, args.enlarge, args.w_div,
                                      args.w_l2)
                total_loss += loss_value.cpu().numpy()
                total_batch += 1
                train_acc(tf.argmax(logits, axis=1, output_type=tf.int32),
                          tf.argmax(labels, axis=1, output_type=tf.int32))
                grads = tape.gradient(loss_value, model.variables)
                optimizer.apply_gradients(
                    zip(grads, model.variables),
                    global_step=tf.train.get_or_create_global_step())
            print('Learning Rate', lr.numpy())
            if (epoch + 1) % 50 == 0:
                lr.assign(lr.numpy() / 2)

            print('Training acc {}'.format(100 * train_acc.result()))
            print('train_acc', 100 * train_acc.result().cpu().numpy())
            test_acc = test(model, test_ds)
            test2_acc, reses, test3_acc, reses3 = test23(model, test_ds)
            print('test_acc1', test_acc)
            print('avg_loss ', total_loss / total_batch)
            print('test_acc2', test2_acc)
            print('test_acc3', test3_acc)
            for i in range(reses.shape[0]):
                print('Cate %d ' % i, reses[i])
    checkpoint_dir = './saved_models/'
    checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
    root = tfe.Checkpoint(optimizer=optimizer,
                          model=model,
                          optimizer_step=tf.train.get_or_create_global_step())

    root.save(file_prefix=checkpoint_dir)
Example #10
0
def test(net, dataset, criterion, n_aug=1):
    print('Start test set predictions')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()

    
    all_file_names = []
    all_preds = []
    image_count = 0
    pred_dict = {}
    
    for epoch in range(n_aug):
        test_iter = iter(dataset)
        for i in range(len(dataset)):
            data = test_iter.next()
            #i += 1
            cpu_images, __, file_names = data
            batch_size = cpu_images.size(0)
            image_count = image_count + batch_size
            utils.loadData(image, cpu_images)

            preds = crnn(image)
            #print(preds.size())
            preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))


            # RA: While I am not sure yet, it looks like a greedy decoder and not beam search is being used here
            # Case is ignored in the accuracy, which is not ideal for an actual working system

            _, preds = preds.max(2)     
            if torch.__version__ < '0.2':
              preds = preds.squeeze(2) # https://github.com/meijieru/crnn.pytorch/issues/31
            preds = preds.transpose(1, 0).contiguous().view(-1)
            sim_preds = converter.decode(preds.data, preds_size.data, raw=False)

            for pred, f in zip(sim_preds, file_names):
                if f not in pred_dict:
                    pred_dict[f] = [pred]
                else:
                    pred_dict[f].append(pred)

    for f, final_preds in pred_dict.items():
        all_preds.append(Counter(final_preds).most_common(1)[0][0])
        all_file_names.append(f.partition(".jpg")[0])

    
    print("Total number of images in test set: %8d" % image_count)
    
    return (all_file_names, all_preds)
Example #11
0
def eval(model, test_dirs):
    count, error, correct, eval_batchSize = 0, 0, 0, 1
    dst = labelTestDataLoader(test_dirs, imgSize)
    testloader = DataLoader(dst, batch_size=1, shuffle=True, num_workers=8)
    start = time()
    eval_text = Variable(torch.IntTensor(eval_batchSize * 5))
    eval_length = Variable(torch.IntTensor(eval_batchSize))
    correct = 0
    for i, (XI, labels, ims) in enumerate(testloader):
        count += 1
        #Changes for the encoder [1234df ] -> [0,1,2,3]
        YI = [
        ]  #List of all the licence plate (string of actual licence plates)
        for label in labels:
            indexs = [int(x) for x in label.split('_')[:7]]
            l = [provinces[indexs[0]], alphabets[indexs[1]]]
            for index in range(2, 7):
                l.append(ads[indexs[index]])
            YI.append(''.join(l))
        t, l = converter.encode(YI)
        utils.loadData(eval_text, t)
        utils.loadData(eval_length, l)

        #YI = [[int(ee) for ee in el.split('_')[:7]] for el in labels]
        if use_gpu:
            x = Variable(XI.cuda())
        else:
            x = Variable(XI)
        # Forward pass: Compute predicted y by passing x to the model
        #print('X: {}'.format(x))
        fps_pred, preds = model(x)

        _, preds = preds.max(2, keepdim=True)
        preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        preds_size = Variable(torch.IntTensor([preds.size(0)] *
                                              eval_batchSize))
        rsim_preds = converter.decode(preds.data, preds_size.data, raw=True)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        #print('rsim_preds: {}, sim_preds {} and YI is {}'.format(str(rsim_preds.encode('utf-8')), sim_preds.encode('utf-8'), YI))
        try:
            print('rsim_preds: {}, sim_preds {} and YI is {}'.format(
                rsim_preds, sim_preds, YI))
        except Exception as e:
            print('Exception in printing the decoded value: {}'.format(error))
        try:
            for pred, target in zip(sim_preds, YI):
                if pred == target.lower():
                    correct += 1
        except Exception as e:
            print('Exception while calculating correct in Eval')
Example #12
0
def main():

    # Load data
    x, y, columns = loadData(DATASET, SAVED)

    # Train/Test split 80/20
    trnidx = int(x.shape[0] * .8)
    xtrain, ytrain = x[:trnidx], y[:trnidx][:, None]
    xtest, ytest = x[trnidx:], y[trnidx:][:, None]

    # Train
    optimizer = RMSPropOptimizer()
    alpha, batch, epochs = 1e-3, 4, 100

    dnn = Model([
        Dense(inputdim=xtrain.shape[1], units=32, activation='relu'),
        Dense(inputdim=32, units=1, activation='linear')
    ],
                loss='mean_squared_error',
                optimizer=optimizer)

    # Test
    dnn.fit(x=xtrain, y=ytrain, batch=batch, alpha=alpha, epochs=epochs)
    ypred = dnn.predict(xtest)
    print('Test MSE: %.4f' % MeanSquaredError(ytest, ypred))
Example #13
0
def computeModelMetrics():
    """ Function to do a manual cross validation and check precision, recall and f1 """

    from sklearn.model_selection import cross_validate

    # Load data
    features, labels, unScaledFeatures = ut.loadData(True)
    features = features.astype(np.float32)

    mapping = {
        key: value
        for key, value in zip(list(set(labels)), range(len(set(labels))))
    }
    labels = np.array([mapping[x] for x in labels], dtype=np.int64)

    y_pred = cross_validate(net,
                            features,
                            labels,
                            scoring=('recall_micro', 'precision_micro',
                                     'f1_micro', 'accuracy'),
                            cv=10)

    print(y_pred)
    print(
        f"Precision = {np.mean(y_pred['test_precision_micro'])} (+/- {np.std(y_pred['test_precision_micro'])})"
    )
    print(
        f"Recall = {np.mean(y_pred['test_recall_micro'])} (+/- {np.std(y_pred['test_recall_micro'])})"
    )
    print(
        f"F1 = {np.mean(y_pred['test_f1_micro'])} (+/- {np.std(y_pred['test_f1_micro'])})"
    )
Example #14
0
def main():

    # Load data
    xtrain, xtest, _ = loadData(DATASET, SAVED)
    xtrain = xtrain

    if os.path.isfile(EMBEDDINGS):

        # Load embeddings
        embeddings = np.loadtxt('w2v.csv',
                                dtype=str,
                                delimiter=',',
                                comments=None)
        vocab = embeddings[:, 0]
        vdict = {vocab[idx]: idx for idx in range(vocab.shape[0])}
        embeddings = embeddings[:, 1:].astype(np.float)

    else:

        # Creat embeddings
        vocab, probs = getUnigramProbs(xtrain)
        w2v = Word2Vec(vocab, probs, 50)
        w2v.train(xtrain[:100], 1, 4, 4, 1e-2)

        # Save embeddings
        embeddings = (w2v.w + w2v.c).astype(str)
        vocab = np.asarray(vocab)[:, None]
        embeddings = np.concatenate((vocab, embeddings), axis=1)
        np.savetxt('w2v.csv', embeddings, fmt='%s', delimiter=',')
Example #15
0
def test_for_one_region(filename='100307.tsv',
                        path='../Data',
                        region_idx=0,
                        history_lengths=range(1, 6),
                        delays=range(1, 2),
                        calc_type='ksg',
                        compute_p=False):
    calc = startCalc(calc_type)
    df = utils.loadData(filename, path)
    data = utils.preprocess(df,
                            sampling_rate=1.3,
                            mean_processing_type='removal',
                            trim_start=50,
                            trim_end=25)
    result, ais_values, parameters, p_values = getLocalsForRegion(
        data, calc, region_idx, history_lengths, delays, compute_p=compute_p)
    if p_values is not None:
        print("p value:", p_values)
    plotAISAcrossParams(ais_values, history_lengths, delays, show_plot=False)
    plt.figure()
    plt.plot(result)
    plt.xlabel('Time')
    plt.ylabel('AIS')
    plt.title("AIS Kraskov: {}[{}]\nHistory = {}, Delay = {}".format(
        filename, region_idx, parameters[0], parameters[1]))
    plt.show()
Example #16
0
def main():

    # Load data
    x, y, columns = loadData(DATASET, SAVED)

    # Randomly permute data
    rargs = torch.randperm(x.shape[0])
    x, y = x[rargs], y[rargs]
    x = torch.cat([x, torch.ones((x.shape[0], 1))], dim=1)
    y = torch.Tensor(createOneHotColumn(y.numpy())[0])

    # Train/Test split 80/20
    trnidx = int(x.shape[0] * .8)
    xtrain, ytrain = x[:trnidx], y[:trnidx]
    xtest, ytest = x[trnidx:], y[trnidx:]

    # Train
    alpha, batch, epochs = 1e-1, 32, 100
    optimizer = RMSPropOptimizer()

    dnn = Model([
        Dense(inputdim=xtrain.shape[1], units=8, activation='relu'),
        Dense(inputdim=8, units=ytrain.shape[1], activation='softmax')
    ], loss='categorical_cross_entropy', optimizer=optimizer)

    # Test
    dnn.fit(x=xtrain, y=ytrain, batch=batch, alpha=alpha, epochs=epochs)
    ypred = dnn.predict(xtest)
    print('Test Acc: %.4f' % Accuracy(ytest, ypred))
Example #17
0
    def loadData(self):
        rawInputs, rawOutputs = loadData(self.__filename, list(self.numericCols.keys()) + list(self.stringCols.keys()), list(self.outputCol.keys())[0])

        priceAverage = 0
        for i in range(len(rawInputs)):
        # for i in range(1000):
            containsNA = False
            for j in range(len(rawInputs[0])):
                if rawInputs[i][j] == "NA":
                    containsNA = True
                    break
            if len(rawInputs[i][self.stringCols['description']]) < 5:
                containsNA = True
            if containsNA:
                continue
            rawInputs[i][0] = rawInputs[i][0].replace('$', "")
            if rawInputs[i][0] != "":
                priceAverage += float(rawInputs[i][0])
            if rawInputs[i][1] == "Studio":
                rawInputs[i][1] = 1
            self.__inputs.append(rawInputs[i])
            self.__outputs.append(rawOutputs[i])
        priceAverage = priceAverage / len(self.__inputs)
        for i in range(len(self.__inputs)):
            if self.__inputs[i][0] == "":
                self.__inputs[i][0] = priceAverage
Example #18
0
def main():

    # Load data
    x, y, columns = loadData(DATASET, SAVED)

    # Randomly permute data
    rargs = torch.randperm(x.shape[0])
    x, y = x[rargs], y[rargs]

    # Train/Test split 80/20
    trnidx = int(x.shape[0] * .8)
    xtrain, ytrain = x[:trnidx], y[:trnidx]
    xtest, ytest = x[trnidx:], y[trnidx:]
    classes = [c.item() for c in torch.unique(ytrain)]

    # Train
    forest = RandomForestClassifier(numTrees=10,
                                    maxDepth=None,
                                    leafSize=1,
                                    bootstrapRatio=0.3)
    forest.fit(xtrain, ytrain, classes)

    # Test
    ypred = forest.predict(xtest)
    acc = torch.sum((ytest == ypred).float()) / ytest.shape[0]
    print('Test Accuracy: %.4f' % acc)
Example #19
0
def loaded(ev, server, plugin):
    global cfg
    if ev["name"] == name:
        cfg = utils.loadData("uuid", cfg)
        cfg["offline"] = server.offline_login
        if "usercache" not in cfg or cfg["usercache"] == "":
            cfg["usercache"] = server.cfg["asd"] + "/usercache.json"
Example #20
0
def main():

    # Load data
    x, y, columns = loadData(DATASET, SAVED)

    # Train/Test split 80/20
    trnidx = int(x.shape[0] * .8)
    xtrain, ytrain = x[:trnidx], y[:trnidx]
    xtest, ytest = x[trnidx:], y[trnidx:]

    # Train
    alpha, batch, epochs = 1e-3, 128, 1000
    optimizer = RMSPropOptimizer()

    dnn = Model([
        Dense(inputdim=xtrain.shape[1], units=16, activation='relu'),
        Dense(inputdim=16, units=1, activation='sigmoid')
    ],
                loss='binary_cross_entropy',
                optimizer=optimizer)

    # Test
    dnn.fit(x=xtrain, y=ytrain, batch=batch, alpha=alpha, epochs=epochs)
    ypred = dnn.predict(xtest)
    print('Test Acc: %.4f' % Accuracy(ytest, ypred))
Example #21
0
def doit(X, k):
    x, y = loadData("train", 225)
    x = x.toarray()

    train_x = x[0:10000]
    train_y = y[0:10000]

    test_x = x[9000:10000]
    test_y = y[9000:10000]

    model = lwp()
    model.fit(train_x, train_y)
    prediction = model.predict(test_x)
    cent = model.centroids_
    clas = model.classes_
    #  print(cent.shape)
    #  print(clas)
    neigh = knn(n_neighbors=k)
    neigh.fit(cent, clas)
    kn = neigh.kneighbors(X.toarray())[:][1]
    #  correct = 0
    #  wrong = 0
    #  for i in range(1000):
    #  print(test_y[i],clas[kn[i]])
    #  if test_y[i] in clas[kn[i]]:
    #  correct = correct+1
    #  else:
    #  wrong = wrong+1
    #  print(correct,wrong)
    return clas[kn]
Example #22
0
def test_for_one_pair(filename='100307.tsv',
                      path='../Data',
                      source_region=1,
                      target_region=0,
                      param_file='Results/HCP/AIS/params/100307_params.csv',
                      calc_type='ksg',
                      compute_p=False):
    calc = startCalc(calc_type)
    df, param_df = utils.loadData(filename,
                                  path,
                                  get_params=True,
                                  param_file=param_file)
    data = utils.preprocess(df,
                            sampling_rate=1.3,
                            mean_processing_type='removal',
                            trim_start=50,
                            trim_end=25)
    result, p_values, dce = getLocalsForRegionPair(data,
                                                   source_region,
                                                   target_region,
                                                   param_df,
                                                   calc,
                                                   compute_p=compute_p)
    if p_values is not None:
        print("p value:", p_values)
    print('Dynamic correlation exclusion value:', dce)
    utils.plotTimeseries(result)
Example #23
0
def __init__(self):
	try:
		self.lastseenDict=utils.loadData('lastseen', dict)
		self.functions[".ls"]=("lastseen", 2, [0,1,2])
		self.helpDict[".ls"]="Shows when a user was last seen (by the bot). Usage: .ls <Username>"
	except:
		self.writeLog("Error initializing plugin 'lastseen':", 2)
		self.noteError()
Example #24
0
    def __init__(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            self.batch_size = cfg.batch_size
            self.data_feed = loadData(batch_size=self.batch_size,
                                      train_shuffle=True)  # False

            # Construct Template Model (G_enc) to encoder input face
            with tf.variable_scope('face_model'):
                self.face_model = Resnet50()  # Vgg16()
                self.face_model.build()
                print('VGG model built successfully.')

            # Construct G_dec and D
            self.is_train = tf.placeholder(tf.bool, name='is_train')
            self.profile, self.front = self.data_feed.get_train()

            # Construct Model
            self.build_arch()
            print('Model built successfully.')

            all_vars = tf.trainable_variables()
            self.vars_gen = [
                var for var in all_vars if var.name.startswith('decoder')
            ]
            self.vars_dis = [
                var for var in all_vars if var.name.startswith('discriminator')
            ]
            self.loss()

            #################DEBUG#######################
            with tf.name_scope('Debug'):
                grad1 = tf.gradients([self.feature_loss], [self.gen_p])[0]
                self.grad1 = tf.reduce_mean(
                    tf.sqrt(tf.reduce_sum(tf.square(grad1), [1, 2, 3])))
                grad2 = tf.gradients([self.g_loss], [self.gen_p])[0]
                self.grad2 = tf.reduce_mean(
                    tf.sqrt(tf.reduce_sum(tf.square(grad2), [1, 2, 3])))
                grad3 = tf.gradients([self.front_loss], [self.gen_f])[0]
                self.grad3 = tf.reduce_mean(
                    tf.sqrt(tf.reduce_sum(tf.square(grad3), [1, 2, 3])))
            # Summary
            self._summary()

            # Trainer
            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)
            self.train_gen = tf.train.AdamOptimizer(
                cfg.lr, beta1=cfg.beta1,
                beta2=cfg.beta2).minimize(self.gen_loss,
                                          global_step=self.global_step,
                                          var_list=self.vars_gen)
            self.train_dis = tf.train.AdamOptimizer(
                cfg.lr, beta1=cfg.beta1,
                beta2=cfg.beta2).minimize(self.dis_loss,
                                          global_step=self.global_step,
                                          var_list=self.vars_dis)
 def runAll(self):
     """
     This method will be used by the bayesian parameter search
     It returns input parameters enriched with validation scores
     """
     df, truth = utils.loadData(self.dataset)
     y = truth.clusters
     self.fitPredict(df)
     return self.evaluate(y)
Example #26
0
def add_ID(train_df, opt):
    path = os.path.join(opt['data_dir'], opt['train_file'])
    train_data = loadData(path)
    news2id = {}
    for i, news in enumerate(train_data):
        news2id[news['newsId']] = i
    df_ = pd.DataFrame({'newsId': list(news2id.keys()), 'ID': list(news2id.values())})
    train_df = train_df.merge(df_, on='newsId', how='left')
    return train_df
Example #27
0
    def run_individual_parameters(i,
                                  data_path,
                                  extension,
                                  save_folder,
                                  GRP=False,
                                  compute_p=True,
                                  **preprocessing_params):
        """
        Arguments:
            GRP -- True if processing the GRP data
        """
        files = utils.getAllFiles(data_path, extension)
        if GRP:
            file = files[0]
            filename = '{:02}'.format(
                i)  # Save the results by the subjects number
            subject_id = i
        else:
            file = files[i]
            filename = utils.basename(file)
            subject_id = None
        os.makedirs("Results/{}/AM/idx".format(save_folder), exist_ok=True)
        os.makedirs("Results/{}/AM/p_values".format(save_folder),
                    exist_ok=True)

        print("Processing", i, ":", filename)
        if os.path.exists('Results/{}/AM/p_values/{}.csv'.format(
                save_folder, filename)):
            exit()

        param_file = 'Results/{}/AIS/idx/{}.csv'.format(save_folder, filename)
        df, param_df = utils.loadData(file,
                                      get_params=True,
                                      param_file=param_file,
                                      subject_id=subject_id)
        data = utils.preprocess(df, **preprocessing_params)
        results, p_values = getLocalsForAllRegions(data,
                                                   param_df,
                                                   compute_p=compute_p)
        # Add back the trimmed sections
        padding = ((0, 0), (preprocessing_params.get('trim_start', 0),
                            preprocessing_params.get('trim_end', 0)))
        results = np.pad(results, padding, mode='constant', constant_values=0)

        pd.DataFrame(results).to_csv('Results/{}/AM/{}_AM.csv'.format(
            save_folder, filename),
                                     index=None,
                                     header=None)
        pd.DataFrame(p_values).to_csv('Results/{}/AM/p_values/{}.csv'.format(
            save_folder, filename),
                                      index=None,
                                      header=None)
        try:
            utils.plotHeatmap(results, divergent=True)
        except:
            pass
Example #28
0
def main():
    train = utils.loadData('./data/letters.train.data')
    test = utils.loadData('./data/letters.test.data')

    trainset = filterData(train)
    testset = filterData(test)

    model = Model2(trainset)
    model.train(3)

    correct = incorrect = 0
    for x, y in testset:
        y_tag = model.inference(x)
        correct += (y == y_tag)
        incorrect += (y != y_tag)

    acc = 100.0 * correct / (correct + incorrect)

    print("test accuracy: {}".format(acc))
def val(net, criterion, max_iter=100):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    val_iter = iter(val_loader)

    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    max_iter = len(val_loader)
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        # t, l = converter.encode(cpu_texts)
        # utils.loadData(text, t)
        # utils.loadData(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)

        _, preds = preds.max(2)
        preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, cpu_texts):
            if pred == target.lower():
                n_correct += 1

    raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:opt.n_test_disp]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    accuracy = n_correct / float(max_iter * opt.batchSize)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
Example #30
0
def set_data(tt):
    #load data of txt to numpy
    x_input, y_input = utils.loadData()
    #
    x = np.reshape(x_input, (-1, 1))
    y = np.reshape(y_input, (-1, 1))
    #Defined a tensor data
    x_data = tt.FloatTensor(x)
    y_data = tt.FloatTensor(y)
    return x_input, y_input, x_data, y_data
Example #31
0
def main():
    #load the JPEG images or the npz file

    #data, labels = load_images() #un-comment if want to load JPEG images into numpy
    #saveData('data/data.npz', data, labels) #un-comment if want to save loaded JPEG images as npz for faster loading later
    data, labels = loadData(
        'data/data.npz'
    )  #load images from npz, much faster then loading from JPEGS each time

    #split dataset into train, val, and test ... split into 70/30 and then 70/15/15
    print('Splitting data into 70/15/15 train, val, and test sets.')
    train, testval, train_labels, testval_labels = train_test_split(
        data,
        labels,
        test_size=0.30,
        random_state=42,
        shuffle=True,
        stratify=labels)
    test, val, test_labels, val_labels = train_test_split(
        testval,
        testval_labels,
        test_size=0.50,
        random_state=42,
        shuffle=True,
        stratify=testval_labels)
    del data, labels, testval, testval_labels  #free up memory

    #check imbalance
    print('Train Shape:', train.shape, 'Train Labels Shape:',
          train_labels.shape)
    print('Validation Shape:', val.shape, 'Validation Labels Shape:',
          val_labels.shape)
    print('Test Shape:', test.shape, 'Test Labels Shape:', test_labels.shape)
    plotClassDist(train_labels, 'Train Class Distribution')
    plotClassDist(val_labels, 'Validation Class Distribution')
    plotClassDist(test_labels, 'Test Class Distribution')

    #random oversample train set and recheck balance
    ovs_data, ovs_labels = randomOversample(train, train_labels)
    print('OVS Data Shape:', ovs_data.shape, 'OVS Labels Shape:',
          ovs_labels.shape)
    plotClassDist(ovs_labels, 'Train Class Distribution (ROS)')

    #oversample using smote on train set and recheck balance
    smt_data, smt_labels = smoteOversample(train, train_labels)
    print('SMT Data Shape:', smt_data.shape, 'SMT Labels Shape:',
          smt_labels.shape)
    plotClassDist(smt_labels, 'Train Class Distribution (SMOTE)')

    #save untouched and oversampled data as npz along with val and test
    saveData('data/train.npz', train, train_labels)
    saveData('data/ros_data.npz', ovs_data, ovs_labels)
    saveData('data/smt_data.npz', smt_data, smt_labels)
    saveData('data/val.npz', val, val_labels)
    saveData('data/test.npz', test, test_labels)
Example #32
0
	def __init__(self, code, lines, path, tglobals, config):
		try:
			self.configref = config
			sys.path.append(path)
			self.code = __import__(code, tglobals)
			print "\tTesting code execution for personality."
			if callable(self.code.test): 
				self.code.test()
			else: raise ImportError
			self.lines = utils.loadData(lines, dict, path)
			
		except Exception as error:
			traceback.print_exc()
			
		except ImportError:
			print ("Cannot call self.code.test. Either the .py does not have it or the import went wrong.")
Example #33
0
def trainBatch(net, criterion, optimizer):
    data = train_iter.next()
    cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)

    preds = crnn(image)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    cost = criterion(preds, text, preds_size, length) / batch_size
    crnn.zero_grad()
    cost.backward()
    optimizer.step()
    return cost
Example #34
0
def val(net, dataset, criterion, max_iter=100):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    data_loader = torch.utils.data.DataLoader(
        dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers))
    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    loss_avg = utils.averager()

    for i in range(min(max_iter, len(data_loader))):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)

        _, preds = preds.max(2)
        preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, cpu_texts):
            if pred == target.lower():
                n_correct += 1

    raw_preds = converter.decode(preds.data, preds_size.data, raw=True)
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    accuracy = n_correct / float(max_iter * opt.batchSize)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
Example #35
0
__author__ = 'HyNguyen'

from utils import loadData, save_data_4_nn_k_words, time
import argparse

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-indir', required=True, type=str)
    parser.add_argument('-outdir', required=True, type=str)
    parser.add_argument('-kwords', required=True, type=int)
    parser.add_argument('-nsample', type=str, default=-1)
    args = parser.parse_args()

    path2InDir = args.indir
    path2OutDir = args.outdir
    kwords = args.kwords
    nsample = args.nsample

    dataset = loadData(path2InDir,nsample)
    start = time.time()
    save_data_4_nn_k_words(dataset,path2OutDir,k_words=kwords ,data_name="cnn")
    end = time.time()
    print("Time for ", len(dataset), ": ", end-start)
  def computeRatios(self,true_dist=False, vars_g=None,
      data_file='test',use_log=False):
    '''
      Use the computed score densities to compute 
      the decomposed ratio test.
      set true_dist to True if workspace have the true distributions to 
      make plots, in that case vars_g also must be provided
      Final result is histogram for ratios and signal - bkf rejection curves
    '''

    f = ROOT.TFile('{0}/{1}'.format(self.dir,self.workspace))
    w = f.Get('w')
    f.Close()

    
    #TODO: This are Harcoded for now
    c1 = self.c1
    c0 = self.c0
    #c1 = np.multiply(c1, self.cross_section)
    c1 = c1/c1.sum()
    c0 = c0/c0.sum()

    print 'Calculating ratios'

    npoints = 50

    if true_dist == True:
      vars = ROOT.TList()
      for var in vars_g:
        vars.Add(w.var(var))
      x = ROOT.RooArgSet(vars)

    if use_log == True:
      evaluateRatio = self.evaluateLogDecomposedRatio
      post = 'log'
    else:
      evaluateRatio = self.evaluateDecomposedRatio
      post = ''

    score = ROOT.RooArgSet(w.var('score'))
    scoref = ROOT.RooArgSet(w.var('scoref'))

    if use_log == True:
      getRatio = self.singleLogRatio
    else:
      getRatio = self.singleRatio
   
    if self.preprocessing == True:
      if self.scaler == None:
        self.scaler = {}
        for k in range(self.nsamples):
         for j in range(self.nsamples):
           if k < j:
            self.scaler[(k,j)] = joblib.load('{0}/model/{1}/{2}/{3}_{4}_{5}.dat'.format(self.dir,'mlp',self.c1_g,'scaler',self.dataset_names[k],self.dataset_names[j]))
            

    # NN trained on complete model
    F0pdf = w.function('bkghistpdf_F0_F1')
    F1pdf = w.function('sighistpdf_F0_F1')

    # TODO Here assuming that signal is first dataset  
    testdata, testtarget = loadData(data_file,self.F0_dist,0,dir=self.dir,c1_g=self.c1_g,preprocessing=False) 
    if len(vars_g) == 1:
      xarray = np.linspace(0,5,npoints)
      fullRatios,_ = evaluateRatio(w,xarray,x=x,plotting=True,roc=False,true_dist=True)

      F1dist = np.array([self.evalDist(x,w.pdf('F1'),[xs]) for xs in xarray])
      F0dist = np.array([self.evalDist(x,w.pdf('F0'),[xs]) for xs in xarray])
      y2 = getRatio(F1dist, F0dist)

      # NN trained on complete model
      outputs = predict('{0}/model/{1}/{2}/adaptive_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g),xarray.reshape(xarray.shape[0],1),model_g=self.model_g,clf=self.clf)
      F1fulldist = np.array([self.evalDist(scoref,F1pdf,[xs]) for xs in outputs])
      F0fulldist = np.array([self.evalDist(scoref,F0pdf,[xs]) for xs in outputs])

      pdfratios = getRatio(F1fulldist, F0fulldist)

      saveFig(xarray, [fullRatios, y2, pdfratios], makePlotName('all','train',type='ratio'+post),title='Likelihood Ratios',labels=['Composed trained', 'True', 'Full trained'],print_pdf=True,dir=self.dir)
      
    if true_dist == True:
      decomposedRatio,_ = evaluateRatio(w,testdata,x=x,plotting=False,roc=self.verbose_printing,true_dist=True)
    else:
      decomposedRatio,_ = evaluateRatio(w,testdata,c0arr=c0,c1arr=c1,plotting=True,
      roc=True,data_type=data_file)
    if len(testdata.shape) > 1:
      outputs = predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file),testdata,model_g=self.model_g,clf=self.clf)
      #outputs = predict('/afs/cern.ch/work/j/jpavezse/private/{0}_F0_F1.pkl'.format(self.model_file),testdata,model_g=self.model_g)

    else:
      outputs = predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file),testdata.reshape(testdata.shape[0],1),model_g=self.model_g,clf=self.clf)

    F1fulldist = np.array([self.evalDist(scoref,F1pdf,[xs]) for xs in outputs])
    F0fulldist = np.array([self.evalDist(scoref,F0pdf,[xs]) for xs in outputs])

    completeRatio = getRatio(F1fulldist,F0fulldist)
    if true_dist == True:
      if len(testdata.shape) > 1:
        F1dist = np.array([self.evalDist(x,w.pdf('F1'),xs) for xs in testdata])
        F0dist = np.array([self.evalDist(x,w.pdf('F0'),xs) for xs in testdata])
      else:
        F1dist = np.array([self.evalDist(x,w.pdf('F1'),[xs]) for xs in testdata])
        F0dist = np.array([self.evalDist(x,w.pdf('F0'),[xs]) for xs in testdata])

      realRatio = getRatio(F1dist,F0dist)

    decomposed_target = testtarget
    complete_target = testtarget
    real_target = testtarget
    #Histogram F0-f0 for composed, full and true

    # Removing outliers
    numtest = decomposedRatio.shape[0] 
    #decomposedRatio[decomposedRatio < 0.] = completeRatio[decomposedRatio < 0.]

    #decomposed_outliers = np.zeros(numtest,dtype=bool)
    #complete_outliers = np.zeros(numtest,dtype=bool)
    #decomposed_outliers = self.findOutliers(decomposedRatio)
    #complete_outliers = self.findOutliers(completeRatio)
    #decomposed_target = testtarget[decomposed_outliers] 
    #complete_target = testtarget[complete_outliers] 
    #decomposedRatio = decomposedRatio[decomposed_outliers]
    #completeRatio = completeRatio[complete_outliers]
    if true_dist == True:
      real_outliers = np.zeros(numtest,dtype=bool)
      real_outliers = self.findOutliers(realRatio)
      #real_target = testtarget[real_outliers] 
      #realRatio = realRatio[real_outliers]

    all_ratios_plots = []
    all_names_plots = []
    bins = 70
    low = 0.6
    high = 1.2
    if use_log == True:
      low = -1.0
      high = 1.0
    low = []
    high = []
    low = []
    high = []
    ratios_vars = []
    for l,name in enumerate(['sig','bkg']):
      if true_dist == True:
        ratios_names = ['truth','full','composed']
        ratios_vec = [realRatio, completeRatio, decomposedRatio]
        target_vec = [real_target, complete_target, decomposed_target] 

        minimum = min([realRatio[real_target == 1-l].min(), 
              completeRatio[complete_target == 1-l].min(), 
              decomposedRatio[decomposed_target == 1-l].min()])
        maximum = max([realRatio[real_target == 1-l].max(), 
              completeRatio[complete_target == 1-l].max(), 
              decomposedRatio[decomposed_target == 1-l].max()])

      else:
        ratios_names = ['full','composed']
        ratios_vec = [completeRatio, decomposedRatio]
        target_vec = [complete_target, decomposed_target] 
        minimum = min([completeRatio[complete_target == 1-l].min(), 
              decomposedRatio[decomposed_target == 1-l].min()])
        maximum = max([completeRatio[complete_target == 1-l].max(), 
              decomposedRatio[decomposed_target == 1-l].max()])

      low.append(minimum - ((maximum - minimum) / bins)*10)
      high.append(maximum + ((maximum - minimum) / bins)*10)
      w.factory('ratio{0}[{1},{2}]'.format(name, low[l], high[l]))
      ratios_vars.append(w.var('ratio{0}'.format(name)))
    for curr, curr_ratios, curr_targets in zip(ratios_names,ratios_vec,target_vec):
      numtest = curr_ratios.shape[0] 
      for l,name in enumerate(['sig','bkg']):
        hist = ROOT.TH1F('{0}_{1}hist_F0_f0'.format(curr,name),'hist',bins,low[l],high[l])
        for val in curr_ratios[curr_targets == 1-l]:
          hist.Fill(val)
        datahist = ROOT.RooDataHist('{0}_{1}datahist_F0_f0'.format(curr,name),'hist',
              ROOT.RooArgList(ratios_vars[l]),hist)
        ratios_vars[l].setBins(bins)
        histpdf = ROOT.RooHistFunc('{0}_{1}histpdf_F0_f0'.format(curr,name),'hist',
              ROOT.RooArgSet(ratios_vars[l]), datahist, 0)

        histpdf.specialIntegratorConfig(ROOT.kTRUE).method1D().setLabel('RooBinIntegrator')
        getattr(w,'import')(hist)
        getattr(w,'import')(datahist) # work around for morph = w.import(morph)
        getattr(w,'import')(histpdf) # work around for morph = w.import(morph)
        #print '{0} {1} {2}'.format(curr,name,hist.Integral())
        if name == 'bkg':
          all_ratios_plots.append([w.function('{0}_sighistpdf_F0_f0'.format(curr)),
                w.function('{0}_bkghistpdf_F0_f0'.format(curr))])
          all_names_plots.append(['sig_{0}'.format(curr),'bkg_{0}'.format(curr)])
        
    all_ratios_plots = [[all_ratios_plots[j][i] for j,_ in enumerate(all_ratios_plots)] 
                for i,_ in enumerate(all_ratios_plots[0])]
    all_names_plots = [[all_names_plots[j][i] for j,_ in enumerate(all_names_plots)] 
                for i,_ in enumerate(all_names_plots[0])]

    printMultiFrame(w,['ratiosig','ratiobkg'],all_ratios_plots, makePlotName('ratio','comparison',type='hist'+post,dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),all_names_plots,setLog=True,dir=self.dir,model_g=self.model_g,y_text='Count',title='Histograms for ratios',x_text='ratio value',print_pdf=True)

    # scatter plot true ratio - composed - full ratio

    #if self.verbose_printing == True and true_dist == True:
    #  saveFig(completeRatio,[realRatio], makePlotName('full','train',type='scat'+post,dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),scatter=True,axis=['full trained ratio','true ratio'],dir=self.dir,model_g=self.model_g)
    #  saveFig(decomposedRatio,[realRatio], makePlotName('comp','train',type='scat'+post,dir=self.dir, model_g=self.model_g, c1_g=self.c1_g),scatter=True, axis=['composed trained ratio','true ratio'],dir=self.dir, model_g=self.model_g)
    # signal - bkg rejection plots
    if use_log == True:
      decomposedRatio = np.exp(decomposedRatio)
      completeRatio = np.exp(completeRatio)
      if true_dist == True:
        realRatio = np.exp(realRatio)
    if true_dist == True:

      ratios_list = [decomposedRatio/decomposedRatio.max(), 
                    completeRatio/completeRatio.max(),
                    realRatio/realRatio.max()]
      targets_list = [decomposed_target, complete_target, real_target]
      legends_list = ['composed', 'full', 'true']
    else:

      indices = (decomposedRatio > 0.)
      decomposedRatio = decomposedRatio[indices] 
      decomposed_target = decomposed_target[indices]
      indices = (completeRatio > 0.)
      completeRatio = completeRatio[indices]
      complete_target = complete_target[indices]

      completeRatio = np.log(completeRatio)
      decomposedRatio = np.log(decomposedRatio)
      decomposedRatio = decomposedRatio + np.abs(decomposedRatio.min())
      completeRatio = completeRatio + np.abs(completeRatio.min())
      ratios_list = [decomposedRatio/decomposedRatio.max(), 
                    completeRatio/completeRatio.max()]
      targets_list = [decomposed_target, complete_target]
      legends_list = ['composed','full']
    makeSigBkg(ratios_list,targets_list,makePlotName('comp','all',type='sigbkg'+post,dir=self.dir,
          model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g,print_pdf=True,legends=legends_list,title='Signal-Background rejection curves')

    # Scatter plot to compare regression function and classifier score
    if self.verbose_printing == True and true_dist == True:
      testdata, testtarget = loadData('test',self.F0_dist,self.F1_dist,dir=self.dir,c1_g=self.c1_g) 
      if len(testdata.shape) > 1:
        reg = np.array([self.__regFunc(x,w.pdf('F0'),w.pdf('F1'),xs) for xs in testdata])
      else:
        reg = np.array([self.__regFunc(x,w.pdf('F0'),w.pdf('F1'),[xs]) for xs in testdata])
      if len(testdata.shape) > 1:
        outputs = predict('{0}/model/{1}/{2}/adaptive_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g),testdata.reshape(testdata.shape[0],testdata.shape[1]),model_g=self.model_g, clf=self.clf)
      else:
        outputs = predict('{0}/model/{1}/{2}/adaptive_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g),testdata.reshape(testdata.shape[0],1),model_g=self.model_g, clf=self.clf)
  def evaluateDecomposedRatio(self,w,evalData,x=None,plotting=True, roc=False,gridsize=None,c0arr=None, c1arr=None,true_dist=False,pre_evaluation=None,pre_dist=None,data_type='test',debug=False,cross_section=None,indexes=None):
    '''
      Compute composed ratio for dataset 'evalData'.
      Single ratios can be precomputed in pre_evaluation
    '''

    # pair-wise ratios
    # and decomposition computation
    #f = ROOT.TFile('{0}/{1}'.format(self.dir,self.workspace))
    #w = f.Get('w')
    #f.Close()

    if indexes == None:
      indexes = self.basis_indexes

    score = ROOT.RooArgSet(w.var('score'))
    npoints = evalData.shape[0]
    fullRatios = np.zeros(npoints)
    fullRatiosReal = np.zeros(npoints)
    c0arr = self.c0 if c0arr == None else c0arr
    c1arr = self.c1 if c1arr == None else c1arr

    true_score = []
    train_score = []
    all_targets = []
    all_positions = []
    all_ratios = []
    for k,c in enumerate(c0arr):
      innerRatios = np.zeros(npoints)
      innerTrueRatios = np.zeros(npoints)
      if c == 0:
        continue
      for j,c_ in enumerate(c1arr):
        index_k, index_j = (indexes[k],indexes[j])
        f0pdf = w.function('bkghistpdf_{0}_{1}'.format(index_k,index_j))
        f1pdf = w.function('sighistpdf_{0}_{1}'.format(index_k,index_j))
        if index_k<>index_j:
          if pre_evaluation == None:
            traindata = evalData
            if self.preprocessing == True:
              traindata = preProcessing(evalData,self.dataset_names[min(index_k,index_j)],
              self.dataset_names[max(index_k,index_j)],self.scaler) 
            outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file,k,j),traindata,model_g=self.model_g,clf=self.clf)
            #outputs = predict('/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(self.model_file,index_k,
            #index_j),traindata,model_g=self.model_g)
            f0pdfdist = np.array([self.evalDist(score,f0pdf,[xs]) for xs in outputs])
            f1pdfdist = np.array([self.evalDist(score,f1pdf,[xs]) for xs in outputs])
          else:
            f0pdfdist = pre_evaluation[0][index_k][index_j]
            f1pdfdist = pre_evaluation[1][index_k][index_j]
            if f0pdfdist == None or f1pdfdist == None:
              pdb.set_trace()
          pdfratios = self.singleRatio(f0pdfdist,f1pdfdist)
        else:
          pdfratios = np.ones(npoints) 
        all_ratios.append(pdfratios)
        innerRatios += (c_/c) * pdfratios
        if true_dist == True:
          if pre_dist == None:
            f0 = w.pdf('f{0}'.format(index_k))
            f1 = w.pdf('f{0}'.format(index_j))
            if len(evalData.shape) > 1:
              f0dist = np.array([self.evalDist(x,f0,xs) for xs in evalData])
              f1dist = np.array([self.evalDist(x,f1,xs) for xs in evalData])
            else:
              f0dist = np.array([self.evalDist(x,f0,[xs]) for xs in evalData])
              f1dist = np.array([self.evalDist(x,f1,[xs]) for xs in evalData])
          else:
            f0dist = pre_dist[0][index_k][index_j]
            f1dist = pre_dist[1][index_k][index_j]
          ratios = self.singleRatio(f0dist, f1dist)
          innerTrueRatios += (c_/c) * ratios
        # ROC curves for pair-wise ratios
        if (roc == True or plotting==True) and k < j:
          all_positions.append((k,j))
          if roc == True:
            if self.dataset_names <> None:
              name_k, name_j = (self.dataset_names[index_k], self.dataset_names[index_j])
            else:
              name_k, name_j = (index_k,index_j)
            testdata, testtarget = loadData(data_type,name_k,name_j,dir=self.dir,c1_g=self.c1_g,
                  preprocessing=self.preprocessing, scaler=self.scaler) 
          else:
            testdata = evalData
          size2 = testdata.shape[1] if len(testdata.shape) > 1 else 1
          outputs = predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file,k,j),testdata,model_g=self.model_g,clf=self.clf)
          #outputs = predict('/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(self.model_file,index_k,
          #          index_j),testdata.reshape(testdata.shape[0],size2),model_g=self.model_g)
          f0pdfdist = np.array([self.evalDist(score,f0pdf,[xs]) for xs in outputs])
          f1pdfdist = np.array([self.evalDist(score,f1pdf,[xs]) for xs in outputs])
          clfRatios = self.singleRatio(f0pdfdist,f1pdfdist)
          train_score.append(clfRatios)
          if roc == True:
            all_targets.append(testtarget)
          #individual ROC
          #makeROC(clfRatios, testtarget,makePlotName('dec','train',k,j,type='roc',dir=self.dir,
          #model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g)
          if true_dist == True:
            if len(evalData.shape) > 1:
              f0dist = np.array([self.evalDist(x,f0,xs) for xs in testdata])
              f1dist = np.array([self.evalDist(x,f1,xs) for xs in testdata])
            else:
              f0dist = np.array([self.evalDist(x,f0,[xs]) for xs in testdata])
              f1dist = np.array([self.evalDist(x,f1,[xs]) for xs in testdata])

            trRatios = self.singleRatio(f0dist,f1dist)

            true_score.append(trRatios)
 
          #  makeROC(trRatios, testtarget, makePlotName('dec','truth',k,j,type='roc',
          #  dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g)
          

      innerRatios = 1./innerRatios
      innerRatios[np.abs(innerRatios) == np.inf] = 0.
      fullRatios += innerRatios
      if true_dist == True:
        innerTrueRatios = 1./innerTrueRatios
        innerTrueRatios[np.abs(innerTrueRatios) == np.inf] = 0.
        fullRatiosReal += innerTrueRatios
    if roc == True:
      for ind in range(1,(len(train_score)/3+1)):
        print_scores = train_score[(ind-1)*3:(ind-1)*3+3]
        print_targets = all_targets[(ind-1)*3:(ind-1)*3+3]
        print_positions = all_positions[(ind-1)*3:(ind-1)*3+3]
        if true_dist == True:
          makeMultiROC(print_scores, print_targets,makePlotName('all{0}'.format(ind-1),'comparison',type='roc',
          dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g,
          true_score = true_score,print_pdf=True,title='ROC for pairwise trained classifier',pos=print_positions)
        else:
          makeMultiROC(print_scores, print_targets,makePlotName('all{0}'.format(ind-1),'comparison',type='roc',
          dir=self.dir,model_g=self.model_g,c1_g=self.c1_g),dir=self.dir,model_g=self.model_g,
          print_pdf=True,title='ROC for pairwise trained classifier',pos=print_positions)

    if plotting == True:
      saveMultiFig(evalData,[x for x in zip(train_score,true_score)],
      makePlotName('all_dec','train',type='ratio'),labels=[['f0-f1(trained)','f0-f1(truth)'],['f0-f2(trained)','f0-f2(truth)'],['f1-f2(trained)','f1-f2(truth)']],title='Pairwise Ratios',print_pdf=True,dir=self.dir)

    return fullRatios,fullRatiosReal
Example #38
0
#!/usr/bin/env python
# coding:utf-8

import numpy as np
import conf
import NN
from activation_function import Sigmoid
import utils


if __name__ == '__main__':
    print "Part 1: Loading Data\n"

    X, y = utils.loadData(conf.FILE_X, conf.FILE_Y)

    print "Part 2: Loading Parameters\n"

    W1, W2 = utils.loadParams(conf.FILE_W1, conf.FILE_W2)
    # Unroll parameters
    W = np.hstack((W1.flatten(0), W2.flatten(0)))
    W = W.reshape((len(W), 1))

    print "Part 3: Compute Cost(Feedforward)\n"

    LEARN_RATE = 0
    J, _ = NN.nnCostFunction(W, conf.INPUT_LAYER_SIZE, conf.HIDDEN_LAYER_SIZE,
                             conf.NUM_LABELS, X, y, LEARN_RATE)
    print ("Cost at parameters (loaded from w1.txt and w2.txt): %f"
           "\n(this value should be about 0.287629)\n") % J

    print "Part 4: Implement Regularization\n"
  def fit(self, data_file='test',importance_sampling=False, true_dist=True,vars_g=None):
    ''' 
      Create pdfs for the classifier 
      score to be used later on the ratio 
      test, input workspace only needed in case 
      there exist true pdfs for the distributions
      the models being used are ./model/{model_g}/{c1_g}/{model_file}_i_j.pkl
      and the data files are ./data/{model_g}/{c1_g}/{data_file}_i_j.dat
    '''

    bins = 40
    low = 0.
    high = 1.  
    
    if self.input_workspace <> None:
      #f = ROOT.TFile('{0}/{1}'.format('/afs/cern.ch/work/j/jpavezse/private',self.workspace))
      f = ROOT.TFile('{0}/{1}'.format(self.dir,self.workspace))
     
      w = f.Get('w')
      # TODO test this when workspace is present
      w = ROOT.RooWorkspace('w') if w == None else w
      f.Close()
    else: 
      w = ROOT.RooWorkspace('w')
    w.Print()

    print 'Generating Score Histograms'

    w.factory('score[{0},{1}]'.format(low,high))
    s = w.var('score')
    
    if importance_sampling == True:
      if true_dist == True:
        vars = ROOT.TList()
        for var in vars_g:
          vars.Add(w.var(var))
        x = ROOT.RooArgSet(vars)
      else:
        x = None

    #This is because most of the data of the full model concentrate around 0 
    bins_full = 40
    low_full = 0.
    high_full = 1.
    w.factory('scoref[{0},{1}]'.format(low_full, high_full))
    s_full = w.var('scoref')
    histos = []
    histos_names = []
    inv_histos = []
    inv_histos_names = []
    sums_histos = []
    def saveHistos(w,outputs,s,bins,low,high,pos=None,importance_sampling=False,importance_data=None,
          importance_outputs=None):
      if pos <> None:
        k,j = pos
      else:
        k,j = ('F0','F1')
      print 'Estimating {0} {1}'.format(k,j)
      for l,name in enumerate(['sig','bkg']):
        data = ROOT.RooDataSet('{0}data_{1}_{2}'.format(name,k,j),"data",
            ROOT.RooArgSet(s))
        hist = ROOT.TH1F('{0}hist_{1}_{2}'.format(name,k,j),'hist',bins,low,high)
        values = outputs[l]
        #values = values[self.findOutliers(values)]
        for val in values:
          hist.Fill(val)
          s.setVal(val)
          data.add(ROOT.RooArgSet(s))
        norm = 1./hist.Integral()
        hist.Scale(norm) 
          
        s.setBins(bins)
        datahist = ROOT.RooDataHist('{0}datahist_{1}_{2}'.format(name,k,j),'hist',
              ROOT.RooArgList(s),hist)
        #histpdf = ROOT.RooHistPdf('{0}histpdf_{1}_{2}'.format(name,k,j),'hist',
        #      ROOT.RooArgSet(s), datahist, 1)
        histpdf = ROOT.RooHistFunc('{0}histpdf_{1}_{2}'.format(name,k,j),'hist',
              ROOT.RooArgSet(s), datahist, 1)
        #histpdf.setUnitNorm(True)
        #testvalues = np.array([self.evalDist(ROOT.RooArgSet(s), histpdf, [xs]) for xs in values])

        #histpdf.specialIntegratorConfig(ROOT.kTRUE).method1D().setLabel('RooBinIntegrator')

        #print 'INTEGRAL'
        #print histpdf.createIntegral(ROOT.RooArgSet(s)).getVal()
        #print histpdf.Integral()
      
        #histpdf.specialIntegratorConfig(ROOT.kTRUE).method1D().setLabel('RooAdaptiveGaussKronrodIntegrator1D')

        getattr(w,'import')(hist)
        getattr(w,'import')(data)
        getattr(w,'import')(datahist) # work around for morph = w.import(morph)
        getattr(w,'import')(histpdf) # work around for morph = w.import(morph)
        score_str = 'scoref' if pos == None else 'score'
        # Calculate the density of the classifier output using kernel density 
        #w.factory('KeysPdf::{0}dist_{1}_{2}({3},{0}data_{1}_{2},RooKeysPdf::NoMirror,2)'.format(name,k,j,score_str))

        # Print histograms pdfs and estimated densities
        if self.verbose_printing == True and name == 'bkg' and k <> j:
          full = 'full' if pos == None else 'dec'
          if k < j and k <> 'F0':
            histos.append([w.function('sighistpdf_{0}_{1}'.format(k,j)), w.function('bkghistpdf_{0}_{1}'.format(k,j))])
            histos_names.append(['f{0}-f{1}_f{1}(signal)'.format(k,j), 'f{0}-f{1}_f{0}(background)'.format(k,j)])
          if j < k and k <> 'F0':
            inv_histos.append([w.function('sighistpdf_{0}_{1}'.format(k,j)), w.function('bkghistpdf_{0}_{1}'.format(k,j))])
            inv_histos_names.append(['f{0}-f{1}_f{1}(signal)'.format(k,j), 'f{0}-f{1}_f{0}(background)'.format(k,j)])

    if self.scaler == None:
      self.scaler = {}

    # change this
    for k in range(self.nsamples):
      for j in range(self.nsamples):
        if k == j:
          continue
        #if k <> 2 and j <> 2:
        #  continue
        if self.dataset_names <> None:
          name_k, name_j = (self.dataset_names[k], self.dataset_names[j])
        else:
          name_k, name_j = (k,j)
        print 'Loading {0}:{1} {2}:{3}'.format(k,name_k, j,name_j)
        traindata, targetdata = loadData(data_file,name_k,name_j,dir=self.dir,c1_g=self.c1_g,
            preprocessing=self.preprocessing,scaler=self.scaler,persist=True)
       
        numtrain = traindata.shape[0]       
        size2 = traindata.shape[1] if len(traindata.shape) > 1 else 1
        #output = [predict('/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(self.model_file,k,j),traindata[targetdata == 1],model_g=self.model_g),
        #  predict('/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(self.model_file,k,j),traindata[targetdata == 0],model_g=self.model_g)]
        output = [predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file,k,j),traindata[targetdata==1],model_g=self.model_g,clf=self.clf),
              predict('{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file,k,j),traindata[targetdata==0],model_g=self.model_g,clf=self.clf)]
        saveHistos(w,output,s,bins,low,high,(k,j))
        #w.writeToFile('{0}/{1}'.format('/afs/cern.ch/work/j/jpavezse/private',self.workspace))
        w.writeToFile('{0}/{1}'.format(self.dir,self.workspace))

    if self.verbose_printing==True:
      for ind in range(1,(len(histos)/3+1)):
        print_histos = histos[(ind-1)*3:(ind-1)*3+3]
        print_histos_names = histos_names[(ind-1)*3:(ind-1)*3+3]
        printMultiFrame(w,['score']*len(print_histos),print_histos, makePlotName('dec{0}'.format(ind-1),'all',type='hist',dir=self.dir,c1_g=self.c1_g,model_g=self.model_g),print_histos_names,
          dir=self.dir,model_g=self.model_g,y_text='score(x)',print_pdf=True,title='Pairwise score distributions')
    # Full model
    traindata, targetdata = loadData(data_file,self.F0_dist,self.F1_dist,dir=self.dir,c1_g=self.c1_g,
      preprocessing=self.preprocessing, scaler=self.scaler)
    numtrain = traindata.shape[0]       
    size2 = traindata.shape[1] if len(traindata.shape) > 1 else 1
    outputs = [predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file),traindata[targetdata==1],model_g=self.model_g,clf=self.clf),
              predict('{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(self.dir,self.model_g,self.c1_g,self.model_file),traindata[targetdata==0],model_g=self.model_g,clf=self.clf)]
    #outputs = [predict('/afs/cern.ch/work/j/jpavezse/private/{0}_F0_F1.pkl'.format(self.model_file),traindata[targetdata==1],model_g=self.model_g),
    #          predict('/afs/cern.ch/work/j/jpavezse/private/{0}_F0_F1.pkl'.format(self.model_file),traindata[targetdata==0],model_g=self.model_g)]

    saveHistos(w,outputs,s_full, bins_full, low_full, high_full,importance_sampling=False)
    if self.verbose_printing == True:
      printFrame(w,['scoref'],[w.function('sighistpdf_F0_F1'),w.function('bkghistpdf_F0_F1')], makePlotName('full','all',type='hist',dir=self.dir,c1_g=self.c1_g,model_g=self.model_g),['signal','bkg'],
    dir=self.dir,model_g=self.model_g,y_text='score(x)',print_pdf=True,title='Pairwise score distributions')
   
    #w.writeToFile('{0}/{1}'.format('/afs/cern.ch/work/j/jpavezse/private',self.workspace))
    w.writeToFile('{0}/{1}'.format(self.dir,self.workspace))
    
    w.Print()
Example #40
0
                for i in xrange(100):
                    plt.subplot(10, 10, i)
                    plt.axis("off")
                    plt.imshow(self.W1.T[i, :].reshape(28, 28), cmap=cmap)
                plt.show()
            if i == 0:
                stop_time = time.clock()
                print "one single epoch runs %i minutes!" % ((stop_time - start_time) / 60.0)

        plt.plot(learning_curve_list)
        plt.show()


if __name__ == "__main__":
    dataset = "mnist.pkl.gz"
    train_set, valid_set, test_set = loadData(dataset)
    train_x, train_y = train_set
    valid_x, valid_y = valid_set
    test_x, test_y = test_set
    print "the size of training set is:(%d,%d)" % train_x.shape

    n_sample, feature_size = train_x.shape
    n_hidden = 500
    epochs = 100

    '''lr = 0.1
    batch_size = 20
    corruption_level = 0.3
    regularization = 0
    print "initializing AutoEncoder......"
    dA = DenoisingAutoEncoder(feature_size,n_hidden)
Example #41
0
import personality

# from cogito import config?
# how to access config.character?
# eval all lines in cogito itself?
# return s, sArgs instance
# sArgs is a dummy class containing the needed things to complete
# s has its insertions converted to sArgs.<item>, sArgs instance delivered along - .format via eval in cogito core?
import FListAPI
import random
import utils

lines = utils.loadData("EDI", dict, "./personalities/EDI/")


def __init__(datapipe):
    print("\tEDI initialized. ")
    # datapipe.personality.lines = utils.loadData('bartender', '\personalities\bartender\\')


class Functions:
    def JCH(FListProtocol, msgobj):
        if msgobj.source.character.name == "Jalon Renk":
            FListProtocol.reply("Space Husband Unit 'Jalon Renk' recognized. Welcome.", msgobj)


def test():
    print("\tEDI.py successfully called test()")


def handle(FListProtocol, msg):
def trainClassifiers(clf,nsamples,
      model_g='mlp',c1_g='',
      dir='/afs/cern.ch/user/j/jpavezse/systematics',
      model_file='adaptive',
      dataset_names = None,
      full_names = None, 
      data_file='train',
      preprocessing=False,
      seed=1234,
      index = None,
      vars_names = None
    ):
  '''
    Train classifiers pair-wise on 
    datasets
  '''
  print 'Training classifier'
  scaler=None
  if preprocessing == True:
    scaler = {}
  for k in range(nsamples):
    for j in range(nsamples):
      if k==j or k > j:
        continue
      if dataset_names <> None:
        name_k, name_j = (dataset_names[k], dataset_names[j])
      else:
        name_k, name_j = (k,j)
      print " Training Classifier on {0}/{1}".format(name_k,name_j)
      traindata,targetdata = loadData(data_file,name_k,name_j,dir=dir,c1_g=c1_g,
            preprocessing=preprocessing, scaler=scaler) 
      if model_g == 'mlp':
        clf.fit(traindata, targetdata, save_file='{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(dir,model_g,c1_g,model_file,k,j))
      else:
        rng = np.random.RandomState(seed)
        indices = rng.permutation(traindata.shape[0])
        traindata = traindata[indices]
        targetdata = targetdata[indices]
        #scores = cross_validation.cross_val_score(clf, traindata.reshape(traindata.shape[0],
        #traindata.shape[1]), targetdata)
        #print "Accuracy: {0} (+/- {1})".format(scores.mean(), scores.std() * 2)
        clf.fit(traindata.reshape(traindata.shape[0],traindata.shape[1])
            ,targetdata)
        #joblib.dump(clf, '/afs/cern.ch/work/j/jpavezse/private/{0}_{1}_{2}.pkl'.format(model_file,k,j))
        joblib.dump(clf, '{0}/model/{1}/{2}/{3}_{4}_{5}.pkl'.format(dir,model_g,c1_g,model_file,k,j))
  print " Training Classifier on F0/F1"
  traindata,targetdata = loadData(data_file,'F0' if full_names == None else full_names[0],
          'F1' if full_names == None else full_names[1],dir=dir,c1_g=c1_g) 
  if model_g == 'mlp':
    clf.fit(traindata, targetdata, save_file='{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(dir,model_g,c1_g,model_file))
  else:
    rng = np.random.RandomState(seed)
    indices = rng.permutation(traindata.shape[0])
    traindata = traindata[indices]
    targetdata = targetdata[indices]
    #clf = svm.NuSVC(probability=True) #Why use a SVR??
    scores = cross_validation.cross_val_score(clf, traindata, targetdata)
    print "Accuracy: {0} (+/- {1})".format(scores.mean(), scores.std() * 2)
    clf.fit(traindata,targetdata)
    #clf.plot_importance_matrix(vars_names)
    #joblib.dump(clf, '/afs/cern.ch/work/j/jpavezse/private/{0}_F0_F1.pkl'.format(model_file))
    joblib.dump(clf, '{0}/model/{1}/{2}/{3}_F0_F1.pkl'.format(dir,model_g,c1_g,model_file))


  return scaler
Example #43
0
import personality
#from cogito import config?
#how to access config.character?
#eval all lines in cogito itself?
#return s, sArgs instance
#sArgs is a dummy class containing the needed things to complete
#s has its insertions converted to sArgs.<item>, sArgs instance delivered along - .format via eval in cogito core?
import FListAPI
import random
import utils

lines = utils.loadData('bartender', dict, './personalities/bartender/')

class Functions():
	def JCH(FListProtocol, msg):
		line = eval(random.choice(lines['join']))
		if random.random<0.6:
			FList.say("Bartender Personality JCH! Welcome, new user.")
	
	def telling(FListProtocol, msg):
		pass
		# messages = FListProtocol._telling(msg.source.character.name)
		
def __init__(datapipe):
	print("\tBartender personality successfully loaded. HERE WE GO!")
	#datapipe.personality.lines = utils.loadData('bartender', '\personalities\bartender\\')

def test():
	print("\tBartender.py successfully called test()\n")
	
def handle(FListProtocol, msg):