Example #1
0
def figure_2_1():
    """Replicate figure 2.1 of Sutton and Barto's book."""
    print('Running figure 2.1 simulation ...')
    np.random.seed(1234)
    epsilons = (0.1, 0.01, 0)
    ars, pos = [], []
    for epsilon in epsilons:
        ar, po = run_experiment(2000, 1000, epsilon)
        ars.append(np.mean(ar, 0))
        pos.append(np.mean(po, 0))
        
    # plot the results
    plt.close('all')
    f, (ax1, ax2) = plt.subplots(2)
    for i,epsilon in enumerate(epsilons):
        ax1.plot(ars[i].T, label='$\epsilon$=%.2f' % epsilon)
        ax2.plot(pos[i].T, label='$\epsilon$=%.2f' % epsilon)
    ax1.legend(loc='lower right')
    ax1.set_ylabel('Average reward')
    ax1.set_xlim(xmin=-10)
    ax2.legend(loc='lower right')
    ax2.set_xlabel('Plays')
    ax2.set_ylabel('% Optimal action')
    ax2.set_xlim(xmin=-20)
    plt.savefig('fig_2_1.pdf')
    plt.show()
Example #2
0
    def get_tracedata(self, format = 'AmpPha', single=False):
        '''
        Get the data of the current trace

        Input:
            format (string) : 'AmpPha': Amp in dB and Phase, 'RealImag',

        Output:
            'AmpPha':_ Amplitude and Phase
        '''
        #data = self._visainstrument.ask_for_values(':FORMAT REAL,32;*CLS;CALC1:DATA:NSW? SDAT,1;*OPC',format=1)      
        data = self._visainstrument.ask_for_values('FORM:DATA REAL; FORM:BORD SWAPPED; CALC%i:SEL:DATA:SDAT?'%(self._ci), format = visa.double)      
        data_size = numpy.size(data)
        datareal = numpy.array(data[0:data_size:2])
        dataimag = numpy.array(data[1:data_size:2])
          
        if format.upper() == 'REALIMAG':
          if self._zerospan:
            return numpy.mean(datareal), numpy.mean(dataimag)
          else:
            return datareal, dataimag
        elif format.upper() == 'AMPPHA':
          if self._zerospan:
            datareal = numpy.mean(datareal)
            dataimag = numpy.mean(dataimag)
            dataamp = numpy.sqrt(datareal*datareal+dataimag*dataimag)
            datapha = numpy.arctan(dataimag/datareal)
            return dataamp, datapha
          else:
            dataamp = numpy.sqrt(datareal*datareal+dataimag*dataimag)
            datapha = numpy.arctan2(dataimag,datareal)
            return dataamp, datapha
        else:
          raise ValueError('get_tracedata(): Format must be AmpPha or RealImag') 
Example #3
0
    def testNormalizeLike(self):
        a = np.empty((10, 3))
        a[:, 0] = np.random.random(10)
        a[:, 1] = np.random.random(10)
        a[:, 2] = np.random.random(10)

        b = np.empty((10, 3))
        b[:, 0] = np.random.random(10)
        b[:, 1] = np.random.random(10)
        b[:, 2] = np.random.random(10)
        b = b * 2

        c = normalizeArrayLike(b, a)

        # Should be normalized like a
        mean = []
        std = []
        mean.append(np.mean(a[:, 0]))
        mean.append(np.mean(a[:, 1]))
        mean.append(np.mean(a[:, 2]))
        std.append(np.std(a[:, 0]))
        std.append(np.std(a[:, 1]))
        std.append(np.std(a[:, 2]))

        # Check all values
        for col in xrange(b.shape[1]):
            for bval, cval in zip(b[:, col].flat, c[:, col].flat):
                print cval, (bval - mean[col]) / std[col]
                print cval, bval
                assert cval == (bval - mean[col]) / std[col]
        print ("TestNormalizeLike success")
Example #4
0
def figure_2_4():
    """Replicate figure 2.4 of Sutton and Barto's book."""
    print('Running figure 2.4 simulation ...')
    np.random.seed(1234)
    epsilons = (0.1, 0)
    q_inits = (0, 5)
    ars, pos = [], []
    for epsilon, q_init in zip(epsilons, q_inits):
        ar, po = run_experiment(2000, 1000, epsilon=epsilon, Q_init=q_init,
                                alpha=0.1)
        ars.append(np.mean(ar, 0))
        pos.append(np.mean(po, 0))
        
    # plot the results
    plt.close('all')
    f, (ax1, ax2) = plt.subplots(2)
    labels = ('$\epsilon$-greedy', 'optimistic')
    for i,label in enumerate(labels):
        ax1.plot(ars[i].T, label=label)
        ax2.plot(pos[i].T, label=label)
    ax1.legend(loc='lower right')
    ax1.set_ylabel('Average reward')
    ax1.set_xlim(xmin=-10)
    ax2.legend(loc='lower right')
    ax2.set_xlabel('Plays')
    ax2.set_ylabel('% Optimal action')
    ax2.set_xlim(xmin=-20)
    plt.savefig('fig_2_4.pdf')
    plt.show()
Example #5
0
def add_noise_evoked(evoked, noise, snr, tmin=None, tmax=None):
    """Adds noise to evoked object with specified SNR.

    SNR is computed in the interval from tmin to tmax.

    Parameters
    ----------
    evoked : Evoked object
        An instance of evoked with signal
    noise : Evoked object
        An instance of evoked with noise
    snr : float
        signal to noise ratio in dB. It corresponds to
        10 * log10( var(signal) / var(noise) )
    tmin : float
        start time before event
    tmax : float
        end time after event

    Returns
    -------
    evoked_noise : Evoked object
        An instance of evoked corrupted by noise
    """
    evoked = copy.deepcopy(evoked)
    tmask = _time_mask(evoked.times, tmin, tmax)
    tmp = 10 * np.log10(np.mean((evoked.data[:, tmask] ** 2).ravel()) /
                        np.mean((noise.data ** 2).ravel()))
    noise.data = 10 ** ((tmp - float(snr)) / 20) * noise.data
    evoked.data += noise.data
    return evoked
def mean_quadratic_weighted_kappa(kappas, weights=None):
    """
    Calculates the mean of the quadratic
    weighted kappas after applying Fisher's r-to-z transform, which is
    approximately a variance-stabilizing transformation.  This
    transformation is undefined if one of the kappas is 1.0, so all kappa
    values are capped in the range (-0.999, 0.999).  The reverse
    transformation is then applied before returning the result.

    mean_quadratic_weighted_kappa(kappas), where kappas is a vector of
    kappa values

    mean_quadratic_weighted_kappa(kappas, weights), where weights is a vector
    of weights that is the same size as kappas.  Weights are applied in the
    z-space
    """
    kappas = np.array(kappas, dtype=float)
    if weights is None:
        weights = np.ones(np.shape(kappas))
    else:
        weights = weights / np.mean(weights)

    # ensure that kappas are in the range [-.999, .999]
    kappas = np.array([min(x, .999) for x in kappas])
    kappas = np.array([max(x, -.999) for x in kappas])

    z = 0.5 * np.log((1 + kappas) / (1 - kappas)) * weights
    z = np.mean(z)
    return (np.exp(2 * z) - 1) / (np.exp(2 * z) + 1)
Example #7
0
def getIdealWins(errors, testErrors, p=0.01): 
    """
    Figure out whether the ideal error obtained using the test set is an improvement 
    over model selection using CV. 
    """
    winsShape = list(errors.shape[1:-1]) 
    winsShape.append(3)
    stdWins = numpy.zeros(winsShape, numpy.int)
       
    for i in range(len(sampleSizes)):
        for j in range(foldsSet.shape[0]): 
            s1 = errors[:, i, j, 0]
            s2 = testErrors[:, i]
            
            s1Mean = numpy.mean(s1)
            s2Mean = numpy.mean(s2)                
            
            t, prob = scipy.stats.wilcoxon(s1, s2)
            if prob < p: 
                if s1Mean > s2Mean: 
                    stdWins[i, j, 2] = 1 
                elif s1Mean < s2Mean:
                    stdWins[i, j, 0] = 1
            else: 
                print("Test draw samplesize:" + str(sampleSizes[i]) + " folds " + str(foldsSet[j]))
                stdWins[i, j, 1] = 1 
                    
    return stdWins
Example #8
0
 def test_mat_output(self):
     samples = GMM1([.9999, .0001], [0.0, 1.0], [0.000001, 0.000001],
             rng=self.rng,
             size=[40, 20])
     assert samples.shape == (40, 20)
     assert -.001 < np.mean(samples) < .001, np.mean(samples)
     assert np.var(samples) < .0001, np.var(samples)
def Haffine_from_points(fp, tp):
    '''计算仿射变换的单应性矩阵H,使得tp是由fp经过仿射变换得到的'''
    if fp.shape != tp.shape:
        raise RuntimeError('number of points do not match')

    # 对点进行归一化
    # 映射起始点
    m = numpy.mean(fp[:2], axis=1)
    maxstd = numpy.max(numpy.std(fp[:2], axis=1)) + 1e-9
    C1 = numpy.diag([1/maxstd, 1/maxstd, 1])
    C1[0, 2] = -m[0] / maxstd
    C1[1, 2] = -m[1] / maxstd
    fp_cond = numpy.dot(C1, fp)

    # 映射对应点
    m = numpy.mean(tp[:2], axis=1)
    maxstd = numpy.max(numpy.std(tp[:2], axis=1)) + 1e-9
    C2 = numpy.diag([1/maxstd, 1/maxstd, 1])
    C2[0, 2] = -m[0] / maxstd
    C2[1, 2] = -m[1] / maxstd
    tp_cond = numpy.dot(C2, tp)

    # 因为归一化之后点的均值为0,所以平移量为0
    A = numpy.concatenate((fp_cond[:2], tp_cond[:2]), axis=0)
    U, S, V = numpy.linalg.svd(A.T)
    # 创建矩阵B和C
    tmp = V[:2].T
    B = tmp[:2]
    C = tmp[2:4]

    tmp2 = numpy.concatenate((numpy.dot(C, numpy.linalg.pinv(B)), numpy.zeros((2, 1))), axis=1)
    H = numpy.vstack((tmp2, [0, 0, 1]))

    H = numpy.dot(numpy.linalg.inv(C2), numpy.dot(H, C1))  # 反归一化
    return H / H[2, 2]  # 归一化,然后返回
Example #10
0
    def work(self):
        self.worked = True
        kwargs = dict(
                weights=self.weights,
                mus=self.mus,
                sigmas=self.sigmas,
                low=self.low,
                high=self.high,
                q=self.q,
                )
        samples = GMM1(rng=self.rng,
                size=(self.n_samples,),
                **kwargs)
        samples = np.sort(samples)
        edges = samples[::self.samples_per_bin]
        #print samples

        pdf = np.exp(GMM1_lpdf(edges[:-1], **kwargs))
        dx = edges[1:] - edges[:-1]
        y = 1 / dx / len(dx)

        if self.show:
            plt.scatter(edges[:-1], y)
            plt.plot(edges[:-1], pdf)
            plt.show()
        err = (pdf - y) ** 2
        print np.max(err)
        print np.mean(err)
        print np.median(err)
        if not self.show:
            assert np.max(err) < .1
            assert np.mean(err) < .01
            assert np.median(err) < .01
Example #11
0
    def work(self, **kwargs):
        self.__dict__.update(kwargs)
        self.worked = True
        samples = LGMM1(rng=self.rng,
                size=(self.n_samples,),
                **self.LGMM1_kwargs)
        samples = np.sort(samples)
        edges = samples[::self.samples_per_bin]
        centers = .5 * edges[:-1] + .5 * edges[1:]
        print edges

        pdf = np.exp(LGMM1_lpdf(centers, **self.LGMM1_kwargs))
        dx = edges[1:] - edges[:-1]
        y = 1 / dx / len(dx)

        if self.show:
            plt.scatter(centers, y)
            plt.plot(centers, pdf)
            plt.show()
        err = (pdf - y) ** 2
        print np.max(err)
        print np.mean(err)
        print np.median(err)
        if not self.show:
            assert np.max(err) < .1
            assert np.mean(err) < .01
            assert np.median(err) < .01
Example #12
0
	def run_svm_evaluation(self, svmtype, inputdata, outputdata, k):
		""" Run SVM on training data to evaluate classifier. Return f1scores, gamma and C"""

		if svmtype == 'rbf':
			# Parameter grid
			param_grid = [
			 {'C': np.logspace(1,5,5), 'gamma': np.logspace(-3,0,5), 'kernel': ['rbf']}
			]
		if svmtype == 'ln':
			param_grid =[ {'C': np.logspace(1,5,5)}]
		
		score_func = metrics.f1_score

		# Cross validation
		cv = cross_validation.KFold(inputdata.shape[0], n_folds=k, indices=True,shuffle=True)
		f1_scores = []

		for traincv, testcv in cv:

			# TODO: multithreading of cross validation.
			(f1_score, gamma1, c) = self.do_cross_validation(param_grid, svmtype, score_func, inputdata[traincv], outputdata[traincv], inputdata[testcv], outputdata[testcv])
			f1_scores.append(f1_score)
			
		print "score average: %s" + str(np.mean(f1_scores))
		print f1_scores

		average_score =np.mean(f1_scores)
		tuples = (average_score, f1_scores)

		return (tuples, gamma1, c)
def sample_every_two_correlation_times(energy_data, magnetization_data, correlation_time, no_of_sites):
    """Sample the given data every 2 correlation times and determine value and error."""
    magnet_samples = []
    energy_samples = []

    for t in np.arange(0, len(energy_data), 2 * int(np.ceil(correlation_time))):
        magnet_samples.append(magnetization_data[t])
        energy_samples.append(energy_data[t])

    magnet_samples = np.asarray(magnet_samples)
    energy_samples = np.asarray(energy_samples)

    abs_magnetization = np.mean(np.absolute(magnet_samples))
    abs_magnetization_error = calculate_error(magnet_samples)
    print("<m> (<|M|/N>) = {0} +/- {1}".format(abs_magnetization, abs_magnetization_error))

    magnetization = np.mean(magnet_samples)
    magnetization_error = calculate_error(magnet_samples)
    print("<M/N> = {0} +/- {1}".format(magnetization, magnetization_error))

    energy = np.mean(energy_samples)
    energy_error = calculate_error(energy_samples)
    print("<E/N> = {0} +/- {1}".format(energy, energy_error))

    magnetization_squared = np.mean((magnet_samples * no_of_sites)**2)
    magnetization_squared_error = calculate_error((magnet_samples * no_of_sites)**2)
    print("<M^2> = {0} +/- {1}".format(magnetization_squared, magnetization_squared_error))
Example #14
0
def summarize_features_mfcc(mfccs, v=False):
    """
    Given mfcc matrix, return summary for a window
    :param mfccs: NxM matrix
        mfcc matrix
    :param i_start: int
        index for beginning of window
    :param i_end: int
        index for end of window
    :return: 1xL array
        feature vector
    """

    # Summarize features
    features = np.max(mfccs, axis=1)
    features = np.append(features, np.mean(mfccs, axis=1))
    features = np.append(features, np.std(mfccs, axis=1))
    d_mfccs = np.diff(mfccs, axis=1)
    features = np.append(features, np.mean(d_mfccs, axis=1))
    features = np.append(features, np.std(d_mfccs, axis=1))
    d_d_mfccs = np.diff(d_mfccs, axis=1)
    features = np.append(features, np.mean(d_d_mfccs, axis=1))
    features = np.append(features, np.std(d_d_mfccs, axis=1))

    # print np.shape(d_d_mfccs)
    # print np.shape(features)
    return np.reshape(features, (1, len(features)))
Example #15
0
def trainer(model, data, epochs, validate_period, model_path, prob_lm=0.1, runid=''):
    def valid_loss():
        result = dict(lm=[], visual=[])
        for item in data.iter_valid_batches():
            result['lm'].append(model.lm.loss_test(*model.lm.args(item)))
            result['visual'].append(model.visual.loss_test(*model.visual.args(item)))
        return result
    costs = Counter(dict(cost_v=0.0, N_v=0.0, cost_t=0.0, N_t=0.0))
    print "LM: {} parameters".format(count_params(model.lm.params()))
    print "Vi: {} parameters".format(count_params(model.visual.params()))
    for epoch in range(1,epochs+1):
        for _j, item in enumerate(data.iter_train_batches()):
            j = _j +1
            if random.random() <= prob_lm:
                cost_t = model.lm.train(*model.lm.args(item))
                costs += Counter(dict(cost_t=cost_t, N_t=1))
            else:
                cost_v = model.visual.train(*model.visual.args(item))
                costs += Counter(dict(cost_v=cost_v, N_v=1))
            print epoch, j, j*data.batch_size, "train", \
                    numpy.divide(costs['cost_v'], costs['N_v']),\
                    numpy.divide(costs['cost_t'], costs['N_t'])
            if j % validate_period == 0:
                result = valid_loss()
                print epoch, j, 0, "valid", \
                    numpy.mean(result['visual']),\
                    numpy.mean(result['lm'])
                sys.stdout.flush()
        model.save(path='model.r{}.e{}.zip'.format(runid, epoch))
    model.save(path='model.zip')
Example #16
0
def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10):
    assert(embeddings1.shape[0] == embeddings2.shape[0])
    assert(embeddings1.shape[1] == embeddings2.shape[1])
    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
    nrof_thresholds = len(thresholds)
    k_fold = KFold(n_splits=nrof_folds, shuffle=False)
    
    val = np.zeros(nrof_folds)
    far = np.zeros(nrof_folds)
    
    diff = np.subtract(embeddings1, embeddings2)
    dist = np.sum(np.square(diff),1)
    indices = np.arange(nrof_pairs)
    
    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
      
        # Find the threshold that gives FAR = far_target
        far_train = np.zeros(nrof_thresholds)
        for threshold_idx, threshold in enumerate(thresholds):
            _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
        if np.max(far_train)>=far_target:
            f = interpolate.interp1d(far_train, thresholds, kind='slinear')
            threshold = f(far_target)
        else:
            threshold = 0.0
    
        val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
  
    val_mean = np.mean(val)
    far_mean = np.mean(far)
    val_std = np.std(val)
    return val_mean, val_std, far_mean
def getClass(imageWindow, models,z):
	hasLabel=False
	label=999
	for k in models.keys():
		m=models[k]
		l1=m[0]
		l2=m[1]
		l3=m[2]
		
		h1=m[3]
		h2=m[4]
		h3=m[5]

		ch1=numpy.mean(imageWindow[:,:,0])
		ch2=numpy.mean(imageWindow[:,:,1])
		ch3=numpy.mean(imageWindow[:,:,2])
		#print "checking if ", ch1, ch2, ch3, " is between ", h1, l1, h2, l2, h3, l3
		if(l1<ch1<h1 and l2<ch2<h2 and l3<ch3<h3):
			if(not hasLabel):
				label=k
				print "got label ", z[k]
				hasLabel=True
			else:
				print "error, relabeling as :", z[k]
				return 999
	if(not hasLabel):
		return 999
	else:
		return label
Example #18
0
def updateBackgroundCutoff(fit_data):
    residual_bg = estimateBackground(fit_data.residual)
    mean_residual_bg = numpy.mean(residual_bg)
    fit_data.residual -= residual_bg
    fit_data.residual += mean_residual_bg
    fit_data.background = numpy.mean(fit_data.residual)
    fit_data.cutoff = fit_data.background + fit_data.cur_threshold
Example #19
0
def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10):
    assert(embeddings1.shape[0] == embeddings2.shape[0])
    assert(embeddings1.shape[1] == embeddings2.shape[1])
    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
    nrof_thresholds = len(thresholds)
    k_fold = KFold(n_splits=nrof_folds, shuffle=False)
    
    tprs = np.zeros((nrof_folds,nrof_thresholds))
    fprs = np.zeros((nrof_folds,nrof_thresholds))
    accuracy = np.zeros((nrof_folds))
    
    diff = np.subtract(embeddings1, embeddings2)
    dist = np.sum(np.square(diff),1)
    indices = np.arange(nrof_pairs)
    
    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
        
        # Find the best threshold for the fold
        acc_train = np.zeros((nrof_thresholds))
        for threshold_idx, threshold in enumerate(thresholds):
            _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
        best_threshold_index = np.argmax(acc_train)
        for threshold_idx, threshold in enumerate(thresholds):
            tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])
        _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
          
    tpr = np.mean(tprs,0)
    fpr = np.mean(fprs,0)
    return tpr, fpr, accuracy
Example #20
0
 def run_epoch(self, session, input_data, input_labels,
               shuffle=True, verbose=True):
   orig_X, orig_y = input_data, input_labels
   dp = self.config.dropout
   # We're interested in keeping track of the loss and accuracy during training
   total_loss = []
   total_correct_examples = 0
   total_processed_examples = 0
   total_steps = len(orig_X) / self.config.batch_size
   for step, (x, y) in enumerate(
     data_iterator(orig_X, orig_y, batch_size=self.config.batch_size,
                  label_size=self.config.label_size, shuffle=shuffle)):
     feed = self.create_feed_dict(input_batch=x, dropout=dp, label_batch=y)
     loss, total_correct, _ = session.run(
         [self.loss, self.correct_predictions, self.train_op],
         feed_dict=feed)
     total_processed_examples += len(x)
     total_correct_examples += total_correct
     total_loss.append(loss)
     ##
     if verbose and step % verbose == 0:
       sys.stdout.write('\r{} / {} : loss = {}'.format(
           step, total_steps, np.mean(total_loss)))
       sys.stdout.flush()
   if verbose:
       sys.stdout.write('\r')
       sys.stdout.flush()
   return np.mean(total_loss), total_correct_examples / float(total_processed_examples)
def main():
    road = Road(number_of_cars=30)
    number_of_runs = 100
    seconds_in_run = 60

    road.place_cars()
    speed_limit_list = []
    positions_list = []
    speeds_list = []
    mean_speeds = []
    st_devs = []

    for _ in range(number_of_runs):
        speeds, positions = road.simulate_n_seconds(seconds_in_run)

        mean = np.mean(speeds)
        stdv = np.std(speeds)
        speed_limit_list.append(mean + stdv)
        mean_speeds.append(mean)
        st_devs.append(stdv)

        if _ in {0, 9, 34, 74, 99}:
            positions_list.append(positions[:])
            speeds_list.append(speeds)

    return (int(np.mean(speed_limit_list)), positions_list, speeds_list,
            mean_speeds, st_devs)
Example #22
0
def test_decimate():
    """Test decimation of digitizer headshapes with too many points."""
    # load headshape and convert to meters
    hsp_mm = _get_ico_surface(5)['rr'] * 100
    hsp_m = hsp_mm / 1000.

    # save headshape to a file in mm in temporary directory
    tempdir = _TempDir()
    sphere_hsp_path = op.join(tempdir, 'test_sphere.txt')
    np.savetxt(sphere_hsp_path, hsp_mm)

    # read in raw data using spherical hsp, and extract new hsp
    with warnings.catch_warnings(record=True) as w:
        raw = read_raw_kit(sqd_path, mrk_path, elp_txt_path, sphere_hsp_path)
    assert_true(any('more than' in str(ww.message) for ww in w))
    # collect headshape from raw (should now be in m)
    hsp_dec = np.array([dig['r'] for dig in raw.info['dig']])[8:]

    # with 10242 points and _decimate_points set to resolution of 5 mm, hsp_dec
    # should be a bit over 5000 points. If not, something is wrong or
    # decimation resolution has been purposefully changed
    assert_true(len(hsp_dec) > 5000)

    # should have similar size, distance from center
    dist = np.sqrt(np.sum((hsp_m - np.mean(hsp_m, axis=0))**2, axis=1))
    dist_dec = np.sqrt(np.sum((hsp_dec - np.mean(hsp_dec, axis=0))**2, axis=1))
    hsp_rad = np.mean(dist)
    hsp_dec_rad = np.mean(dist_dec)
    assert_almost_equal(hsp_rad, hsp_dec_rad, places=3)
Example #23
0
def modulate_image(gabor_def,
                    visuals,
                    spacials,
                    position,
                    min_contrast=0.0,
                    frequency_data=None,
                    use_local_rms=False):
    
    (pixels_per_degree, gabor_diameter, xf, yf, gaussian, ramp, grating, g) = frequency_data if isinstance(frequency_data, FREQ_DATA) else load_spacial_data(visuals, spacials)
    import time
    st = time.time()
    top_left_pos = (position[0] - (gabor_diameter / 2.0), position[1] - (gabor_diameter / 2.0))
    
    patch = gabor_def.rms_matrix[top_left_pos[0] : top_left_pos[0] + gabor_diameter, top_left_pos[1] : top_left_pos[1] + gabor_diameter, :]
    
    if use_local_rms:
        patch_avg = gabor_def.avg_matrix[top_left_pos[0] : top_left_pos[0] + gabor_diameter, top_left_pos[1] : top_left_pos[1] + gabor_diameter]
        R = (patch_avg / 127.0) - 1
        R = R / (numpy.max(numpy.abs(R))) / 2.0
        rms_measure = numpy.std(R + 0.5) / numpy.mean(R + 0.5)
        print rms_measure
        if min_contrast > 0:
            rms_measure = max(rms_measure, min_contrast)
        g = g * (255.0 * rms_measure)
    else:
        g = g * (255.0 * gabor_def.rms_measure)
    
    g = g - numpy.mean(g)
    
    gabor = numpy.transpose(numpy.tile(g, (3,1,1)), (1,2,0))
    print "took {0}".format((time.time() - st) * 1000.0)
    return GABOR_DATA._make([top_left_pos, gabor_diameter, gabor_diameter / 2.0, patch, numpy.clip(patch + gabor, 0, 255).astype('uint8')])
Example #24
0
 def testPdfOfSampleMultiDims(self):
   student = student_t.StudentT(df=[7., 11.], loc=[[5.], [6.]], scale=3.)
   self.assertAllEqual([], student.event_shape)
   self.assertAllEqual([], self.evaluate(student.event_shape_tensor()))
   self.assertAllEqual([2, 2], student.batch_shape)
   self.assertAllEqual([2, 2], self.evaluate(student.batch_shape_tensor()))
   num = 50000
   samples = student.sample(num, seed=123456)
   pdfs = student.prob(samples)
   sample_vals, pdf_vals = self.evaluate([samples, pdfs])
   self.assertEqual(samples.get_shape(), (num, 2, 2))
   self.assertEqual(pdfs.get_shape(), (num, 2, 2))
   self.assertNear(5., np.mean(sample_vals[:, 0, :]), err=.03)
   self.assertNear(6., np.mean(sample_vals[:, 1, :]), err=.03)
   self._assertIntegral(sample_vals[:, 0, 0], pdf_vals[:, 0, 0], err=0.02)
   self._assertIntegral(sample_vals[:, 0, 1], pdf_vals[:, 0, 1], err=0.02)
   self._assertIntegral(sample_vals[:, 1, 0], pdf_vals[:, 1, 0], err=0.02)
   self._assertIntegral(sample_vals[:, 1, 1], pdf_vals[:, 1, 1], err=0.02)
   if not stats:
     return
   self.assertNear(
       stats.t.var(7., loc=0., scale=3.),  # loc d.n. effect var
       np.var(sample_vals[:, :, 0]),
       err=.4)
   self.assertNear(
       stats.t.var(11., loc=0., scale=3.),  # loc d.n. effect var
       np.var(sample_vals[:, :, 1]),
       err=.4)
Example #25
0
 def testEpsilon_MOEA_NegativeDTLZ2(self):
     random = pyotl.utility.Random(1)
     problemGen = lambda: pyotl.problem.real.NegativeDTLZ2(3)
     problem = problemGen()
     pathProblem = os.path.join(self.pathData, type(problem).__name__.replace('Negative', ''), str(problem.GetNumberOfObjectives()))
     crossover = pyotl.crossover.real.SimulatedBinaryCrossover(random, 1, problem.GetBoundary(), 20)
     mutation = pyotl.mutation.real.PolynomialMutation(random, 1 / float(len(problem.GetBoundary())), problem.GetBoundary(), 20)
     epsilon = pyotl.utility.PyList2Vector_Real([0.06] * problem.GetNumberOfObjectives())
     pfList = []
     for _ in range(self.repeat):
         problem = problemGen()
         initial = pyotl.initial.real.BatchUniform(random, problem.GetBoundary(), 100)
         optimizer = pyotl.optimizer.couple_couple.real.Epsilon_MOEA(random, problem, initial, crossover, mutation, epsilon)
         while optimizer.GetProblem().GetNumberOfEvaluations() < 30000:
             optimizer()
         pf = pyotl.utility.PyListList2VectorVector_Real(
             [list(solution.objective_) for solution in optimizer.GetSolutionSet()])
         for objective in pf:
             problem.Fix(objective)
         pfList.append(pf)
     pathCrossover = os.path.join(pathProblem, type(crossover).__name__)
     pathOptimizer = os.path.join(pathCrossover, type(optimizer).__name__)
     pfTrue = pyotl.utility.PyListList2VectorVector_Real(numpy.loadtxt(os.path.join(pathProblem, 'PF.csv')).tolist())
     # GD
     indicator = pyotl.indicator.real.DTLZ2GD()
     metricList = [indicator(pf) for pf in pfList]
     rightList = numpy.loadtxt(os.path.join(pathOptimizer, 'GD.csv')).tolist()
     self.assertGreater(scipy.stats.ttest_ind(rightList, metricList)[1], 0.05, [numpy.mean(rightList), numpy.mean(metricList), metricList])
     # IGD
     indicator = pyotl.indicator.real.InvertedGenerationalDistance(pfTrue)
     metricList = [indicator(pf) for pf in pfList]
     rightList = numpy.loadtxt(os.path.join(pathOptimizer, 'IGD.csv')).tolist()
     self.assertGreater(scipy.stats.ttest_ind(rightList, metricList)[1], 0.05, [numpy.mean(rightList), numpy.mean(metricList), metricList])
Example #26
0
def EN_CID(y):
    """
    CID measure from Batista, G. E. A. P. A., Keogh, E. J., Tataw, O. M. & de
    Souza, V. M. A. CID: an efficient complexity-invariant distance for time
    series. Data Min Knowl. Disc. 28, 634-669 (2014).
    
    Arguments
    ---------

    y: a nitime time-series object, or numpy vector

    """

    # Make the input a row vector of numbers:
    y = makeRowVector(vectorize(y))

    # Prepare the output dictionary
    out = {}
    
     # Original definition (in Table 2 of paper cited above)
    out['CE1'] = np.sqrt(np.mean(np.power(np.diff(y),2))); # sum -> mean to deal with non-equal time-series lengths

    # Definition corresponding to the line segment example in Fig. 9 of the paper
    # cited above (using Pythagoras's theorum):
    out['CE2'] = np.mean(np.sqrt(1 + np.power(np.diff(y),2)));

    return out
Example #27
0
def SB_MotifTwo(y,binarizeHow='diff'):
    """
    Looks at local motifs in a binary symbolization of the time series, which is performed by a
    given binarization method
    
    Arguments
    ---------

    y: a nitime time-series object, or numpy vector

    """
    
    # Make the input a row vector of numbers:
    y = makeRowVector(vectorize(y))

    # Make binarization on incremental differences:
    if binarizeHow == 'diff':
        yBin = ((np.sign(np.diff(y)))+1.)/2.
    else:
        raise ValueError(binarizeHow)
        
    # Initialize output dictionary
    out = {}
    
    # Where the difference is 0, 1
    r0 = yBin==0
    r1 = yBin==1
    

    out['u'] = np.mean(r1)
    out['d'] = np.mean(r0)
    out['h'] = -(out['u']*np.log2(out['u']) + out['d']*np.log2(out['d']))
    
    return out
Example #28
0
    def update(self, y):
    
        L = Loss().MSE(self.output, y)
        
        # stopping criteria
        self.errors[self.epoch%5] =  numpy.mean(L.E**2)**0.5
        score = numpy.mean(self.errors)    
            
        # stop when error starts to diverge too much
        print " " , self.bestScore
        self.stop = score/self.bestScore > 1e60
        
        # save the best weights
        if score < self.bestScore:
            self.bestW = self.W
            self.bestScore = score
            self.bestEpoch = self.epoch
        norm_W = numpy.linalg.norm(self.W)
        sys.stdout.write( "\rEpoch %d: RMSE: %2.3f, Norm(W): %2.2f"%(self.epoch, numpy.mean((y-self.output)**2)**0.5, norm_W) )
        sys.stdout.flush()
        
        # gradients
        grad_outputs = L.dE_dY*(1 - self.output**2)
        dE_dK = numpy.dot(self.hidden.reshape(self.n_hidden, 1), grad_outputs.reshape(1, self.n_output))
        
        transfer = numpy.dot(grad_outputs, self.K.T)        
               
        # hidden layer
        grad_hidden =  transfer * (1 - self.hidden**2) 
        dE_dW = numpy.dot(self.X.T , grad_hidden)

        # updating weights
        self.K -= 1.2*self.alpha*dE_dK
        
        self.W -= self.alpha*dE_dW
Example #29
0
def softmax_experiment():
    """Run softmax experiment."""
    print('Running softmax experiment.')
    taus = [0.01, 0.1, 1]
    ars, pos = [], []
    for tau in taus:
        ar, po = run_experiment(2000, 1000, tau=tau, alpha=0.1)
        ars.append
        ars.append(np.mean(ar, 0))
        pos.append(np.mean(po, 0))
        
    # plot the results
    plt.close('all')
    f, (ax1, ax2) = plt.subplots(2)
    for i,tau in enumerate(taus):
        ax1.plot(ars[i].T, label='$\\tau$ = %.2f' % tau)
        ax2.plot(pos[i].T, label='$\\tau$ = %.2f' % tau)
    ax1.legend(loc='lower right')
    ax1.set_ylabel('Average reward')
    ax1.set_xlim(xmin=-10)
    ax2.legend(loc='lower right')
    ax2.set_xlabel('Plays')
    ax2.set_ylabel('% Optimal action')
    ax2.set_xlim(xmin=-20)
    plt.savefig('softmax_experiment.pdf')
    plt.show()
Example #30
0
def svm_SVR_C( xM, yV, c_l, graph = True):
	"""
	SVR is performed iteratively with different C values
	until all C in the list are used.
	"""

	r2_l, sd_l = [], []
	for C in c_l:
		print('sklearn.svm.SVR(C={})'.format( C))
		clf = svm.SVR( C = C)
		clf.fit( xM, yV.A1)
		yV_pred = clf.predict(xM)		
		
		r2, sd = regress_show( yV, np.mat( yV_pred).T, graph = graph)
		for X, x in [[r2_l, r2], [sd_l, sd]]:
			X.append( x)

	print('average r2, sd are', np.mean( r2_l), np.mean( sd_l))


	if graph:
		pdw = pd.DataFrame( { 'log10(C)': np.log10(c_l), 'r2': r2_l, 'sd': sd_l})
		pdw.plot( x = 'log10(C)')

	return r2_l, sd_l
Example #31
0
        inputs = Variable(inputs).cuda()
        labels = Variable(labels).cuda()
        #print('input_shape',inputs.shape)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        prediction = outputs.data.max(1)[1]
        accuracy = ( float( prediction.eq(labels.data).sum() ) /float(batch_size))*100.0
        train_accu.append(accuracy)

    accuracy_epoch = np.mean(train_accu)
    print(epoch, accuracy_epoch)

    if (epoch%5==4):
        correct = 0
        total = 0
        for data in testloader:
            inputs, labels = data
            inputs, labels = Variable(inputs).cuda(), Variable(labels).cuda()
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            #correct += (predicted == labels).sum().item()
            correct+=predicted.eq(labels.data).sum()

        print('Accuracy of the network on the 10000 test images: %d %%' % (100.0 * float(correct) / float(total)))
Example #32
0
 with tf.device('/gpu:0'):
     n_epochs = 50
     N_train = len(q_train)
     n_batches = N_train // batch_size + 1
     for epoch in range(n_epochs):
         epoch_loss = []
         times = 0.
         indexes = np.arange(N_train)
         np.random.shuffle(indexes)
         q_train = q_train[indexes]
         a_train = a_train[indexes]
         for idx in range(n_batches):
             tic = time()
             if idx%(n_batches//10)==0:
                 print("Epoch %d - %d/%d : loss = %1.4f - time = %1.3fs"%(epoch,idx,
                                                                          n_batches,np.mean(epoch_loss),
                                                                          times/((n_train//10)*batch_size)))
                 times = 0.
             begin = idx*batch_size
             end = min((idx+1)*batch_size, N_train)
             Q, mask, A = get_batch(begin,end,q_train,a_train,batch_size,max_q,Na)
             _,l,l_s = sess.run([model_outputs['train_op'],
                                 model_outputs['loss'],
                                 model_outputs['loss_summary']], 
                                feed_dict={model_outputs['question']:Q,
                                           model_outputs['mask']:mask,
                                           model_outputs['answer']:A})
             epoch_loss.append(l)
             writer.add_summary(l_s,idx+epoch*n_batches)
             times += time() - tic
         with tf.device('/cpu:0'):
Example #33
0
def _SSIMForMultiScale(img1,
                       img2,
                       max_val=255,
                       filter_size=11,
                       filter_sigma=1.5,
                       k1=0.01,
                       k2=0.03):
    """Return the Structural Similarity Map between `img1` and `img2`.

    This function attempts to match the functionality of ssim_index_new.m by
    Zhou Wang: http://www.cns.nyu.edu/~lcv/ssim/msssim.zip

    Arguments:
      img1: Numpy array holding the first RGB image batch.
      img2: Numpy array holding the second RGB image batch.
      max_val: the dynamic range of the images (i.e., the difference between the
        maximum the and minimum allowed values).
      filter_size: Size of blur kernel to use (will be reduced for small images).
      filter_sigma: Standard deviation for Gaussian blur kernel (will be reduced
        for small images).
      k1: Constant used to maintain stability in the SSIM calculation (0.01 in
        the original paper).
      k2: Constant used to maintain stability in the SSIM calculation (0.03 in
        the original paper).

    Returns:
      Pair containing the mean SSIM and contrast sensitivity between `img1` and
      `img2`.

    Raises:
      RuntimeError: If input images don't have the same shape or don't have four
        dimensions: [batch_size, height, width, depth].
    """
    if img1.shape != img2.shape:
        raise RuntimeError(
            'Input images must have the same shape (%s vs. %s).', img1.shape,
            img2.shape)
    if img1.ndim != 4:
        raise RuntimeError('Input images must have four dimensions, not %d',
                           img1.ndim)

    img1 = img1.astype(np.float64)
    img2 = img2.astype(np.float64)
    _, height, width, _ = img1.shape

    # Filter size can't be larger than height or width of images.
    size = min(filter_size, height, width)

    # Scale down sigma if a smaller filter size is used.
    sigma = size * filter_sigma / filter_size if filter_size else 0

    if filter_size:
        window = np.reshape(_FSpecialGauss(size, sigma), (1, size, size, 1))
        mu1 = signal.fftconvolve(img1, window, mode='valid')
        mu2 = signal.fftconvolve(img2, window, mode='valid')
        sigma11 = signal.fftconvolve(img1 * img1, window, mode='valid')
        sigma22 = signal.fftconvolve(img2 * img2, window, mode='valid')
        sigma12 = signal.fftconvolve(img1 * img2, window, mode='valid')
    else:
        # Empty blur kernel so no need to convolve.
        mu1, mu2 = img1, img2
        sigma11 = img1 * img1
        sigma22 = img2 * img2
        sigma12 = img1 * img2

    mu11 = mu1 * mu1
    mu22 = mu2 * mu2
    mu12 = mu1 * mu2
    sigma11 -= mu11
    sigma22 -= mu22
    sigma12 -= mu12

    # Calculate intermediate values used by both ssim and cs_map.
    c1 = (k1 * max_val)**2
    c2 = (k2 * max_val)**2
    v1 = 2.0 * sigma12 + c2
    v2 = sigma11 + sigma22 + c2
    ssim = np.mean((((2.0 * mu12 + c1) * v1) / ((mu11 + mu22 + c1) * v2)))
    cs = np.mean(v1 / v2)
    return ssim, cs
Example #34
0
def plot_collapse(flat_list,
                  gamma_shape,
                  min_rep=10,
                  min_d=4,
                  ax=None,
                  str_leg=None,
                  extrapolate=False,
                  color='r',
                  show_subplots=True):

    #Definitions
    interp_points = 1000

    if ax is None:
        plt.figure()
    else:
        plt.sca(ax)

    #Flattens list of reps
    #flat_list = np.array([item for sublist in shape_list for item in sublist])
    flat_list = np.array(flat_list)

    #List of avalanche sizes
    shape_size = np.zeros(len(flat_list))
    for i in range(len(flat_list)):
        shape_size[i] = flat_list[i].size

    max_size = shape_size.max()

    #Avalanche size count
    shape_count, _ = np.histogram(shape_size, bins=np.arange(0, max_size + 2))

    #Censors data by size
    censor_d_keep = np.arange(0, max_size + 1) >= min_d
    censor_rep_keep = shape_count >= min_rep
    censor_index = np.where(
        [a and b for a, b in zip(censor_d_keep, censor_rep_keep)])[0]

    #Defines average size matrix
    average_shape = np.zeros((censor_index.size, interp_points))

    #Defines bottom interpolation range from data, to prevent extrapolation bias
    #x_min = 1/censor_index[0]
    if extrapolate is True:
        x_min = 0
    elif extrapolate is False:
        x_min = 1 / censor_index[0]
    else:
        error('extrapolate is not binary.')
    x_range = np.linspace(x_min, 1, num=interp_points)

    #Averages shape for each duration and interpolates results
    y_min = 100
    for i in range(len(censor_index)):

        #Calculates average shape
        size_i = censor_index[i]
        avg_shape_i_y = np.mean(flat_list[shape_size == size_i]) / np.power(
            size_i, gamma_shape - 1)
        avg_shape_i_x = np.arange(1, size_i + 1) / size_i

        if np.min(avg_shape_i_y) < y_min:
            y_min = np.min(avg_shape_i_y)

        #Interpolates results
        fx = InterpolatedUnivariateSpline(avg_shape_i_x, avg_shape_i_y)
        average_shape[i, :] = fx(x_range)

        #Plots transparent subplots
        if show_subplots:
            ax.plot(avg_shape_i_x, avg_shape_i_y, alpha=0.2, color=color)

    #Plots interpolated average curve
    if show_subplots:
        color_collapse = 'k'
    else:
        color_collapse = color
    plot_line, = ax.plot(x_range,
                         np.mean(average_shape, axis=0),
                         color=color_collapse,
                         linewidth=2,
                         label=str_leg)
    ax.legend([plot_line], [str_leg])
    plt.legend()

    #Beautifies plot
    ax.set_xlabel('Scaled time')
    ax.set_ylabel('Scaled activity')
    plt.xlim([0, 1])
Example #35
0
def run_analysis(data, newFig=True, label='Data', color='k'):

    #Sets up figure
    if newFig is True:
        fig = plt.figure(figsize=(18, 12))
        gs = fig.add_gridspec(2, 2)
        ax_pS = fig.add_subplot(gs[0, 0])
        ax_pD = fig.add_subplot(gs[0, 1])
        ax_avgS = fig.add_subplot(gs[1, 0])
        ax_shape = fig.add_subplot(gs[1, 1])
    else:
        fig = plt.gcf()
        if len(fig.get_axes()) != 4:
            ValueError('Current figure does not have a 2x2 layout.')
        ax_pS, ax_pD, ax_avgS, ax_shape = fig.get_axes()

    #Analyzes avalanches

    avalanches = get_avalanches(data)
    S_list = [avalanches[i]['S'] for i in avalanches.keys()]
    D_list = [avalanches[i]['D'] for i in avalanches.keys()]
    shape_list = [avalanches[i]['shape'] for i in avalanches.keys()]

    #Calculates S_avg
    S_avg = np.zeros((np.max(D_list), 3))
    for i in range(np.max(D_list)):
        S_avg[i, 0] = i + 1
        S_D = [
            avalanches[j]['S'] for j in avalanches.keys()
            if avalanches[j]['D'] == i + 1
        ]
        S_avg[i, 1] = np.mean(S_D)
        S_avg[i, 2] = np.std(S_D)

    #Plots p(S)
    fit_pS = powerlaw.Fit(S_list, xmin=1)
    str_label = label + r': $\alpha$ = {:0.3f}'.format(fit_pS.power_law.alpha)
    fit_pS.plot_pdf(ax=ax_pS, color=color, **{'label': str_label})
    fit_pS.power_law.plot_pdf(ax=ax_pS, color='k', linestyle='--')

    #Plots p(D)
    fit_pD = powerlaw.Fit(D_list, xmin=1)
    str_label = label + r': $\beta$ = {:0.3f}'.format(fit_pD.power_law.alpha)
    fit_pD.plot_pdf(ax=ax_pD, color=color, **{'label': str_label})
    fit_pD.power_law.plot_pdf(ax=ax_pD, color='k', linestyle='--')

    #Plots <S>(D)
    fit_gamma, _, _ = fit_powerlaw(S_avg[:, 0],
                                   S_avg[:, 1],
                                   S_avg[:, 2],
                                   loglog=True)
    str_label = label + r': $\gamma$ = {:0.3f}'.format(fit_gamma)
    ax_avgS.plot(S_avg[:, 0], S_avg[:, 1], label=str_label, color=color)
    ax_avgS.plot(S_avg[:, 0],
                 np.power(S_avg[:, 0], fit_gamma),
                 color='k',
                 linestyle='--')

    #Fits and plots the average avalanche shape
    fit_gamma_shape = fit_collapse(shape_list, 4, 20, extrapolate=True)
    str_leg = label + r': $\gamma_s$ = {:0.2f}'.format(fit_gamma_shape)
    plot_collapse(shape_list,
                  fit_gamma_shape,
                  4,
                  20,
                  ax_shape,
                  str_leg,
                  True,
                  color,
                  show_subplots=False)

    print('== Exponents for {:s} =='.format(label))
    print('alpha = {:0.3f}'.format(fit_pS.power_law.alpha))
    print('beta = {:0.3f}'.format(fit_pD.power_law.alpha))
    print('gamma_scaling = {:0.3f}'.format(
        (fit_pD.power_law.alpha - 1) / (fit_pS.power_law.alpha - 1)))
    print('gamma = {:0.3f}'.format(fit_gamma))
    print('gamma_shape = {:0.3f}'.format(fit_gamma_shape))

    #Beautifies plots
    plt.sca(ax_pS)
    plt.legend(loc='upper right')
    plt.xlabel('S')
    plt.ylabel('p(S)')
    plt.sca(ax_pD)
    plt.legend(loc='upper right')
    plt.xlabel('D')
    plt.ylabel('p(D)')
    plt.sca(ax_avgS)
    plt.legend(loc='upper left')
    plt.xlabel('D')
    plt.ylabel(r'$\langle S \rangle$ (D)')
    plt.xlim([1, 1e3])
    plt.ylim([1, 1e5])
    ax_avgS.set_xscale('log')
    ax_avgS.set_yscale('log')
Example #36
0
def fit_collapse(flat_list, min_d, min_rep, extrapolate=False):

    #Definitions
    interp_points = 1000
    gamma_x0 = 0.5
    opt_bounds = (-1, 5)

    #Flattens list of reps
    #flat_list = np.array([item for sublist in shape_list for item in sublist])
    flat_list = np.array(flat_list)

    #List of avalanche sizes
    shape_size = np.zeros(len(flat_list))
    for i in range(len(flat_list)):
        shape_size[i] = flat_list[i].size

    max_size = shape_size.max()

    #Avalanche size count
    shape_count, _ = np.histogram(shape_size, bins=np.arange(0, max_size + 2))

    #Censors data by size
    censor_d_keep = np.arange(0, max_size + 1) >= min_d
    censor_rep_keep = shape_count >= min_rep
    censor_index = np.where(
        [a and b for a, b in zip(censor_d_keep, censor_rep_keep)])[0]

    #Defines average size matrix
    average_shape = np.zeros((censor_index.size, interp_points))

    #Defines bottom interpolation range from data, to prevent extrapolation bias
    if extrapolate is True:
        x_min = 0
    elif extrapolate is False:
        x_min = 1 / censor_index[0]
    else:
        error('extrapolate is not binary.')
    x_range = np.linspace(x_min, 1, num=interp_points)

    #Averages shape for each duration and interpolates results
    for i in range(len(censor_index)):

        #Calculates average shape
        size_i = censor_index[i]
        avg_shape_i_y = np.mean(flat_list[shape_size == size_i])
        avg_shape_i_x = np.arange(1, size_i + 1) / size_i

        #Interpolates results
        fx = InterpolatedUnivariateSpline(avg_shape_i_x, avg_shape_i_y)
        average_shape[i, :] = fx(x_range)

    #Error function for optimization
    def _error(gamma_shape, *params):
        average_shape, censor_index = params
        shape_scaled = np.zeros((censor_index.size, interp_points))
        for i in range(censor_index.size):
            shape_scaled[i, :] = average_shape[i, :] / np.power(
                censor_index[i], gamma_shape)

        err = np.mean(np.var(shape_scaled, axis=0)) / np.power(
            (np.max(np.max(shape_scaled)) - np.min(np.min(shape_scaled))), 2)
        return err

    #Minimizes error
    minimize_obj = minimize(_error,
                            x0=[gamma_x0],
                            args=(average_shape, censor_index),
                            bounds=[opt_bounds])

    return minimize_obj.x[0] + 1
    session.run(tf.global_variables_initializer())

    for iteration in xrange(ITERS):
      start_time = time.time()
      _input_noise = np.random.normal(size=(BATCH_SIZE, NOISE_DIM))

      _dis_cost = []
      for i in xrange(CRITIC_ITERS):
        _data = inf_train_gen().next()
        _dis_cost_, _ = session.run([dis_cost, dis_train_op],
                                    feed_dict={real_data: _data,
                                               input_noise: _input_noise})
        _dis_cost.append(_dis_cost_)
        if clip_dis_weights:
          _ = session.run(clip_dis_weights)
      _dis_cost = np.mean(_dis_cost)

      _ = session.run(gen_train_op, feed_dict={input_noise: _input_noise})
      _inv_cost, _ = session.run([inv_cost, inv_train_op],
                                 feed_dict={input_noise: _input_noise})

      lib.plot.plot('train discriminator cost', _dis_cost)
      lib.plot.plot('train invertor cost', _inv_cost)
      lib.plot.plot('time', time.time() - start_time)

      if iteration % 1000 == 999:
        test_dis_costs = []
        for test_instances, _ in gen(X_test, y_test):
          _test_dis_cost = session.run(dis_cost,
                                       feed_dict={real_data: test_instances,
                                                  input_noise: _input_noise})
Example #38
0
def isMaxWhite(plate):
	avg = np.mean(plate)
	if(avg>=115):
		return True
	else:
 		return False
Example #39
0
results = cross_validate(neural_network,
                         clinicalInput,
                         clinicalOutput,
                         cv=10,
                         scoring=("accuracy", "f1", "recall", "precision"))

import matplotlib.pyplot as plt
plt.plot(results["test_accuracy"], color="c")
plt.plot(results["test_f1"], color="m")
plt.plot(results["test_recall"], color="y")
plt.plot(results["test_precision"], color="k")
plt.title("Model Information (RNN)")
plt.ylabel("Model Performance")
plt.xlabel("Number of Folds")
plt.legend(["Accuracy", "F1-Score", "Recall", "Precision"], loc="lower right")
plt.show()

#Determine the prediction
y_pred = cross_val_predict(neural_network,
                           clinicalInput,
                           clinicalOutput,
                           cv=10)

#Provide AUC score
from sklearn.metrics import roc_auc_score
print("Accuracy result: ", np.mean(results["test_accuracy"]))
print("Recall result: ", np.mean(results["test_recall"]))
print("Precision result: ", np.mean(results["test_precision"]))
print("F1 result: ", np.mean(results["test_f1"]))
print("ROC: ", roc_auc_score(clinicalOutput, y_pred))
Example #40
0
def run_test(title, test, setup, repeats=10):
    print '{:>60}'.format(title+':'),
    x = timeit.Timer(test, setup=setup).repeat(repeats,1)
    print '{:>9.3f} {:>9.3f} {:>9.3f} {:9d}'.format(min(x), np.mean(x), max(x), repeats)
# For computing reasons I'm limiting the dataframe length to 15,000 users
merged_df=merged_df[['user_id', 'name', 'user_rating']]
merged_subdf= merged_df[merged_df.user_id <= 15000]
merged_subdf.head()

#Create a matrix of Users vs Animes wih User Ratings as the values
piv_table = merged_subdf.pivot_table(index=['user_id'], columns=['name'], values='user_rating')
print(piv_table.shape)
piv_table.head()

# Standardization is being done here.
# All users with only one rating or who had rated everything the same will be dropped

# Normalize the values
norm_piv_table = piv_table.apply(lambda x: (x-np.mean(x))/(np.max(x)-np.min(x)), axis=1)

# Drop all columns containing only zeros. These represent users who did not rate
norm_piv_table.fillna(0, inplace=True)
norm_piv_table = norm_piv_table.T
norm_piv_table = norm_piv_table.loc[:, (norm_piv_table != 0).any(axis=0)]

# Our data needs to be in a sparse matrix format to be read by the following functions
sparse_matrix = sp.sparse.csr_matrix(norm_piv_table.values)

#Calculate item-item similarity and user-user similarity
item_similarity = cosine_similarity(sparse_matrix)
user_similarity = cosine_similarity(sparse_matrix.T)

# Convert the similarity matrices into dataframes
item_similarity_df = pd.DataFrame(item_similarity, index = norm_piv_table.index, columns = norm_piv_table.index)
def rms_flat(a):
    # Return the root mean square of all the elements of *a*, flattened out.
    return numpy.sqrt(numpy.mean(numpy.absolute(a)**2))
Example #43
0
    def download_one_rib_before_unix(self, my_date,
                                     unix):  # my_date for deciding month
        tmp_month = my_date[0:4] + '.' + my_date[4:6]
        if self.co.startswith('rrc'):
            web_location = rrc_root + self.co + '/' + tmp_month + '/'
        else:
            web_location = rv_root + self.co + '/bgpdata/' + tmp_month + '/RIBS/'
            web_location = web_location.replace('//', '/')

        try:
            webraw = cmlib.get_weblist('http://' + web_location)
            print 'Getting list from ' + 'http://' + web_location
        except:
            return -1

        cmlib.make_dir(datadir + web_location)

        #----------------------------------------------------------------
        # select a RIB file right before the unix and with reasonable (not strange) file size
        rib_list = webraw.split('\n')
        filter(lambda a: a != '', rib_list)
        filter(lambda a: a != '\n', rib_list)
        rib_list = [
            item for item in rib_list if 'rib' in item or 'bview' in item
        ]

        sizelist = list()
        for line in rib_list:
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            sizelist.append(fsize)

        avg = np.mean(sizelist)

        ok_rib_list = list()  # RIBs whose size is OK
        for line in rib_list:
            fsize = cmlib.parse_size(line.split()[-1])
            if fsize > 0.9 * avg:
                ok_rib_list.append(line)

        target_line = None  # the RIB closest to unix
        min = 9999999999
        for line in ok_rib_list:
            fdate = line.split()[0].split('.')[-3]
            ftime = line.split()[0].split('.')[-2]
            dtstr = fdate + ftime
            objdt = datetime.datetime.strptime(dtstr, '%Y%m%d%H%M')
            runix = time_lib.mktime(
                objdt.timetuple()) + 8 * 60 * 60  # F**k! Time zone!
            print objdt, runix, unix
            if runix <= unix and unix - runix < min:
                min = unix - runix
                print 'min changed to ', min
                target_line = line

        print 'Selected RIB:', target_line
        if target_line == None:
            return -1
        size = target_line.split()[-1]  # claimed RIB file size
        fsize = cmlib.parse_size(size)

        filename = target_line.split()[0]
        full_loc = datadir + web_location + filename  # .bz2/.gz

        if os.path.exists(full_loc + '.txt'):  # only for clearer logic
            os.remove(full_loc + '.txt')

        #------------------------------------------------------------------
        # Download the RIB
        if os.path.exists(full_loc + '.txt.gz'):
            print 'existed!!!!!!!!!!!!'
            return full_loc + '.txt.gz'  # Do not download

        if os.path.exists(full_loc):
            cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize)
            cmlib.pack_gz(full_loc + '.txt')
            return full_loc + '.txt.gz'

        cmlib.force_download_file('http://' + web_location,
                                  datadir + web_location, filename)
        cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize)
        cmlib.pack_gz(full_loc + '.txt')
        os.remove(full_loc)  # remove the original file

        return full_loc + '.txt.gz'
Example #44
0
def test_objective(x):
    """
    An ackley defined on a low D space.
    """
    xs = x.reshape([1, x.shape[0]])
    z = model(tf.cast(xs, tf.float32)).numpy().reshape(R)
    # Reshape according to the extent of the low D points.
    return (myackley(z))


# Entropy max initial design
N = N_init
design = neural_maxent(N, P, L, H, R,
                       net_weights=model.get_weights())['design']
response_us = np.apply_along_axis(test_objective, 1, design)
y_mu = np.mean(response_us)
y_sig = np.std(response_us)
response = (response_us - y_mu) / y_sig

design, response, explored = seq_design(design=design,
                                        response=response,
                                        model=model,
                                        objective=test_objective,
                                        seq_steps=seq_steps,
                                        explore_starts=explore_starts,
                                        verbose=True)
design_tf = tf.Variable(design)

## Contour plot
delta = 0.025
x = np.arange(extent[0], extent[1], delta)
Example #45
0
# 	# print("")
# 	# print(negatives_CD_costs[i])
# 	# exit()

# 	##Append positives
# 	idx_selected = np.where(np.isin(neg_candidate_idxs[i], np.array(positives)))[0]
# 	[positives_CD_costs[i].append(positive_costs[idx_selected[x]]) for x in range(idx_selected.shape[0])]	
# ########################################################

num_pos = np.array(num_pos)
num_neg = np.array(num_neg)
log_string(str(len(positives_idx)))
log_string("Num models no positives: "+str(num_models_no_positives))
log_string("Num models no negatives: "+str(num_models_no_negatives))
log_string("Average number of positives: "+str(np.mean(num_pos)))
log_string("Average number of negatives: "+str(np.mean(num_neg)))

# dict_value = {"positives_cost": positives_CD_costs, "negatives_cost":negatives_CD_costs}
# filename = 'arap_tripletv4_' + DATA_SPLIT + '_'+OBJ_CAT+'_cost.pickle'

dict_value = {"positives": positives_idx, "negatives":negatives_idx}
filename = 'arap_triplet_' + DATA_SPLIT + '_'+OBJ_CAT+'.pickle'

log_string("Filename: "+filename)

with open(filename, 'w') as handle:
    pickle.dump(dict_value, handle, protocol=pickle.HIGHEST_PROTOCOL)

LOG_FOUT.close()
Example #46
0
    def download_one_rib(self, my_date):
        tmp_month = my_date[0:4] + '.' + my_date[4:6]
        if self.co.startswith('rrc'):
            web_location = rrc_root + self.co + '/' + tmp_month + '/'
        else:
            web_location = rv_root + self.co + '/bgpdata/' + tmp_month + '/RIBS/'
            web_location = web_location.replace('//', '/')
        webraw = cmlib.get_weblist('http://' + web_location)

        cmlib.make_dir(datadir + web_location)

        #----------------------------------------------------------------
        # select a RIB file with reasonable (not strange) file size
        rib_list = webraw.split('\n')
        filter(lambda a: a != '', rib_list)
        filter(lambda a: a != '\n', rib_list)
        rib_list = [
            item for item in rib_list if 'rib' in item or 'bview' in item
        ]

        sizelist = list()
        for line in rib_list:
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            sizelist.append(fsize)

        avg = np.mean(sizelist)

        target_line = None  # stores the RIB file for downloading
        largest_line = None
        max = -1
        closest = 99999
        for line in rib_list:
            fdate = line.split()[0].split('.')[-3]
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            if fsize > max:
                max = fsize
                largest_line = line

            diff = abs(int(fdate) - int(my_date))  # >0
            # XXX logic here not clear (but seems effective)
            if diff <= closest and fsize > 0.9 * avg and fsize < 1.1 * avg:
                target_line = line
                closest = diff

        if target_line is None:
            assert largest_line is not None
            print 'Failed. Resort to downloading the largest RIB...'
            target_line = largest_line  # work-around for a special case

        print 'Selected RIB:', target_line
        size = target_line.split()[-1]  # claimed RIB file size
        fsize = cmlib.parse_size(size)

        filename = target_line.split()[0]
        full_loc = datadir + web_location + filename  # .bz2/.gz

        if os.path.exists(full_loc + '.txt'):  # only for clearer logic
            os.remove(full_loc + '.txt')

        #------------------------------------------------------------------
        # Download the RIB
        if os.path.exists(full_loc + '.txt.gz'):
            print 'existed size & original size:', os.path.getsize(
                full_loc + '.txt.gz'), fsize
            if os.path.getsize(full_loc +
                               '.txt.gz') > 0.6 * fsize:  # 0.6 is good enough
                return full_loc + '.txt.gz'  # Do not download
            else:
                os.remove(full_loc + '.txt.gz')  # too small to be complete

        if os.path.exists(full_loc):
            if os.path.getsize(full_loc) <= 0.95 * fsize:
                os.remove(full_loc)
            else:  # Good!
                cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize)
                cmlib.pack_gz(full_loc + '.txt')
                return full_loc + '.txt.gz'

        cmlib.force_download_file('http://' + web_location,
                                  datadir + web_location, filename)
        cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize)
        cmlib.pack_gz(full_loc + '.txt')
        os.remove(full_loc)  # remove the original file

        return full_loc + '.txt.gz'
Example #47
0
    def getPvals(self, geneNames, num_cores=1):
        """Get p-Values for the the list of genes, one for each column in the ratios matrix

         Keyword arguments:
         geneNames  -- A list of genes in the cluster
         singleCore -- Set to True to use a single core.  False may not work.
        """
        pVals = {}

        relGenes = list(set(geneNames) & set(self.ratios.row_names))
        curGeneMatrix = self.ratios.submatrix_by_rows(
            self.ratios.row_indexes_for(relGenes))

        noVarNs = []  #These three matrices should have a matched order
        noVarRats = []  #It would be better to have a single list
        noVarCns = []  #With 3 named elements in each list item

        for cn in self.ratios.column_names:
            colIdx = curGeneMatrix.column_indexes_for(column_names=[cn])
            geneVect = curGeneMatrix.column_values(column=colIdx)
            geneVect = [x for x in geneVect if not math.isnan(x)]
            n = len(geneVect)

            if not self.allVars.get(cn, False):
                self.allVars[cn] = {}

            #For loop: efficiently use multicore by precalculating additional numbers of genes
            i_s = [n]
            if num_cores > 1:
                i_s = [n - 3, n - 2, n - 1, n, n + 1, n + 2, n + 3]
            for i in i_s:
                if not self.allVars[cn].get(str(i), False) and i >= 0:
                    ratioVect = self.ratios.column_values(
                        column=self.ratios.column_indexes_for(
                            column_names=[cn]))
                    noVarNs.append(i)
                    noVarRats.append(ratioVect.tolist())
                    noVarCns.append(cn)

        #  2) Use a pool of workers to calculate a distribution for each of the tuples
        if len(noVarNs) > 0:
            logging.info("Calculating some backgrounds for about %d genes",
                         len(geneNames))
            if self.useChi2:
                logging.info("\tFitting variance samples to Chi2 distribution")

            if num_cores > 1:
                newargs = []
                for i in range(0, len(noVarNs)):
                    newargs.append([
                        noVarRats[i], noVarNs[i], self.tolerance, self.maxTime,
                        self.chunkSize, self.verbose, noVarCns[i]
                    ])
                pool = mp.Pool(num_cores)
                newVars = pool.map(getVarianceMeanSDvect_mp_wrapper, newargs)
                pool.close()
                pool.join()
            else:
                tolerance = np.repeat(self.tolerance, len(noVarNs)).tolist()
                maxTime = np.repeat(self.maxTime, len(noVarNs)).tolist()
                chunkSize = np.repeat(self.chunkSize, len(noVarNs)).tolist()
                verbose = np.repeat(self.verbose, len(noVarNs)).tolist()
                newVars = list(
                    map(getVarianceMeanSDvect, noVarRats, noVarNs, tolerance,
                        maxTime, chunkSize, verbose, noVarCns))

        #  3) Assign the new values into the empty slots
        for idx in range(0, len(noVarCns)):
            cn = noVarCns[idx]
            curN = str(noVarNs[idx])
            if self.useChi2:
                curVars = newVars[idx]
                self.allVars[cn][curN] = sp.stats.chi2.fit(curVars,
                                                           df=int(curN))
            else:
                self.allVars[cn][curN] = newVars[idx]

        #  4) Calculate the p-Values
        pVals = {}
        for cn in self.ratios.column_names:
            colIdx = curGeneMatrix.column_indexes_for(column_names=[cn])
            geneVect = curGeneMatrix.column_values(column=colIdx)
            geneVect = [x for x in geneVect if not math.isnan(x)]
            n = str(len(geneVect))

            if len(geneVect) <= 1 or np.any(np.isnan(
                    self.allVars[cn][str(n)])):
                pVals[cn] = 1
            else:
                curVar = np.var(geneVect)
                if self.useChi2:
                    [df, loc, scale] = self.allVars[cn][str(n)]
                    pVals[cn] = 1 - sp.stats.chi2.sf(
                        curVar, df=df, loc=loc, scale=scale)
                else:
                    pVals[cn] = np.mean(self.allVars[cn][str(n)] < curVar)

        return pVals
Example #48
0
    def make_side_view(self, axis_name):
        scene = getattr(self, 'scene_%s' % axis_name)
        scene.scene.parallel_projection = True
        ipw_3d = getattr(self, 'ipw_3d_%s' % axis_name)

        # We create the image_plane_widgets in the side view using a
        # VTK dataset pointing to the data on the corresponding
        # image_plane_widget in the 3D view (it is returned by
        # ipw_3d._get_reslice_output())
        side_src = ipw_3d.ipw._get_reslice_output()
        ipw = mlab.pipeline.image_plane_widget(
            side_src,
            plane_orientation='z_axes',
            vmin=self.data.min(),
            vmax=self.data.max(),
            figure=scene.mayavi_scene,
            name='Cut view %s' % axis_name,
        )
        setattr(self, 'ipw_%s' % axis_name, ipw)

        # Extract the spacing of the side_src to convert coordinates
        # into indices
        spacing = side_src.spacing

        # Make left-clicking create a crosshair
        ipw.ipw.left_button_action = 0

        x, y, z = self.position
        cursor = mlab.points3d(
            x,
            y,
            z,
            mode='axes',
            color=(0, 0, 0),
            scale_factor=2 * max(self.data.shape),
            figure=scene.mayavi_scene,
            name='Cursor view %s' % axis_name,
        )
        self.cursors[axis_name] = cursor

        # Add a callback on the image plane widget interaction to
        # move the others
        this_axis_number = self._axis_names[axis_name]

        def move_view(obj, evt):
            # Disable rendering on all scene
            position = list(obj.GetCurrentCursorPosition() * spacing)[:2]
            position.insert(this_axis_number, self.position[this_axis_number])
            # We need to special case y, as the view has been rotated.
            if axis_name is 'y':
                position = position[::-1]
            self.position = position

        ipw.ipw.add_observer('InteractionEvent', move_view)
        ipw.ipw.add_observer('StartInteractionEvent', move_view)

        # Center the image plane widget
        ipw.ipw.slice_position = 0.5 * self.data.shape[
            self._axis_names[axis_name]]

        # 2D interaction: only pan and zoom
        scene.scene.interactor.interactor_style = \
                                 tvtk.InteractorStyleImage()
        scene.scene.background = (0, 0, 0)

        # Some text:
        mlab.text(0.01, 0.8, axis_name, width=0.08)

        # Choose a view that makes sens
        views = dict(x=(0, 0), y=(90, 180), z=(0, 0))
        mlab.view(views[axis_name][0],
                  views[axis_name][1],
                  focalpoint=0.5 * np.array(self.data.shape),
                  figure=scene.mayavi_scene)
        scene.scene.camera.parallel_scale = 0.52 * np.mean(self.data.shape)
Example #49
0
    # Compute output
    test_hidden, test_output = net(test_secret, test_cover)
    
    # Calculate loss
    test_loss, loss_cover, loss_secret = customized_loss(test_output, test_hidden, test_secret, test_cover, beta)
    
#     diff_S, diff_C = np.abs(np.array(test_output.data[0]) - np.array(test_secret.data[0])), np.abs(np.array(test_hidden.data[0]) - np.array(test_cover.data[0]))
    
#     print (diff_S, diff_C)
    
    if idx in [1,2,3,4]:
        print ('Total loss: {:.2f} \nLoss on secret: {:.2f} \nLoss on cover: {:.2f}'.format(test_loss.data, loss_secret.data, loss_cover.data))

        # Creates img tensor
        print("11111111111111")
        imgs = [test_secret.data, test_output.data, test_cover.data, test_hidden.data]
        print("22222222222222")
        imgs_tsor = torch.cat(imgs, 0)
        print("33333333333333")

        # Prints Images
        imshow(utils.make_grid(imgs_tsor), idx+1, learning_rate=learning_rate, beta=beta)
        print("444444444444444")
        
    test_losses.append(test_loss.data)
        
mean_test_loss = np.mean(test_losses)

print ('Average loss on test set: {:.2f}'.format(mean_test_loss))

Example #50
0
def getVarianceMeanSDvect(ratioVect,
                          n,
                          tolerance=0.01,
                          maxTime=600,
                          chunkSize=200,
                          verbose=False,
                          expName=None):
    """Given a ratios matrix and a number of genes, figure out the expected distribution of variances
       Will sample background until the mean and sd converge or the operation times out
       Will return a list of variances to be used for statistical tests,
       or return nan if only nan values in ratioVect

     Keyword arguments:
     ratioVect  -- A a vector of ratios
     n          -- The number of genes to sample
     tolerance  -- The fraction tolance to use as a stopping condition (DEFAULT: 0.01)
     maxTime    -- The approximate maximum time to run in seconds (DEFAULT: 600)
     chunkSize  -- The number of samples to add between test (DEFAULT: 200)
     verbose    -- Set to false to suppress output (DEFAULT: False)
     expName    -- Set to echo this name if verbose = True (DEFAULT: None)

     Useage:
     varDist = getVarianceMeanSD(ratioVect, n)
    """
    ratioVect = [x for x in ratioVect if not math.isnan(x)]

    if verbose:
        logging.info("Calculating background for %d sampled from %d in %s", n,
                     len(ratioVect), expName)

    if n <= 1 or n > len(ratioVect):
        return [np.nan]

    varList = []
    repeat = True
    startTime = dt.datetime.now()

    while repeat:
        newVars = []
        for i in range(0, chunkSize):
            curSample = random.sample(ratioVect, n)
            try:
                newVar = np.var(curSample)
            except:
                newVar = 0
            newVars.append(newVar)

        if len(varList) > 0:  #True if past the first sample
            oldMean = np.mean(varList)
            oldVar = np.var(varList)
            varList = varList + newVars
            newMean = np.mean(varList)
            newVar = np.var(varList)
            meanWinTol = abs(newMean - oldMean) < tolerance * abs(oldMean)
            varWinTol = abs(oldVar - newVar) < tolerance * abs(oldVar)
            if meanWinTol and varWinTol:
                repeat = False
        else:
            varList = varList + newVars

        curTime = dt.datetime.now()
        if (curTime - startTime).seconds > maxTime:
            repeat = False

    return varList
Example #51
0
fname = sic_dir + "sic_1992_2019.nc"
f6 = Nio.open_file(fname)
sic = f6.variables["sic"][:, :, :]
del (fname)

fname = v_ocn_dir + "v200_ocn_1992_2019.nc"
f3 = Nio.open_file(fname)
v_ocn = f3.variables["v_ocn"][:, :, :]
del (fname)

fname = u_atm_dir + "u_atm_1992_2019.nc"
f4 = Nio.open_file(fname)
u_atm = f4.variables["u_atm"][:, :, :]
del (fname)
u_atm_clim = np.mean(u_atm, axis=0)

fname = v_atm_dir + "v_atm_1992_2019.nc"
f5 = Nio.open_file(fname)
v_atm = f5.variables["v_atm"][:, :, :]
del (fname)
v_atm_clim = np.mean(v_atm, axis=0)

# Pick the desired time period
u_ocn = u_ocn[0:3286, :, :]
v_ocn = v_ocn[0:3286, :, :]
u_atm = u_atm[0:3286, :, :]
v_atm = v_atm[0:3286, :, :]
sst = sst[0:3286, :, :]
sic = sic[0:3286, :, :]
time = time[0:3286]
def create_output_images(Rover):

      # Create a scaled map for plotting and clean up obs/nav pixels a bit
      if np.max(Rover.worldmap[:,:,2]) > 0:
            nav_pix = Rover.worldmap[:,:,2] > 0
            navigable = Rover.worldmap[:,:,2] * (255 / np.mean(Rover.worldmap[nav_pix, 2]))
      else: 
            navigable = Rover.worldmap[:,:,2]
      if np.max(Rover.worldmap[:,:,0]) > 0:
            obs_pix = Rover.worldmap[:,:,0] > 0
            obstacle = Rover.worldmap[:,:,0] * (255 / np.mean(Rover.worldmap[obs_pix, 0]))
      else:
            obstacle = Rover.worldmap[:,:,0]

      likely_nav = navigable >= obstacle
      obstacle[likely_nav] = 0
      plotmap = np.zeros_like(Rover.worldmap)
      plotmap[:, :, 0] = obstacle
      plotmap[:, :, 2] = navigable
      plotmap = plotmap.clip(0, 255)
      # Overlay obstacle and navigable terrain map with ground truth map
      map_add = cv2.addWeighted(plotmap, 1, Rover.ground_truth, 0.5, 0)

      # Check whether any rock detections are present in worldmap
      rock_world_pos = Rover.worldmap[:,:,1].nonzero()
      # If there are, we'll step through the known sample positions
      # to confirm whether detections are real
      samples_located = 0
      if rock_world_pos[0].any():
            
            rock_size = 2
            for idx in range(len(Rover.samples_pos[0])):
                  test_rock_x = Rover.samples_pos[0][idx]
                  test_rock_y = Rover.samples_pos[1][idx]
                  rock_sample_dists = np.sqrt((test_rock_x - rock_world_pos[1])**2 + \
                                        (test_rock_y - rock_world_pos[0])**2)
                  # If rocks were detected within 3 meters of known sample positions
                  # consider it a success and plot the location of the known
                  # sample on the map
                  if np.min(rock_sample_dists) < 3:
                        samples_located += 1
                        map_add[test_rock_y-rock_size:test_rock_y+rock_size, 
                        test_rock_x-rock_size:test_rock_x+rock_size, :] = 255

      # Calculate some statistics on the map results
      # First get the total number of pixels in the navigable terrain map
      tot_nav_pix = np.float(len((plotmap[:,:,2].nonzero()[0])))
      # Next figure out how many of those correspond to ground truth pixels
      good_nav_pix = np.float(len(((plotmap[:,:,2] > 0) & (Rover.ground_truth[:,:,1] > 0)).nonzero()[0]))
      # Next find how many do not correspond to ground truth pixels
      bad_nav_pix = np.float(len(((plotmap[:,:,2] > 0) & (Rover.ground_truth[:,:,1] == 0)).nonzero()[0]))
      # Grab the total number of map pixels
      tot_map_pix = np.float(len((Rover.ground_truth[:,:,1].nonzero()[0])))
      # Calculate the percentage of ground truth map that has been successfully found
      perc_mapped = round(100*good_nav_pix/tot_map_pix, 1)
      # Calculate the number of good map pixel detections divided by total pixels 
      # found to be navigable terrain
      if tot_nav_pix > 0:
            fidelity = round(100*good_nav_pix/(tot_nav_pix), 1)
      else:
            fidelity = 0
      # Flip the map for plotting so that the y-axis points upward in the display
      map_add = np.flipud(map_add).astype(np.float32)
      # Add some text about map and rock sample detection results
      cv2.putText(map_add,"Time: "+str(np.round(Rover.total_time, 1))+' s', (0, 10), 
                  cv2.FONT_HERSHEY_COMPLEX, 0.4, (255, 255, 255), 1)
      cv2.putText(map_add,"Mapped: "+str(perc_mapped)+'%', (0, 25), 
                  cv2.FONT_HERSHEY_COMPLEX, 0.4, (255, 255, 255), 1)
      cv2.putText(map_add,"Mode: " + Rover.mode, (0,150),cv2.FONT_HERSHEY_COMPLEX, 0.4, (255, 255, 255), 1)
      cv2.putText(map_add,"Fidelity: "+str(fidelity)+'%', (0, 40), 
                  cv2.FONT_HERSHEY_COMPLEX, 0.4, (255, 255, 255), 1)
      cv2.putText(map_add,"Rocks", (0, 55), 
                  cv2.FONT_HERSHEY_COMPLEX, 0.4, (255, 255, 255), 1)
      cv2.putText(map_add,"  Located: "+str(samples_located), (0, 70), 
                  cv2.FONT_HERSHEY_COMPLEX, 0.4, (255, 255, 255), 1)
      cv2.putText(map_add,"  Collected: "+str(Rover.samples_collected), (0, 85), 
                  cv2.FONT_HERSHEY_COMPLEX, 0.4, (255, 255, 255), 1)
      # Convert map and vision image to base64 strings for sending to server
      pil_img = Image.fromarray(map_add.astype(np.uint8))
      buff = BytesIO()
      pil_img.save(buff, format="JPEG")
      encoded_string1 = base64.b64encode(buff.getvalue()).decode("utf-8")
      
      pil_img = Image.fromarray(Rover.vision_image.astype(np.uint8))
      buff = BytesIO()
      pil_img.save(buff, format="JPEG")
      encoded_string2 = base64.b64encode(buff.getvalue()).decode("utf-8")

      return encoded_string1, encoded_string2
Example #53
0
from sklearn.metrics import log_loss
import lightgbm as lgb
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np

train = pd.read_csv('../titanic/train.csv')
test = pd.read_csv('../titanic/test.csv')
sub = pd.read_csv('../titanic/gender_submission.csv')

data = pd.concat([train, test], sort=False)

data['Sex'].replace(['male', 'female'], [0, 1], inplace=True)
data['Embarked'].fillna('S', inplace=True)
data['Embarked'] = data['Embarked'].map({'S': 0, 'C': 1, 'Q': 2}).astype(int)
data['Fare'].fillna(np.mean(data['Fare']), inplace=True)

age_avg = data['Age'].mean()
age_std = data['Age'].std()
data['Age'].fillna(np.random.randint(age_avg - age_std, age_avg + age_std), inplace=True)

delete_columns = ['Name', 'PassengerId', 'SibSp', 'Parch', 'Ticket', 'Cabin']
data.drop(delete_columns, axis=1, inplace=True)

print(data.head())

train = data[:len(train)]
test = data[len(train):]
X_train = train.drop('Survived', axis=1)
X_test = test.drop('Survived', axis=1)
y_train = train['Survived']
    def MSRCR(self, image):
        """
            MSRCR (Multi-Scale Retinex with Color Restoration) is a retinex based algorithm
            that uses logarithmic compression and spatial convolution.
            MSRCR combines the dynamic range compression and color constancy of the MSR with
            a color 'restoration' filter that provides excellent color rendition.
                where:
                    - image: array of image

                output:
                    - image_out: array of image treated
        """

        self.message.toprint('IMAGE_APPLY_MSRCR')

        image_original = np.float32(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

        # Distributes scale interactions
        max_scale = self.settings.RETINEX_MAX_SCALE
        nr_scale = self.settings.RETINEX_NR_SCALE
        scales = ScalesDistribution.apply(max_scale, nr_scale)

        # new image with zero channels
        image_blur = np.zeros(shape=[
            len(scales), image_original.shape[0], image_original.shape[1],
            image_original.shape[2]
        ])

        # new image with zero channels
        image_mlog = np.zeros(shape=[
            len(scales), image_original.shape[0], image_original.shape[1],
            image_original.shape[2]
        ])

        # Do for each channel
        for channel in range(3):
            # Do for each scale distributed
            for scale_count, scale in enumerate(scales):

                # If sigma==0, it will be automatically calculated based on scale
                image_blur[scale_count, :, :, channel] = cv2.GaussianBlur(
                    image_original[:, :, channel], (0, 0), scale)
                image_mlog[scale_count, :, :, channel] = np.log(
                    image_original[:, :, channel] +
                    1.) - np.log(image_blur[scale_count, :, :, channel] + 1.)

        image_retinex = np.mean(image_mlog, 0)

        alpha = self.settings.RETINEX_ALPHA
        gain = self.settings.RETINEX_GAIN
        offset = self.settings.RETINEX_OFFSET

        image_retinex = ColorRestoration.apply(image_original=image_original,
                                               image_retinex=image_retinex,
                                               alpha=alpha,
                                               gain=gain,
                                               offset=offset)

        # Average color image retinex whith restoration
        image_mean = np.mean(image_retinex)

        # Standard deviation image retinex whith color restoration
        image_std = np.std(image_retinex)

        # Tansmission Map
        #   The processing consist of to apply, using the "Transmission Map" average and
        #   standard-deviation, a transformation of the type:
        #       * newT = (oldT-mini) / (maxi-mini)
        #           * with mini = average - k * standard-deviation
        #           * with maxi = average + k * standard-deviation
        # where k = retinex_dynamic = contrast (variance): is decisive in the image rendering:
        #   * low values will increase the seeming contrast,
        #   * hight values will make the image more natural with less artefacts and hazes.
        k = self.settings.RETINEX_DYNAMIC

        image_mini = image_mean - k * image_std

        image_maxi = image_mean + k * image_std

        image_maxi_mini = image_maxi - image_mini

        image_oldT_mini = image_retinex - image_mini
        image_out = np.uint8(
            np.clip(image_oldT_mini / image_maxi_mini * 255, 0, 255))

        image_out = cv2.cvtColor(image_out, cv2.COLOR_RGB2BGR)

        self.message.toprint('IMAGE_APPLIED_MSRCR')

        return image_out
Example #55
0
import numpy as np
import sys
##MG = ['F5','F6']
MG = []
box = str(sys.argv[1])

dir_1 = str(sys.argv[2])  #; dir_2 = '/home/jarmijo/HOD_galaxies/'
dir_2 = str(sys.argv[3])
#
print '\ndirectory:' + dir_1 + 'and' + dir_2 + '\n'
mark = ['0.1']
#chi_M = np.zeros((len(mark),len(MG)),dtype=float)

for p in range(len(mark)):
    m = np.loadtxt(dir_1 + 'marks/GR_mark_p' + mark[p] + '_' + box + '.txt')
    mean_m_GR = np.mean(m)
    print "mean mark (p = " + mark[p] + ")  in GR is:" + str(mean_m_GR) + "\n"
    mean_m_GR2 = np.mean(
        np.loadtxt(dir_2 + 'marks/GR_mark_p' + mark[p] + '_' + box + '.txt'))
    DD_GR = np.loadtxt(dir_1 + 'pairs/D1D2_JK64_GR_' + box + '.txt')
    mm_GR = np.loadtxt(dir_1 + 'pairs/m1m2_JK64_GR_mark_p' + mark[p] + '_' +
                       box + '.txt')
    dGR = mm_GR / (DD_GR * mean_m_GR * mean_m_GR2)
    GR_mean = np.mean(dGR, axis=0)
    n = len(dGR)
    N = len(GR_mean)
    C = np.zeros((N, N))
    for i in range(N):
        for j in range(N):
            for k in range(n):
                C[i][j] += (n - 1) / float(n) * (dGR[k][i] - GR_mean[i]) * (
 def compute_mean_d(self):
     return np.mean([self.compute_d(i) for i in self.x_list])
def train_model_regression(X,
                           X_test,
                           y,
                           params,
                           folds,
                           model_type='lgb',
                           eval_metric='mae',
                           columns=None,
                           plot_feature_importance=False,
                           model=None,
                           verbose=10000,
                           early_stopping_rounds=200,
                           n_estimators=50000,
                           mol_type=-1,
                           fold_group=None,
                           skip_folds=None,
                           phase_mark="",
                           skipped_mark=[]):
    """
    A function to train a variety of regression models.
    Returns dictionary with oof predictions, test predictions, scores and, if necessary, feature importances.

    :params: X - training data, can be pd.DataFrame or np.ndarray (after normalizing)
    :params: X_test - test data, can be pd.DataFrame or np.ndarray (after normalizing)
    :params: y - target
    :params: folds - folds to split data
    :params: model_type - type of model to use
    :params: eval_metric - metric to use
    :params: columns - columns to use. If None - use all columns
    :params: plot_feature_importance - whether to plot feature importance of LGB
    :params: model - sklearn model, works only for "sklearn" model type

    """
    assert isinstance(skip_folds, list) or skip_folds is None
    print(f"skip_folds :{skip_folds}")

    columns = X.columns if columns is None else columns
    X_test = X_test[columns]

    # to set up scoring parameters
    metrics_dict = {
        'mae': {
            'lgb_metric_name': 'mae',
            'catboost_metric_name': 'MAE',
            'sklearn_scoring_function': metrics.mean_absolute_error
        },
        'group_mae': {
            'lgb_metric_name': 'mae',
            'catboost_metric_name': 'MAE',
            'scoring_function': group_mean_log_mae
        },
        'mse': {
            'lgb_metric_name': 'mse',
            'catboost_metric_name': 'MSE',
            'sklearn_scoring_function': metrics.mean_squared_error
        }
    }

    result_dict = {}

    # out-of-fold predictions on train data
    oof = np.zeros(len(X))

    # averaged predictions on train data
    prediction = np.zeros(len(X_test))

    # list of scores on folds
    scores = []
    feature_importance = pd.DataFrame()
    model_list = []

    # split and train on folds
    for fold_n, (train_index,
                 valid_index) in enumerate(folds.split(X, groups=fold_group)):

        if skip_folds is not None and fold_n in skip_folds and phase_mark in skipped_mark:
            print(f'Fold {fold_n + 1} is skipped!!! at {time.ctime()}')
            oof = unpickle(mid_path / f"oof_cv{phase_mark}_{fold_n}.pkl", )
            y_pred = unpickle(
                mid_path / f"prediction_cv{phase_mark}_{fold_n}.pkl", )
            model = unpickle(mid_path / f"model_cv{phase_mark}_{fold_n}.pkl", )
            fold_importance = unpickle(
                mid_path / f"importance_cv{phase_mark}_{fold_n}.pkl", )

            feature_importance = pd.concat(
                [feature_importance, fold_importance], axis=0)
            prediction += y_pred
            model_list += [model]
            continue

        print(f'Fold {fold_n + 1} started at {time.ctime()}')
        if type(X) == np.ndarray:
            X_train, X_valid = X[columns][train_index], X[columns][valid_index]
            y_train, y_valid = y[train_index], y[valid_index]
        else:
            X_train, X_valid = X[columns].iloc[train_index], X[columns].iloc[
                valid_index]
            y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]

        if model_type == 'lgb':
            model = lgb.LGBMRegressor(**params,
                                      n_estimators=n_estimators,
                                      n_jobs=-1,
                                      importance_type='gain')
            print(model)
            model.fit(X_train,
                      y_train,
                      eval_set=[(X_train, y_train), (X_valid, y_valid)],
                      eval_metric=metrics_dict[eval_metric]['lgb_metric_name'],
                      verbose=verbose,
                      early_stopping_rounds=early_stopping_rounds)

            y_pred_valid = model.predict(X_valid)
            y_pred = model.predict(X_test, num_iteration=model.best_iteration_)

        if model_type == 'xgb':
            train_data = xgb.DMatrix(data=X_train,
                                     label=y_train,
                                     feature_names=X.columns)
            valid_data = xgb.DMatrix(data=X_valid,
                                     label=y_valid,
                                     feature_names=X.columns)

            watchlist = [(train_data, 'train'), (valid_data, 'valid_data')]
            params["objective"] = "reg:linear"
            params["eval_metric"] = metrics_dict[eval_metric][
                'lgb_metric_name']
            model = xgb.train(dtrain=train_data,
                              num_boost_round=20000,
                              evals=watchlist,
                              early_stopping_rounds=200,
                              verbose_eval=verbose,
                              params=params)
            y_pred_valid = model.predict(xgb.DMatrix(X_valid,
                                                     feature_names=X.columns),
                                         ntree_limit=model.best_ntree_limit)
            y_pred = model.predict(xgb.DMatrix(X_test,
                                               feature_names=X.columns),
                                   ntree_limit=model.best_ntree_limit)

        if model_type == 'sklearn':
            model = model
            model.fit(X_train, y_train)

            y_pred_valid = model.predict(X_valid).reshape(-1, )
            score = metrics_dict[eval_metric]['sklearn_scoring_function'](
                y_valid, y_pred_valid)
            print(f'Fold {fold_n}. {eval_metric}: {score:.4f}.')
            print('')

            y_pred = model.predict(X_test).reshape(-1, )

        if model_type == 'cat':
            model = CatBoostRegressor(
                iterations=20000,
                eval_metric=metrics_dict[eval_metric]['catboost_metric_name'],
                **params,
                loss_function=metrics_dict[eval_metric]
                ['catboost_metric_name'])
            model.fit(X_train,
                      y_train,
                      eval_set=(X_valid, y_valid),
                      cat_features=[],
                      use_best_model=True,
                      verbose=False)

            y_pred_valid = model.predict(X_valid)
            y_pred = model.predict(X_test)

        oof[valid_index] = y_pred_valid.reshape(-1, )

        if eval_metric != 'group_mae':
            scores.append(
                metrics_dict[eval_metric]['sklearn_scoring_function'](
                    y_valid, y_pred_valid))
        else:
            scores.append(metrics_dict[eval_metric]['scoring_function'](
                y_valid, y_pred_valid, X_valid['type']))

        prediction += y_pred

        if model_type == 'lgb' and plot_feature_importance:
            # feature importance
            fold_importance = pd.DataFrame()
            fold_importance["feature"] = columns
            fold_importance["importance"] = model.feature_importances_
            fold_importance["fold"] = fold_n + 1

            try:
                fold_importance.to_csv(mid_path /
                                       f"importance_cv_{fold_n}.csv")
            except Exception as e:
                print("failed to save importance...")
                print(e)

            feature_importance = pd.concat(
                [feature_importance, fold_importance], axis=0)
        model_list += [model]

        try:
            to_pickle(mid_path / f"oof_cv{phase_mark}_{fold_n}.pkl", oof)
            to_pickle(mid_path / f"prediction_cv{phase_mark}_{fold_n}.pkl",
                      y_pred)
            to_pickle(mid_path / f"model_cv{phase_mark}_{fold_n}.pkl", model)
            to_pickle(mid_path / f"importance_cv{phase_mark}_{fold_n}.pkl",
                      fold_importance)
        except Exception as e:
            print("failed to save intermediate data...")
            print(e)

    if model_type == 'lgb' and plot_feature_importance:
        result_dict['importance'] = feature_importance

    prediction /= folds.n_splits
    try:
        cv_score_msg = f'{DATA_VERSION}_{TRIAL_NO}' + ' CV mean score: {0:.4f}, std: {1:.4f}.'.format(
            np.mean(scores), np.std(scores))
        print(cv_score_msg)
        send_message(cv_score_msg)
    except Exception as e:
        print(e)
        pass

    result_dict["models"] = model_list
    result_dict['oof'] = oof
    result_dict['prediction'] = prediction
    result_dict['scores'] = scores

    return result_dict
Example #58
0
    
    # Determine errors and errors
    e = y_test_est != y_test
    error_rate = (sum(e).type(torch.float)/len(y_test)).data.numpy()
    errors.append(error_rate) # store error rate for current CV fold 
    
    # Display the learning curve for the best net in the current fold
    h, = summaries_axes[0].plot(learning_curve, color=color_list[k])
    h.set_label('CV fold {0}'.format(k+1))
    summaries_axes[0].set_xlabel('Iterations')
    summaries_axes[0].set_xlim((0, max_iter))
    summaries_axes[0].set_ylabel('Loss')
    summaries_axes[0].set_title('Learning curves')
    
# Display the error rate across folds
summaries_axes[1].bar(np.arange(1, K+1), np.squeeze(np.asarray(errors)), color=color_list)
summaries_axes[1].set_xlabel('Fold');
summaries_axes[1].set_xticks(np.arange(1, K+1))
summaries_axes[1].set_ylabel('Error rate');
summaries_axes[1].set_title('Test misclassification rates')

print('Diagram of best neural net in last fold:')
weights = [net[i].weight.data.numpy().T for i in [0,2]]
biases = [net[i].bias.data.numpy() for i in [0,2]]
tf =  [str(net[i]) for i in [1,3]]
draw_neural_net(weights, biases, tf, attribute_names=attributeNames)

# Print the average classification error rate
print('\nGeneralization error/average error rate: {0}%'.format(round(100*np.mean(errors),4)))

#print('Ran Exercise 8.2.5')
Example #59
0
        ss_values=ss_values)

else:
    all_sampling_dates = [45, 190, 300, 360]
    all_LHs = [0.6, 0.7, 0.8, 1.0]

sampling_date_names = ['Feb. 14', 'Jul. 9', 'Oct. 27', 'Dec. 26']
# calculate the average prevalence (equally weighted across all days of the year) for each LH scenario
print('calculating average prevalences...')
ave_prevs = [None] * len(all_LHs)
for lh in range(len(all_LHs)):
    with open(
            "simOutputs_DTK/prevalenceData_xLH%i_%s.p" %
        (round(all_LHs[lh] * 100), filename_suffix), "rb") as f:
        prev_list = pickle.load(f)
    ave_prevs[lh] = np.mean(prev_list[0])

# Plots
if need_to_generate_plots == True:
    print('creating plot panels...')

    # To greatly reduce the difficulty of this problem, we assume the population size is constant at the mean value across
    #     all DTK simulations. This assumption may possibly give rise to some biases and will ideally be explored through
    #     a sensitivity analysis in the future (something as easy as using the max and min values and seeing whether
    #     results change substantially).
    # load the population sizes across all simulations and take average
    with open("simOutputs_DTK/pop_size_sim_all_%s.p" % filename_suffix,
              "rb") as f:
        pop_size_sim = pickle.load(f)
    pop_size = int(round(np.mean(pop_size_sim)))
Example #60
0
def pearSim(A, B):
    meanA = np.mean(A)
    meanB = np.mean(B)
    A_B = sum((A[i] - meanA) * (B[i] - meanB) for i in range(len(A)))
    pear = A_B / (np.linalg.norm(A - meanA) * np.linalg.norm(B - meanB))
    return 0.5 + 0.5 * pear