Example #1
0
def _get_effect_interval(counts, effect, power_alpha=0.05, ci_alpha=None):
    """Calculate the confidence interal around the effect"""

    if isinstance(effect, (int, float)):
        l_bar = effect
        ci = np.nan

    elif ci_alpha is None:
        ci = np.nanstd(effect)
        l_bar = np.nanmean(effect)
    else:
        ci = confidence_bound(effect, alpha=ci_alpha)
        l_bar = np.nanmean(effect)

    l_low = l_bar - ci
    l_hi = l_bar + ci

    power_bar = z_power(counts, l_bar, power_alpha)
    if not np.isnan(ci):
        power_low = z_power(counts, l_low, power_alpha)
        power_hi = z_power(counts, l_hi, power_alpha)
    else:
        power_low = np.nan * power_bar
        power_hi = np.nan * power_bar

    return power_bar, power_low, power_hi
def summary():
    # read sonde data
    for sites in [[0],[1],[2]]:
        slist,snames=read_diff_events(sites=sites)
        ecount = [len(s.einds) for s in slist]
        mintp = [np.nanmin(s.tp) for s in slist]
        meantp = [np.nanmean(s.tp) for s in slist]
        maxtp = [np.nanmax(s.tp) for s in slist]
        
        head="%9s"%slist[0].name
        ecount = "events   "
        meantp = "mean tph "
        minmax = "tph bound"
        for sonde, sname in zip(slist,snames):
            
            head=head+'| %16s'%sname
            ecount=ecount+'| %16d'%len(sonde.einds)
            meantp=meantp+'| %16.2f'%np.nanmean(sonde.tp)
            minmax=minmax+'| %7.2f,%7.2f '%(np.nanmin(sonde.tp),np.nanmax(sonde.tp))
            
        print("")
        print(head)
        print(ecount)
        print(meantp)
        print(minmax)
Example #3
0
def filted_diff(curve, period, threshold):
    nframe=len(curve)
    ncycle=nframe/period
    diff1=np.ones(ncycle)
    diff1[:]=np.NAN
    diff2=np.ones(ncycle)
    diff2[:]=np.NAN
    #Von=np.ones(ncycle)
    #Von[:]=np.NAN
    #Voff=np.ones(ncycle)
    #Voff[:]=np.NAN
    k=0
    l=0
    F=[]
    for i in range(ncycle):
        if threshold <= min(curve[i*period : (i+1)*period]):
            diff1[k]=sum(curve[i*period : i*period+period/2])-sum(curve[i*period+period/2 : (i+1)*period])
            F.append(curve[i*period:(i+1)*period])
            #Von[k]=sum(curve[i*period : i*period+period/2])
            #Voff[k]=sum(curve[i*period+period/2 : (i+1)*period])
            k=k+1
    for i in range(ncycle-1):
        if threshold <= min(curve[i*period+(period/2) : (i+1)*period+(period/2)]):
            diff2[l]=sum(curve[i*period+(period/2) : (i+1)*period])-sum(curve[(i+1)*period : (i+1)*period+(period/2)])
            #Von[k]=sum(curve[i*period : i*period+period/2])
            #Voff[k]=sum(curve[i*period+period/2 : (i+1)*period])
            l=l+1
    F=np.array(F)
    Favg=np.mean(F)
    dff1=diff1/(period/2)/Favg
    dff2=diff2/(period/2)/Favg
    dff_avg=(np.nanmean(dff1)-np.nanmean(dff2))/2
    return diff1, diff2, dff_avg #Von/(period/2)/np.nanmax(curve), Voff/(period/2)/np.nanmax(curve)
Example #4
0
def plot_hist(ind, binwidth=5, incl_daystr=True, ax=None, pos=(0.05, 0.7),
              kw={'alpha' : 0.3, 'color' : 'k'}):
    """Plot histogram of onset days.
    """
    if ax is None:
        ax = plt.gca()

    def daystr(day):
        day = round(day)
        mm, dd = atm.jday_to_mmdd(day)
        mon = atm.month_str(mm)
        return '%.0f (%s-%.0f)' % (day, mon, dd)

    if isinstance(ind, pd.Series) or isinstance(ind, xray.DataArray):
        ind = ind.values

    b1 = np.floor(np.nanmin(ind) / binwidth) * binwidth
    b2 = np.ceil(np.nanmax(ind) / binwidth) * binwidth
    bin_edges = np.arange(b1, b2 + 1, binwidth)
    n, bins, _ = ax.hist(ind, bin_edges, **kw)
    ax.set_xlabel('Day of Year')
    ax.set_ylabel('Num of Occurrences')
    if incl_daystr:
        dmean = daystr(np.nanmean(ind))
        dmin = daystr(np.nanmin(ind))
        dmax = daystr(np.nanmax(ind))
    else:
        dmean = '%.0f' % np.nanmean(ind)
        dmin = '%.0f' % np.nanmin(ind)
        dmax = '%.0f' % np.nanmax(ind)
    s = 'Mean %s\n' % dmean + 'Std %.0f\n' % np.nanstd(ind)
    s = s + 'Min %s\n' % dmin + 'Max %s' % dmax
    x0, y0 = pos
    atm.text(s, (x0, y0), ax=ax, horizontalalignment='left')
Example #5
0
def calc_norm_summary_tables(accuracy_tbl, time_tbl):
    """
    Calculate normalized performance/ranking summary, as numpy
    matrices as usual for convenience, and matrices of additional
    statistics (min, max, percentiles, etc.)

    Here normalized means relative to the best which gets a 1, all
    others get the ratio resulting from dividing by the performance of
    the best.
    """
    # Min across all minimizers, i.e. for each fit problem what is the lowest chi-squared and the lowest time
    min_sum_err_sq = np.nanmin(accuracy_tbl, 1)
    min_runtime = np.nanmin(time_tbl, 1)

    # create normalised tables
    norm_acc_rankings = accuracy_tbl / min_sum_err_sq[:, None]
    norm_runtimes = time_tbl / min_runtime[:, None]

    summary_cells_acc = np.array([np.nanmin(norm_acc_rankings, 0),
                                  np.nanmax(norm_acc_rankings, 0),
                                  nanmean(norm_acc_rankings, 0),
                                  nanmedian(norm_acc_rankings, 0)
                                  ])

    summary_cells_runtime = np.array([np.nanmin(norm_runtimes, 0),
                                      np.nanmax(norm_runtimes, 0),
                                      nanmean(norm_runtimes, 0),
                                      nanmedian(norm_runtimes, 0)
                                      ])

    return norm_acc_rankings, norm_runtimes, summary_cells_acc, summary_cells_runtime
    def error(self, nCells=15):
        '''
        calculate the standard deviation of all fitted images, 
        averaged to a grid
        '''
        s0, s1 = self.fits[0].shape
        aR = s0 / s1
        if aR > 1:
            ss0 = int(nCells)
            ss1 = int(ss0 / aR)
        else:
            ss1 = int(nCells)
            ss0 = int(ss1 * aR)
        L = len(self.fits)

        arr = np.array(self.fits)
        arr[np.array(self._fit_masks)] = np.nan
        avg = np.tile(np.nanmean(arr, axis=0), (L, 1, 1))
        arr = (arr - avg) / avg

        out = np.empty(shape=(L, ss0, ss1))

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=RuntimeWarning)

            for n, f in enumerate(arr):
                out[n] = subCell2DFnArray(f, np.nanmean, (ss0, ss1))

        return np.nanmean(out**2)**0.5
  def evaluate(self):
    """Compute evaluation result.

    Returns:
      A named tuple with the following fields -
        average_precision: float numpy array of average precision for
            each class.
        mean_ap: mean average precision of all classes, float scalar
        precisions: List of precisions, each precision is a float numpy
            array
        recalls: List of recalls, each recall is a float numpy array
        corloc: numpy float array
        mean_corloc: Mean CorLoc score for each class, float scalar
    """
    if (self.num_gt_instances_per_class == 0).any():
      logging.warn(
          'The following classes have no ground truth examples: %s',
          np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)) +
          self.label_id_offset)

    if self.use_weighted_mean_ap:
      all_scores = np.array([], dtype=float)
      all_tp_fp_labels = np.array([], dtype=bool)
    for class_index in range(self.num_class):
      if self.num_gt_instances_per_class[class_index] == 0:
        continue
      if not self.scores_per_class[class_index]:
        scores = np.array([], dtype=float)
        tp_fp_labels = np.array([], dtype=float)
      else:
        scores = np.concatenate(self.scores_per_class[class_index])
        tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index])
      if self.use_weighted_mean_ap:
        all_scores = np.append(all_scores, scores)
        all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels)
      logging.info('Scores and tpfp per class label: %d', class_index)
      logging.info(tp_fp_labels)
      logging.info(scores)
      precision, recall = metrics.compute_precision_recall(
          scores, tp_fp_labels, self.num_gt_instances_per_class[class_index])
      self.precisions_per_class[class_index] = precision
      self.recalls_per_class[class_index] = recall
      average_precision = metrics.compute_average_precision(precision, recall)
      self.average_precision_per_class[class_index] = average_precision

    self.corloc_per_class = metrics.compute_cor_loc(
        self.num_gt_imgs_per_class,
        self.num_images_correctly_detected_per_class)

    if self.use_weighted_mean_ap:
      num_gt_instances = np.sum(self.num_gt_instances_per_class)
      precision, recall = metrics.compute_precision_recall(
          all_scores, all_tp_fp_labels, num_gt_instances)
      mean_ap = metrics.compute_average_precision(precision, recall)
    else:
      mean_ap = np.nanmean(self.average_precision_per_class)
    mean_corloc = np.nanmean(self.corloc_per_class)
    return ObjectDetectionEvalMetrics(
        self.average_precision_per_class, mean_ap, self.precisions_per_class,
        self.recalls_per_class, self.corloc_per_class, mean_corloc)
Example #8
0
 def _get_x_0_stats(self):
     x_diff = np.diff(self.x_arr_0, axis=1)
     mu_mm = np.nanmean(x_diff)
     std_mm = np.nanstd(x_diff)
     mu_px_mm = np.nanmean(x_diff / self.aramis_info.n_px_facet_step_x)
     std_px_mm = np.nanstd(x_diff / self.aramis_info.n_px_facet_step_x)
     return mu_mm, std_mm, mu_px_mm, std_px_mm
Example #9
0
    def go(x, y, x_denominators=1, y_denominators=1):

        # these next too lines are wrong, but they are bug-compatible with v0.6.13 !
        x = x / np.nanmean(x_denominators)
        y = y / np.nanmean(y_denominators)

        return group_sequential(x, y, spending_function, estimated_sample_size, alpha, cap)
Example #10
0
def _do_scale_fit(freqs, signal, model, w=None):
   """
   Perform a round of fitting to deal with over or under-estimation.
   Scales curve on y-axis but preserves shape.

   Parameters
   ----------
   freqs : array
   signal : array
      The signal that the model is being fit to
   model : array
      The model being scaled
   w : array
      weighting function

   Returns
   -------
   scalefac : array of len(signal)
      the scaling factor for each transient
   scalemodel : array of model.shape
      the scaled model
   """
   scalefac = np.empty(model.shape[0])
   scalemodel = np.empty((model.shape[0], np.real(model).shape[1]))
   scalesignal = np.empty((signal.shape[0], np.real(signal).shape[1]))
   for ii, xx in enumerate(signal): # per transient
      scalesignal[ii] = np.real(xx)
#      ratio = np.empty(scalesignal[ii].shape[0])
#      for ppm, trans in enumerate(scalesignal[ii]):
#          ratio[ppm] = trans/model[ii][ppm]
#      scalefac[ii] = np.mean(ratio,0)
      scalefac[ii] = np.nanmean(scalesignal[ii],0)/np.nanmean(model[ii],0)
      scalemodel[ii] = scalefac[ii] * model[ii]
   return scalefac, scalemodel
Example #11
0
def __entrofy(X, k, w=None, q=None, pre_selects=None):
    '''See entrofy() for documentation'''

    n_participants, n_attributes = X.shape

    if w is None:
        w = np.ones(n_attributes)

    if q is None:
        q = 0.5 * np.ones(n_attributes)

    assert 0 < k <= n_participants
    assert not np.any(w < 0)
    assert np.all(q >= 0.0) and np.all(q <= 1.0)
    assert len(w) == n_attributes
    assert len(q) == n_attributes

    if k == n_participants:
        return np.arange(n_participants)

    # Initialization
    y = np.zeros(n_participants, dtype=bool)

    if pre_selects is None:
        # Select one at random
        pre_selects = np.random.choice(n_participants, size=1)

    y[pre_selects] = True

    # Where do we have missing data?
    Xn = np.isnan(X)

    while True:
        i = y.sum()
        if i >= k:
            break

        # Initialize the distribution vector
        p = np.nanmean(X[y], axis=0)
        p[np.isnan(p)] = 0.0

        # Compute the candidate distributions
        p_new = (p * i + X) / (i + 1.0)

        # Wherever X is nan, propagate the old p since we have no new information
        p_new[Xn] = (Xn * p)[Xn]

        # Compute marginal gain for each candidate
        delta = obj(p_new, w, q) - obj(p, w, q)

        # Knock out the points we've already taken
        delta[y] = -np.inf

        # Select the top score.  Break near-ties randomly.
        target_score = delta.max()
        target_score = target_score - 1e-3 * np.abs(target_score)
        new_idx = np.random.choice(np.flatnonzero(delta >= target_score))
        y[new_idx] = True

    return obj(np.nanmean(X[y], axis=0), w, q), np.flatnonzero(y)
Example #12
0
def get_loss_bb(gt,est):
    sf="/home/coskun/PycharmProjects/RNNPose21/daya/blanket.txt"
    batch_size=gt.shape[0]
    seq_length=gt.shape[1]
    loss=0
    loss_list=[]
    seq_list=[]
    b_seq_list=[]
    with open(sf,"a") as f_handle:
        for b in range(batch_size):
            seq_los=[0]*seq_length
            for s in range(seq_length):
                diff_vec=np.abs(gt[b][s].reshape(14,3) - est[b][s].reshape(14,3))*2 #14,3
                val=np.sqrt(np.sum(diff_vec**2,axis=1))
                for i in range(14):
                    f=val[i]
                    f_handle.write("%f"%(f))
                    if(i<13):
                        f_handle.write(";")
                f_handle.write('\n')
                b_l=np.nanmean(np.sqrt(np.sum(diff_vec**2,axis=1)))
                loss_list.append(b_l)
                seq_los[s]=b_l
                loss +=np.nanmean(np.sqrt(np.sum(diff_vec**2,axis=1)))
            b_seq_list.append(seq_los)
        seq_list=np.mean(b_seq_list,axis=0)
        loss/=(seq_length*batch_size)
    return (loss,loss_list,seq_list)
Example #13
0
	def run(self, x1, x2):
		if isinstance(x1, np.ndarray):
			x1 = np.nanmean(x1)
		if isinstance(x2, np.ndarray):
			x2 = np.nanmean(x2)

		return x1/(x1+x2)
Example #14
0
def pair_angle_op(angles, nmask=None, m=4, globl=False, locl=False):
    """calculate the pair-angle (bond angle) order parameter

    the parameter for particle i is defined as:
        psi_m_i = < exp(i m theta_ij) >
    averaged over neighbors j of particle i
    the global parameter is the mean over all particles i:
        Psi_m = < psi_m_i >

    Parameters
    angles: angles between neighboring pairs (from pair_angles)
    nmask:  neighbor mask if invalid angles are not np.nan (None)
    m:      symmetryangles will be considered modulo tau/m

    Returns
    mag:    the absolute value |psi|
    ang:    the phase of psi mod tau/m
    psims:  the local values of psi for each particle
    """
    if not (globl or locl):
        globl = locl = True
    if nmask is not None:
        angles[nmask] = np.nan
    psims = np.nanmean(np.exp(m*angles*1j), 1)
    if not globl:
        return np.abs(psims)
    psim = np.nanmean(psims)
    mag = abs(psim)
    ang = phase(psim)/m
    if locl:
        return mag, ang, psims
    return mag, ang
Example #15
0
def get_loss_pred(params,gt,est):
    fest="/home/coskun/PycharmProjects/RNNPoseV2/pred/3.6m/estimation.txt"
    fgt="/home/coskun/PycharmProjects/RNNPoseV2/pred/3.6m/ground_truth.txt"
    loss=0
    loss_list=[]
    with open(fest,"a") as f_handle_est,  open(fgt,"a") as f_handle_gt:
        for b in range(len(gt)):
            diff_vec=np.abs(gt[b].reshape(params['n_output']/3,3) - est[b].reshape(params['n_output']/3,3)) #14,3
            for val in est[b]:
                f_handle_est.write("%f "%(val*1000))
            for val in gt[b]:
                f_handle_gt.write("%f "%(val*1000))
            # val=np.sqrt(np.sum(diff_vec**2,axis=1))
            #
            # for i in range(14):
            #     f=val[i]
            #     f_handle.write("%f"%(f))
            #     if(i<13):
            #         f_handle.write(";")
            f_handle_est.write('\n')
            f_handle_gt.write('\n')
            b_l=np.sqrt(np.sum(diff_vec**2,axis=1))
            loss_list.append(b_l)
            loss +=np.nanmean(np.sqrt(np.sum(diff_vec**2,axis=1)))
        loss=np.nanmean(loss)
    return (loss,loss_list)
Example #16
0
def sigmaclip(data, factor, replacement=None, median=False, maxiter = 100):
    std = np.std(data)
    iteration=0
    if median: center = np.nanmedian(data)
    else: center = np.nanmean(data)
    if not replacement: replacement = np.nan
    elif replacement == 'mean': replacement = center
    indx = (data>(center+std*factor))+(data<(center-std*factor))
    while np.sum(indx) > 0 and iteration < maxiter:
        #print indx, np.sum(indx)
        #pl.plot(data)
        #pl.plot([0,len(data)],[center+std*factor,center+std*factor])
        #pl.plot([0,len(data)],[center-std*factor,center-std*factor])        
        data[indx] = replacement
        std = np.std(data)
        if median: center = np.nanmedian(data)
        else: center = np.nanmean(data)
        if not replacement: replacement = np.nan
        elif replacement == 'mean': replacement = center
        indx = (data>(center+std*factor))+(data<(center-std*factor))
        #print indx, np.sum(indx)
        #pl.plot(data,'ko')
        
        #pl.show()
        iteration+=1
    return data
Example #17
0
def orient_op(orientations, m=4, positions=None, margin=0,
              ret_complex=True, do_err=False, globl=False, locl=False):
    """orient_op(orientations, m=4, positions=None, margin=0,
                 ret_complex=True, do_err=False, globl=False, locl=False)

       calculate the global m-fold particle orientational order parameter

                1   N    i m theta
        Phi  = --- SUM e          j
           m    N  j=1
    """
    if not (globl or locl):
        globl = True
        locl = orientations.ndim == 2
    np.mod(orientations, tau/m, orientations)
    if margin:
        if margin < ss:
            margin *= ss
        center = 0.5*(positions.max(0) + positions.min(0))
        d = helpy.dist(positions, center)   # distances to center
        orientations = orientations[d < d.max() - margin]
    phis = np.exp(m*orientations*1j)
    if locl:
        phis = np.nanmean(phis, 1)
    if do_err:
        err = np.nanstd(phis, ddof=1)/sqrt(np.count_nonzero(~np.isnan(phis)))
    if not globl:
        return (np.abs(phis), err) if do_err else np.abs(phis)
    phi = np.nanmean(phis) if ret_complex else np.abs(np.nanmean(phis))
    if locl:
        return (np.abs(phis), phi, err) if do_err else (np.abs(phis), phi)
    return (phi, err) if do_err else phi
Example #18
0
def Mplot(ax, x, ys, lss, xlabel, xunit, ylabel, yunit, labels, rescaleX=True, rescaleY=True):
    colors = getcolors(len(ys))
    xprefix, mx = SId(nanmean(x))
    yprefix, my = SId(nanmean(ys))
    if rescaleX:
        if xunit != '':
            xunit = ' / ('+xprefix+xunit+')'
    elif xunit != '':
            xunit = ' / ('+xunit+')'
    if rescaleY:
        if yunit != '':
            yunit = ' / ('+yprefix+yunit+')'
    elif yunit != '':
            yunit = ' / ('+yunit+')'
    if labels == '':
        labels = ['' for i in range(len(ys))]
    if type(x) != type([]):
        xs = [x for i in range(len(ys))]
    else:
        xs = x
    for x, y, ls, lab, col in zip(xs, ys, lss, labels, colors):
        if rescaleY:
            y = y/my
        if rescaleX:
            x = x/mx  # carefull! numpy.ndarrays are mutable!!!
        ax.plot(x, y, ls, color=col, label=lab)
    if xlabel != '':
        ax.set_xlabel(xlabel+xunit)
    if ylabel != '':
        ax.set_ylabel(ylabel+yunit)
    ax.set_xlim([min(x), max(x)])
    return mx, my
    def h__computeAvgAngles(self, x, y):
        """
        Take average difference between successive x and y skeleton points, 
        then compute the arc tangent from those averages.

        Parameters
        ---------------------------------------    
        x : m x n float numpy array
          m is the number of skeleton points
          n is the number of frames
        y : m x n float numpy array
          (Same as x)

        Returns
        ---------------------------------------    
        1-d float numpy array of length n
          The angles

        Notes
        ---------------------------------------    
        Simple helper for h__computeNoseBends

        """
        # Suppress RuntimeWarning: Mean of empty slice
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', category=RuntimeWarning)
            avg_diff_x = np.nanmean(np.diff(x, n=1, axis=0), axis=0)
            avg_diff_y = np.nanmean(np.diff(y, n=1, axis=0), axis=0)

        angles = np.arctan2(avg_diff_y, avg_diff_x)

        return angles
Example #20
0
def _msd_iter(pos, lagtimes):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)
        for lt in lagtimes:
            diff = pos[lt:] - pos[:-lt]
            yield np.concatenate((np.nanmean(diff, axis=0),
                                  np.nanmean(diff**2, axis=0)))
Example #21
0
def plot(ax, x, y, ls, xlabel, xunit, ylabel, yunit, label, col=False,
         setlim=True, rescaleX=True, rescaleY=True, xprefix=None, mx=None,
         yprefix=None, my=None):
    if xprefix is None:
        xprefix, mx = SId(nanmean(abs(x)))
    if yprefix is None:
        yprefix, my = SId(nanmean(abs(y)))
    if rescaleX:
        x = x/mx  # carefull! numpy.ndarrays are mutable!!!
        if xunit != '':
            xunit = ' / ('+xprefix+xunit+')'
    elif xunit != '':
            xunit = ' / ('+xunit+')'
    if rescaleY:
        y = y/my
        if yunit != '':
            yunit = ' / ('+yprefix+yunit+')'
    elif yunit != '':
            yunit = ' / ('+yunit+')'
    if col is False:
        ax.plot(x, y, ls, label=label)
    else:
        ax.plot(x, y, ls, color=col, label=label)
    if xlabel != '':
        ax.set_xlabel(xlabel+xunit)
    if ylabel != '':
        ax.set_ylabel(ylabel+yunit)
    if setlim:
        epsy = (max(y)-min(y))*0.15
        ax.set_xlim([min(x), max(x)])
        ax.set_ylim([min(y)-epsy, max(y)+epsy])
    return x, y, yprefix, my
Example #22
0
def Avg_WS_Picks(draft_db, plot=False):
    x=draft_db[['Pk','WS/48']]
    #Find the range of picks (#1-?)
    pick_list=sorted(set(x['Pk'].tolist())) #set function gets unique elements of picks
    pick_means=[]
    for pick in pick_list:
        x1=x['WS/48'][x['Pk']==pick].tolist() #Get win shares for each pick
        pick_mean=np.nanmean(x1)
        pick_means.append(pick_mean)

    #Graph
    if plot:
        plt.scatter(pick_list,pick_means)
        #Plot with average WS for an NBA Player
        avg_WS=np.nanmean(x['WS/48'].tolist())
        plt.plot(pick_list,np.ones(len(pick_list))*avg_WS)
        plt.xlabel('Pick')
        plt.ylabel('Average WS/48')
        plt.ylim([-.5,.5])
        plt.xlim([0,200])
        plt.xticks([0,10,25,40,50,100])
        plt.savefig('Avg_WS_vs_pick')
        plt.show()
    else:
        return pd.DataFrame({
            'Pk': pick_list,
            'Avg_WS/48' : pick_means
        })
	    def compute(self, today, assets, out, close):

	    	# get returns dataset
	        returns = ((close - np.roll(close, 1, axis=0)) / np.roll(close, 1, axis=0))[1:]

	        # get index of benchmark
	        benchmark_index = np.where((assets == 8554) == True)[0][0]

	        # get returns of benchmark
	        benchmark_returns = returns[:, benchmark_index]
	        
	        # prepare X matrix (x_is - x_bar)
	        X = benchmark_returns
	        X_bar = np.nanmean(X)
	        X_vector = X - X_bar
	        X_matrix = np.tile(X_vector, (len(returns.T), 1)).T

	        # prepare Y matrix (y_is - y_bar)
	        Y_bar = np.nanmean(close, axis=0)
	        Y_bars = np.tile(Y_bar, (len(returns), 1))
	        Y_matrix = returns - Y_bars

	        # prepare variance of X
	        X_var = np.nanvar(X)

	        # multiply X matrix an Y matrix and sum (dot product)
	        # then divide by variance of X
	        # this gives the MLE of Beta
	        out[:] = (np.sum((X_matrix * Y_matrix), axis=0) / X_var) / (len(returns))
def autocorr(datain,endlag):
    '''
    autocorr(datain,endlag)
    
    Input: 
         datain[0:N] is a data time series of size N
	 endlag is the number of time steps to find autocorrelation
    Output:
    	 aut[0:endlag] is the autocorrelation of datain from lag 0 to time step endlag	 
    
    Steven Cavallo
    University of Oklahoma
    July 2016
    '''
    
    N = np.size(datain)
    aut = []
    for lag in range(0,endlag):
        data1 = datain[0:N-lag]
	data1m = data1 - np.nanmean(data1)
	data2 = datain[lag:]
	data2m = data2 - np.nanmean(data2)
	aut.append(np.sum(data1m*data2m)/np.sqrt(np.sum(data1m**2.0)*np.sum(data2m**2.0)))

    return aut
Example #25
0
    def process_chunk(self, data):

        moment_data = numpy.log(data)
        
        moments = numpy.zeros(self.mmax - self.mmin, dtype=numpy.float32)

        mean = numpy.nanmean(moment_data)

        moment_data = moment_data - mean

        if self.mmin == 1:
            temp = numpy.ones(len(moment_data), dtype=numpy.float32)
        elif self.mmin == 2:
            temp = moment_data
        else:
            temp = numpy.pow(moment_data, self.mmin-1)

        for i in range(0, self.mmax-self.mmin):
            temp = temp * moment_data
            moments[i] = numpy.nanmean(temp)

        if self.mmin == 1:
            moments[0] = mean

        return moments
Example #26
0
 def trim_bad_edges(self, r, window_width = 128, min_snr = 5.):
     """
     Find edge regions that contain no information and trim them.
     
     Parameters
     ----------
     r : `int`
         order index
     window_width : `int`
         number of pixels to average over for local SNR            
     min_snr : `float`
         SNR threshold below which we discard the data
     """
     for n in range(self.N):
         n_pix = len(self.xs[0][n])
         for window_start in range(n_pix - window_width):
             mean_snr = np.sqrt(np.nanmean(self.ivars[r][n,window_start:window_start+window_width]))
             if mean_snr > min_snr:
                 self.ivars[r][n,:window_start] = 0. # trim everything to left of window
                 break
         for window_start in reversed(range(n_pix - window_width)):
             mean_snr = np.sqrt(np.nanmean(self.ivars[r][n,window_start:window_start+window_width]))
             if mean_snr > min_snr:
                 self.ivars[r][n,window_start+window_width:] = 0. # trim everything to right of window
                 break
def imputedata(data, strategy='mean', missing=False):
	'''
	two impute strategys
	'''
	with warnings.catch_warnings():
		warnings.simplefilter("ignore", category=RuntimeWarning)
		mean = np.nanmean(data, axis=0)
		sd = np.sqrt(np.nanmean((data - mean)**2, axis=0))
	sign = np.sign(data - mean)
	is_out = is_outliers(data, m=2.5)
	data[is_out] = np.nan
	
	if strategy == '2sd':
		# impute as +-2sd m
		# reduce the change in distribution. 
		for i in range(data.shape[1]):
			if missing:
				sign[np.isnan(sign)] = 0 #missing data will be imputed as mean
			ind_nan = np.where(np.isnan(data[:,i]))
			data[ind_nan,i] = mean[i] + (sd[i] * 2 * sign[ind_nan,i])

	if strategy == 'mean':
		#impute as mean
		for i in range(data.shape[1]):
			ind_nan = np.where(np.isnan(data[:,i]))
			if missing: #missing data will be imputed as mean
				data[ind_nan,i] = mean[i]
			else: #missing data will be left as nan
				data[ind_nan,i] = mean[i] * abs(sign[ind_nan,i])
	return data
def nanmean(array):
    """Return the mean of an array ignoring nans.

    Args:
        array: array of values

    Returns:
        result: np.nanmean(array)

    """
    try:
        i = 0
        unc = 0
        if np.isnan(array.v).all() or len(array.v) == 0:
            return Measurement(np.nan, np.nan)
        val = np.nanmean(array.v)
        for u in np.nditer(array.u):
            if np.isfinite(u):
                unc += u ** 2
                i += 1
        return Measurement(val, np.sqrt(unc) / i)
    except AttributeError:
        if np.isnan(array).all() or len(array) == 0:
            return np.nan
        return np.nanmean(array)
Example #29
0
def main():
    os.system('modprobe w1-gpio')
    os.system('modprobe w1-therm')
    print len(sys.argv)
    if len(sys.argv) == 1:
        number_of_meas = 7
    else:
        print sys.argv[1]
        number_of_meas = int(sys.argv[1])
    print "number_of_measurements = " + str(number_of_meas)
    
    print "getting device files and serials..."
    THEDICT = _get_w1_tree_and_serials()
    
    print "reading sensors " + str(number_of_meas) + " times ..."
    for step in range(int(number_of_meas)):
        for sensor_id in THEDICT:
            if sensor_id[0:2] == '28' or sensor_id[0:2] == '10':
                temp = read_sensor_ds18b20(sensor_id,THEDICT[sensor_id]["path"])
                volt = "n.a."
                THEDICT[sensor_id]["temp"].append(temp)
                THEDICT[sensor_id]["volt"].append(0.)
            if sensor_id[0:2] == '26':
                temp,volt = read_sensor_ds2438(sensor_id,THEDICT[sensor_id]["path"])
                THEDICT[sensor_id]["temp"].append(temp)
                THEDICT[sensor_id]["volt"].append(volt)
            print "step " + str(step) + " " + sensor_id + " " + str(temp) + " " + str(volt)
    
    print "calculating individual and total means:"
    MEAN_IND = {}
    for sensor_id in THEDICT:
        MEAN_IND[sensor_id] = [
                                np.nanmean(np.array(THEDICT[sensor_id]["temp"])), 
                                np.nanmean(np.array(THEDICT[sensor_id]["volt"]))
                              ]
    total_temp = []
    total_volt = []
    for sensor_id in MEAN_IND:
        if sensor_id[0:2] == '28' or sensor_id[0:2] == '10':
            total_temp.append(MEAN_IND[sensor_id][0])
        if sensor_id[0:2] == '26':
            total_volt.append(MEAN_IND[sensor_id][1])
    mean_temp = np.nanmean(np.array(total_temp))
    mean_volt = np.nanmean(np.array(total_volt))
    
    print "temp mean: " + str(mean_temp) + " +/- " + str(np.nanstd(np.array(total_temp)))
    print "volt mean: " + str(mean_volt) + " +/- " + str(np.nanstd(np.array(total_temp)))
        
    
    print "calculating offsets..."
    OFFSETS = {}
    for sensor_id in MEAN_IND:
        OFFSETS[sensor_id] = [
                               MEAN_IND[sensor_id][0] - mean_temp, 
                               MEAN_IND[sensor_id][1] - mean_volt
                             ]
    print OFFSETS
            
    print "writing offsets..."
    write_offset(OFFSETS)
def average_values(result):
	"""average the results of bootstrapping"""
	result_avg = result[0]
	
	if isinstance(result_avg,dict):
		for key in result_avg:
			val = result_avg[key]
			
			if isinstance(val,dict):
				result_avg[key] = average_values([r[key] for r in result])
			elif isinstance(val, np.float64):
				result_avg[key] = np.nanmean([r[key] for r in result], axis=0)
			elif key == 'twoway':
				for (x,y), res in np.ndenumerate(val):
					result_avg[key][x,y] = average_values([r[key][x,y] for r in result])
			elif isinstance(val, np.ndarray) and not val.dtype == np.object:
				result_avg[key] = np.nanmean([r[key] for r in result], axis=0)
			elif key != 'target':
				result_avg[key] = [r[key] for r in result]
				
				#make sure we get a cell-array back in MATLAB
				if isinstance(val, str):
					result_avg[key] = np.array(result_avg[key],dtype=np.object)
	
	return result_avg
def calculate_agg_middle_features(df, velocity_median, velocity_mean,
                                  velocity_05, velocity_10, velocity_15,
                                  velocity_25, velocity_75, velocity_85,
                                  velocity_90, velocity_95, velocity_99):
    if df.shape[0] > 0:
        # This method calculates the aggregated feature and
        # saves them in the original df as well as an metadata df.
        v_ave = np.nanmean(df['velocity'].values)
        v_min = np.nanmin(df['velocity'].values)

        v_max = np.nanmax(df['velocity'].values)
        a_ave = np.nanmean(df['acceleration'].values)

        a_min = np.nanmin(df['acceleration'].values)
        a_max = np.nanmax(df['acceleration'].values)

        d_ave = np.nanmean(df['distance'].values)

        d_min = np.nanmin(df['distance'].values)
        d_max = np.nanmax(df['distance'].values)

        e_ave = np.nanmean(df['elevation'].values)

        e_min = np.nanmin(df['elevation'].values)
        e_max = np.nanmax(df['elevation'].values)

        lon_ave = np.nanmean(df['longitude'].values)

        lon_min = np.nanmin(df['longitude'].values)
        lon_max = np.nanmax(df['longitude'].values)

        lat_ave = np.nanmean(df['latitude'].values)

        lat_min = np.nanmin(df['latitude'].values)
        lat_max = np.nanmax(df['latitude'].values)

        az_ave = np.nanmean(df['azimus'].values)

        az_min = np.nanmin(df['azimus'].values)
        az_max = np.nanmax(df['azimus'].values)

        long_delta_ave = np.nanmean(df['long_delta'].values)

        long_delta_min = np.nanmin(df['long_delta'].values)
        long_delta_max = np.nanmax(df['long_delta'].values)

        latitude_delta_ave = np.nanmean(df['latitude_delta'].values)

        latitude_delta_min = np.nanmin(df['latitude_delta'].values)
        latitude_delta_max = np.nanmax(df['latitude_delta'].values)

        velocity_delta_ave = np.nanmean(df['velocity'].values)

        velocity_delta_min = np.nanmin(df['velocity'].values)
        velocity_delta_max = np.nanmax(df['velocity'].values)

        azimus_delta_ave = np.nanmean(df['azimus'].values)

        azimus_delta_min = np.nanmin(df['azimus'].values)
        azimus_delta_max = np.nanmax(df['azimus'].values)

        elevation_delta_ave = np.nanmean(df['elevation'].values)

        elevation_delta_min = np.nanmin(df['elevation'].values)
        elevation_delta_max = np.nanmax(df['elevation'].values)

        velocity_median_count = np.sum(df['velocity'] > velocity_median)
        velocity_mean_count = np.sum(df['velocity'] > velocity_mean)

        velocity_05_count = np.sum(df['velocity'] > velocity_05)
        velocity_10_count = np.sum(df['velocity'] > velocity_10)
        velocity_15_count = np.sum(df['velocity'] > velocity_15)

        velocity_25_count = np.sum(df['velocity'] > velocity_25)
        velocity_75_count = np.sum(df['velocity'] > velocity_75)
        velocity_85_count = np.sum(df['velocity'] > velocity_85)

        velocity_90_count = np.sum(df['velocity'] > velocity_90)
        velocity_95_count = np.sum(df['velocity'] > velocity_95)
        velocity_99_count = np.sum(df['velocity'] > velocity_99)



        middle_list = list(df['distance'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \
        list(df['velocity'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \
        list(df['acceleration'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \
        list(df['elevation'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \
        list(df['longitude'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \
        list(df['latitude'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \
        list(df['azimus'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \
        list(df['long_delta'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \
        list(df['latitude_delta'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \
        list(df['velocity_delta'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \
        list(df['azimus_delta'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \
        list(df['elevation_delta'].quantile([0, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, 1])) + \
        [d_ave, d_min, d_max] + \
        [v_ave, v_min, v_max] + \
        [a_ave, a_min, a_max] + \
        [e_ave, e_min, e_max] + \
        [lon_ave, lon_min, lon_max] + \
        [lat_ave, lat_min, lat_max] + \
        [az_ave, az_min, az_max] + \
        [long_delta_ave, long_delta_min, long_delta_max] + \
        [latitude_delta_ave, latitude_delta_min, latitude_delta_max] + \
        [velocity_delta_ave, velocity_delta_min, velocity_delta_max] + \
        [azimus_delta_ave, azimus_delta_min, azimus_delta_max] + \
        [elevation_delta_ave, elevation_delta_min, elevation_delta_max] + \
        [velocity_median_count, velocity_mean_count, velocity_05_count, velocity_10_count, velocity_15_count, velocity_25_count, velocity_75_count, velocity_85_count, velocity_90_count, velocity_95_count, velocity_99_count]
    else:
        middle_list = [-1.0] * 227

    return middle_list
    def _fitgfunc_set(self, obj, g_func, p0=None,method_dqdt = None, method_fit = None, regression_function = None, uncertainty = True):
        if (method_fit == 'ols') or (method_fit == 'quantile'):
            if len(obj.dq_dt[method_dqdt])>0:
        
                x = np.log(np.array(obj.qh[method_dqdt])) 
                y = np.log(obj.dq_dt[method_dqdt] * obj.corh[method_dqdt])
    
                #Remove data when discharge <= 0
                y = y[~np.isinf(x)]
                x = x[~np.isinf(x)]

                x = x[~np.isinf(y)]
                y = y[~np.isinf(y)]
                                
                if len(x)>0:
                    d = {'x': x, 'y': y}
                    df = pd.DataFrame(data=d)
                    
                    if regression_function == 'ln(-dqdt) = a + b * ln(Q)':                
                        if method_fit == 'ols':                    
                            res = smf.ols(formula='y ~ x',   data=df).fit()
                        elif method_fit == 'quantile':
                            res = smf.quantreg('y ~ x', data=df).fit(q=.10)
                        else:
                            logger.error('Wrong fitting method')
                            
                        popt = [np.exp(res.params[0]), res.params[1] - 1] #modifying popt due to log and g(Q)*Q        
    
                        if uncertainty:
                            ci = res.conf_int(alpha=0.05, cols=None)
                            tmp = ufloat(np.log(popt[0]), ci[1][0] - np.log(popt[0]))
                            tmp  = uexp(tmp)
                            std = tmp.std_dev
                            popt_low = [popt[0] - std, ci[0][1] - 1]
                            popt_high = [popt[0] + std,ci[1][1] - 1]
                        else:
                            popt_low, popt_high = None, None 
                            
                    elif regression_function == 'ln(-dqdt) ~ a + b * ln(Q) + c * ln(Q)^2':
                        if method_fit == 'ols':                    
                            res = smf.ols(formula='y ~ x + np.power(x,2)',   data=df).fit()
                        elif method_fit == 'quantile':
                            res = smf.quantreg('y ~ x + np.power(x,2)', data=df).fit(q=.10)
                        else:
                            logger.error('Wrong fitting method')
                            logger.error(method_fit)
                                                
                        popt = [res.params[0], res.params[1]-1 , res.params[2]] #modifying popt due to log and g(Q)*Q        
                        
                        if uncertainty:
                            ci = res.conf_int(alpha=0.05, cols=None)
                            popt_low = [ci[0][0], ci[0][1] -1 , ci[0][2]]
                            popt_high = [ci[1][0], ci[1][1] -1, ci[1][2]]
                        else:
                            popt_low, popt_high = None, None                        
                                            
                    else:
                        logger.error('Wrong regression function')
                else:

                    if regression_function == 'ln(-dqdt) = a + b * ln(Q)':                
                        popt = [0.,0.0]
                        popt_high = [0,0]
                        popt_low = [0,0] 
                    elif regression_function == 'ln(-dqdt) ~ a + b * ln(Q) + c * ln(Q)^2':
                        popt = [0.,0.0,0.]
                        popt_high = [0,0,0.]
                        popt_low = [0,0,0.] 
                        
                    logger.debug('No data - fitgfunc')

                                  
            else: #
                if regression_function == 'ln(-dqdt) = a + b * ln(Q)':                
                    popt = [0.,0.0]
                    popt_high = [0,0]
                    popt_low = [0,0] 
                elif regression_function == 'ln(-dqdt) ~ a + b * ln(Q) + c * ln(Q)^2':
                    popt = [0.,0.0,0.]
                    popt_high = [0,0,0.]
                    popt_low = [0,0,0.] 
                    
                logger.debug('No data - fitgfunc')
            
            return lambda x: g_func(x, popt), popt, popt_low, popt_high, None, None, None,None, None
        
        elif method_fit == 'wls':

            x = np.array(obj.qh[method_dqdt])
            y = obj.dq_dt[method_dqdt] * obj.corh[method_dqdt] 

            y = y[x>0]  #Remove data when discharge <=0
            x = x[x>0]  
    
            #sort y based on x and sort x as well.
            temp = x.argsort()
            temp = np.flipud(temp)
            
            y = y[temp]
            x = x[temp]
    
            xlog = np.log(x)
            
            binx = []
            biny = []
            binvar = []
            binvarlog = []
            bin_stderr_divQ = []
            binnobs = []
            
            bin_x_range = [np.nanmax(xlog)]
            xlog_min = np.nanmin(xlog)
            onepercent_range = (np.nanmax(xlog) - np.nanmin(xlog)) / 100.
    
            flag_cont = True
        
            idx_here = 0
            while flag_cont:
                
                #Check if there is enough data
                std_err = 0.0
                
                #First guess on the bin
                bin_upper = bin_x_range[idx_here]
                bin_lower = bin_x_range[idx_here] - onepercent_range
                
                if bin_lower > xlog_min:
                    #adjust the range based on standard error               
                    flag_criteria = True
                    bin_upper_idx = next(xx[0] for xx in enumerate(xlog) if xx[1] <= bin_upper) 
                    if idx_here>0:
                        bin_upper_idx = bin_upper_idx + 1
                        
                    bin_lower_idx = next(xx[0] for xx in enumerate(xlog) if xx[1] <= bin_lower)
                        
                    bin_lower = xlog[bin_lower_idx]
                    while flag_criteria:
                        if len(y[bin_upper_idx:bin_lower_idx]) > 1:
                            std_err_y = sem(y[bin_upper_idx:bin_lower_idx])
                            half_mean = np.nanmean(y[bin_upper_idx:bin_lower_idx]) * 0.5
                            x_mean = np.nanmean(x[bin_upper_idx:bin_lower_idx])
                        else:
                            std_err_y = np.inf
                            half_mean = 0.0
                            
                        if std_err_y <= half_mean:
                            flag_criteria = False
                        else:
                            bin_lower_idx = bin_lower_idx + 1
                            
                        if bin_lower_idx >= len(x):
                            flag_criteria = False 
                            flag_cont = False
                            x_mean = np.nan
                            
                    #add stats to the arrays
                    if ~np.isnan([np.float64(x_mean), np.float64(half_mean * 2.0),np.power(np.float64(std_err_y),2.0)]).any(): #how is this possible? happen when bin_low_idx>=len(x) above?
                        binx.append(np.float64(x_mean))
                        biny.append(np.float64(half_mean * 2.0))
                        binvar.append(np.power(np.float64(std_err_y),2.0))
                        binvarlog.append(np.power(np.float64(sem(np.log(y[bin_upper_idx:bin_lower_idx]))),2.0))
                        bin_stderr_divQ.append(np.float64(sem(np.array(y[bin_upper_idx:bin_lower_idx])/np.array(x[bin_upper_idx:bin_lower_idx]))))
                        bin_x_range.append(bin_lower)
                        binnobs.append(bin_lower_idx-bin_upper_idx)
                        idx_here = idx_here + 1
                        
                else: # didnt include the last bin for now
                    flag_cont = False 
                
                if idx_here >= len(x):
                    flag_cont = False
                
            d = {'x': np.log(binx), 'y': np.log(biny)}
            df = pd.DataFrame(data=d)
    
            if regression_function == 'ln(-dqdt) = a + b * ln(Q)':                
                wls_res = smf.wls('y ~ x', data =df, weights = 1./np.array(binvarlog)).fit()  #maybe I need the variance in the log space...
                popt = [np.exp(wls_res.params[0]), wls_res.params[1] - 1] #modifying popt due to log and g(Q)*Q
        
                ci = wls_res.conf_int(alpha=0.05, cols=None)  
                tmp = ufloat(np.log(popt[0]), ci[1][0] - np.log(popt[0]))
                tmp  = uexp(tmp)
                std = tmp.std_dev
                
                popt_low = [popt[0] - std, ci[0][1] - 1] 
                popt_high = [popt[0] + std,ci[1][1] - 1]
        
            elif regression_function == 'ln(-dqdt) ~ a + b * ln(Q) + c * ln(Q)^2':

                wls_res = smf.wls('y ~ x + np.power(x,2)', data =df, weights = 1./np.array(binvarlog)).fit()  #maybe I need the variance in the log space...
                popt = [wls_res.params[0], wls_res.params[1] - 1, wls_res.params[2]] #modifying popt due to log and g(Q)*Q
        
                ci = wls_res.conf_int(alpha=0.05, cols=None)  
                popt_low = [ci[0][0], ci[0][1] - 1, ci[0][2]] 
                popt_high = [ci[1][0], ci[1][1] - 1, ci[1][2]]
            
            else:
                logger.error('Wrong regression function')
                            
            return lambda x: g_func(x, popt), popt, popt_low, popt_high, np.array(binx), np.array(biny) / np.array(binx), binnobs, np.sqrt(np.array(binvar)), bin_stderr_divQ
        
        else:
            logger.error('Wrong fittig method')
def MLD_temp_and_dens_criteria(dt,drho,time,depth,temp,salt,dens):

    MLD_temp_crit = np.empty(len(time))
    MLD_temp_crit[:] = np.nan
    Tmean_temp_crit = np.empty(len(time))
    Tmean_temp_crit[:] = np.nan
    Smean_temp_crit = np.empty(len(time))
    Smean_temp_crit[:] = np.nan
    #Td_temp_crit = np.empty(len(time))
    #Td_temp_crit[:] = np.nan
    MLD_dens_crit = np.empty(len(time))
    MLD_dens_crit[:] = np.nan
    Tmean_dens_crit = np.empty(len(time))
    Tmean_dens_crit[:] = np.nan
    Smean_dens_crit = np.empty(len(time))
    Smean_dens_crit[:] = np.nan
    #Td_dens_crit = np.empty(len(time))
    #Td_dens_crit[:] = np.nan
    for t,tt in enumerate(time):
        if depth.ndim == 1:
            d10 = np.where(depth >= 10)[0][0]
        if depth.ndim == 2:
            d10 = np.where(depth[:,t] >= -10)[0][-1]
        T10 = temp[d10,t]
        delta_T = T10 - temp[:,t]
        ok_mld_temp = np.where(delta_T <= dt)[0]
        rho10 = dens[d10,t]
        delta_rho = -(rho10 - dens[:,t])
        ok_mld_rho = np.where(delta_rho <= drho)[0]

        if ok_mld_temp.size == 0:
            MLD_temp_crit[t] = np.nan
            #Td_temp_crit[t] = np.nan
            Tmean_temp_crit[t] = np.nan
            Smean_temp_crit[t] = np.nan
        else:
            if depth.ndim == 1:
                MLD_temp_crit[t] = depth[ok_mld_temp[-1]]
                #ok_mld_plus1m = np.where(depth >= depth[ok_mld_temp[-1]] + 1)[0][0]
            if depth.ndim == 2:
                MLD_temp_crit[t] = depth[ok_mld_temp[-1],t]
                #ok_mld_plus1m = np.where(depth >= depth[ok_mld_temp[-1],t] + 1)[0][0]
            #Td_temp_crit[t] = temp[ok_mld_plus1m,t]
            Tmean_temp_crit[t] = np.nanmean(temp[ok_mld_temp,t])
            Smean_temp_crit[t] = np.nanmean(salt[ok_mld_temp,t])

        if ok_mld_rho.size == 0:
            MLD_dens_crit[t] = np.nan
            #Td_dens_crit[t] = np.nan
            Tmean_dens_crit[t] = np.nan
            Smean_dens_crit[t] = np.nan
        else:
            if depth.ndim == 1:
                MLD_dens_crit[t] = depth[ok_mld_rho[-1]]
                #ok_mld_plus1m = np.where(depth >= depth[ok_mld_rho[-1]] + 1)[0][0]
            if depth.ndim == 2:
                MLD_dens_crit[t] = depth[ok_mld_rho[-1],t]
                #ok_mld_plus1m = np.where(depth >= depth[ok_mld_rho[-1],t] + 1)[0][0]
            #Td_dens_crit[t] = temp[ok_mld_plus1m,t]
            Tmean_dens_crit[t] = np.nanmean(temp[ok_mld_rho,t])
            Smean_dens_crit[t] = np.nanmean(salt[ok_mld_rho,t])

    return MLD_temp_crit,Tmean_temp_crit,Smean_temp_crit,\
           MLD_dens_crit,Tmean_dens_crit,Smean_dens_crit
Example #34
0
def get_offset(nBurst, Df_DC, coh_treshold=0.3):

    burst1 = 'burst_' + str(nBurst) + '/'
    burst2 = 'burst_' + str(nBurst + 1) + '/'

    # cpxint16 and cpxfloat32
    dataFormat_s = 'complex64'

    line_start, line_length, first_pixel_this, first_pixel_next, pixel_length, this_nr_oflines, this_nr_ofpixels, next_nr_oflines, next_nr_ofpixels, PRF = get_coordinates(
        nBurst)

    ifgs_1 = freadbk(burst1 + 'cint.raw.old', line_start, first_pixel_this,
                     line_length, pixel_length, dataFormat_s, this_nr_oflines,
                     this_nr_ofpixels)
    ESD_coh_1 = freadbk(burst1 + 'coherence.raw', line_start, first_pixel_this,
                        line_length, pixel_length, 'float32', this_nr_oflines,
                        this_nr_ofpixels)
    ifgs_2 = freadbk(burst2 + 'cint.raw.old', 1, first_pixel_next, line_length,
                     pixel_length, dataFormat_s, next_nr_oflines,
                     next_nr_ofpixels)
    ESD_coh_2 = freadbk(burst2 + 'coherence.raw', 1, first_pixel_next,
                        line_length, pixel_length, 'float32', next_nr_oflines,
                        next_nr_ofpixels)
    ESD_coh = (ESD_coh_1 + ESD_coh_2) / 2

    #ifgs_1_total = freadbk(burst1 + 'cint.raw.old', 1, 1, this_nr_oflines, this_nr_ofpixels, dataFormat_s,  this_nr_oflines, this_nr_ofpixels)
    #ifgs_2_total = freadbk(burst2 + 'cint.raw.old', 1, 1, next_nr_oflines, next_nr_ofpixels, dataFormat_s,  next_nr_oflines, next_nr_ofpixels)

    # Remove invalid data both in range and azimuth
    valid_range = []
    valid_azimuth = []
    for i in range(0, len(ifgs_1[0, :])):
        if np.nanmean(abs(ifgs_1[:, i])) != 0 and np.nanmean(abs(
                ifgs_2[:, i])) != 0:
            valid_range.append(i)

    for i in range(0, len(ifgs_1[:, 0])):
        if np.nanmean(abs(ifgs_1[i, :])) != 0 and np.nanmean(abs(
                ifgs_2[i, :])) != 0:
            valid_azimuth.append(i)

    if valid_range and valid_azimuth:
        ifgs_1 = ifgs_1[:, valid_range[:]]
        ifgs_2 = ifgs_2[:, valid_range[:]]
        ESD_coh = ESD_coh[:, valid_range[:]]

        ifgs_1 = ifgs_1[valid_azimuth[:], :]
        ifgs_2 = ifgs_2[valid_azimuth[:], :]
        ESD_coh = ESD_coh[valid_azimuth[:], :]

        Df_DC = Df_DC[:, valid_range[:]]
        Df_DC = Df_DC[valid_azimuth[:], :]

    # First downsample 2 * 10
    Nra = 10
    Naz = 2
    new_ra = ESD_coh.shape[1] / 10
    new_az = ESD_coh.shape[0] / 2

    ESD_coh = ESD_coh[0:new_az * Naz - 1:Naz, 0:new_ra * Nra - 1:Nra]
    ifgs_1_multilook = ifgs_1[:new_az * 2, :new_ra * 10].reshape(
        [new_az, Naz, new_ra, Nra]).mean(3).mean(1)
    ifgs_2_multilook = ifgs_2[:new_az * 2, :new_ra * 10].reshape(
        [new_az, Naz, new_ra, Nra]).mean(3).mean(1)
    Df_DC_multilook = Df_DC[:new_az * 2, :new_ra * 10].reshape(
        [new_az, Naz, new_ra, Nra]).mean(3).mean(1)

    # Double difference and calculate weights according to Cramer Rao bound
    diffBursts = ifgs_1_multilook * ifgs_2_multilook.conj()
    weights = 2 * ESD_coh * ESD_coh / (1 - ESD_coh * ESD_coh)

    W = np.sum(weights[ESD_coh > coh_treshold])
    angle = (
        PRF /
        (2 * np.pi * np.nanmean(Df_DC_multilook[ESD_coh > coh_treshold] *
                                weights[ESD_coh > coh_treshold] /
                                np.mean(weights[ESD_coh > coh_treshold]))))
    offset = np.angle(
        np.sum(diffBursts[ESD_coh > coh_treshold] *
               weights[ESD_coh > coh_treshold]) / W) * angle

    angle_pixel = angle * (line_start - 1)

    return offset, W, angle_pixel
Example #35
0
time_ax = da.DimArray(axes=[np.array(gmt.time)], dims=['time'])
time_ax[:] = gmt.time

# read HadCRUT4
dat = open('data/HadCRUT4_gmt.txt', 'r').read()
had4 = []
year = []
for line in dat.split('\n')[::2]:
    year.append(line.split(' ')[1])
    had4.append(float(line.split(' ')[-1]))
# get HadCRUT4 for 1850-2016
had4_gmt_ = np.array(had4[:-1])
had4_gmt = da.DimArray(axes=[np.array(gmt.time)], dims=['time'])
had4_gmt[1850:2016] = had4_gmt_
ref_ar5 = gmt.time[(gmt.time >= 1986) & (gmt.time < 2006)]
had4_gmt[:] = had4_gmt[:] - np.nanmean(had4_gmt[ref_ar5]) + 0.61
#print np.nanmean(np.array(had4_gmt_-np.nanmean(had4_gmt_[0:240]))[136*12:145*12])

print 'hadcrut4', np.nanmean(had4_gmt[2010:2020])
print 'blend-mask', np.nanmean(gmt['rcp85', :, 'gmt_bm', 2010:2020]) - 0.93
print 'millar', np.nanmean(gmt['rcp85', :, 'gmt_millar', 2010:2020])
print 'blend-mask', np.nanmean(gmt['rcp85', :, 'gmt_bm', 2015:2016]) - 0.93

# FIG SI 1
plot_dict = {
    'gmt_sat': {
        'l_color': 'orange',
        'color': 'darkorange',
        'longname': '$\mathregular{GMT_{SAT}}$',
        'pos': 0.65,
        'lsty': '-'
Example #36
0
    gv = mask.get_data() == cl_values[c]  # voxels showing the result

    # get voxels showing the result and also inside the network mask
    res_net = np.multiply(gv, nets.get_data()[:, :, :] > 0)
    nvoxels = np.sum(res_net) / float(np.sum(gv))
    if not quiet:
        print 'Cluster %d overlap: %.2f' % (c, nvoxels)

    for sidx, s in enumerate(subjs):
        fname = '%s/dr_stage2_%s_Z.nii.gz' % (data_dir, s)
        img = nb.load(fname)
        subj_data = img.get_data()[gv, int(ic)]
        # # if we want to only plot results inside the network
        # subj_data = img.get_data()[res_net == 1, int(ic)]
        gidx = my_groups.index(groups[sidx])
        cl_data[gidx].append(float(np.nanmean(subj_data)))
        sx[gidx].append(float(all_sx[sidx]))
    data.append(cl_data)

nrows = nclusters
ncols = 2
cnt = 1
fig = pl.figure(figsize=[10.25, nclusters * 5.9])

# for each cluster, make a scatterplot and a barplot
for cl in range(nclusters):
    if res_fname.find('NV') > 0:
        x = [i for g in sx for i in g]
        y = [i for g in data[cl] for i in g]
    else:
        x = [
def make_gridded_dataset(data, res=0.25):
    """
    Big ugly function to make a lat/lon gridded netcdf out L2 AMSR precip retrievals.
    In lieu of proper docstrings, because if you're reading this I forgot to polish this before sharing,
    I'll explain the gist of what's happening. 
    
    Real simple, we take our data, smoosh it so that each obs falls at the nearest lat/lon point on our regular grid,
    group the data by which grid box it falls in, and calculate the relevant stats of the distribution of obs in 
    each grid box. Stats are then returned as an xarray dataset. 
    """ 
    def round_nearest(arr, res):
        nans = np.isnan(arr)
        ret = (((arr+res/2)/res)//1)*res
        ret[nans] = np.nan
        return ret
    
    def reshape_incomplete_array(complete_idx, incomplete_idx, vals, shape):
        new_vals = np.full_like(complete_idx, fill_value=np.nan)
        for idx, val in zip(incomplete_idx, vals):
            new_vals[idx] = val
        return new_vals.reshape(shape)
    
    rain_stats_dict = {0: {'name': 'rain_prob',
                       'long_name': 'Probability of Rain',
                       'standard_name': 'rain_probability',
                       'units': '0-1'},
                   1: {'name': 'rain_rate',
                       'long_name': 'Rain Rate',
                       'standard_name': 'rain_rate',
                       'units': 'mm hr^-1'},
                   2: {'name': 'rain_rwr',
                       'long_name': 'Rain Rate While Raining',
                       'standard_name': 'conditional_rain_rate',
                       'units': 'mm hr^-1'},
                   3: {'name': 'rain_max',
                       'long_name': 'Max Rain Rate',
                       'standard_name': 'max_rain_rate',
                       'units': 'mm hr^-1'}}
    
    func_dict = {'mean': np.nanmean,
             'median': np.nanmedian,
             '25_pctile': lambda x: np.nanpercentile(x, 25),
             '75_pctile': lambda x: np.nanpercentile(x, 75),
             'min': np.nanmin,
             'max': np.nanmax}
    
    
    
    if not 1/res == int(1/res):
        raise ValueError("I haven't gone through to test whether this will work for any resolution that's not a unit fraction.")
    
    #setting up new grid and gridbox index
    grid_lats = np.arange(-90, 90, res)
    grid_lons = np.arange(0, 360, res)
    grid_coords = np.array(list(product(grid_lats, grid_lons)))
    full_grid_lats = grid_coords[:,0]
    full_grid_lons = grid_coords[:,1]
    grid_coords_lats_idx = (full_grid_lats+90)/res
    grid_coords_lons_idx = full_grid_lons/res
    grid_combined_idx = (360/res)*grid_coords_lats_idx + grid_coords_lons_idx
    assert(len(np.unique(grid_combined_idx)) == len(grid_combined_idx))

    #setting up old data unique index
    old_lats = data.latitude.values.flatten()
    old_lons = data.longitude.values.flatten()
    good_filt = np.logical_and(~np.isnan(old_lats), ~np.isnan(old_lons))
    old_lats, old_lons = old_lats[good_filt], old_lons[good_filt]
    lats_regrid = round_nearest(old_lats, res)
    lons_regrid = round_nearest(old_lons, res)%360
    lats_regrid_idx = (lats_regrid+90)/res
    lons_regrid_idx = lons_regrid/res
    unique_combined_idx = (360/res)*lats_regrid_idx + lons_regrid_idx
    assert(set(unique_combined_idx).issubset(grid_combined_idx))
    
    #grouping old data by box
    grouped = Groupby(unique_combined_idx.astype(int))
    
    def new_reshape(vals):
        """Reshapes value from groupby operation to an unfilled lat/lon grid"""
        return reshape_incomplete_array(grid_combined_idx, grouped.keys, vals, shape=(len(grid_lats), len(grid_lons)))
    
    ds = xr.Dataset()
    ds['latitude'] = grid_lats
    ds['longitude'] = grid_lons
    
    ds.attrs['comments'] = "gridded netcdf created by [email protected], adapted from R Eastman AMSR 89 GHz retrievals. " +\
                           "https://doi.org/10.1175/JTECH-D-18-0185.1"
    ds.attrs['creation date'] = str(dt.datetime.utcnow())
    ds.attrs['resolution'] = f'{str(res)} deg'
    
    ds['obs_count'] = (('latitude', 'longitude'), new_reshape(grouped.apply(len, np.empty_like(unique_combined_idx))))
    ds['not_nan_count'] = (('latitude', 'longitude'), new_reshape(grouped.apply(
        lambda x: sum(~np.isnan(x)), np.empty_like(unique_combined_idx))))
    ds['time'] = (('latitude', 'longitude'), new_reshape(grouped.apply(
        lambda x: np.nanmean(x.astype('int64')).astype('datetime64[ns]'), data['datetime'].values.flatten()[good_filt])))
    
    for k, v in rain_stats_dict.items():
        print('working on '+v['name'])
        sys.stdout.flush()
        old_data = data.rain_stats.isel(prob_rate_rwr_max=k).values.flatten()[good_filt]
        for func_name, func in func_dict.items():
            new_vals = new_reshape(grouped.apply(func, old_data))
            new_dict = {'long_name': f"{v['long_name']}_{func_name}",
                        'standard_name': f"{v['standard_name']}_{func_name}",
                        'units': v['units']}
            ds[f"{v['name']}_{func_name}"] = (('latitude', 'longitude'), new_vals, new_dict)
#             print(f"{v['name']}_{func_name}")
            sys.stdout.flush()
    
    print('finishing one')
    sys.stdout.flush()
    return ds
Example #38
0
def train_nn(model,
             name,
             optimizer,
             scheduler,
             train_generator,
             test_generator,
             classification=False,
             n_epochs=10,
             outputs=[],
             use_wandb=False,
             plot_gradients=False,
             seed=0):

    np.random.seed(seed)
    torch.manual_seed(seed)
    if use_wandb:
        import wandb
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)
    print(classification)
    model = model.to(device)

    # for p in model.parameters():
    #     param_norm = p.grad.data.norm(2)
    #     print(param_norm.item())
    #     print(p.grad.data)
    #     total_norm += param_norm.item() ** 2
    # total_norm = total_norm ** (1. / 2)
    # print('norm', total_norm)

    if use_wandb and plot_gradients:
        wandb.watch(model, log='all')
    # by default, reduction = mean when multiple outputs
    #criterion = nn.MSELoss()
    if classification:
        criterion = nn.BCELoss()
    else:
        criterion = nn.MSELoss(reduction="none")
    step = 0
    best_loss_ts = None
    best_loss_tr = None
    losses_tr = []
    losses_ts = []
    dtype = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.FloatTensor
    for epoch in range(n_epochs):
        print(epoch)
        train_aux = []
        for x, y, lengths in train_generator:
            # print(x.shape)
            x, y, lengths = x.type(dtype).to(device), y.type(dtype).to(
                device), lengths.to(device)
            preds = model(x, lengths)
            preds = preds.reshape(x.shape[0], len(outputs))
            assert preds.shape == y.shape, "{} {}".format(preds.shape, y.shape)
            loss_elements = criterion(preds, y)
            loss = loss_elements.mean()
            if np.isnan(loss.detach().cpu().numpy()):
                raise ValueError("Train loss is nan: ", loss)
            train_aux.append(loss.detach().cpu().numpy())
            # TODO: maybe we don't want to log at every step
            if use_wandb:
                wandb.log({f"{name} train loss per step": loss}, step=step)
            if len(outputs) > 1:
                outputs_loss = loss_elements.mean(dim=0)
                # print(outputs)
                # print(outputs_loss)
                assert len(outputs) == len(outputs_loss)
                per_output_loss = outputs_loss
                if use_wandb:
                    for i in range(len(outputs)):
                        wandb.log({outputs[i]: per_output_loss[i]}, step=step)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            preds = model(x, lengths)
            step += 1
            if step % 20 == 0:
                # losses_tr.append(per_output_loss.detach().cpu().numpy())

                aux = []
                accuracy = []
                for x, y, lengths in test_generator:
                    x, y, lengths = x.type(dtype).to(device), y.type(dtype).to(
                        device), lengths.to(device)
                    loss_elements = criterion(model(x, lengths), y)
                    loss = loss_elements.mean()
                    if np.isnan(loss.detach().cpu().numpy()):
                        raise ValueError("Test loss is nan: ", loss)
                    if classification:
                        accuracy.append(
                            accuracy_score(
                                model(x, lengths).detach().cpu().numpy(),
                                y.detach().cpu().numpy().astype(np.int8)))
                    aux.append(loss.detach().cpu().numpy())
                test_loss = np.nanmean(aux)
                if use_wandb:
                    wandb.log({f"{name} test loss per step": test_loss},
                              step=step)
                if len(outputs) > 1:
                    outputs_loss = loss_elements.mean(dim=0)
                    assert len(outputs) == len(outputs_loss)
                    per_output_loss = outputs_loss
                    if use_wandb:
                        for i in range(len(outputs)):
                            wandb.log({outputs[i]: per_output_loss[i]},
                                      step=step)
                train_loss = train_aux[-1]
                #                 train_loss = np.nanmean(train_aux)
                #                 print(train_aux)
                train_aux = []
                losses_tr.append(train_loss)
                #                 print(train_loss)
                if not np.isnan(train_loss) and not best_loss_tr or (
                        train_loss < best_loss_tr):
                    if use_wandb:
                        wandb.run.summary["best_tr_loss"] = train_loss
                    best_loss_tr = train_loss
                scheduler.step()
                if classification:
                    print('Train loss: ' + str(train_loss) + ", test loss: " +
                          str(test_loss) + 'test accuracy: ' +
                          np.nanmean(accuracy))
                else:
                    print('Train loss: ' + str(train_loss) + ", test loss: " +
                          str(test_loss))
                # losses_ts.append(per_output_loss.detach().cpu().numpy())
                losses_ts.append(test_loss)
                if not np.isnan(train_loss) and not best_loss_ts or (
                        test_loss < best_loss_ts):
                    if use_wandb:
                        wandb.run.summary["best_loss"] = test_loss
                    best_loss_ts = test_loss
            #print(list(model.parameters())[4])
    return model, best_loss_tr, best_loss_ts, losses_tr, losses_ts
def parent_function():
    # TODO: set patients
    patients = ["TS057"]  # set which patients you want to test
    # show_fig = 0  # if 1, figures show; if 0, figures save to current working directory

    # create paths to the data folder
    to_data = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
    data_path = os.path.join(to_data, 'data')
    save_path = os.path.join(to_data,  'DCEpy', 'Features', 'BurnsStudy')
    features_path = os.path.join(save_path, 'StoredFeatures', 'features')

    # setting model parameters
    # TODO: some patients also have 5 min windowing available. If you want to play with it, chase chunk_len to 300 and chunk_overlap to 270
    chunk_len = 180
    chunk_overlap = 150

    # MI parameters
    mi_win_len = 0.25  # seconds
    mi_win_overlap = 0  # seconds
    f_s = float(1e3)  # Hz

    # TODO: set frequency bands here. Mapping see function get_freq_bands(). Delta band is not available now!
    freq_bands = ["theta", "beta", "gamma"]

    persistence_time = 300/(chunk_len - chunk_overlap) + 1
    # persistence_time = 3.33 * 60  # minutes times seconds, the amount of time after a seizure prediction for which no alarm is raised
    preictal_time = 5 * 60  # minutes times seconds, the amount of time prior to seizure onset defined as preictal
    postictal_time = 5 * 60  # minutes times seconds, the amount of time after seizure end defined as postictal
    include_awake = True
    include_asleep = True

    # TODO: set rbf kernel here.
    svm_kernel = 'linear'

    # evaluate each patient
    for patient_index, patient_id in enumerate(patients):

        print "\n---------------------------Analyzing patient ", patient_id, "----------------------------\n"

        # update paths to be specific to each patient
        # my_data_path = "/Volumes/Brain_cleaner/Seizure Data/data"
        my_data_path = "/Users/TianyiZhang/Desktop/PatientData"
        p_data_path = os.path.join(my_data_path, patient_id)

        print 'Retreiving stored raw data'
        all_files, data_filenames, file_type, seizure_times = analyze_patient_raw(p_data_path, f_s, include_awake,
                                                                                  include_asleep, patient_id, chunk_len,
                                                                                  chunk_overlap, calc_train_local=True)
        number_files = len(all_files)

        # intializing performance stats
        prediction_sensitivity = np.zeros(len(all_files))
        detection_sensitivity = np.zeros(len(all_files))
        latency = np.zeros(len(all_files))
        fp = np.zeros(len(all_files))
        times = np.zeros(len(all_files))

        # beginning leave one out cross-validation
        for i in xrange(number_files):

            print '\nCross validations, k-fold %d of %d ...' % (i + 1, number_files)
            # defining which files are training files vs testing files for this fold
            testing_file = all_files[i]
            testing_file_idx = i
            cv_file_names = data_filenames[:i] + data_filenames[i + 1:]
            cv_file_idxs = range(i) + range(i+1,number_files)

            cv_train_files = all_files[:i] + all_files[i + 1:]
            cv_file_type = file_type[:i] + file_type[i + 1:]
            cv_seizure_times = seizure_times[:i] + seizure_times[i + 1:]

            print '\tEntering offline training'
            my_svm, mean_MI_matrix, sd_MI_matrix = offline_training(cv_file_type, cv_file_names,  cv_file_idxs, cv_seizure_times, cv_train_files, chunk_len, chunk_overlap, mi_win_len, mi_win_overlap, f_s, i, patient_id,
                                      persistence_time, preictal_time, postictal_time, freq_bands, svm_kernel)


            print'\tEntering online testing'
            # computing prediction on testing file for this fold

            print '\tCalculating testing features locally'
            # determine how many samples, windows, and channels we have in this test file
            total_test_samples = testing_file.shape[0]
            chunk_len_samples = chunk_len * f_s
            chunk_ovlap_samples = chunk_overlap * f_s
            num_chunks = int(math.floor(float(total_test_samples) / float(chunk_len_samples - chunk_ovlap_samples)))
            num_channels = testing_file.shape[1]


            # load test file
            test_key = str(testing_file_idx) + "_" + data_filenames[testing_file_idx].split("/")[-1]
            test_MI = get_MI_features(patient_id, test_key, freq_bands = freq_bands, chunk_len = chunk_len)

            # transform (normalize) MI matrix
            transformed_MI_test = transform_coherency([test_MI], mean_MI_matrix,
                                                          sd_MI_matrix)

            test_features = find_centrality_multibands(transformed_MI_test)[0]  # should be list of (n_samples, 2, 6, 6)  # for loop to process each window in the test file
            # initializing for computing performance metrics
            # full_file_decision = np.zeros(num_chunks)
            t_samples = test_features.shape[0]
            full_file_decision = np.zeros(t_samples)

            alarm_timer = 0

            for index in np.arange(t_samples):
                # getting the single window of data for this iteration of the for loop
                feature = test_features[index].reshape(1, -1)
                decision, label_index, alarm_timer = online_testing(feature, f_s, testing_file_idx, persistence_time, index,
                                                                    alarm_timer, my_svm)

                # storing the outlier fraction and decision for calculating performance metrics and visualization
                full_file_decision[index] = decision

            # using outputs from test file to compute performance metrics
            print'\tCalculating performance stats'

            print "\tFile Type: ", file_type[i]

            print "\t Full File Decision: ", full_file_decision

            # convert from units of windows to units of samples
            test_decision_sample = window_to_samples(full_file_decision, chunk_len, chunk_overlap, f_s)

            # find performance metrics for this fold of cross validation
            prediction_sensitivity[i], detection_sensitivity[i], latency[i], fp[i], times[i] = performance_stats(
                test_decision_sample, seizure_times[i], f_s, preictal_time, chunk_len, chunk_overlap)


            # print the performance metrics and visualize the algorithm output on a graph
            print '\tPrediction sensitivity = ', prediction_sensitivity[i], 'Detection sensitivity = ', \
            detection_sensitivity[i], 'Latency = ', latency[i], 'FP = ', fp[i], 'Time = ', times[i]
            # viz_single_outcome(test_decision_sample, test_outlier_frac_sample, testing_file[:,0], seizure_times[i], threshold, i, patient_id, f_s)

        # compute false positive rate
        fpr = float(np.nansum(fp)) / float(np.nansum(times))

        # print mean and median performance metrics
        print '\nMean prediction sensitivity = ', np.nanmean(
            prediction_sensitivity), 'Mean detection sensitivity = ', np.nanmean(
            detection_sensitivity), 'Mean latency = ', np.nanmean(latency), 'Mean FPR = ', fpr
        print 'Median prediction sensitivity = ', np.nanmedian(
            prediction_sensitivity), 'Median detection sensitivity = ', np.nanmedian(
            detection_sensitivity), 'Median latency = ', np.nanmedian(latency)
Example #40
0
def plot_conditions(epochs,
                    conditions=OrderedDict(),
                    ci=97.5,
                    n_boot=1000,
                    title='',
                    palette=None,
                    ylim=(-6, 6),
                    diff_waveform=(1, 2)):
    """Plot ERP conditions.
    Args:
        epochs (mne.epochs): EEG epochs
    Keyword Args:
        conditions (OrderedDict): dictionary that contains the names of the
            conditions to plot as keys, and the list of corresponding marker
            numbers as value. E.g.,
                conditions = {'Non-target': [0, 1],
                               'Target': [2, 3, 4]}
        ci (float): confidence interval in range [0, 100]
        n_boot (int): number of bootstrap samples
        title (str): title of the figure
        palette (list): color palette to use for conditions
        ylim (tuple): (ymin, ymax)
        diff_waveform (tuple or None): tuple of ints indicating which
            conditions to subtract for producing the difference waveform.
            If None, do not plot a difference waveform
    Returns:
        (matplotlib.figure.Figure): figure object
        (list of matplotlib.axes._subplots.AxesSubplot): list of axes
    """
    if isinstance(conditions, dict):
        conditions = OrderedDict(conditions)

    if palette is None:
        palette = sns.color_palette("hls", len(conditions) + 1)

    X = epochs.get_data() * 1e6
    times = epochs.times
    y = pd.Series(epochs.events[:, -1])

    fig, axes = plt.subplots(2, 2, figsize=[12, 6], sharex=True, sharey=True)
    axes = [axes[1, 0], axes[0, 0], axes[0, 1], axes[1, 1]]

    for ch in range(4):
        for cond, color in zip(conditions.values(), palette):
            sns.tsplot(X[y.isin(cond), ch],
                       time=times,
                       color=color,
                       n_boot=n_boot,
                       ci=ci,
                       ax=axes[ch])

        if diff_waveform:
            diff = (np.nanmean(X[y == diff_waveform[1], ch], axis=0) -
                    np.nanmean(X[y == diff_waveform[0], ch], axis=0))
            axes[ch].plot(times, diff, color='k', lw=1)

        axes[ch].set_title(epochs.ch_names[ch])
        axes[ch].set_ylim(ylim)
        axes[ch].axvline(x=0,
                         ymin=ylim[0],
                         ymax=ylim[1],
                         color='k',
                         lw=1,
                         label='_nolegend_')

    axes[0].set_xlabel('Time (s)')
    axes[0].set_ylabel('Amplitude (uV)')
    axes[-1].set_xlabel('Time (s)')
    axes[1].set_ylabel('Amplitude (uV)')

    if diff_waveform:
        legend = (['{} - {}'.format(diff_waveform[1], diff_waveform[0])] +
                  list(conditions.keys()))
    else:
        legend = conditions.keys()
    axes[-1].legend(legend)
    sns.despine()
    plt.tight_layout()

    if title:
        fig.suptitle(title, fontsize=20)

    return fig, axes
Example #41
0
    au['Classification'].str.replace(
        ' ', '').values + au['Subclassification'].str.replace(' ', '').values
qu['fname'] = pd.Series(np.arange(qu.shape[0])).astype(str).values + '-' + qu['Member'].values + '-Q-' + \
    qu['Classification'].str.replace(
        ' ', '').values + qu['Subclassification'].str.replace(' ', '').values
mu['fname'] = pd.Series(np.arange(mu.shape[0])).astype(str).values + '-' + mu['Member'].values + '-M-' + \
    mu['Classification'].str.replace(
        ' ', '').values + mu['Subclassification'].str.replace(' ', '').values


# Normalize feature values and we store the mean and standard deviation
qu_stat = {}
for fname in qu['fname'].values:
    if 'RealGDPGrowth' not in fname:
        data = qu.loc[(qu['fname'] == fname), '2000-Q1':'2020-Q4'].values
        mean, std = np.nanmean(data), np.nanstd(data)
        qu.loc[(qu['fname'] == fname),
               '2000-Q1':'2020-Q4'] = (qu.loc[(qu['fname'] == fname),
                                              '2000-Q1':'2020-Q4'] - mean) / std
        qu_stat[fname] = mean, std


def deseasonalize(qu=None):
    """
    Deseasonalize quarterly data. We will decompose it first and get trend, seasonality, and random error.
    We then remove seasonality and save it to a dictionary to be used later

    Args:
        qu - quarterly data
    return:
        deseasonalized qu
Example #42
0
for i in range(numtrans):
    x = model.forward_model(x, p, dt)

# Integramos la simulacion verdad
# El resultado es almacenado en un array de numpy "state" con dimension (numstep,3)

yo = forward_operator(x) + np.random.multivariate_normal(np.array([0]), R)

#Inicializamos el ciclo desde la media "climatologica" del sistema. Es decir no tenemos informacion precisa
#de donde esta el sistema al tiempo inicial.

statefens = np.zeros((nvars, EnsSize))

for iens in range(EnsSize):
    statefens[:,
              iens] = np.nanmean(x, 0) + dx0 + np.random.multivariate_normal(
                  np.zeros(nvars), P0)

#Calculamos la matriz de transporte opitmo.
#from emd import emd
from scipy.spatial.distance import cdist
import ot

#Calculo la inversa de la matriz de covarianza
Rinv = np.linalg.inv(R)

#Calculamos los pesos en base al likelihood de las observaciones
#dada cada una de las particulas.
w = np.zeros(EnsSize)
for iens in range(EnsSize):
    yf = forward_operator(statefens[:, iens])
rhs = []
ssts = []
binned = [[] for i in range(4)]
values = []
regions = []

for region in sorted(world_dict.keys()):
    xai = world_dict[region]['feature_imp'].tolist()
    #xai.append(np.nanmean(world_dict[region]['values']))

    X.append(xai)
    omegas.append(world_dict[region]['feature_imp'][0])
    eiss.append(world_dict[region]['feature_imp'][1])
    rhs.append(world_dict[region]['feature_imp'][2])
    ssts.append(world_dict[region]['feature_imp'][3])
    values.append(np.nanmean(world_dict[region]['values']))
    regions.append(region)

ssts = np.array(ssts)
omegas = np.array(omegas)
eiss = np.array(eiss)
rhs = np.array(rhs)
values = np.array(values)

X = np.array(X)

kmeans = KMeans(n_clusters=n_bins,
                n_init=5,
                algorithm='full',
                tol=.000001,
                max_iter=60,
Example #44
0
index_y1f = np.min(np.where(time>simu.y1f))
'''
array_to_plotH = np.nanmean(moc[:,:,0,index_y1h:index_y1h+10],axis=2)
array_to_plotH[array_to_plotH==0] = np.nan
array_to_plotH = np.ma.masked_invalid(array_to_plotH)

array_to_plotF = np.nanmean(moc[:,:,0,index_y1f:index_y1f+10],axis=2)
array_to_plotF[array_to_plotF==0] = np.nan
array_to_plotF = np.ma.masked_invalid(array_to_plotF)

print(np.nanmin(array_to_plotH),np.nanmax(array_to_plotF))
print(array_to_plotH)

make_plot.plot_map_s(xr,yr,array_to_plotH,var,simu.y1h,'',simu.output_fileH)
make_plot.plot_map_s(xr,yr,array_to_plotF,var,simu.y1f,'',simu.output_fileF)
'''
colorbar='undefined'
array_to_plotH = np.nanmean(moc[0,:,:,index_y1h:index_y1h+10],axis=2)
array_to_plotF = np.nanmean(moc[0,:,:,index_y1f:index_y1f+10],axis=2)
X = yr[0,:]
minline=-0.1
maxline=0.1

array_to_plotF = np.ma.masked_invalid(array_to_plotF)
array_to_plotH = np.ma.masked_invalid(array_to_plotH)

#make_plot.one_sec(array_to_plotH.transpose(),var,X,-depth,simu.max_depth,simu.y1h,simu.output_fileH,minline,maxline,str(simu.y1h)+'-'+str(simu.y2h),colorbar)
#make_plot.one_sec(array_to_plotF.transpose(),var,X,-depth,simu.max_depth,simu.y1f,simu.output_fileF,minline,maxline,str(simu.y1f)+'-'+str(simu.y2f),colorbar)
make_plot.one_sec(array_to_plotH.transpose(),array_to_plotF.transpose(),var,X,-depth,simu.max_depth,simu.y1h,simu.output_fileC,minline,maxline,'current and future',colorbar)

Example #45
0
                                        gt_coords,
                                        lm_cnt=valid_data.lm_cnt,
                                        pck_threshold=params['pck_threshold'],
                                        scale=1)
        # Write the validation result to csv
        write_pred_dataframe(valid_data,
                             pred_coords,
                             folder=params['valid_result_dir'] + "grid_temp/",
                             file_name=str(date.today()) + col_name,
                             patches_coord=None,
                             write_index=False)

        result_dict = params
        result_dict = build_result_dict(result_dict=params,
                                        pck=np.round(pck, 4),
                                        mean_pck=round(np.nanmean(pck), 4),
                                        pck_threshold=params['pck_threshold'],
                                        diff_per_pt=np.round(diff_per_pt, 4),
                                        mean_diff_per_pt=round(
                                            np.nanmean(diff_per_pt), 4))

        final_grid_df = final_grid_df.append(
            pd.DataFrame(result_dict, index=[id_grid]))

    final_grid_df.to_csv(params['valid_result_dir'] +
                         "{}grid_search.csv".format(str(date.today())),
                         index=False)

# lr_list = np.round(lr_list,4)
# N=5
# print(lr_list)
def classification_algorithm(file,
                             out_directory,
                             diagnostic=False,
                             xr_data=False):
    Path(out_directory).mkdir(parents=True, exist_ok=True)
    df = xr.open_dataset(file)
    df = df.where(df.range > 90, drop=True)
    df = preprocess.bleed_through(df)

    df['beta_raw'] = df['beta_raw'].where(
        df['co_signal'] > (1 + df.attrs['background_snr_sd']))

    classifier = np.zeros(df['beta_raw'].shape, dtype=int)

    log_beta = np.log10(df['beta_raw'])

    if xr_data is True:
        with open('ref_XR2.npy', 'rb') as f:
            ref_XR = np.load(f)
        log_beta[:, :50] = log_beta[:, :50] - ref_XR

    # Aerosol
    aerosol = log_beta < -5.5

    # Small size median filter to remove noise
    aerosol_smoothed = median_filter(aerosol, size=11)
    # Remove thin bridges, better for the clustering
    aerosol_smoothed = median_filter(aerosol_smoothed, size=(15, 1))

    classifier[aerosol_smoothed] = 10

    for var in ['beta_raw', 'v_raw', 'depo_bleed']:
        df[var] = df[var].where(
            df['co_signal'] > (1 + 3 * df.attrs['background_snr_sd']))
    log_beta = np.log10(df['beta_raw'])

    if xr_data is True:
        log_beta[:, :50] = log_beta[:, :50] - ref_XR

    range_flat = np.tile(df['range'], df['beta_raw'].shape[0])
    # Liquid
    liquid = log_beta > -5.5

    # maximum filter to increase the size of liquid region
    liquid_max = maximum_filter(liquid, size=5)
    # Median filter to remove background noise
    liquid_smoothed = median_filter(liquid_max, size=13)

    classifier[liquid_smoothed] = 30

    # updraft - indication of aerosol zone
    updraft = df['v_raw'] > 1
    updraft_smooth = median_filter(updraft, size=3)
    updraft_max = maximum_filter(updraft_smooth, size=91)

    # Fill the gap in aerosol zone
    updraft_median = median_filter(updraft_max, size=31)

    # precipitation < -1 (center of precipitation)
    precipitation_1 = (log_beta > -7) & (df['v_raw'] < -1)

    precipitation_1_median = median_filter(precipitation_1, size=9)

    # Only select precipitation outside of aerosol zone
    precipitation_1_ne = precipitation_1_median * ~updraft_median
    precipitation_1_median_smooth = median_filter(precipitation_1_ne, size=3)
    precipitation = precipitation_1_median_smooth

    # precipitation < -0.5 (include all precipitation)
    precipitation_1_low = (log_beta > -7) & (df['v_raw'] < -0.5)

    # Avoid ebola infection surrounding updraft
    # Useful to contain error during ebola precipitation
    updraft_ebola = df['v_raw'] > 0.2
    updraft_ebola_max = maximum_filter(updraft_ebola, size=3)

    # Ebola precipitation
    for _ in range(1500):
        prep_1_max = maximum_filter(precipitation, size=3)
        prep_1_max *= ~updraft_ebola_max  # Avoid updraft area
        precipitation_ = precipitation_1_low * prep_1_max
        if np.sum(precipitation) == np.sum(precipitation_):
            break
        precipitation = precipitation_

    classifier[precipitation] = 20

    # Remove all aerosol above cloud or precipitation
    mask_aerosol0 = classifier == 10
    for i in np.array([20, 30]):
        if i == 20:
            mask = classifier == i
        else:
            mask = log_beta > -5
            mask = maximum_filter(mask, size=5)
            mask = median_filter(mask, size=13)
        mask_row = np.argwhere(mask.any(axis=1)).reshape(-1)
        mask_col = np.nanargmax(mask[mask_row, :], axis=1)
        for row, col in zip(mask_row, mask_col):
            mask[row, col:] = True
        mask_undefined = mask * mask_aerosol0
        classifier[mask_undefined] = i

    if (classifier == 10).any():
        classifier_ = classifier.ravel()
        time_dbscan = np.repeat(np.arange(df['time'].size),
                                df['beta_raw'].shape[1])
        height_dbscan = np.tile(np.arange(df['range'].size),
                                df['beta_raw'].shape[0])

        time_dbscan = time_dbscan[classifier_ == 10].reshape(-1, 1)
        height_dbscan = height_dbscan[classifier_ == 10].reshape(-1, 1)
        X = np.hstack([time_dbscan, height_dbscan])
        db = DBSCAN(eps=3, min_samples=25, n_jobs=-1).fit(X)

        v_dbscan = df['v_raw'].values.ravel()[classifier_ == 10]
        range_dbscan = range_flat[classifier_ == 10]

        v_dict = {}
        r_dict = {}
        for i in np.unique(db.labels_):
            v_dict[i] = np.nanmean(v_dbscan[db.labels_ == i])
            r_dict[i] = np.nanmin(range_dbscan[db.labels_ == i])

        lab = db.labels_.copy()
        for key, val in v_dict.items():
            if key == -1:
                lab[db.labels_ == key] = 40
            elif (val < -0.5):
                lab[db.labels_ == key] = 20
            elif r_dict[key] == min(df['range']):
                lab[db.labels_ == key] = 10
            elif (val > -0.2):
                lab[db.labels_ == key] = 10
            else:
                lab[db.labels_ == key] = 40

        classifier[classifier == 10] = lab

    # Separate ground rain
    if (classifier == 20).any():
        classifier_ = classifier.ravel()
        time_dbscan = np.repeat(np.arange(df['time'].size),
                                df['beta_raw'].shape[1])
        height_dbscan = np.tile(np.arange(df['range'].size),
                                df['beta_raw'].shape[0])

        time_dbscan = time_dbscan[classifier_ == 20].reshape(-1, 1)
        height_dbscan = height_dbscan[classifier_ == 20].reshape(-1, 1)
        X = np.hstack([time_dbscan, height_dbscan])
        db = DBSCAN(eps=3, min_samples=1, n_jobs=-1).fit(X)

        range_dbscan = range_flat[classifier_ == 20]

        r_dict = {}
        for i in np.unique(db.labels_):
            r_dict[i] = np.nanmin(range_dbscan[db.labels_ == i])

        lab = db.labels_.copy()
        for key, val in r_dict.items():
            if r_dict[key] == min(df['range']):
                lab[db.labels_ == key] = 20
            else:
                lab[db.labels_ == key] = 30

        classifier[classifier == 20] = lab

    cmap = mpl.colors.ListedColormap(
        ['white', '#2ca02c', 'blue', 'red', 'gray'])
    boundaries = [0, 10, 20, 30, 40, 50]
    norm = mpl.colors.BoundaryNorm(boundaries, cmap.N, clip=True)
    # decimal_time = df['time'].dt.hour + \
    #     df['time'].dt.minute / 60 + df['time'].dt.second/3600

    if diagnostic is True:
        fig, axes = plt.subplots(6,
                                 2,
                                 sharex=True,
                                 sharey=True,
                                 figsize=(16, 9))
        for val, ax, cmap_ in zip(
            [
                aerosol, aerosol_smoothed, liquid_smoothed,
                precipitation_1_median, updraft_median,
                precipitation_1_median_smooth, precipitation_1_low,
                updraft_ebola_max, precipitation
            ],
                axes.flatten()[2:-1],
            [['white', '#2ca02c'], ['white', '#2ca02c'], ['white', 'red'],
             ['white', 'blue'], ['white', '#D2691E'], ['white', 'blue'],
             ['white', 'blue'], ['white', '#D2691E'], ['white', 'blue']]):
            ax.pcolormesh(df['time'],
                          df['range'],
                          val.T,
                          cmap=mpl.colors.ListedColormap(cmap_))
        axes.flatten()[-1].pcolormesh(df['time'],
                                      df['range'],
                                      classifier.T,
                                      cmap=cmap,
                                      norm=norm)
        axes[0, 0].pcolormesh(df['time'],
                              df['range'],
                              np.log10(df['beta_raw']).T,
                              cmap='jet',
                              vmin=-8,
                              vmax=-4)
        axes[0, 1].pcolormesh(df['time'],
                              df['range'],
                              df['v_raw'].T,
                              cmap='jet',
                              vmin=-2,
                              vmax=2)
        fig.tight_layout()
        fig.savefig(out_directory + '/' + df.attrs['file_name'] +
                    '_diagnostic_plot.png',
                    dpi=150,
                    bbox_inches='tight')

    fig, ax = plt.subplots(4, 1, figsize=(6, 8), sharex=True)
    ax1, ax3, ax5, ax7 = ax.ravel()
    p1 = ax1.pcolormesh(df['time'],
                        df['range'],
                        np.log10(df['beta_raw']).T,
                        cmap='jet',
                        vmin=-8,
                        vmax=-4)
    p2 = ax3.pcolormesh(df['time'],
                        df['range'],
                        df['v_raw'].T,
                        cmap='jet',
                        vmin=-2,
                        vmax=2)
    p3 = ax5.pcolormesh(df['time'],
                        df['range'],
                        df['depo_bleed'].T,
                        cmap='jet',
                        vmin=0,
                        vmax=0.5)
    p4 = ax7.pcolormesh(df['time'],
                        df['range'],
                        classifier.T,
                        cmap=cmap,
                        norm=norm)

    myFmt = DateFormatter("%H")
    for ax in [ax1, ax3, ax5, ax7]:
        ax.yaxis.set_major_formatter(preprocess.m_km_ticks())
        ax.set_ylabel('Height [km, a.g.l]')

    cbar = fig.colorbar(p1, ax=ax1)
    cbar.ax.set_ylabel(r'$\beta\quad[Mm^{-1}]$', rotation=90)
    # cbar.ax.yaxis.set_label_position('left')
    cbar = fig.colorbar(p2, ax=ax3)
    cbar.ax.set_ylabel('w [' + units.get('v_raw', None) + ']', rotation=90)
    # cbar.ax.yaxis.set_label_position('left')
    cbar = fig.colorbar(p3, ax=ax5)
    cbar.ax.set_ylabel(r'$\delta$')
    # cbar.ax.yaxis.set_label_position('left')
    cbar = fig.colorbar(p4, ax=ax7, ticks=[5, 15, 25, 35, 45])
    cbar.ax.set_yticklabels(
        ['Background', 'Aerosol', 'Precipitation', 'Clouds', 'Undefined'])

    ax7.set_xlim(left=pd.to_datetime(df.time[0].values).floor('D'))
    myFmt = DateFormatter("%H")
    ax7.xaxis.set_major_locator(mdates.HourLocator(byhour=[0, 6, 12, 18]))
    ax7.xaxis.set_major_formatter(myFmt)
    ax7.set_xlim(left=pd.to_datetime(df.time[0].values).floor('D'))
    ax7.set_xlabel('Time UTC [hour]')

    fig.tight_layout()
    fig.savefig(out_directory + '/' + df.attrs['file_name'] +
                '_classified.png',
                dpi=150,
                bbox_inches='tight')
    plt.close('all')
    df['classified'] = (['time', 'range'], classifier)

    df.attrs['classified'] = 'Clasification algorithm by Vietle \
                                at github.com/vietle94/halo-lidar'

    df.attrs['bleed_corrected'] = 'Bleed through corrected for \
                                depolarization ratio, see Vietle thesis'

    df['depo_bleed'].attrs = {
        'units': ' ',
        'long_name': 'Depolarization ratio \
                              (bleed through corrected)',
        'comments': 'Bleed through corrected'
    }

    df['depo_bleed_sd'].attrs = {
        'units': ' ',
        'long_name': 'Standard deviation of depolarization \
                              ratio (bleed through corrected)',
        'comments': 'Bleed through corrected'
    }
    df['classified'].attrs = {
        'units':
        ' ',
        'long_name':
        'Classified mask',
        'comments':
        '0: Background, 10: Aerosol, \
                           20: Precipitation, 30: Clouds, 40: Undefined'
    }

    df.to_netcdf(out_directory + '/' + df.attrs['file_name'] +
                 '_classified.nc',
                 format='NETCDF3_CLASSIC')
Example #47
0
def msob_fp_array_to_results(title: str, arrival_enum: ArrivalEnum,
                             perform_param: PerformParameter,
                             opt_method: OptMethod, mc_dist: MonteCarloDist,
                             param_array: np.array, res_array: np.array,
                             number_flows: int, number_servers: int,
                             compare_metric: ChangeEnum) -> dict:
    """Writes the array values into a dictionary"""
    if res_array.shape[1] != 3:
        raise IllegalArgumentError(f"Array must have 3 columns,"
                                   f"not {res_array.shape[1]}")

    np.seterr(all='warn')

    res_array_no_full_nan = remove_full_nan_rows(full_array=res_array)
    valid_iterations = res_array_no_full_nan.shape[0]

    if compare_metric == ChangeEnum.RATIO_REF_NEW:
        change_vec_server_bound = np.divide(res_array[:, 0], res_array[:, 1])
        change_vec_pmoo_fp = np.divide(res_array[:, 0], res_array[:, 2])

    elif compare_metric == ChangeEnum.RATIO_NEW_REF:
        change_vec_server_bound = np.divide(res_array[:, 1], res_array[:, 0])
        change_vec_pmoo_fp = np.divide(res_array[:, 2], res_array[:, 0])

    elif compare_metric == ChangeEnum.RELATIVE_CHANGE:
        abs_vec_server_bound = np.subtract(res_array[:, 0], res_array[:, 1])
        change_vec_server_bound = np.divide(abs_vec_server_bound, res_array[:,
                                                                            0])

        abs_vec_pmoo_fp = np.subtract(res_array[:, 0], res_array[:, 2])
        change_vec_pmoo_fp = np.divide(abs_vec_pmoo_fp, res_array[:, 0])

    else:
        raise NotImplementedError(
            f"Metric={compare_metric.name} is not implemented")

    only_improved_server_bound = change_vec_server_bound[
        res_array[:, 0] > res_array[:, 1]]
    only_improved_pmoo_fp = change_vec_pmoo_fp[res_array[:, 0] > res_array[:,
                                                                           2]]

    row_max_msob = np.nanargmax(change_vec_server_bound)
    opt_msob = change_vec_server_bound[row_max_msob]
    mean_msob = np.nanmean(change_vec_server_bound)
    median_improved_server_bound = np.nanmedian(only_improved_server_bound)

    row_max_pmoo_fp = np.nanargmax(change_vec_pmoo_fp)
    opt_pmoo_fp = change_vec_pmoo_fp[row_max_pmoo_fp]
    mean_pmoo_fp = np.nanmean(change_vec_pmoo_fp)
    median_improved_pmoo_fp = np.nanmedian(only_improved_pmoo_fp)

    if (perform_param.perform_metric == PerformEnum.DELAY_PROB
            or perform_param.perform_metric == PerformEnum.BACKLOG_PROB):
        number_standard_bound_valid = np.nansum(
            res_array_no_full_nan[:, 0] < 1)
        number_server_bound_valid = np.nansum(res_array_no_full_nan[:, 1] < 1)
        number_pmoo_fp_valid = np.nansum(res_array_no_full_nan[:, 2] < 1)
    else:
        number_standard_bound_valid = np.nansum(
            res_array_no_full_nan[:, 0] < inf)
        number_server_bound_valid = np.nansum(
            res_array_no_full_nan[:, 1] < inf)
        number_pmoo_fp_valid = np.nansum(res_array_no_full_nan[:, 2] < inf)

    number_improved_server_bound = np.sum(
        res_array_no_full_nan[:, 0] > res_array_no_full_nan[:, 1])
    number_improved_pmoo_fp = np.sum(
        res_array_no_full_nan[:, 0] > res_array_no_full_nan[:, 2])

    best_approach = np.nanargmin(res_array_no_full_nan, axis=1)
    standard_best = np.count_nonzero(best_approach == 0)
    msob_best = np.count_nonzero(best_approach == 1)
    fp_best = np.count_nonzero(best_approach == 2)

    res_dict = {
        "Name": "Value",
        "topology": title,
        "arrival_distribution": arrival_enum.name
    }

    opt_dict = {
        "Name": "Value",
        "topology": title,
        "arrival_distribution": arrival_enum.name
    }

    for j in range(number_flows):
        if arrival_enum == ArrivalEnum.DM1:
            opt_dict[f"pmoo_fp_lamb{j + 1}"] = format(
                param_array[row_max_pmoo_fp, j], '.3f')
            opt_dict[f"server_bound_lamb{j + 1}"] = format(
                param_array[row_max_msob, j], '.3f')

        elif arrival_enum == ArrivalEnum.MD1:
            opt_dict[f"pmoo_fp_lamb{j + 1}"] = format(
                param_array[row_max_pmoo_fp, j], '.3f')
            opt_dict[f"ser_bound_lamb{j + 1}"] = format(
                param_array[row_max_msob, j], '.3f')

        elif arrival_enum == ArrivalEnum.MMOODisc:
            opt_dict[f"pmoo_fp_stay_on{j + 1}"] = format(
                param_array[row_max_pmoo_fp, j], '.3f')
            opt_dict[f"pmoo_fp_stay_off{j + 1}"] = format(
                param_array[row_max_pmoo_fp, number_flows + j], '.3f')
            opt_dict[f"pmoo_fp_burst{j + 1}"] = format(
                param_array[row_max_pmoo_fp, 2 * number_flows + j], '.3f')

            opt_dict[f"ser_bound_stay_on{j + 1}"] = format(
                param_array[row_max_msob, j], '.3f')
            opt_dict[f"ser_bound_stay_off{j + 1}"] = format(
                param_array[row_max_msob, number_flows + j], '.3f')
            opt_dict[f"ser_bound_burst{j + 1}"] = format(
                param_array[row_max_msob, 2 * number_flows + j], '.3f')

        elif arrival_enum == ArrivalEnum.MMOOFluid:
            opt_dict[f"pmoo_fp_mu{j + 1}"] = format(
                param_array[row_max_pmoo_fp, j], '.3f')
            opt_dict[f"pmoo_fp_lamb{j + 1}"] = format(
                param_array[row_max_pmoo_fp, number_flows + j], '.3f')
            opt_dict[f"pmoo_fp_burst{j + 1}"] = format(
                param_array[row_max_pmoo_fp, 2 * number_flows + j], '.3f')

            opt_dict[f"ser_bound_mu{j + 1}"] = format(
                param_array[row_max_msob, j], '.3f')
            opt_dict[f"ser_bound_lamb{j + 1}"] = format(
                param_array[row_max_msob, number_flows + j], '.3f')
            opt_dict[f"ser_bound_burst{j + 1}"] = format(
                param_array[row_max_msob, 2 * number_flows + j], '.3f')

        else:
            raise NotImplementedError(
                f"Arrival parameter={arrival_enum.name} is not implemented")

    for j in range(number_servers):
        opt_dict[f"pmoo_fp_rate{j + 1}"] = format(
            param_array[row_max_pmoo_fp,
                        arrival_enum.number_parameters() * number_flows + j],
            '.3f')
        opt_dict[f"server_bound_rate{j + 1}"] = format(
            param_array[row_max_msob,
                        arrival_enum.number_parameters() * number_flows + j],
            '.3f')

    opt_dict.update({
        "opt_pmoo_fp": format(opt_pmoo_fp, '.3f'),
        "opt_msob": format(opt_msob, '.3f'),
        "valid iterations": res_array.shape[0],
        "PerformParamValue": perform_param.value,
        "optimization": opt_method.name,
        "compare_metric": compare_metric.name,
        "MCDistribution": mc_dist.to_name(),
        "MCParam": mc_dist.param_to_string()
    })

    res_dict.update({
        "mean_pmoo_fp": mean_pmoo_fp,
        "mean_msob": mean_msob,
        "median_improved_pmoo_fp": median_improved_pmoo_fp,
        "median_improved_server_bound": median_improved_server_bound,
        "number standard bound is valid": number_standard_bound_valid,
        "number server bound is valid": number_server_bound_valid,
        "number PMOO_FP bound is valid": number_pmoo_fp_valid,
        "number server bound is improvement": number_improved_server_bound,
        "number PMOO_FP is improvement": number_improved_pmoo_fp,
        "valid iterations": valid_iterations,
        "number standard bound is best": standard_best,
        "number server bound is best": msob_best,
        "number PMOO_FP bound is best": fp_best,
    })

    filename = title
    filename += f"_optimal_{perform_param.to_name()}_{arrival_enum.name}_" \
                f"MC{mc_dist.to_name()}_{opt_method.name}_" \
                f"{compare_metric.name}"

    with open(filename + ".csv", 'w') as csv_file:
        writer = csv.writer(csv_file)
        for key, value in opt_dict.items():
            writer.writerow([key, value])

    return res_dict
Example #48
0
def generateAccCurveVote(functions):

	for time in times:

		# Do a run to test the accuracies when 'voters' vote
		# A vote will simply just be the average MAE over the number of voters
		for voters in range(1, NUM_VOTERS + 1):

			# A list to hold the details for the accuracy curve
			# This will be saved and printed out later. 
			accCurveState = []

			# How many data points are used for classification?
			# start at 5, count by 5s to 121 (121 is overkill)
			for GROUP_SIZE in range(5, MAX_GROUP_SIZE + 1, 5):
	
				# This is where we will store all the data to be output
				accMat = []	
	
				for task in tasks:
					for subject in subjects:
						for take in takes:

							# reporting
							print time, voters, GROUP_SIZE, ':\t', task, subject, take

							# Load the data
							# We're always applying it to the full data (all --- '')
							fileName = getFileName(task, subject, take, '')
							data = loadData(fileName)

							# Store a row (corresponding to a set of data) of accuracies
							# This will be added to the accMat
							accRow = np.zeros(NUM_FUNCTIONS)

							# We'll repeat the test 100 times (ti give us a percent)
							# Basically, apply some subset of data (GROUP_SIZE) to models 100 times.
							# This is for the stats. This makes sense... right?
							for i in range(100):
							
								# This list will store lists of errors
								# the length of this list depends on GROUP_SIZE
								# The lenfth of the lists inside this list will be equal to NUM_FUNCTIONS			
								# Eventually we will get the column mean, to know how each model did.
								absErr_forAllData = []

								# For each data point in a randomly selected set of data of size GROUP_SIZE
								# Apply this data point to each model, and keep track of the errors in a list
								# After applying this data point to every function, but that list inside ansErr_forAllData
								for d in data[np.random.permutation(len(data))[:GROUP_SIZE]]:
									absErr_forDatum = []
							
									# For each model... 
									# I really really wish I wrote this in a similar way to script 3
									# had to rename variables here to not mess with outer loops
									for tsk in tasks:
										for sub in subjects:
											for tke in takes:
											
												# NEVER APPLY THE DATA TO THE MODEL IT WAS FIT TOOO!!!
												# If we have data from the model we're looking at, skip it (add a list of max value because that means it will never be the 'BEST')
												if task == tsk and subject == sub and tke == take:
													absErr_forDatum.append(sys.maxint)
												# Otherwise, just apply the data to the model and record the error value
												else:
													# Give each voter a shot
													voterScores = []

													# This is the only line different from the TOP script
													# We just randomly pick some number of models
													for j in np.random.permutation(len(functions[time][tsk][sub][tke]))[:voters]:
														try:
															# Get the error by finding the difference between what we expect (l[-1] --- the last element in the row)
															# and what we got (applying all other data points to the model).
															err = d[-1] - functions[time][tsk][sub][tke][j](*d)
														except (ValueError, OverflowError, ZeroDivisionError):
															print 'Busted'
															# What's better... nan or maxint?
															# nan might be cheating because: np.nanmean([nan, nan, nan, 1]) = 1
															# max int might be unfair as it could make things slightly off seem way worse?
															# I'll go with nan as maxvalue might throw off votes
															err = float('nan')
															#err = sys.maxint

														# Keep track of each voter's error	
														voterScores.append(abs(err))

													# The voters will just average out their error on the data point
													# Add the average error to the list keeping track of the data points error on all models												
													absErr_forDatum.append(np.nanmean(voterScores))

									# After applying the single row of data to all models
									# Add the error for each model to the list of errors
									# After doing this for all data points (GROUP_SIZE), we will get the column mean. 
									absErr_forAllData.append(absErr_forDatum)

								# This line should not be necessary....
								#if np.argmin(np.mean(abEs,axis=0)) > 0.000:	
							
								# Find the index of the model with the smallest error
								# This will be the *winner* model
								# And mark it as the winner in the row's accuracy
								# Note that this is not an accuracy really, but just a record that it was selected
								# Accuracy is determined if the min model belonged to the same subject/task combo (take doesn't matter)		
								accRow[np.argmin(np.mean(absErr_forAllData,axis=0))] += (1.)

							# Divide by 100 so we get a percent
							# Add the row's (data set's) values to the matrix
							# Remember, accuracy is really only measures after it's verified
							# Really, the matrix is just seeing what % of the time what model was the best.  
							accRow = accRow/float(100)
							accMat.append(accRow)

				accMat = np.array(accMat)

				# Save the accuracy matrix here
				np.savetxt('./accuracyMatrices/2-AccMat-RAND_' + str(voters) + '_' + str(GROUP_SIZE) + '_' + time + '.csv', accMat, delimiter=',') 

				# This part will now count the actual accuracies
			
				# Where we will store the compressed version of the mat
				# This will smush it over the takes
				accMatSmall = []

				# For each row, we'll sum up the accuracies over all takes of the same subject/task
				# Will result in 150 rows, 30 cols. 
				# Note, each row will add up to 1.00 (100%)
				#	Or at least it should... it's possible two models tie, but this would be very very unusual considering floats
				for i in range(0, accMat.shape[0], 1):
					accMatRow = []

					# For each group of 5 (take)
					# Add up the accuracies over the 5
					for j in range(0, accMat.shape[0], 5):
						accMatRow.append(np.sum(accMat[i, j:j+5]))
				
					accMatSmall.append(accMatRow)

				accMatSmall = np.array(accMatSmall)
			
				# Save the small accuracy matrix here
				np.savetxt('./accuracyMatrices/2-AccMat-Small-RAND_' + str(voters) + '_' + str(GROUP_SIZE) + '_' + time + '.csv', accMat, delimiter=',') 

			
				# Go through the diagonal (kinda, it's not really a diag, more like a staircase)					
				# And record how often it was right
				diagValues = []
			
				# This is ugly, but works...
				# 150 rows, 30 cols remember
				# So we need to look at 5 rows for each col. 
				for i in range(30):
					for j in range(5):
						diagValues.append(accMatSmall[i*5+j,i])		
			
				# Store the mean, standard deviation, median, min, max, and number of functions (for CI calculations)
				accCurveState.append([np.mean(diagValues), np.std(diagValues), np.median(diagValues), np.min(diagValues), np.max(diagValues), NUM_FUNCTIONS])

			# Save the output for each time 
			np.savetxt('2-accCurveNoSameTake-RAND-' + str(voters) + '_' + time + '.csv', accCurveState, delimiter=',')
Example #49
0
    def plot_diagnostics(self, variable=0, lags=10, fig=None, figsize=None):
        """Plot an ARIMA's diagnostics.

        Diagnostic plots for standardized residuals of one endogenous variable

        Parameters
        ----------
        variable : integer, optional
            Index of the endogenous variable for which the diagnostic plots
            should be created. Default is 0.

        lags : integer, optional
            Number of lags to include in the correlogram. Default is 10.

        fig : Matplotlib Figure instance, optional
            If given, subplots are created in this figure instead of in a new
            figure. Note that the 2x2 grid will be created in the provided
            figure using `fig.add_subplot()`.

        figsize : tuple, optional
            If a figure is created, this argument allows specifying a size.
            The tuple is (width, height).

        Notes
        -----
        Produces a 2x2 plot grid with the following plots (ordered clockwise
        from top left):

        1. Standardized residuals over time
        2. Histogram plus estimated density of standardized residulas, along
           with a Normal(0,1) density plotted for reference.
        3. Normal Q-Q plot, with Normal reference line.
        4. Correlogram

        See Also
        --------
        statsmodels.graphics.gofplots.qqplot
        pmdarima.utils.visualization.plot_acf

        References
        ----------
        .. [1] https://www.statsmodels.org/dev/_modules/statsmodels/tsa/statespace/mlemodel.html#MLEResults.plot_diagnostics  # noqa: E501
        """
        # implicitly checks whether installed, and does our backend magic:
        _get_plt()

        # We originally delegated down to SARIMAX model wrapper, but
        # statsmodels makes it difficult to trust their API, so we just re-
        # implemented a common method for all results wrappers.
        from statsmodels.graphics import utils as sm_graphics
        fig = sm_graphics.create_mpl_fig(fig, figsize)

        res_wpr = self.arima_res_
        data = res_wpr.data

        # Eliminate residuals associated with burned or diffuse likelihoods.
        # The statsmodels code for the Kalman Filter takes the loglik_burn
        # as a parameter:

        # loglikelihood_burn : int, optional
        #     The number of initial periods during which the loglikelihood is
        #     not recorded. Default is 0.

        # If the class has it, it's a SARIMAX and we'll use it. Otherwise we
        # will just access the residuals as we normally would...
        if hasattr(res_wpr, 'loglikelihood_burn'):
            # This is introduced in the bleeding edge version, but is not
            # backwards compatible with 0.9.0 and less:
            d = res_wpr.loglikelihood_burn
            if hasattr(res_wpr, 'nobs_diffuse'):
                d = np.maximum(d, res_wpr.nobs_diffuse)

            resid = res_wpr.filter_results\
                           .standardized_forecasts_error[variable, d:]
        else:
            # This gets the residuals, but they need to be standardized
            d = 0
            r = res_wpr.resid
            resid = (r - np.nanmean(r)) / np.nanstd(r)

        # Top-left: residuals vs time
        ax = fig.add_subplot(221)
        if hasattr(data, 'dates') and data.dates is not None:
            x = data.dates[d:]._mpl_repr()
        else:
            x = np.arange(len(resid))
        ax.plot(x, resid)
        ax.hlines(0, x[0], x[-1], alpha=0.5)
        ax.set_xlim(x[0], x[-1])
        ax.set_title('Standardized residual')

        # Top-right: histogram, Gaussian kernel density, Normal density
        # Can only do histogram and Gaussian kernel density on the non-null
        # elements
        resid_nonmissing = resid[~(np.isnan(resid))]
        ax = fig.add_subplot(222)
        # temporarily disable Deprecation warning, normed -> density
        # hist needs to use `density` in future when minimum matplotlib has it
        # 'normed' argument is no longer supported in matplotlib since
        # version 3.2.0. New function added for backwards compatibility
        with warnings.catch_warnings(record=True):
            ax.hist(resid_nonmissing,
                    label='Hist',
                    **mpl_compat.mpl_hist_arg())

        kde = gaussian_kde(resid_nonmissing)
        xlim = (-1.96 * 2, 1.96 * 2)
        x = np.linspace(xlim[0], xlim[1])
        ax.plot(x, kde(x), label='KDE')
        ax.plot(x, norm.pdf(x), label='N(0,1)')
        ax.set_xlim(xlim)
        ax.legend()
        ax.set_title('Histogram plus estimated density')

        # Bottom-left: QQ plot
        ax = fig.add_subplot(223)
        from statsmodels.graphics import gofplots
        gofplots.qqplot(resid_nonmissing, line='s', ax=ax)
        ax.set_title('Normal Q-Q')

        # Bottom-right: Correlogram
        ax = fig.add_subplot(224)
        from statsmodels.graphics import tsaplots
        tsaplots.plot_acf(resid, ax=ax, lags=lags)
        ax.set_title('Correlogram')

        ax.set_ylim(-1, 1)

        return fig
Example #50
0
def calc_mse(y, y_hat):
    return np.nanmean(((y - y_hat) ** 2))
Example #51
0
def estimate_phase_elevation_ratio(dem, ts_data, inps):
    """Estimate phase/elevation ratio for each acquisition of timeseries
    Parameters: dem     : 2D array in size of (          length, width)
                ts_data : 3D array in size of (num_date, length, width)
                inps    : Namespace
    Returns:    X       : 2D array in size of (poly_num+1, num_date)
    """
    num_date = ts_data.shape[0]

    # prepare phase and elevation data
    print('reading mask from file: '+inps.mask_file)
    mask = readfile.read(inps.mask_file, datasetName='mask')[0]
    dem = mask_matrix(np.array(dem), mask)
    ts_data = mask_matrix(np.array(ts_data), mask)

    # display
    # 1. effect of multilooking --> narrow phase range --> better ratio estimation
    debug_mode = False
    if debug_mode:
        import matplotlib.pyplot as plt
        #d_index = np.argmax(topo_trop_corr)
        d_index = 47
        data = ts_data[d_index, :, :]
        title = inps.date_list[d_index]
        fig = plt.figure()
        plt.plot(dem[~np.isnan(dem)],
                 data[~np.isnan(dem)],
                 '.', label='Number of Looks = 1')
        mli_dem = multilook_data(dem, 8, 8)
        mli_data = multilook_data(data, 8, 8)
        plt.plot(mli_dem[~np.isnan(mli_dem)],
                 mli_data[~np.isnan(mli_dem)],
                 '.', label='Number of Looks = 8')
        plt.legend()
        plt.xlabel('Elevation (m)')
        plt.ylabel('Range Change (m)')
        plt.title(title)
        out_file = 'phase_elevation_ratio_{}.png'.format(title)
        plt.savefig(out_file, bbox_inches='tight', transparent=True, dpi=300)
        print('save to {}'.format(out_file))
        #plt.show()

    print('----------------------------------------------------------')
    print('Empirical tropospheric delay correction based on phase/elevation ratio (Doin et al., 2009)')
    print('polynomial order: {}'.format(inps.poly_order))

    if inps.num_multilook > 1:
        print('number of multilook: {} (multilook data for estimation only)'.format(inps.num_multilook))
        mask = multilook_data(mask, inps.num_multilook, inps.num_multilook)
        dem = multilook_data(dem, inps.num_multilook, inps.num_multilook)
        ts_data = multilook_data(ts_data, inps.num_multilook, inps.num_multilook)

    mask_nan = ~np.isnan(dem)
    dem = dem[mask_nan]
    ts_data = ts_data[:, mask_nan]

    # calculate correlation coefficient
    print('----------------------------------------------------------')
    print('calculate correlation of DEM with each acquisition')
    topo_trop_corr = np.zeros(num_date, np.float32)
    for i in range(num_date):
        phase = ts_data[i, :]
        cc = 0.
        if np.count_nonzero(phase) > 0:
            comp_data = np.vstack((dem, phase))
            cc = np.corrcoef(comp_data)[0, 1]
            topo_trop_corr[i] = cc
        print('{}: {:>5.2f}'.format(inps.date_list[i], cc))
    topo_trop_corr = np.abs(topo_trop_corr)
    print('average correlation magnitude: {:>5.2f}'.format(np.nanmean(topo_trop_corr)))

    # estimate ratio parameter
    print('----------------------------------------------------------')
    print('estimate phase/elevation ratio')
    A = design_matrix(dem=dem, poly_order=inps.poly_order)
    X = np.dot(np.linalg.pinv(A), ts_data.T)
    X = np.array(X, dtype=np.float32)
    X[:, topo_trop_corr < inps.threshold] = 0.
    return X
	elif subjectNum > 100:
		style = 1
	plt.plot(dmn_connectivity_change[s],madrs_change,marker='.',ms=20,color=colors[style],alpha=0.5)
plt.xlabel('DMN Connectivity Change 3 - 1')
plt.ylabel('MADRS Change 3 - 1')
plt.show()
scipy.stats.


fig = plt.figure(figsize=(10,7))
# plot for each subject
for s in np.arange(nSub):
	if subjects[s] < 100:
		style = 0
		plt.plot(np.arange(nVisits),average_within_mat[row,col,s,:],marker='.', ms=20,color=colors[style],alpha=0.5)
	else:
		style = 1
		plt.plot(np.arange(nVisits),average_within_mat[row,col,s,:], marker='.',ms=20,color=colors[style],alpha=0.5)
plt.errorbar(np.arange(nVisits),np.nanmean(average_within_mat[row,col,HC_ind,:],axis=0),lw = 5,color=colors[0],yerr=scipy.stats.sem(average_within_mat[row,col,HC_ind,:],axis=0,nan_policy='omit'), label='HC')
plt.errorbar(np.arange(nVisits),np.nanmean(average_within_mat[row,col,MDD_ind,:],axis=0),lw = 5,color=colors[1],yerr=scipy.stats.sem(average_within_mat[row,col,MDD_ind,:],axis=0,nan_policy='omit'), label='MDD')
plt.xticks(np.arange(nVisits),('Pre NF', 'Post NF'))
plt.xlabel('Visit')
plt.title('Row %i Col %i' % (row,col))
plt.title('%s Within-Network Connectivity'% systems_to_keep_abbrv[system])
plt.legend()
plt.show()
# now test significance
print('FIRST DAY')
print(scipy.stats.ttest_ind(average_within_mat[row,col,HC_ind,0],average_within_mat[row,col,MDD_ind,0]))
print('LAST DAY')
print(scipy.stats.ttest_ind(average_within_mat[row,col,HC_ind,1],average_within_mat[row,col,MDD_ind,1]))
Example #53
0
arr_dem = rb_dem.ReadAsArray(difx_dem, dify_dem, cols_all,
                             rows_all).astype(float)
arr_thp = rb_thp.ReadAsArray(difx_thp, dify_thp, cols_all,
                             rows_all).astype(float)

## check which are uncommen (or NA) values
np.unique(arr_sl)
np.unique(arr_dem)
np.unique(arr_thp)

# replace na values with 0
arr_sl[arr_sl < int(0)] = np.nan  # here negeative values as nas
arr_dem[arr_dem == int(65536)] = np.nan  # here 65535 as nas
arr_thp[arr_thp == int(65535)] = np.nan

print("mean slope", round(np.nanmean(arr_sl), 2))
print("max slope", round(np.nanmax(arr_sl), 2))
print("min slope", round(np.nanmin(arr_sl), 2))
print("mean dem", round(np.nanmean(arr_dem), 2))
print("max dem", round(np.nanmax(arr_dem), 2))
print("min dem", round(np.nanmin(arr_dem), 2))

# task 2
# binary raster where elevatoin < 1000m and slope < 30deg
# slope < 30 deg

arr_sl_bin = np.nan_to_num(arr_sl)
arr_sl_bin[arr_sl_bin < float(30.00)] = 1
arr_sl_bin[arr_sl_bin >= float(30.00)] = 0

np.unique(arr_sl_bin)
def main(args=None):

    start = time.time()
    a = time.asctime()
    b = a.replace(':', '')
    start_at = b.replace(' ', '_')
    mode = "train"
    loop_num_ = None
    test_batch_num = None
    max_to_keep = 2
    TEST_THRESHHOLD = 0.75
    SAVE_THRESHHOLD = 0
    dropout_1 = 1.00
    dropout_2 = 0.80
    dropout_3 = 0.50
    queue_len = 5000
    #max_train=20000

    if args != None:
        mode = args.mode
        loop_num_ = args.loop_number
        test_batch_num = args.test_batch_number
        max_to_keep = args.max_to_keep
        input_dir = args.in_directory
        model_name = args.model
        pretrained_dir = args.ckpt_file
        output_dir = args.out_directory
    else:
        try:
            options, args = getopt.getopt(sys.argv[1:], 'm:i:n:b:o:c:p:', [
                'mode=', 'in_dir=', 'loop_num=', 'test_batch_num=', 'out_dir=',
                'network_constructor=', 'pretrained_model='
            ])
        except getopt.GetoptError as err:
            print(str(err))
            sys.exit(2)
        if len(options) < 3:
            print('too few argument')
            sys.exit(0)
        for opt, arg in options:
            if opt in ('-m', '--mode'):
                mode = arg
            elif opt in ('-i', '--in_dir'):
                input_dir = arg

            elif opt in ('-n', '--loop_num'):
                loop_num_ = int(arg)
            elif opt in ('-b', '--test_batch_num'):
                test_batch_num = int(arg)
            elif opt in ('-o', '--out_dir'):
                output_dir = arg
            elif opt in ('-c', '--network_constructor'):
                model_name = arg
            elif opt in ('-p', '--pretrained_model'):
                pretrained_dir = arg

    if input_dir.endswith("/"):
        input_dir = str(input_dir) + "*.npz"
    elif input_dir.endswith("*") or input_dir.endswith(".npz"):
        pass
    else:
        input_dir = str(input_dir) + "/*.npz"
    f = glob.glob(input_dir)
    if len(f) == 0:
        print("can't open input files, no such a directory")
        sys.exit(0)

    f_srt = natsorted(f)

    if loop_num_ == None:
        loop_num_ = len(f_srt) - 5

    if test_batch_num == None:
        test_batch_num = loop_num_ + 1

    with np.load(str(f_srt[0])) as f:
        labels = f['labels']
        _data = f['data_array']
        batch_size, label_dim = labels.shape
        _, data_length, _2 = _data.shape
        print(batch_size, label_dim)

    config = tf.ConfigProto(device_count={'GPU': 2})
    config.gpu_options.allow_growth = True
    #config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
    sess = tf.Session(config=config)
    x_image = tf.placeholder(tf.float32, shape=[None, data_length, 4, 1])
    y_ = tf.placeholder(tf.float32, shape=[None, label_dim])
    phase = tf.placeholder(tf.bool)
    keep_prob = tf.placeholder(tf.float32)
    keep_prob2 = tf.placeholder(tf.float32)
    keep_prob3 = tf.placeholder(tf.float32)
    nc = il.import_module("deepgmap.network_constructors." + str(model_name))
    print("running " + str(model_name))

    model = nc.Model(image=x_image,
                     label=y_,
                     output_dir=output_dir,
                     phase=phase,
                     start_at=start_at,
                     keep_prob=keep_prob,
                     keep_prob2=keep_prob2,
                     keep_prob3=keep_prob3,
                     data_length=data_length,
                     max_to_keep=max_to_keep)

    sess.run(tf.global_variables_initializer())
    saver = model.saver

    if mode == 'retrain':
        saver.restore(sess, pretrained_dir)

    train_accuracy_record = []
    loss_val_record = []
    total_learing = []
    loop_num = div_roundup(queue_len, len(f_srt))
    BREAK = False
    prev_ac = None
    test_step = []
    CHECK_TEST_FR = False
    for i in range(loop_num):
        if BREAK:
            print("breaking the train loop")
            break
        input_files = f_srt[i * queue_len:(i + 1) * queue_len]
        image_list, label_list = batch_queuing(input_files, batch_size,
                                               data_length)

        for k in range(len(image_list)):
            start_tmp = time.time()
            a = np.shape(image_list[k])

            #print a
            if len(a) == 4:
                train_accuracy_, loss_val = sess.run(
                    [model.error, model.cost],
                    feed_dict={
                        x_image: image_list[k],
                        y_: label_list[k],
                        keep_prob: 1.0,
                        keep_prob2: 1.0,
                        keep_prob3: 1.0,
                        phase: False
                    })
            else:
                batch = image_list[k][0], label_list[k][0], image_list[k][
                    1], label_list[k][1]
                #print(len(batch))
                #batch = next_batch(i,input_files, batch_size, data_length)
                train_accuracy_, loss_val = sess.run(
                    [model.error, model.cost],
                    feed_dict={
                        x_image: np.concatenate((batch[2], batch[0])),
                        y_: np.concatenate((batch[3], batch[1])),
                        keep_prob: 1.0,
                        keep_prob2: 1.0,
                        keep_prob3: 1.0,
                        phase: False
                    })
                """train_accuracy_,loss_val= sess.run([model.error, model.cost], feed_dict={x_image:batch[2], 
                                                                                         y_: batch[3], 
                                                                                        keep_prob: 1.0, keep_prob2: 1.0, keep_prob3: 1.0, 
                                                                                        phase: False})"""
            FPR_list, TPR_list, PPV_list = train_accuracy_
            #print np.nansum(PPV_list)
            curr_accu = float(
                np.round(
                    np.nanmean(
                        2 * np.array(TPR_list) * np.array(PPV_list) /
                        (0.0000001 + np.array(PPV_list) + np.array(TPR_list))),
                    4))
            sys.stdout.write("\r" + "step " + str(i * queue_len + k) +
                             ", cost: " + str(loss_val) +
                             ", train_accuracy: " + str(list([curr_accu])) +
                             ", " + str(time.time() - start_tmp))
            sys.stdout.flush()

            #train_accuracy_record.append(TPR_list[0]-FPR_list[0])
            train_accuracy_record.append(curr_accu)
            loss_val_record.append(loss_val)
            total_learing.append((i * queue_len + k) * batch_size / 1000.0)
            if i * queue_len + k >= 2:
                #temporal_accuracy=train_accuracy_record[i*queue_len+k]+train_accuracy_record[i*queue_len+k-1]+train_accuracy_record[i*queue_len+k-2]
                temporal_accuracy = np.round(
                    (train_accuracy_record[i * queue_len + k] +
                     train_accuracy_record[i * queue_len + k - 1] +
                     train_accuracy_record[i * queue_len + k - 2]) / 3.0, 4)
                if len(test_step) > 1:
                    CHECK_TEST_FR = ((i * queue_len + k - test_step[-1]) >
                                     1000)
                CHECK_ACCU = (temporal_accuracy >= TEST_THRESHHOLD)
                if CHECK_ACCU or CHECK_TEST_FR:

                    test_step.append(i * queue_len + k)
                    if len(test_step) > 10:
                        e, f = test_step[-1], test_step[-10]
                        if e - f <= 40:
                            TEST_THRESHHOLD += 0.10
                            print("\n" + str(TEST_THRESHHOLD))
                            if TEST_THRESHHOLD > 0.9800:
                                TEST_THRESHHOLD = 0.9800

                    if CHECK_TEST_FR:
                        TEST_THRESHHOLD -= 0.02
                    #TEST_THRESHHOLD=temporal_accuracy-0.005
                    t_batch = test_batch(input_dir, output_dir, test_batch_num,
                                         batch_size, data_length)

                    f1_list = []
                    for o in range(3):
                        ta = sess.run(model.error,
                                      feed_dict={
                                          x_image: t_batch[o * 2],
                                          y_: t_batch[o * 2 + 1],
                                          keep_prob: 1.0,
                                          keep_prob2: 1.0,
                                          keep_prob3: 1.0,
                                          phase: False
                                      })
                        FPR_list, TPR_list, PPV_list = ta

                        f1 = float(
                            np.round(
                                np.nanmean(2 * np.array(TPR_list) *
                                           np.array(PPV_list) /
                                           (0.0000001 + np.array(PPV_list) +
                                            np.array(TPR_list))), 4))
                        f1_list.append(f1)

                    mean_ac = np.round(np.nanmean(f1_list), 4)
                    to_print = (
                        "\nThis is tests for the model at the train step: " +
                        str(i * queue_len + k) + "\n" + "mean accuracy : " +
                        str(mean_ac) + "\n Total time " +
                        str(time.time() - start))
                    print(to_print)
                    if (prev_ac == None and mean_ac >= SAVE_THRESHHOLD) or (
                            prev_ac != None and mean_ac >= prev_ac):

                        flog = open(
                            str(output_dir) + str(start_at) + '.log', 'a')
                        flog.write(
                            "This is tests for the model at the train step: " +
                            str(i * queue_len + k) +
                            "\nThe average of TPR+PPV: " + str(mean_ac) + '\n')
                        flog.close()
                        saver.save(sess,
                                   str(output_dir) + str(model_name) + "_" +
                                   str(start_at) + '_step' +
                                   str(i * queue_len + k) + '.ckpt',
                                   global_step=i * queue_len + k)
                        prev_ac = mean_ac

                    if mean_ac >= 0.999:
                        BREAK = True
                        break
            #sess.run(model.optimize, feed_dict={x_image: np.concatenate((batch[2],batch[0])),y_: np.concatenate((batch[3],batch[1])), keep_prob: dropout_1, keep_prob2: dropout_2, keep_prob3: dropout_3,phase:True})
            if len(a) == 4:
                sess.run(model.optimize,
                         feed_dict={
                             x_image: image_list[k],
                             y_: label_list[k],
                             keep_prob: dropout_1,
                             keep_prob2: dropout_2,
                             keep_prob3: dropout_3,
                             phase: True
                         })
            else:
                sess.run(model.optimize,
                         feed_dict={
                             x_image: batch[2],
                             y_: batch[3],
                             keep_prob: dropout_1,
                             keep_prob2: dropout_2,
                             keep_prob3: dropout_3,
                             phase: True
                         })
                sess.run(model.optimize,
                         feed_dict={
                             x_image: batch[0],
                             y_: batch[1],
                             keep_prob: dropout_1,
                             keep_prob2: dropout_2,
                             keep_prob3: dropout_3,
                             phase: True
                         })
                sess.run(model.optimize,
                         feed_dict={
                             x_image: batch[2],
                             y_: batch[3],
                             keep_prob: dropout_1,
                             keep_prob2: dropout_2,
                             keep_prob3: dropout_3,
                             phase: True
                         })
                sess.run(model.optimize,
                         feed_dict={
                             x_image: batch[0],
                             y_: batch[1],
                             keep_prob: dropout_1,
                             keep_prob2: dropout_2,
                             keep_prob3: dropout_3,
                             phase: True
                         })

            if (i * queue_len +
                    k) == loop_num_:  # or (i*queue_len+k) >= max_train:
                BREAK = True
                break

    saver.save(sess,
               str(output_dir) + str(model_name) + "_" + str(start_at) +
               ".ckpt",
               global_step=i * queue_len + k)

    t_batch = test_batch(input_dir, output_dir, test_batch_num, batch_size,
                         data_length)
    f1_list = []
    for o in range(3):
        ta = sess.run(model.error,
                      feed_dict={
                          x_image: t_batch[o * 2],
                          y_: t_batch[o * 2 + 1],
                          keep_prob: 1.0,
                          keep_prob2: 1.0,
                          keep_prob3: 1.0,
                          phase: False
                      })
        FPR_list, TPR_list, PPV_list = ta

        f1 = float(
            np.round(
                np.nanmean(
                    2 * np.array(TPR_list) * np.array(PPV_list) /
                    (0.0000001 + np.array(PPV_list) + np.array(TPR_list))), 4))
        print(f1)
        f1_list.append(f1)

    current_variable = {}
    all_tv = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    for v in all_tv:
        value = sess.run(v)
        scope = v.name
        current_variable[scope] = value
    all_lv = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES)
    local_variable = {}
    for v in all_lv:
        value = sess.run(v)
        scope = v.name
        print(scope)
        local_variable[scope] = value
    all_ = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
    np.savez(
        str(output_dir) + str(model_name) + '_trained_variables_' +
        str(start_at) + '.npz', **current_variable)
    np.savez(
        str(output_dir) + str(model_name) + '_local_variables_' +
        str(start_at) + '.npz', **local_variable)
    mean_ac = np.round(np.nanmean(f1_list), 4)
    running_time = time.time() - start
    import datetime
    if args is not None:
        _args = args
    else:
        _args = sys.argv
    to_print = ("dropout parameters: " + str(dropout_1) + ", " +
                str(dropout_2) + ", " + str(dropout_3) + "\n" +
                "input directory: " + str(input_dir) + "\n" +
                "The average of TPR+PPV: " + str(np.round(mean_ac, 2)) +
                "\nTotal time " +
                str(datetime.timedelta(seconds=running_time)) +
                "\nThe model is " + str(model_name) + "\nArguments are " +
                str(sys.argv[1:]) + "\nGlobal variables: " + str(all_))

    sess.close()
    print(to_print)
    flog = open(str(output_dir) + str(start_at) + '.log', 'a')
    flog.write(to_print + '\n')
    flog.close()

    fit = np.polyfit(total_learing, train_accuracy_record, 1)
    fit_fn = np.poly1d(fit)

    plt.figure(1)
    ax1 = plt.subplot(211)
    plt.title('Train accuracy')
    plt.plot(total_learing, train_accuracy_record, 'c.', total_learing,
             fit_fn(total_learing), 'm-')
    ax1.grid(True)

    x1, x2, y1, y2 = plt.axis()
    plt.axis((x1, x2, y1, 1.0))

    plt.figure(1)
    plt.subplot(212)
    plt.title('Cost')
    plt.plot(total_learing, loss_val_record, '-')

    x1, x2, y1, y2 = plt.axis()
    plt.axis((x1, x2, 0, 1.0))
    plt.savefig(str(output_dir) + 'plot_' + str(start_at) + '.pdf',
                format='pdf')
    np.savez_compressed(str(output_dir) + str(model_name) + "_" +
                        str(start_at) + '_train_rec',
                        total_learing=total_learing,
                        train_accuracy_record=train_accuracy_record,
                        loss_val_record=loss_val_record)

    plt.show()
Example #55
0
def main():
    """Create the network and start the training."""
    model_urls = {'CoarseSN': 'models/DR_CoarseSN/CoarseSN.pth', 'MaskCN': 'models/MaskCN/MaskCN.pth'}

    writer = SummaryWriter('models/' + NAME)

    cudnn.enabled = True

    ############# Create mask-guided classification network.
    MaskCN = Xception_dilation(num_classes=NUM_CLASSES_CLS, input_channel=INPUT_CHANNEL)
    MaskCN.cuda()
    if FP16 is True:
        MaskCN = amp.initialize(MaskCN, opt_level="O1")

    ############# Load pretrained weights
    pretrained_dict = torch.load(model_urls['MaskCN'])
    MaskCN.load_state_dict(pretrained_dict)
    MaskCN.eval()

    ############# Create enhanced segmentation network.
    EnhanceSN = deeplabv3plus_en(num_classes=NUM_CLASSES_SEG)
    optimizer = torch.optim.Adam(EnhanceSN.parameters(), lr=LEARNING_RATE)
    EnhanceSN.cuda()
    if FP16 is True:
        EnhanceSN, optimizer = amp.initialize(EnhanceSN, optimizer, opt_level="O1")
    EnhanceSN = torch.nn.DataParallel(EnhanceSN)

    ############# Load pretrained weights
    pretrained_dict = torch.load(model_urls['CoarseSN'])
    net_dict = EnhanceSN.state_dict()
    pretrained_dict = {k: v for k, v in pretrained_dict.items() if (k in net_dict) and (v.shape == net_dict[k].shape)}
    net_dict.update(pretrained_dict)
    EnhanceSN.load_state_dict(net_dict)
    EnhanceSN.train()
    EnhanceSN.float()

    print(len(net_dict))
    print(len(pretrained_dict))

    DR_loss = loss.Fusin_Dice_rank()

    cudnn.benchmark = True

    ############# Load training and validation data
    data_train_root = 'dataset/seg_data/Training_resize_seg/'
    data_train_root_mask = 'Coarse_masks/Training_EnhancedSN/'
    data_train_list = 'dataset/ISIC/Training_seg.txt'
    trainloader = data.DataLoader(MyDataSet_seg(data_train_root, data_train_list, root_path_coarsemask=data_train_root_mask, crop_size=(w, h)),
                                  batch_size=BATCH_SIZE, shuffle=True, num_workers=8, pin_memory=True)

    data_val_root = 'dataset/seg_data/ISIC-2017_Validation_Data/'
    data_val_root_mask = 'Coarse_masks/Validation_EnhancedSN/'
    data_val_list = 'dataset/ISIC/Validation_seg.txt'
    valloader = data.DataLoader(MyValDataSet_seg(data_val_root, data_val_list, root_path_coarsemask=data_val_root_mask), batch_size=1, shuffle=False,
                                num_workers=8,
                                pin_memory=True)

    ############# Generate CAM for validation data
    val_cams = val_mode_cam(valloader, MaskCN)

    path = 'models/' + NAME
    if not os.path.isdir(path):
        os.mkdir(path)
    f_path = path + 'outputxx.txt'

    val_jac = []

    ############# Start the training
    for epoch in range(EPOCH):

        train_loss_D = []
        train_loss_R = []
        train_loss_total = []
        train_jac = []

        for i_iter, batch in tqdm(enumerate(trainloader)):

            # if i_iter > 50:
            #     continue

            step = (TRAIN_NUM / BATCH_SIZE) * epoch + i_iter

            images, coarsemask, labels, name = batch
            images = images.cuda()
            coarsemask = coarsemask.unsqueeze(1).cuda()
            labels = labels.cuda().squeeze(1)

            with torch.no_grad():
                input_cla = torch.cat((images, coarsemask), dim=1)
                cla_cam = cam(MaskCN, input_cla)

            cla_cam = torch.from_numpy(np.stack(cla_cam)).unsqueeze(1).cuda()

            optimizer.zero_grad()
            lr = adjust_learning_rate(optimizer, step)

            EnhanceSN.train()
            preds = EnhanceSN(images, cla_cam)

            loss_D, loss_R = DR_loss(preds, labels)
            term = loss_D + 0.05 * loss_R

            if FP16 is True:
                with amp.scale_loss(term, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                term.backward()
            optimizer.step()

            writer.add_scalar('learning_rate', lr, step)
            writer.add_scalar('loss', term.cpu().data.numpy(), step)

            train_loss_D.append(loss_D.cpu().data.numpy())
            train_loss_R.append(loss_R.cpu().data.numpy())
            train_loss_total.append(term.cpu().data.numpy())
            train_jac.append(Jaccard(preds, labels))


        print("train_epoch%d: lossTotal=%f, lossDice=%f, lossRank=%f, Jaccard=%f \n" % (
        epoch, np.nanmean(train_loss_total), np.nanmean(train_loss_D), np.nanmean(train_loss_R), np.nanmean(train_jac)))


        ############# Start the validation
        [vacc, vdice, vsen, vspe, vjac_score] = val_mode_seg(valloader, val_cams, EnhanceSN, path, epoch)
        line_val = "val%d: vacc=%f, vdice=%f, vsensitivity=%f, vspecifity=%f, vjac=%f \n" % \
                   (epoch, np.nanmean(vacc), np.nanmean(vdice), np.nanmean(vsen), np.nanmean(vspe),
                    np.nanmean(vjac_score))

        print(line_val)
        f = open(f_path, "a")
        f.write(line_val)

        val_jac.append(np.nanmean(vjac_score))

        ############# Plot val curve
        plt.figure()
        plt.plot(val_jac, label='val jaccard', color='blue', linestyle='--')
        plt.legend(loc='best')

        plt.savefig(os.path.join(path, 'jaccard.png'))
        plt.clf()
        plt.close()
        plt.show()

        plt.close('all')

        writer.add_scalar('val_Jaccard', np.nanmean(vjac_score), epoch)

        ############# Save network
        torch.save(EnhanceSN.state_dict(), path + 'CoarseSN_e' + str(epoch) + '.pth')
import numpy as np
counter = 0
arraynonnan = np.array([[np.nan, 4, np.nan, 6],  [4, 5, 6, 7], [1, 2, 3, 4]])
nrows = len(arraynonnan)
ncols = len(arraynonnan[0])
i = 0
while i <= nrows*ncols:
    for r in range(nrows):
        for c in range(ncols):
            if np.isnan(arraynonnan[r][c]):
                counter += 1
        i += 1
if counter != nrows*ncols:
    nrows = len(arraynonnan)
    ncols = len(arraynonnan[0])
    for r in range(nrows):
        for c in range(ncols):
            if np.isnan(arraynonnan[r][c]):
                arraynonnan[r][c] = np.nanmean(arraynonnan)
else:
    nrows = len(arraynonnan)
    ncols = len(arraynonnan[0])
    for r in range(nrows):
        for c in range(ncols):
            if np.isnan(arraynonnan[r][c]):
                arraynonnan[r][c] = 0
Example #57
0
def ppg_simulate(duration=120,
                 sampling_rate=1000,
                 heart_rate=70,
                 frequency_modulation=0.3,
                 ibi_randomness=0.1,
                 drift=0,
                 motion_amplitude=0.1,
                 powerline_amplitude=0.01,
                 burst_number=0,
                 burst_amplitude=1,
                 random_state=None,
                 show=False):
    """Simulate a photoplethysmogram (PPG) signal.

    Phenomenological approximation of PPG. The PPG wave is described with four
    landmarks: wave onset, location of the systolic peak, location of the
    dicrotic notch and location of the diastolic peaks. These landmarks are
    defined as x and y coordinates (in a  time series). These coordinates are
    then interpolated at the desired sampling rate to obtain the PPG signal.

    Parameters
    ----------
    duration : int
        Desired recording length in seconds. The default is 120.
    sampling_rate : int
        The desired sampling rate (in Hz, i.e., samples/second). The default is
        1000.
    heart_rate : int
        Desired simulated heart rate (in beats per minute). The default is 70.
    frequency_modulation : float
        Float between 0 and 1. Determines how pronounced respiratory sinus
        arrythmia (RSA) is (0 corresponds to absence of RSA). The default is
        0.3.
    ibi_randomness : float
        Float between 0 and 1. Determines how much random noise there is in the
        duration of each PPG wave (0 corresponds to absence of variation). The
        default is 0.1.
    drift : float
        Float between 0 and 1. Determines how pronounced the baseline drift
        (.05 Hz) is (0 corresponds to absence of baseline drift). The default
        is 1.
    motion_amplitude : float
        Float between 0 and 1. Determines how pronounced the motion artifact
        (0.5 Hz) is (0 corresponds to absence of motion artifact). The default
        is 0.1.
    powerline_amplitude : float
        Float between 0 and 1. Determines how pronounced the powerline artifact
        (50 Hz) is (0 corresponds to absence of powerline artifact). Note that
        powerline_amplitude > 0 is only possible if 'sampling_rate' is >= 500.
        The default is 0.1.
    burst_amplitude : float
        Float between 0 and 1. Determines how pronounced high frequency burst
        artifacts are (0 corresponds to absence of bursts). The default is 1.
    burst_number : int
        Determines how many high frequency burst artifacts occur. The default
        is 0.
    show : bool
        If true, returns a plot of the landmarks and interpolated PPG. Useful
        for debugging.
    random_state : int
        Seed for the random number generator. Keep it fixed for reproducible
        results.

    Returns
    -------
    ppg : array
        A vector containing the PPG.

    Examples
    --------
    >>> import neurokit2 as nk
    >>>
    >>> ppg = ppg = nk.ppg_simulate(duration=40, sampling_rate=500,
    >>>                             heart_rate=75, random_state=42, show=True)

    See Also
    --------
    ecg_simulate, rsp_simulate, eda_simulate, emg_simulate
    """
    # At the requested sampling rate, how long is a period at the requested
    # heart-rate and how often does that period fit into the requested
    # duration?
    period = 60 / heart_rate  # in seconds
    n_period = int(np.floor(duration / period))
    periods = np.ones(n_period) * period

    # Seconds at which waves begin.
    x_onset = np.cumsum(periods)
    # Add respiratory sinus arrythmia (frequency modulation).
    periods, x_onset = _frequency_modulation(
        x_onset,
        periods,
        modulation_frequency=.05,
        modulation_strength=frequency_modulation)
    # Randomly modulate duration of waves by subracting a random value between
    # 0 and 20% of the wave duration (see function definition).
    x_onset = _random_x_offset(x_onset, np.diff(x_onset), ibi_randomness)
    # Corresponding signal amplitudes.
    y_onset = np.random.normal(0, .1, n_period)

    # Seconds at which the systolic peaks occur within the waves.
    x_sys = x_onset + 0.175 * periods
    # Corresponding signal amplitudes.
    y_sys = y_onset + np.random.normal(1.5, .25, n_period)

    # Seconds at which the dicrotic notches occur within the waves.
    x_notch = x_onset + 0.4 * periods
    # Corresponding signal amplitudes (percentage of systolic peak height).
    y_notch = y_sys * np.random.normal(.49, .01, n_period)

    # Seconds at which the diatolic peaks occur within the waves.
    x_dia = x_onset + 0.45 * periods
    # Corresponding signal amplitudes (percentage of systolic peak height).
    y_dia = y_sys * np.random.normal(.51, .01, n_period)

    x_all = np.concatenate((x_onset, x_sys, x_notch, x_dia))
    x_all.sort(kind="mergesort")
    x_all = np.rint(x_all * sampling_rate).astype(
        int)  # convert seconds to samples

    y_all = np.zeros(n_period * 4)
    y_all[0::4] = y_onset
    y_all[1::4] = y_sys
    y_all[2::4] = y_notch
    y_all[3::4] = y_dia

    if show:
        fig, (ax0, ax1) = plt.subplots(nrows=2, ncols=1, sharex=True)
        ax0.scatter(x_all, y_all, c="r")

    # Interpolate a continuous signal between the landmarks (i.e., Cartesian
    # coordinates).
    f = Akima1DInterpolator(x_all, y_all)
    samples = np.arange(0, int(np.ceil(duration * sampling_rate)))
    ppg = f(samples)
    # Remove NAN (values outside interpolation range, i.e., after last sample).
    ppg[np.isnan(ppg)] = np.nanmean(ppg)

    if show:
        ax0.plot(ppg)

    # Add baseline drift.
    if drift > 0:
        drift_freq = .05
        if drift_freq < (1 / duration) * 2:
            drift_freq = (1 / duration) * 2
        ppg = signal_distort(ppg,
                             sampling_rate=sampling_rate,
                             noise_amplitude=drift,
                             noise_frequency=drift_freq,
                             random_state=random_state,
                             silent=True)
    # Add motion artifacts.
    if motion_amplitude > 0:
        motion_freq = .5
        ppg = signal_distort(ppg,
                             sampling_rate=sampling_rate,
                             noise_amplitude=motion_amplitude,
                             noise_frequency=motion_freq,
                             random_state=random_state,
                             silent=True)
    # Add high frequency bursts.
    if burst_amplitude > 0:
        ppg = signal_distort(ppg,
                             sampling_rate=sampling_rate,
                             artifacts_amplitude=burst_amplitude,
                             artifacts_frequency=100,
                             n_artifacts=burst_number,
                             random_state=random_state,
                             silent=True)
    # Add powerline noise.
    if powerline_amplitude > 0:
        ppg = signal_distort(ppg,
                             sampling_rate=sampling_rate,
                             powerline_amplitude=powerline_amplitude,
                             powerline_frequency=50,
                             random_state=random_state,
                             silent=True)

    if show:
        ax1.plot(ppg)

    return ppg
Example #58
0
def calc_event_data(etdata, evt,
                    w = {255:1,
                         0: 1,
                         1: 50,
                         2: 1,
                         3: 1,
                         4: 1,
                         5: 1,
                         6: 1,
                         'vel': 18,
                         'etdq': 200}, ):
    """Calculates event parameters.
    Parameters:
        etdata  --  an instance of ETData
        evt     --  compact event vector
        w       --  dictionary of context to take into account
                    for each event type; in ms
    Returns:
        posx_s      --  onset position, horizontal
        posx_e      --  offset position, horizontal
        posy_s      --  onset position, vertical
        posy_e      --  offset position, vertical
        posx_mean   --  mean postion, horizontal
        posy_mean   --  mean postion, vertical
        posx_med    --  median postion, horizontal
        posy_med    --  median postion, vertical
        pv          --  peak velocity
        pv_index    --  index for peak velocity
        rms         --  precision, 2D rms
        std         --  precision, 2D std
    """

    #init params
    data = etdata.data
    fs = etdata.fs
    e = {k:v for k, v in zip(['s', 'e', 'evt'], evt)}
    ws = w[e['evt']]
    ws = 1 if not(ws > 1) else  round_up_to_odd(ws/1000.0*fs, min_val=3)
    ws_vel = round_up_to_odd(w['vel']/1000.0*fs, min_val=3)
    w_etdq = int(w['etdq']/1000.*fs)

    #calculate velocity using Savitzky-Golay filter
    vel = np.hypot(sg.savgol_filter(data['x'], ws_vel, 2, 1),
                   sg.savgol_filter(data['y'], ws_vel, 2, 1))*fs

    ind_s = e['s']+ws
    ind_s = ind_s if ind_s < e['e'] else e['e']
    ind_e = e['e']-ws
    ind_e = ind_e if ind_e > e['s'] else e['s']

    posx_s = np.nanmean(data[e['s']:ind_s]['x'])
    posy_s = np.nanmean(data[e['s']:ind_s]['y'])
    posx_e = np.nanmean(data[ind_e:e['e']]['x'])
    posy_e = np.nanmean(data[ind_e:e['e']]['y'])

    posx_mean = np.nanmean(data[e['s']:e['e']]['x'])
    posy_mean = np.nanmean(data[e['s']:e['e']]['y'])
    posx_med = np.nanmedian(data[e['s']:e['e']]['x'])
    posy_med = np.nanmedian(data[e['s']:e['e']]['y'])

    pv = np.max(vel[e['s']:e['e']])
    pv_index = e['s']+ np.argmax(vel[e['s']:e['e']])

    if e['e']-e['s']>w_etdq:
        x_ = rolling_window(data[e['s']:e['e']]['x'], w_etdq)
        y_ = rolling_window(data[e['s']:e['e']]['y'], w_etdq)

        std = np.median(np.hypot(np.std(x_, axis=1), np.std(y_, axis=1)))
        rms = np.median(np.hypot(np.sqrt(np.mean(np.diff(x_)**2, axis=1)),
                                 np.sqrt(np.mean(np.diff(y_)**2, axis=1))))
    else:
        std = 0
        rms = 0

    return posx_s, posx_e, posy_s, posy_e, posx_mean, posy_mean, posx_med, posy_med, pv, pv_index, rms, std
Example #59
0
        #save data in case program crashes -- remove this if its causing any hold ups
        if nBlock in [1,2,6]:
            trial_type = 1
        elif nBlock in [2,4,5,7]:
            trial_type = 2
        #ocasionally key is
        if not key_response.rt:
            key_response.rt = float('nan')
        #add data to file
        data_out.loc[len(data_out)+1]=[nBlock, key_response.corr, key_response.rt, trial_type]
        #'data/%s_%s_%s' %(expInfo['participant'], expName, expInfo['date'])
    #build adaptive rt design.

    n_corr = np.sum(acc_last_block)
    acc_last_block = n_corr/len(acc_last_block)
    mean_rt = np.nanmean(block_rts)
    std_rt = np.nanstd(block_rts)
    adapt_rt = mean_rt+std_rt

    if (adapt_rt <.200 or acc_last_block < 0.75) or (nBlock == 6 or nBlock == 7) :
        max_rt = 1.0
    else:
        max_rt = adapt_rt

    # completed 1 repeats of 'trials'

    #feedback text component after block completion.
    text_4 = visual.TextStim(win=win, ori=0, name='text_2',
        text='End of Block. You got %i  trials correct out of 256. Your mean response time was : %.2f. \n Press any key to continue' %(n_corr,mean_rt),    font=u'Arial',
        pos=[0, 0], height=0.1, wrapWidth=None,
        color=u'white', colorSpace='rgb', opacity=1,
Example #60
0
def evaluate(args, model, criterions, dataloader):
    model.eval()
    epoch_loss = 0
    n_class = 12
    example_images = []
    with torch.no_grad():
        hist = np.zeros((n_class, n_class))
        miou_images = []
        for images, masks, _ in dataloader:

            images = torch.stack(images)  # (batch, channel, height, width)
            masks = torch.stack(
                masks).long()  # (batch, channel, height, width)

            images, masks = images.to(args.device), masks.to(args.device)

            outputs = model(images)
            flag = criterions[0]
            if flag == "+":
                loss = criterions[1](outputs, masks) + criterions[2](outputs,
                                                                     masks)
            elif flag == "-":
                loss = criterions[1](outputs, masks) - criterions[2](outputs,
                                                                     masks)
            else:
                loss = criterions[1](outputs, masks)
            epoch_loss += loss

            inputs_np = torch.clone(images).detach().cpu().permute(0, 2, 3,
                                                                   1).numpy()
            inputs_np = denormalize_image(inputs_np,
                                          mean=(0.4611, 0.4403, 0.4193),
                                          std=(0.2107, 0.2074, 0.2157))

            example_images.append(
                wb_mask(
                    inputs_np[0],
                    pred_mask=outputs.argmax(1)[0].detach().cpu().numpy(),
                    true_mask=masks[0].detach().cpu().numpy(),
                ))

            outputs = torch.argmax(outputs.squeeze(),
                                   dim=1).detach().cpu().numpy()

            hist = add_hist(hist,
                            masks.detach().cpu().numpy(),
                            outputs,
                            n_class=n_class)

            # 이미지별 miou 저장
            miou_list = get_miou(masks.detach().cpu().numpy(),
                                 outputs,
                                 n_class=n_class)
            miou_images.extend(miou_list)

        # metrics
        acc, acc_cls, miou, fwavacc = label_accuracy_score(hist)

        # 리더보드 miou
        lb_miou = np.nanmean(miou_images)

        print(f"acc:{acc:.4f}, acc_cls:{acc_cls:.4f}, fwavacc:{fwavacc:.4f}")

        # hist wandb에 저장
        summa = hist.sum(1).reshape(-1, 1)
        percent = hist / summa
        plt.figure(figsize=(10, 10))
        sns.heatmap(percent, annot=True, fmt=".2%", annot_kws={"size": 8})
        wandb.log({"percent_hist": wandb.Image(plt)}, commit=False)

    return (epoch_loss / len(dataloader)), lb_miou, miou, example_images