Example #1
0
def visualize_depth_image(data):

    data[data == 0.0] = np.nan

    maxdepth = np.nanmax(data)
    mindepth = np.nanmin(data)
    data = data.copy()
    data -= mindepth
    data /= (maxdepth - mindepth)

    gray = np.zeros(list(data.shape) + [3], dtype=data.dtype)
    data = (1.0 - data)
    gray[..., :3] = np.dstack((data, data, data))

    # use a greenish color to visualize missing depth
    gray[np.isnan(data), :] = (97, 160, 123)
    gray[np.isnan(data), :] /= 255

    gray = exposure.equalize_hist(gray)

    # set alpha channel
    gray = np.dstack((gray, np.ones(data.shape[:2])))
    gray[np.isnan(data), -1] = 0.5

    return gray * 255
    def transform(self, data):

        assert np.isfinite(data).all()

        ntest = len(data)

        data = data.copy()

        data.shape = ntest, -1

        assert np.isfinite(data).all()

        print ">>> Computing traintest linear kernel"
        start = time.time()
        kernel_traintest = np.dot(data,
                                  self._train_data.T)

        assert not np.isnan(kernel_traintest).any()
        assert not np.isinf(kernel_traintest).any()

        kernel_traintest /= self._ktrace

        assert not np.isnan(kernel_traintest).any()
        assert not np.isinf(kernel_traintest).any()

        end = time.time()
        print "Time: %s" % (end-start)

        return self._clf.decision_function(kernel_traintest).ravel()
def ll(actual, predicted):
    """
    Computes the log likelihood.

    This function computes the log likelihood between two numbers,
    or for element between a pair of lists or numpy arrays.

    Parameters
    ----------
    actual : int, float, list of numbers, numpy array
             The ground truth value
    predicted : same type as actual
                The predicted value

    Returns
    -------
    score : double or list of doubles
            The log likelihood error between actual and predicted

    """
    actual = np.array(actual)
    predicted = np.array(predicted)
    err = np.seterr(all='ignore')
    score = -(actual * np.log(predicted) + (1 - actual) * np.log(1 - predicted))
    np.seterr(divide=err['divide'], over=err['over'],
              under=err['under'], invalid=err['invalid'])
    if type(score) == np.ndarray:
        score[np.isnan(score)] = 0
    else:
        if np.isnan(score):
            score = 0
    return score
Example #4
0
    def _crop_out_special_values(self, ws):

        if ws.getNumberHistograms() != 1:
            # Strip zeros is only possible on 1D workspaces
            return

        y_vals = ws.readY(0)
        length = len(y_vals)
        # Find the first non-zero value
        start = 0
        for i in range(0, length):
            if not np.isnan(y_vals[i]) and not np.isinf(y_vals[i]):
                start = i
                break
        # Now find the last non-zero value
        stop = 0
        length -= 1
        for j in range(length, 0, -1):
            if not np.isnan(y_vals[j]) and not np.isinf(y_vals[j]):
                stop = j
                break
        # Find the appropriate X values and call CropWorkspace
        x_vals = ws.readX(0)
        start_x = x_vals[start]
        # Make sure we're inside the bin that we want to crop
        end_x = x_vals[stop + 1]
        return self._crop_to_x_range(ws=ws,x_min=start_x, x_max=end_x)
Example #5
0
def responsetime(conn):
    """ Determine the average response time for tasks in bins """
    c = conn.cursor()
    results = c.execute("""
    select finished.time, event.time, finished.time - event.time as responsetime
    from event
    left join (select time, task_id from event where type_id=""" + taskid("run_task") + """) as finished
          on event.task_id = finished.task_id
    where event.type_id=""" + taskid("add_task")).fetchall()

    results = np.matrix(results, dtype=float)
    runtimes = results[:,2]

    nones = runtimes == np.array(None)
    (finished, nofinish) = (runtimes[~np.isnan(runtimes).all(axis=1)], runtimes[np.isnan(runtimes).any(axis=1)])

    return {
        "completion":{
            "finished":finished.size,
            "dnf":nofinish.size,
        },
        "response_times":{
            "min":np.min(finished),
            "mean":np.mean(finished),
            "max":np.max(finished),
            "std":np.std(finished)
        }
    }
 def __init__(self, data, classes, tree_features, n_trees=100):
     self.n_features = np.shape(data)[1]
     n_rows = np.shape(data)[0]
     n_nans = np.sum(np.isnan(data), 0)
     data = data[:, n_nans < n_rows]
     self.n_features = np.shape(data)[1]
     
     n_nans = np.sum(np.isnan(data), 1)
     data = data[n_nans < self.n_features, :]
     self.n_rows = np.shape(data)[0]
     
     if (tree_features > self.n_features):
         tree_features = self.n_features
     
     self.col_list = np.zeros((n_trees, tree_features), dtype='int')
     self.n_trees = n_trees
     self.bags = []
     for i in range(n_trees):
         cols = sample(range(self.n_features), tree_features)
         cols.sort()
         self.col_list[i, :] = cols
         data_temp = data[:, cols]
         n_nans = np.sum(np.isnan(data_temp), 1)
         data_temp = data_temp[n_nans == 0, :]
         classes_temp = classes[n_nans == 0]
         #bag = BaggingClassifier(n_estimators=1, max_features=tree_features)
         bag = RandomForestClassifier(n_estimators=1, max_features=tree_features)
         bag.fit(data_temp, classes_temp)
         self.bags.append(bag)
         print(np.shape(data_temp))
    def reconstruct_coincidence(self, coincidence_events, station_numbers=None,
                                offsets=None, initial=None):
        """Reconstruct a single coincidence

        :param coincidence_events: a coincidence list consisting of one
                                   or more (station_number, event) tuples.
        :param station_numbers: list of station numbers, to only use
                                events from those stations.
        :param offsets: dictionary with detector offsets for each station.
                        These detector offsets should be relative to one
                        detector from a specific station.
        :param initial: dictionary with already fitted shower parameters.
        :return: list of theta, phi, and station numbers.

        """
        if len(coincidence_events) < 1:
            return nan, nan, []
        if offsets is None:
            offsets = {}
        if initial is None:
            initial = {}

        # Subtract base timestamp to prevent loss of precision
        ts0 = int(coincidence_events[0][1]['timestamp'])
        ets0 = ts0 * int(1e9)
        self.cluster.set_timestamp(ts0)
        t, x, y, z, nums = ([], [], [], [], [])

        offsets = self.get_station_offsets(coincidence_events, station_numbers,
                                           offsets, ts0)

        for station_number, event in coincidence_events:
            if station_numbers is not None:
                if station_number not in station_numbers:
                    continue
            t_off = offsets.get(station_number, NO_OFFSET)
            station = self.cluster.get_station(station_number)
            t_detectors = relative_detector_arrival_times(event, ets0,
                                                          offsets=t_off,
                                                          station=station)
            for t_detector, detector in zip(t_detectors, station.detectors):
                if not isnan(t_detector):
                    dx, dy, dz = detector.get_coordinates()
                    t.append(t_detector)
                    x.append(dx)
                    y.append(dy)
                    z.append(dz)
            if not all(isnan(t_detectors)):
                nums.append(station_number)

        if len(t) >= 3 and 'core_x' in initial and 'core_y' in initial:
            theta, phi = self.curved.reconstruct_common(t, x, y, z, initial)
        elif len(t) == 3:
            theta, phi = self.direct.reconstruct_common(t, x, y, z, initial)
        elif len(t) > 3:
            theta, phi = self.fit.reconstruct_common(t, x, y, z, initial)
        else:
            theta, phi = (nan, nan)

        return theta, phi, nums
Example #8
0
def nanallclose(x, y, rtol=1.0e-5, atol=1.0e-8):
    """Numpy allclose function which allows NaN

    Input
        x, y: Either scalars or numpy arrays

    Output
        True or False

    Returns True if all non-nan elements pass.
    """

    xn = numpy.isnan(x)
    yn = numpy.isnan(y)
    if numpy.any(xn != yn):
        # Presence of NaNs is not the same in x and y
        return False

    if numpy.all(xn):
        # Everything is NaN.
        # This will also take care of x and y being NaN scalars
        return True

    # Filter NaN's out
    if numpy.any(xn):
        x = x[-xn]
        y = y[-yn]

    # Compare non NaN's and return
    return numpy.allclose(x, y, rtol=rtol, atol=atol)
 def __set_static_gaus_pmfs(self):
     if np.logical_not(self.off_buff.is_full()):
         print "The long term buffer is not yet full.  This may give undesirable results"
     
     # median RSS of off-state buffer
     cal_med = self.off_buff.get_no_nan_median()
     
     if (np.sum(cal_med == 127) > 0) | (np.sum(np.isnan(cal_med)) > 0):
         sys.stderr.write('At least one link has a median of 127 or is nan\n\n')
         quit()
          
     if (np.sum(np.isnan(self.off_buff.get_nanvar())) > 0):
         sys.stderr.write('the long term buffer has a nan')
         quit()
     
     cal_med_mat = np.tile(cal_med,(self.V_mat.shape[1],1)).T
     
     # variance of RSS during calibration
     cal_var = np.maximum(self.off_buff.get_nanvar(),self.omega) #3.0 
     cal_var_mat = np.tile(cal_var,(self.V_mat.shape[1],1)).T
     
     # Compute the off_link emission probabilities for each link
     x = np.exp(- (self.V_mat - cal_med_mat)**2/(2*cal_var_mat/1.0)) # 1.0
     self.off_links = self.__normalize_pmf(x)
     
     # Compute the on_link emission probabilities for each link
     x = np.exp(- (self.V_mat - (cal_med_mat-self.Delta))**2/(self.eta*2*cal_var_mat)) # 3
     self.on_links = self.__normalize_pmf(x) 
Example #10
0
 def _get_sum(self):
     """Compute sum of non NaN / Inf values in the array."""
     try:
         return self._sum
     except AttributeError:
         self._sum = self.no_nan.sum()
         # The following 2 lines are needede as in Python 3.3 with NumPy
         # 1.7.1, numpy.ndarray and numpy.memmap aren't hashable.
         if type(self._sum) is numpy.memmap:
             self._sum = numpy.asarray(self._sum).item()
         if self.has_nan and self.no_nan.mask.all():
             # In this case the sum is not properly computed by numpy.
             self._sum = 0
         if numpy.isinf(self._sum) or numpy.isnan(self._sum):
             # NaN may happen when there are both -inf and +inf values.
             if self.has_nan:
                 # Filter both NaN and Inf values.
                 mask = self.no_nan.mask + numpy.isinf(self[1])
             else:
                 # Filter only Inf values.
                 mask = numpy.isinf(self[1])
             if mask.all():
                 self._sum = 0
             else:
                 self._sum = numpy.ma.masked_array(self[1], mask).sum()
             # At this point there should be no more NaN.
             assert not numpy.isnan(self._sum)
     return self._sum
Example #11
0
    def test_align(self):
        left = create_test_data()
        right = left.copy(deep=True)
        right['dim3'] = ('dim3', list('cdefghijkl'))
        right['var3'][:-2] = right['var3'][2:]
        right['var3'][-2:] = np.random.randn(*right['var3'][-2:].shape)

        intersection = list('cdefghij')
        union = list('abcdefghijkl')

        left2, right2 = align(left, right, join='inner')
        self.assertArrayEqual(left2['dim3'], intersection)
        self.assertDatasetIdentical(left2, right2)

        left2, right2 = align(left, right, join='outer')
        self.assertVariableEqual(left2['dim3'], right2['dim3'])
        self.assertArrayEqual(left2['dim3'], union)
        self.assertDatasetIdentical(left2.labeled(dim3=intersection),
                                    right2.labeled(dim3=intersection))
        self.assertTrue(np.isnan(left2['var3'][-2:]).all())
        self.assertTrue(np.isnan(right2['var3'][:2]).all())

        left2, right2 = align(left, right, join='left')
        self.assertVariableEqual(left2['dim3'], right2['dim3'])
        self.assertVariableEqual(left2['dim3'], left['dim3'])
        self.assertDatasetIdentical(left2.labeled(dim3=intersection),
                                    right2.labeled(dim3=intersection))
        self.assertTrue(np.isnan(right2['var3'][:2]).all())

        left2, right2 = align(left, right, join='right')
        self.assertVariableEqual(left2['dim3'], right2['dim3'])
        self.assertVariableEqual(left2['dim3'], right['dim3'])
        self.assertDatasetIdentical(left2.labeled(dim3=intersection),
                                    right2.labeled(dim3=intersection))
        self.assertTrue(np.isnan(left2['var3'][-2:]).all())
Example #12
0
def analyze_symbols(symbols):
    number = 0
    total_bull_correct = np.zeros(len(patterns))
    total_bull_wrong = np.zeros(len(patterns))
    total_bear_correct = np.zeros(len(patterns))
    total_bear_wrong = np.zeros(len(patterns))

    for symbol in symbols:
        print symbol
        bc, bw, bco, bwr = evaluate_pattern(symbol)
        if bc is None:
            continue

        for i in range(len(bc)):
            if not np.isnan(bc[i]):
                total_bull_correct[i] += bc[i]
                total_bull_wrong[i] += bw[i]
            if not np.isnan(bco[i]):
                total_bear_correct[i] += bco[i]
                total_bear_wrong[i] += bwr[i]
        number += 1

    sum_bull = total_bull_correct + total_bull_wrong
    sum_bear = total_bear_correct + total_bear_wrong
    pgain = total_bull_correct*1.0/sum_bull
    plose = total_bear_correct*1.0/sum_bear

    keys = patterns
    for i in range(len(keys)):
        print keys[i], ": ", pgain[i], " ", sum_bull[i], " ", plose[i], " ", sum_bear[i]
def estimateBIsochrone(R,z,pot=None):
    """
    NAME:
       estimateBIsochrone
    PURPOSE:
       Estimate a good value for the scale of the isochrone potential by matching the slope of the rotation curve
    INPUT:
       R,z = coordinates (if these are arrays, the median estimated delta is returned, i.e., if this is an orbit)
       pot= Potential instance or list thereof
    OUTPUT:
       b if 1 R,Z given
       bmin,bmedian,bmax if multiple R given       
    HISTORY:
       2013-09-12 - Written - Bovy (IAS)
    """
    if pot is None: #pragma: no cover
        raise IOError("pot= needs to be set to a Potential instance or list thereof")
    if isinstance(R,nu.ndarray):
        bs= nu.array([estimateBIsochrone(R[ii],z[ii],pot=pot) for ii in range(len(R))])
        return (nu.amin(bs[True-nu.isnan(bs)]),
                nu.median(bs[True-nu.isnan(bs)]),
                nu.amax(bs[True-nu.isnan(bs)]))
    else:
        r2= R**2.+z**2
        r= math.sqrt(r2)
        dlvcdlr= dvcircdR(pot,r)/vcirc(pot,r)*r
        try:
            b= optimize.brentq(lambda x: dlvcdlr-(x/math.sqrt(r2+x**2.)-0.5*r2/(r2+x**2.)),
                               0.01,100.)
        except: #pragma: no cover
            b= nu.nan
        return b
Example #14
0
def Column8(df,Nlen,Tlen):
    mA = np.zeros((Nlen*Tlen,Nlen*2+9),float)
    vb = np.zeros(Nlen*Tlen)
    i = 0
    for firmid,firmgroup in df.groupby('Firmid'):
        if not firmgroup['Dprice'].isnull().values.any():
            mA[i*Tlen:(i+1)*Tlen,i] = np.ones(Tlen)
            mA[i*Tlen:(i+1)*Tlen,i+Nlen] = firmgroup['Dmarket'].values
            mA[i*Tlen:(i+1)*Tlen,2*Nlen] = firmgroup['Event'].values
            eu = firmgroup['Conc'].values
            where_are_NaNs = np.isnan(eu)
            eu[where_are_NaNs] = 0
            mis = firmgroup['Dumconc'].values
            where_are_NaNs = np.isnan(mis)
            mis[where_are_NaNs] = 0
            mA[i*Tlen:(i+1)*Tlen,1+2*Nlen] = np.multiply(firmgroup['Do'].values,firmgroup['Event'].values)
            mA[i*Tlen:(i+1)*Tlen,2+2*Nlen] = np.multiply(firmgroup['Di'].values,firmgroup['Event'].values)
            mA[i*Tlen:(i+1)*Tlen,3+2*Nlen] = np.multiply(eu,firmgroup['Event'].values)
            mA[i*Tlen:(i+1)*Tlen,4+2*Nlen] = np.multiply(mis,firmgroup['Event'].values)
            mA[i*Tlen:(i+1)*Tlen,5+2*Nlen] = np.multiply(np.multiply(eu,firmgroup['Event'].values),firmgroup['Do'].values)
            mA[i*Tlen:(i+1)*Tlen,6+2*Nlen] = np.multiply(np.multiply(mis,firmgroup['Event'].values),firmgroup['Do'].values)
            mA[i*Tlen:(i+1)*Tlen,7+2*Nlen] = np.multiply(np.multiply(eu,firmgroup['Event'].values),firmgroup['Di'].values)
            mA[i*Tlen:(i+1)*Tlen,8+2*Nlen] = np.multiply(np.multiply(mis,firmgroup['Event'].values),firmgroup['Di'].values)
            vb[i*Tlen:(i+1)*Tlen] = [p2f(x) for x in firmgroup['Dprice'].values]
            i += 1
    tmpp = inv(mA.T.dot(mA)).dot(mA.T)
    Xhat = tmpp.dot(vb)
    gamma = Xhat[-9:]
    print gamma
    return gamma
Example #15
0
def calcForces_and_potentialE(F_x, F_y, old_or_new, x_positions, y_positions, V_atoms):
    """calculates x and y forces and potential energy per atom as summed over
    all contributions due to all neighbors, as functions of position and the
    parameters of the LJ potential"""

    for atom in xrange(Natoms):
        for i in xrange(Natoms):
            if i != atom:                    
                    delx = x_positions[atom,old_or_new]-x_positions[i,old_or_new]
                    dely = y_positions[atom,old_or_new]-y_positions[i,old_or_new]
                    r_ij = np.sqrt( (x_positions[atom,old_or_new]-x_positions[i,old_or_new])**2\
                                    + (y_positions[atom,old_or_new]-y_positions[i,old_or_new])**2 )
                    F_x[atom,old_or_new] =  F_x[atom,old_or_new] - 24.0 *epsilon * sigma**6 \
                                           * delx * ( 1 - 2.0*(sigma/r_ij)**6 ) / r_ij**8
                    F_y[atom,old_or_new] =  F_y[atom,old_or_new] - 24.0 *epsilon * sigma**6 * \
                                           dely * ( 1 - 2.0*(sigma/r_ij)**6 ) / r_ij**8    
                    V_atoms[atom] = V_atoms[atom] + 4.0 * epsilon \
                                    * ( (sigma/r_ij)**12-(sigma/r_ij)**6 )
                    if np.isnan(F_x[atom,old_or_new]) or np.isinf(F_x[atom,old_or_new]):
                        F_x[atom,old_or_new]=0
                    if np.isnan(F_y[atom,old_or_new]) or np.isinf(F_y[atom,old_or_new]):
                        F_y[atom,0]=0
                    if np.isnan(V_atoms[atom]) or np.isinf(V_atoms[atom]):
                        V_atoms[atom]=0                   
    return F_x, F_y, V_atoms
Example #16
0
    def get_depth_color(self, value):

        vmin = -0.02
        vmax = 0.02

        if value < vmin:
            value = vmin
        elif value > vmax:
            value = vmax

        dv = vmax - vmin
        r = g = b = 1

        if value < (vmin + 0.25 * dv):
            r = 0
            g = 4 * (value - vmin) / dv
        elif value < (vmin + 0.5 * dv):
            r = 0
            b = 1 + 4 * (vmin + 0.25 * dv - value) / dv
        elif value < (vmin + 0.75 * dv):
            r = 4 * (value - vmin - 0.5 * dv) / dv
            b = 0
        else:
            g = 1 + 4 * (vmin + 0.75 * dv - value) / dv
            b = 0

        if np.isnan(r) or np.isnan(g) or np.isnan(b):
            r = b = g = 0

        return (np.array([b, g, r]) * 255).astype(int)
Example #17
0
    def _evaluate_projection(self, x, y):
        """
        kNNEvaluate - evaluate class separation in the given projection using a k-NN method
        Parameters
        ----------
        x - variables to evaluate
        y - class

        Returns
        -------
        scores
        """
        if self.percent_data_used != 100:
            rand = np.random.choice(len(x), int(len(x) * self.percent_data_used / 100),
                                    replace=False)
            x = x[rand]
            y = y[rand]
        neigh = KNeighborsClassifier(n_neighbors=3) if self.attr_color.is_discrete else \
            KNeighborsRegressor(n_neighbors=3)
        assert ~(np.isnan(x).any(axis=None) | np.isnan(x).any(axis=None))
        neigh.fit(x, y)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UserWarning)
            scores = cross_val_score(neigh, x, y, cv=3)
        return scores.mean()
Example #18
0
def bootstrap(func, arglist, N, kwargs={}):
    '''Computes error via bootstrapping on an arbitrary function. The
    major restriction is that func is assumed to return a single, 1D,
    Numpy array. Bootstrap will also resample ALL of the elements of
    arglist. If you want to keep some inputs unchanged pass them as
    keywords. The func can have an arbitrary number of arguments and
    keyword arguments. If the output of func is a Ndarray of length N
    then bootstrap returns two arrays of length N. The first is the
    mean value over all bootstraps and the second is the stddev of the
    same.
    '''
    
    if type(arglist) != list:
        arglist = [arglist]
    size = len(arglist[0])
    resultarr = None
    for i in range(N):

        idx = np.random.randint(0,size,size)
        bootargs = [i[idx] for i in arglist]
        result = func(*bootargs,**kwargs)
        try:
            resultarr = np.vstack((resultarr,result))
        except ValueError:
            resultarr = result

    print np.isnan(resultarr).sum()
    return bn.nanmean(resultarr,axis=0),bn.nanstd(resultarr,axis=0)
Example #19
0
def moments(data, circle, rotate, vheight, estimator=median, **kwargs):
    """Returns (height, amplitude, x, y, width_x, width_y, rotation angle)
    the gaussian parameters of a 2D distribution by calculating its
    moments.  Depending on the input parameters, will only output
    a subset of the above.
    """
    total = np.abs(data).sum()
    Y, X = np.indices(data.shape)  # python convention: reverse x,y np.indices
    y = np.argmax((X*np.abs(data)).sum(axis=1)/total)
    x = np.argmax((Y*np.abs(data)).sum(axis=0)/total)
    col = data[int(y), :]
    # FIRST moment, not second!
    width_x = np.sqrt(np.abs((np.arange(col.size)-y)*col).sum() / np.abs(col).sum())
    row = data[:, int(x)]
    width_y = np.sqrt(np.abs((np.arange(row.size)-x)*row).sum() / np.abs(row).sum())
    width = (width_x + width_y) / 2.
    height = estimator(data.ravel())
    amplitude = data.max()-height
    mylist = [amplitude, x, y]
    if (np.isnan(width_y) or np.isnan(width_x) or np.isnan(height) or np.isnan(amplitude)):
        raise ValueError("something is nan")
    if vheight:
        mylist = [height] + mylist
    if not circle:
        mylist = mylist + [width_x, width_y]
        if rotate:
            mylist = mylist + [0.]  # rotation "moment" is just zero...
            # also, circles don't rotate.
    else:
        mylist = mylist + [width]
    return mylist
Example #20
0
    def test_nan_inf(self):
        # Not-a-number
        q = u.Quantity('nan', unit='cm')
        assert np.isnan(q.value)

        q = u.Quantity('NaN', unit='cm')
        assert np.isnan(q.value)

        q = u.Quantity('-nan', unit='cm')  # float() allows this
        assert np.isnan(q.value)

        q = u.Quantity('nan cm')
        assert np.isnan(q.value)
        assert q.unit == u.cm

        # Infinity
        q = u.Quantity('inf', unit='cm')
        assert np.isinf(q.value)

        q = u.Quantity('-inf', unit='cm')
        assert np.isinf(q.value)

        q = u.Quantity('inf cm')
        assert np.isinf(q.value)
        assert q.unit == u.cm

        q = u.Quantity('Infinity', unit='cm')  # float() allows this
        assert np.isinf(q.value)

        # make sure these strings don't parse...
        with pytest.raises(TypeError):
            q = u.Quantity('', unit='cm')

        with pytest.raises(TypeError):
            q = u.Quantity('spam', unit='cm')
Example #21
0
    def reportPowerDeviationsDifference(self, book, sheetName, deviationsA, deviationsB, gradient):
        
        sh = book.add_sheet(sheetName, cell_overwrite_ok=True)

        for i in range(self.windSpeedBins.numberOfBins):
            sh.col(i + 1).width = 256 * 5

        for j in range(self.turbulenceBins.numberOfBins):        

            turbulence = self.turbulenceBins.binCenterByIndex(j)
            row = self.turbulenceBins.numberOfBins - j - 1
            
            sh.write(row, 0, turbulence, self.percent_no_dp_style)
            
            for i in range(self.windSpeedBins.numberOfBins):

                windSpeed = self.windSpeedBins.binCenterByIndex(i)
                col = i + 1
                
                if j == 0: sh.write(self.turbulenceBins.numberOfBins, col, windSpeed, self.one_dp_style)
                
                if windSpeed in deviationsA.matrix:
                    if turbulence  in deviationsA.matrix[windSpeed]:
                        deviationA = deviationsA.matrix[windSpeed][turbulence]
                        deviationB = deviationsB.matrix[windSpeed][turbulence]
                        if not np.isnan(deviationA) and not np.isnan(deviationB):
                            diff = abs(deviationA) - abs(deviationB)
                            sh.write(row, col, diff, gradient.getStyle(diff))
Example #22
0
 def test_update_player(self):
   player_dict = io.create_player_dict({'jamesle01': ''})
   player_dict['jamesle01']['gamelog_url_list'] = ['http://www.basketball-reference.com/players/j/jamesle01/gamelog/2013/',
                                                   'http://www.basketball-reference.com/players/j/jamesle01/gamelog/2015/',
                                                   'http://www.basketball-reference.com/players/j/jamesle01/gamelog/2014/']
   loaded_dict = scraper.load_player(player_dict, 'jamesle01')
   assert loaded_dict['jamesle01']['gamelog_data'] is not None
   gd = loaded_dict['jamesle01']['gamelog_data']
   assert len(gd) == 285
   # Okay now pretend this URL was there all along as well
   player_dict['jamesle01']['gamelog_url_list'].append('http://www.basketball-reference.com/players/j/jamesle01/gamelog/2016/')
   scraper.update_player(player_dict, 'jamesle01', 2016)
   gd = loaded_dict['jamesle01']['gamelog_data']
   assert len(gd) > 285  # but I mean, I don't know exactly what it'll be since more games are still being played this year
   import datetime       # so explicitly make sure this test is updated for the 2016-17 season
   assert datetime.datetime.today() <= datetime.datetime(year=2016, month=7, day=1)
   # Spot check a game to make sure the stats are what we expect
   test_game_dict = dict(gd.loc['2015-10-30'])
   reference_dict = {u'+/-': 7.0,
                     u'3P': 0.0,
                     u'3P%': 0.0,
                     u'3PA': 2.0,
                     u'AST': 4.0,
                     u'Age': u'30-304',
                     u'BLK': 0.0,
                     u'DFS': 41.3,
                     u'DRB': 3.0,
                     u'Date': nan,
                     u'FG': 13.0,
                     u'FG%': 0.684,
                     u'FGA': 19.0,
                     u'FT%': 0.6,
                     u'FT': 3.0,
                     u'FTA': 5.0,
                     u'G': 3.0,
                     u'GS': 1.0,
                     u'GmSc': 21.0,
                     u'HomeAway': nan,
                     u'MP': u'33:56',
                     u'ORB': 2.0,
                     u'Opp': u'MIA',
                     u'PF': 3.0,
                     u'PTS': 29.0,
                     u'Rk': 3.0,
                     u'STL': 1.0,
                     u'TOV': 4.0,
                     u'TRB': 5.0,
                     u'Tm': u'CLE',
                     u'WinLoss': u'W (+10)'}
   self.assertItemsEqual(reference_dict.keys(), test_game_dict.keys())
   for k in reference_dict:
     # fortunately almost equal works fine if the items == each other so we can just pass in strings w/o worrying
     # unfortunately nan doesn't match :( :( so we might as well case it out anyway; nevermind
     if isinstance(reference_dict[k], float):
       if isnan(reference_dict[k]):
         assert isnan(test_game_dict[k])
       else:
         self.assertAlmostEqual(reference_dict[k], test_game_dict[k], places=3)
     else:
       self.assertEqual(reference_dict[k], test_game_dict[k])
Example #23
0
    def test_float_modulus_corner_cases(self):
        # Check remainder magnitude.
        for dt in np.typecodes['Float']:
            b = np.array(1.0, dtype=dt)
            a = np.nextafter(np.array(0.0, dtype=dt), -b)
            rem = self.mod(a, b)
            assert_(rem <= b, 'dt: %s' % dt)
            rem = self.mod(-a, -b)
            assert_(rem >= -b, 'dt: %s' % dt)

        # Check nans, inf
        with suppress_warnings() as sup:
            sup.filter(RuntimeWarning, "invalid value encountered in remainder")
            for dt in np.typecodes['Float']:
                fone = np.array(1.0, dtype=dt)
                fzer = np.array(0.0, dtype=dt)
                finf = np.array(np.inf, dtype=dt)
                fnan = np.array(np.nan, dtype=dt)
                rem = self.mod(fone, fzer)
                assert_(np.isnan(rem), 'dt: %s' % dt)
                # MSVC 2008 returns NaN here, so disable the check.
                #rem = self.mod(fone, finf)
                #assert_(rem == fone, 'dt: %s' % dt)
                rem = self.mod(fone, fnan)
                assert_(np.isnan(rem), 'dt: %s' % dt)
                rem = self.mod(finf, fone)
                assert_(np.isnan(rem), 'dt: %s' % dt)
Example #24
0
def exact_roc(actuals, controls):
    """
    computes the area under the roc curve for separating to sets. Uses all
    possibl thresholds and trapezoidal interpolation. Also returns arrays of
    the true positive rate and the false positive rate.
    """

    actuals = np.ravel(actuals)
    controls = np.ravel(controls)
    if np.isnan(actuals).any():
        raise RuntimeError('NaN found in actuals')
    if np.isnan(controls).any():
        raise RuntimeError('NaN found in controls')

    thresholds = np.hstack([-np.inf,
        np.unique(np.concatenate((actuals,controls))), np.inf])[::-1]
    true_pos_rate = np.empty(thresholds.size)
    false_pos_rate = np.empty(thresholds.size)
    num_act = float(len(actuals))
    num_ctr = float(len(controls))

    for i, value in enumerate(thresholds):
        true_pos_rate[i] = (actuals >= value).sum() / num_act
        false_pos_rate[i] = (controls >= value).sum() / num_ctr
    auc = np.dot(np.diff(false_pos_rate),
            (true_pos_rate[0:-1]+true_pos_rate[1:])/2)
    return(auc, true_pos_rate, false_pos_rate)
Example #25
0
 def __init__(self, x, y):
             
     assert np.ndim(x)==2 and np.ndim(y)==2 and np.shape(x)==np.shape(y), \
         'x and y must be 2D arrays of the same size.'
     
     if np.any(np.isnan(x)) or np.any(np.isnan(y)):
         x = np.ma.masked_where( (isnan(x)) | (isnan(y)) , x)
         y = np.ma.masked_where( (isnan(x)) | (isnan(y)) , y)
         
     self.x_vert = x
     self.y_vert = y
     
     mask_shape = tuple([n-1 for n in self.x_vert.shape])
     self.mask_rho = np.ones(mask_shape, dtype='d')
     
     # If maskedarray is given for verticies, modify the mask such that 
     # non-existant grid points are masked.  A cell requires all four
     # verticies to be defined as a water point.
     if isinstance(self.x_vert, np.ma.MaskedArray):
         mask = (self.x_vert.mask[:-1,:-1] | self.x_vert.mask[1:,:-1] | \
                 self.x_vert.mask[:-1,1:] | self.x_vert.mask[1:,1:])
         self.mask_rho = np.asarray(~(~np.bool_(self.mask_rho) | mask), dtype='d')
     
     if isinstance(self.y_vert, np.ma.MaskedArray):
         mask = (self.y_vert.mask[:-1,:-1] | self.y_vert.mask[1:,:-1] | \
                 self.y_vert.mask[:-1,1:] | self.y_vert.mask[1:,1:])
         self.mask_rho = np.asarray(~(~np.bool_(self.mask_rho) | mask), dtype='d')
     
     self._calculate_subgrids()
     self._calculate_metrics()        
Example #26
0
    def update(self, tick):
        security = tick['security']
        quote_time = datetime.datetime.fromtimestamp(int(tick['data']['timestamp']))
        last_price = tick['data']['last']
        log.debug("tick update security %s with tick %s, price %s" % (security.symbol, quote_time, last_price))
        # update sma

        # appending new row to df is not efficient
        data = tick['data']
        row = [quote_time, float(data['volume']), float(data['bid']), float(data['ask']), float(data['last']), float(data['high']), float(data['low'])]
        new_serie = pd.Series(row, index=['datetime', 'volume', 'bid', 'ask', 'last', 'high', 'low'])
        self.quotes = self.quotes.append(new_serie, ignore_index=True)

        self.sma_short = SMA(self.quotes, timeperiod=10, key='last')
        self.sma_mid = SMA(self.quotes, timeperiod=60, key='last')
        self.sma_long = SMA(self.quotes, timeperiod=200, key='last')

        if np.isnan(self.sma_long.iloc[-1]) or np.isnan(self.sma_mid.iloc[-1]) or np.isnan(self.sma_short.iloc[-1]):
            log.info('not enough data, skip to reduce risk')
            return None

        action = None
        if security.symbol not in self.account.holdings:
            action = self.check_buy(security)

        # already have some holdings
        else:
            action = self.check_sell(security)

        log.info('strategy action {0}'.format(action))
        return action
Example #27
0
def test_nan_arithmetic(ctx_getter):
    context = ctx_getter()
    queue = cl.CommandQueue(context)

    def make_nan_contaminated_vector(size):
        shape = (size,)
        a = numpy.random.randn(*shape).astype(numpy.float32)
        #for i in range(0, shape[0], 3):
            #a[i] = float('nan')
        from random import randrange
        for i in range(size//10):
            a[randrange(0, size)] = float('nan')
        return a

    size = 1 << 20

    a = make_nan_contaminated_vector(size)
    a_gpu = cl_array.to_device(context, queue, a)
    b = make_nan_contaminated_vector(size)
    b_gpu = cl_array.to_device(context, queue, b)

    ab = a*b
    ab_gpu = (a_gpu*b_gpu).get()

    for i in range(size):
        assert numpy.isnan(ab[i]) == numpy.isnan(ab_gpu[i])
Example #28
0
    def test_autocorr(self):
        # Just run the function
        corr1 = self.ts.autocorr()

        # Now run it with the lag parameter
        corr2 = self.ts.autocorr(lag=1)

        # corr() with lag needs Series of at least length 2
        if len(self.ts) <= 2:
            self.assertTrue(np.isnan(corr1))
            self.assertTrue(np.isnan(corr2))
        else:
            self.assertEqual(corr1, corr2)

        # Choose a random lag between 1 and length of Series - 2
        # and compare the result with the Series corr() function
        n = 1 + np.random.randint(max(1, len(self.ts) - 2))
        corr1 = self.ts.corr(self.ts.shift(n))
        corr2 = self.ts.autocorr(lag=n)

        # corr() with lag needs Series of at least length 2
        if len(self.ts) <= 2:
            self.assertTrue(np.isnan(corr1))
            self.assertTrue(np.isnan(corr2))
        else:
            self.assertEqual(corr1, corr2)
Example #29
0
def norm_range(data, mins, maxs, lowbound, highbound):
  """ Normalizing the data with range normalization between lowbound and highbound
  
  Keyword parameters:
  
  data
    the data to be normalized, numpy.ndarray, each row is a sample

  mins, maxs
    arrays of minimum and maximum values that each feature can take

  lowbound, highbound
    the bounds of the normalization
"""
  denom = maxs - mins
  diff = highbound - lowbound
  addit = numpy.ndarray([data.shape[0],1])
  addit.fill(lowbound)
  for i in range(data.shape[0]): # for each feature vector
    data[i] = diff * (data[i] - mins) / denom + lowbound
    nanCounter = numpy.isnan(data[i])
    #If all data was nan, maitain nan,
    if(sum(nanCounter)!=data.shape[1]):
      data[i][numpy.isnan(data[i])] = (lowbound + highbound) / 2
  return data
Example #30
0
def get_coded_data(cases_df, case_ids, coded_feature_names):
    """
    Retrieves the valences corresponding to case_ids, 
    along with coded features, if any
    Recode unknown valences to neutral.
    args:
      cases_df: A dataframe containing the case variables.
      case_ids: list of sorted case_ids
      coded_feature_names: list of column names to pull from cases_df (ie 'geniss' or ['geniss','casetyp1'])
    returns:
      valences: np array of valences
      coded_feature_array: np array of coded features
      filtered_cases_df: Dataframe containing the sorted, filtered case variables
    """
    UNKNOWN_VALENCE = 0
    NEUTRAL_VALENCE = 2

    if isinstance(coded_feature_names, str):
        coded_feature_names = [coded_feature_names]

    print "coded_feature_names: ",coded_feature_names

    valences = []
    coded_feature_list = []
    for case_id in case_ids:
        valence = cases_df[cases_df['caseid'] == case_id]['direct1'].values[0]
        if np.isnan(valence)==False:
            valence = int(valence)
        else: valence = 2

        if coded_feature_names is not None:
            coded_feature_row = cases_df[cases_df['caseid'] == case_id][coded_feature_names].values[0]
            clean_row = []

            #clean row
            for val in coded_feature_row:
                if val and np.isnan(val) == False:
                    clean_row.append(int(val))
                else:
                    clean_row.append(0)
            assert clean_row[0]>=0, ""
            coded_feature_list.append(clean_row)
            
        # Replacing unknown valence variables with netural scores.
        if valence == UNKNOWN_VALENCE:
            valence = NEUTRAL_VALENCE
        valences.append(valence)

    #one-hot encoding
    if coded_feature_names is not None:
        enc = OneHotEncoder()
        coded_feature_array = enc.fit_transform(np.array(coded_feature_list))
        print "Coded Feature Array shape: ", coded_feature_array.shape
    else: 
        coded_feature_array = np.array([])

    #Filter case df
    filtered_case_df = filter_cases_df(cases_df,case_ids)

    return np.array(valences),coded_feature_array,filtered_case_df
Example #31
0
 def plotting(self, filename, data, xstart, logx=False, yfull=True, rainbow=None, markers=None, pngdpi=300):
     """ data is expected to be a dictionary of n profiles,
     each profile is again a dictionary {'x': [], 'y': []}.
     Each profile will be plotted in a curve.
     """
     if not self.legends:
         for i in xrange(len(data)):
             self.legends.append("legends[%d]" % (i))
     if rainbow is None:
         rainbow = ['b','g','r','c','m','orange','y','k','silver','coral','lime','brown','violet','navy','greenyellow']
         # rainbow = brewer2mpl.get_map('Set3', 'qualitative', 12).mpl_colors  # see: Documentation @ https://github.com/jiffyclub/brewer2mpl/wiki
     if markers is None:
         markers = ['o','s','*','v','p','D','^','+','<','>','d','H','x']
     zorder0 = 5 # the lowest zorder for major curves
     
     # Sort the keys list as dictionary is non-ordered
     # and taking care of the ordering in legends as well
     data_keys = data.keys()
     data_keys.sort()
     order_legends = []
     for key in data_keys:
         idx = data.keys().index(key)
         order_legends.append(self.legends[idx])
     
     #====================================#
     #    Plotting commands start here    #
     #====================================#
     #----------------------------------------------
     import matplotlib
     # Use a non-interactive backend such as Agg (for PNGs), PDF, SVG, or PS.
     matplotlib.use('Agg') # make sure to call this before pyplot
     #----------------------------------------------
     import matplotlib.pyplot as plt
     # plt.rc('text', usetex = True)
     fig = plt.figure(1, figsize=self.figsize)
     
     # ax = fig.add_subplot(111)
     ax = fig.add_axes(self.position) # self.position = [left, bottom, width, height]
     
     # Plot the main data:
     #=====================
     for ialg, alg in enumerate(data_keys):
         if not logx:
             ax.plot(data[alg]['x'], data[alg]['y'],
                     drawstyle='steps-post', clip_on=False,
                     color=rainbow[ialg % len(rainbow)],
                     lw=self.lineWidth, alpha=self.alpha, zorder=zorder0+ialg)
         else:
             ax.semilogx(data[alg]['x'], data[alg]['y'],
                         drawstyle='steps-post', clip_on=False,
                         color=rainbow[ialg % len(rainbow)],
                         lw=self.lineWidth, alpha=self.alpha, zorder=zorder0+ialg)
     
     xLim = [xstart, max([max(data[alg]['x']) for alg in data_keys])]
     if logx and xLim[0]==0: xLim[0] = 1
     
     # Plot the horizontal 'extended' line from the end of each curve to xLim[1]
     # and also put a cross to mark the end of the curve
     #==========================================================================
     for ialg, alg in enumerate(data_keys):
         if data[alg]['x'][-1] < xLim[1]:
             if not logx:
                 # Extended horizontal line
                 ax.plot([data[alg]['x'][-1], xLim[1]], [data[alg]['y'][-2], data[alg]['y'][-2]],
                         clip_on=False,
                         color=rainbow[ialg % len(rainbow)],
                         lw=0.9*self.lineWidth, alpha=self.alpha, zorder=zorder0+ialg)
                 # Add a cross to mark the end of the curve
                 ax.plot([data[alg]['x'][-1]], [data[alg]['y'][-2]],
                         marker='x', markeredgecolor=rainbow[ialg % len(rainbow)],
                         markersize=2.7*self.markerSize,
                         markeredgewidth=0.8*self.lineWidth,
                         clip_on=False,
                         color=rainbow[ialg % len(rainbow)],
                         lw=self.lineWidth, alpha=1, zorder=zorder0+ialg)
             else:
                 # Extended horizontal line
                 ax.semilogx([data[alg]['x'][-1], xLim[1]], [data[alg]['y'][-2], data[alg]['y'][-2]],
                             clip_on=False,
                             color=rainbow[ialg % len(rainbow)],
                             lw=0.9*self.lineWidth, alpha=self.alpha, zorder=zorder0+ialg)
                 # Add a cross to mark the end of the curve
                 ax.semilogx([data[alg]['x'][-1]], [data[alg]['y'][-2]],
                             marker='x', markeredgecolor=rainbow[ialg % len(rainbow)],
                             markersize=2.7*self.markerSize,
                             markeredgewidth=0.8*self.lineWidth,
                             clip_on=False,
                             color=rainbow[ialg % len(rainbow)],
                             lw=self.lineWidth, alpha=1, zorder=zorder0+ialg)
     
     # Plot markers for the curves:
     #==============================
     #xarr = np.arange(xstart, data[data_keys[0]]['x'][-1] + 1) # generate data for the x-axis
     #print xarr[-1]
     #print data[data_keys[0]]['x'][-1] # TODO: debug ecdf.py: why 40000.2 ???
     nMarkers = self.nMarkers  # 5 markers on each line
     for ialg, alg in enumerate(data_keys):
         if not logx:
             lenx   = xLim[1] - xLim[0]
             alt    = int(lenx / (nMarkers*len(data))) # alternate among the first markers over lines (in idx unit)
             offset = int(lenx / nMarkers) # offset between 2 consecutive markers of a line (in idx unit)
             # Generate estimated x's for markers
             estxMarkers = int(alt/2) + np.arange(start=ialg*alt, stop=lenx-0.5*offset, step=offset, dtype=int)
         else:
             lenx   = np.log10(xLim[1]) - np.log10(xLim[0])
             alt    = lenx / (nMarkers*len(data)) # alternate among the first markers over lines (in idx unit)
             offset = lenx / nMarkers # offset between 2 consecutive markers of a line (in idx unit)
             # Generate estimated x's for markers
             estxMarkers = alt/2 + np.log10(xLim[0]) + np.arange(start=ialg*alt, stop=lenx-0.5*offset, step=offset)
             estxMarkers = 10 ** estxMarkers
             if ialg==0:
                 estxMarkers = np.delete(estxMarkers, 0) # skip the very first (not so visible) marker on the semilogx scale
         # Sample the real x and y of the markers from the curve
         markerCoord = {'x': [], 'y': []}
         icur = 0
         for estx in estxMarkers:
             for idx, x in enumerate(data[alg]['x']):
                 if idx < icur: continue
                 
                 # TODO: this is added to avoid a list index error with DIFF maxfevals
                 try: data[alg]['x'][idx+1]
                 except: continue
                 
                 if estx == x or (estx > x and estx < data[alg]['x'][idx+1]):
                     markerCoord['x'].append(x)
                     markerCoord['y'].append(data[alg]['y'][idx])
                     icur = idx + 1
                     break
         # Plot the sampled markers. TODO: alpha doesn't work for marker!
         for x, y in zip(markerCoord['x'], markerCoord['y']):
             if not logx:
                 ax.plot(x, y, ls='', clip_on=False, markerfacecolor='none',
                         marker=markers[ialg % len(markers)], markeredgecolor=rainbow[ialg % len(rainbow)],
                         markersize=self.markerSize*(1 if markers[ialg % len(markers)]!='*' else 1.45),
                         markeredgewidth=0.7*self.lineWidth, alpha=self.alpha, zorder=zorder0+ialg)
             else:
                 ax.semilogx(x, y, ls='', clip_on=False, markerfacecolor='none',
                             marker=markers[ialg % len(markers)], markeredgecolor=rainbow[ialg % len(rainbow)],
                             markersize=self.markerSize*(1 if markers[ialg % len(markers)]!='*' else 1.45),
                             markeredgewidth=0.7*self.lineWidth, alpha=self.alpha, zorder=zorder0+ialg)
     
     
     ax.set_xlim(xLim)
     if yfull:
         ax.set_ylim([0, 1])
     
     ax.grid(True, which='both', color="gray", alpha=0.6, ls=self.gridLineStyle, lw=self.gridLineWeight) # print visibility?
     #ax.grid(True, which='both', color="gray", alpha=0.1, ls='-', lw=0.3)
     #ax.grid(True, which='both', axis='x', color="gray", alpha=0.15, ls='-', lw=0.2)
     
     for axis in ['top','bottom','left','right']:
         ax.spines[axis].set_linewidth(self.axesLineWidth)
     
     
     #=============================#
     #  Make legends on the right  #
     #=============================#
     if self.rightLegend:
         xExtRatio      = self.xExtRatio
         xSegLenRatio   = self.xSegLenRatio
         yShrinkRatio   = self.yShrinkRatio
         labelBottomTop = self.labelBottomTop
         
         yLim = ax.get_ylim()
         xTicks = ax.get_xticks()  # backup xticks for use after making legends on the right
         #if xTicks[0] < xLim[0]:   # the 1st element usually doesn't appear on the axis
         #    xTicks = np.delete(xTicks, 0)  # thus remove it of the backed up xTicks
         while xTicks[-1] > xLim[1]:
             xTicks = np.delete(xTicks, -1)
         if logx:
             while xTicks[0] < xLim[0]:
                 xTicks = np.delete(xTicks, 0)
         
         endData = [data[key]['y'][-1] for key in data_keys]
         for idx, key in enumerate(data_keys):
             j = -1
             while np.isnan(endData[idx]):
                 endData[idx] = data[key]['y'][j]
                 j = j - 1
         idx = np.argsort(endData)
         xLength = xLim[1] - xLim[0]
         yLength = yLim[1] - yLim[0]
         if not logx:
             normFactor = (xLim[1] - xLim[0]) / xLim[1]
             xExt = xLim[1] + xExtRatio*xSegLenRatio * normFactor*xLength
         else:
             normFactor = (np.log10(xLim[1]) - np.log10(xLim[0])) / np.log10(xLim[1]) # helps adjust the extension parts when xstart is large
             xExt = 10 ** (np.log10(xLim[1]) + xExtRatio*xSegLenRatio * normFactor*np.log10(xLength))
         yExt = np.linspace(yLim[0] + labelBottomTop[0]*yShrinkRatio*yLength,
                            yLim[1] - labelBottomTop[1]*yShrinkRatio*yLength,
                            num=len(data), endpoint=True)
         
         # Plot all extension segments:
         #==============================
         for k, alg in enumerate(data_keys):
             if not logx:
                 ax.plot(np.array([xLim[1], xExt]),  np.array([endData[idx[k]], yExt[k]]), clip_on=False,
                         ls='-', lw=self.lineWidth, solid_capstyle="round",
                         color=rainbow[idx[k] % len(rainbow)], alpha=self.alpha,
                         marker=markers[idx[k] % len(markers)], markeredgecolor=rainbow[idx[k] % len(rainbow)],
                         markersize=self.markerSize*(1 if markers[idx[k] % len(markers)]!='*' else 1.45),
                         markerfacecolor='none', markeredgewidth=0.7*self.lineWidth, zorder=zorder0+idx[k])
                 ax.text(xExt*1.015*((self.markerSize/7.5)**1)*normFactor, yExt[k],
                         r'%s' % (order_legends[idx[k]]), verticalalignment='bottom', fontsize=self.rightFontSize)  # verticalalignment='center'
             else:
                 ax.semilogx(np.array([xLim[1], xExt]),  np.array([endData[idx[k]], yExt[k]]), clip_on=False,
                             ls='-', lw=self.lineWidth, solid_capstyle="round",
                             color=rainbow[idx[k] % len(rainbow)], alpha=self.alpha,
                             marker=markers[idx[k] % len(markers)], markeredgecolor=rainbow[idx[k] % len(rainbow)],
                             markersize=self.markerSize*(1 if markers[idx[k] % len(markers)]!='*' else 1.45),
                             markerfacecolor='none', markeredgewidth=0.7*self.lineWidth, zorder=zorder0+idx[k])
                 ax.text(xExt * 10**(0.015*((self.markerSize/7.5)**1)*normFactor*np.log10(xExt)), yExt[k],
                         r'%s' % (order_legends[idx[k]]), verticalalignment='bottom', fontsize=self.rightFontSize)  # verticalalignment='center'
         
         # Plot the vertical separation line:
         #===================================
         if self.keepBox:
             #if not logx:
             #    ax.plot(np.array([xLim[1], xLim[1]]), np.array([yLim[0],yLim[1]]), 'k-', lw=0.7, clip_on=False, zorder=1)
             #else:
             #    ax.semilogx(np.array([xLim[1], xLim[1]]), np.array([yLim[0],yLim[1]]), 'k-', lw=0.7, clip_on=False, zorder=1)
             #ax.spines['right'].set_visible(False)
             ax.spines['right'].set_linewidth(0.7)
             
             # Plot extension part of bottom and top bars:
             if not logx:
                 ax.plot(np.array([xLim[1], xLim[1] + xExtRatio * normFactor*xLength]), np.array([yLim[0], yLim[0]]), 'k-', lw=self.axesLineWidth, clip_on=False)
                 ax.plot(np.array([xLim[1], xLim[1] + xExtRatio * normFactor*xLength]), np.array([yLim[1], yLim[1]]), 'k-', lw=self.axesLineWidth, clip_on=False)
             else:
                 ax.semilogx(np.array([xLim[1], 10 ** (np.log10(xLim[1]) + xExtRatio * normFactor*np.log10(xLength))]), np.array([yLim[0], yLim[0]]), 'k-', lw=self.axesLineWidth, clip_on=False)
                 ax.semilogx(np.array([xLim[1], 10 ** (np.log10(xLim[1]) + xExtRatio * normFactor*np.log10(xLength))]), np.array([yLim[1], yLim[1]]), 'k-', lw=self.axesLineWidth, clip_on=False)
         else:
             if not logx:
                 ax.plot(np.array([xLim[1], xLim[1]]), np.array([yLim[0], yExt[-1]+(yExt[0]-yLim[0])]), clip_on=False, ls='-', c='k', lw=0.6)
             else:
                 ax.semilogx(np.array([xLim[1], xLim[1]]), np.array([yLim[0], yExt[-1]+(yExt[0]-yLim[0])]), clip_on=False, ls='-', c='k', lw=0.6)
             ax.spines['right'].set_visible(False)
             ax.spines['top'].set_visible(False)
         
         ax.yaxis.set_ticks_position('left') # turn of tick on right side
         # The following is not necessary anymore, thanks to: clip_on=False, self.position, and extension of bottom and top bars
         # ax.set_xlim(xLim[0], xLim[1] + xLength*xExtRatio)
         ax.set_xticks(xTicks)
         ax.set_ylim(yLim)
     else:
         print "TODO: Make traditional legends"
         pass
     
     
     # Write text annotations:
     try:
         if type(self.note) != type(list()):
             annotations = [self.note]
         else:
             annotations = self.note
         yrange = yLim[1] - yLim[0]
         for i, string in enumerate(annotations):
             if not logx:
                 ax.text(xLim[0] + 0.025*normFactor*xExt, yLim[1] - 0.035*yrange - i*(0.06*((self.annotationFontSize/15)**1)*yrange),
                         r"%s" % string, verticalalignment='top', fontsize=self.annotationFontSize,
                         bbox=dict(facecolor='white', edgecolor='none')) # verticalalignment='bottom'
             else:
                 ax.text(xLim[0] * 10**(0.025*normFactor*np.log10(xExt)), yLim[1] - 0.035*yrange - i*(0.06*((self.annotationFontSize/15)**1)*yrange),
                         r"%s" % string, verticalalignment='top', fontsize=self.annotationFontSize,
                         bbox=dict(facecolor='white', edgecolor='none')) # verticalalignment='bottom'
     except:
         pass
     
     
     #ax.xaxis.tick_bottom()
     #ax.yaxis.tick_left()
     #ax.tick_params(direction='inout')  # for both axes
     # ax.xaxis.set_tick_params(direction='inout')
     
     #[line.set_zorder(3) for line in ax.lines]
     import matplotlib as mpl
     mpl.rcParams['axes.unicode_minus'] = False
     #from matplotlib.ticker import ScalarFormatter
     #majorFormatter = ScalarFormatter(useMathText=True, useOffset=False)
     #majorFormatter.set_scientific(True)
     #majorFormatter.set_powerlimits((-4,4))
     #ax.xaxis.set_major_formatter(majorFormatter)
     #ax.yaxis.set_major_formatter(majorFormatter)
     
     # Change fontsize for x and y ticks
     ax.tick_params(axis='x', labelsize=self.xTickLabelSize)
     ax.tick_params(axis='y', labelsize=self.yTickLabelSize)
     
     ax.set_xlabel(self.xlabel, fontsize=self.bottomFontSize)
     ax.set_ylabel(self.ylabel, fontsize=self.leftFontSize)
     ax.set_title(self.title, fontsize=self.topFontSize)
     
     # Save the plot to file:
     ext = filename[-4:]
     if not ext in ['.pdf', '.eps', '.png']:
         filename = filename + '.pdf'
     fig.savefig(filename)
     #fig.savefig(filename[:-4] + '.png', dpi=(pngdpi))
     
     # plt.show()
     plt.close(fig)
Example #32
0
def convertNansToZeros(ma):
    nan_elements = np.flatnonzero(np.isnan(ma.data))
    if len(nan_elements) > 0:
        ma.data[nan_elements] = 0.0
    return ma
Example #33
0
def nan_fill(a):
    a = a.copy()
    nan_idx = np.where(np.isnan(a))[0]
    a[nan_idx] = a[nan_idx - 1]
    return a
Example #34
0
 def check_null_(self, X):
     nans = np.isnan(X)
     infs = np.isinf(X)
     nan_summary = np.sum(np.logical_or(nans, infs))
     if nan_summary > 0:
         raise ValueError("nans/inf in frame = %s" % (nan_summary))
Example #35
0
def convertNansToOnes(pArray):
    nan_elements = np.flatnonzero(np.isnan(pArray))
    if len(nan_elements) > 0:
        pArray[nan_elements] = 1.0
    return pArray
Example #36
0
        # init
        if iteration == 0:
            print('initializing the model...')
            sess.run(initializer)
            init_loss = sess.run(init_pass, {x_init: x_batch, y_init: y_batch})
            sess.graph.finalize()
        else:
            xfs = np.split(x_batch, args.nr_gpu)
            yfs = np.split(y_batch, args.nr_gpu)
            feed_dict = {tf_lr: lr, tf_student_grad_scale: student_grad_scale}
            feed_dict.update({xs[i]: xfs[i] for i in range(args.nr_gpu)})
            feed_dict.update({ys[i]: yfs[i] for i in range(args.nr_gpu)})
            l, _ = sess.run([train_loss, train_step], feed_dict)
            train_iter_losses.append(l)
            if np.isnan(l):
                print('Loss is NaN')
                sys.exit(0)

            if (iteration + 1) % print_every == 0:
                avg_train_loss = np.mean(train_iter_losses)
                losses_avg_train.append(avg_train_loss)
                train_iter_losses = []
                print('%d/%d train_loss=%6.8f bits/value=%.3f' %
                      (iteration + 1, config.max_iter, avg_train_loss,
                       avg_train_loss / config.ndim / np.log(2.)))
                corr = config.student_layer.corr.eval().flatten()

            if (iteration + 1) % config.save_every == 0:
                current_time = time.time()
                eta_time = (config.max_iter - iteration
Example #37
0
def check_invalid_values(x):
    return np.isnan(x).sum() + np.isinf(x).sum()
Example #38
0
    def run(self, counts, background=None, exposure=None):
        """
        Run image smoothing.

        Parameters
        ----------
        counts : `~gammapy.maps.WcsNDMap`
            Counts map
        background : `~gammapy.maps.WcsNDMap`
            Background map
        exposure : `~gammapy.maps.WcsNDMap`
            Exposure map

        Returns
        -------
        images : dict of `~gammapy.maps.WcsNDMap`
            Smoothed images; keys are:
                * 'counts'
                * 'background'
                * 'flux' (optional)
                * 'scales'
                * 'significance'.
        """
        pixel_scale = counts.geom.pixel_scales.mean()
        kernels = self.kernels(pixel_scale)

        cubes = {}
        cubes["counts"] = scale_cube(counts.data, kernels)

        if background is not None:
            cubes["background"] = scale_cube(background.data, kernels)
        else:
            # TODO: Estimate background with asmooth method
            raise ValueError("Background estimation required.")

        if exposure is not None:
            flux = (counts.data - background.data) / exposure.data
            cubes["flux"] = scale_cube(flux, kernels)

        cubes["significance"] = self._significance_cube(
            cubes, method=self.parameters["method"]
        )

        smoothed = self._reduce_cubes(cubes, kernels)

        result = {}

        for key in ["counts", "background", "scale", "significance"]:
            data = smoothed[key]

            # set remaining pixels with significance < threshold to mean value
            if key in ["counts", "background"]:
                mask = np.isnan(data)
                data[mask] = np.mean(locals()[key].data[mask])
            result[key] = WcsNDMap(counts.geom, data)

        if exposure is not None:
            data = smoothed["flux"]
            mask = np.isnan(data)
            data[mask] = np.mean(flux[mask])
            result["flux"] = WcsNDMap(counts.geom, data)

        return result
Example #39
0
#######################################################################################################
# submit without pseudo
input_fn = 'ann_3tta_th4_test.csv'
#######################################################################################################
input_df = pd.read_csv(sub_dir + input_fn)
propagation_step = 100

test_ids = input_df.RunID
source_ids = input_df.SourceID
coarse_time = input_df.SourceTime
#=======================================================================================================================

x_trn = df_train.iloc[:, 1:100]
# scale train
X = x_trn.values
where_are_NaNs = np.isnan(X)
where_are_infs = np.isinf(X)
X[where_are_NaNs] = 0
X[where_are_infs] = 0

scaler = RobustScaler()
scaler.fit(X)
scaled_train_X = scaler.transform(X)
X = scaled_train_X

#scaler = joblib.load("scaler.save")
# bins for test segment
bins = np.arange(0, 3000, 30)

#=======================================================================================================================
# Parallel code
Example #40
0
 def assertEqualWithNan(self, actual, expected):
     """Like assertEqual, but NaN==NaN."""
     self.assertTrue(((actual == expected) |
                      (np.isnan(actual) & np.isnan(expected))).all())
Example #41
0
def calc_slope_vars(rn_sect, gain_sect, gdq_sect, group_time, max_seg):
    """
    Calculate the segment-specific variance arrays for the given
    integration.

    Parameters
    ----------
    rn_sect : ndarray
        read noise values for all pixels in data section, 2-D float

    gain_sect : ndarray
        gain values for all pixels in data section, 2-D float

    gdq_sect : ndarray
        data quality flags for pixels in section, 3-D int

    group_time : float
        Time increment between groups, in seconds.

    max_seg : int
        maximum number of segments fit

    Returns
    -------
    den_r3 : ndarray
        for a given integration, the reciprocal of the denominator of the
        segment-specific variance of the segment's slope due to read noise, 3-D float

    den_p3 : ndarray
        for a given integration, the reciprocal of the denominator of the
        segment-specific variance of the segment's slope due to Poisson noise, 3-D float

    num_r3 : ndarray
        numerator of the segment-specific variance of the segment's slope
        due to read noise, 3-D float

    segs_beg_3 : ndarray
        lengths of segments for all pixels in the given data section and
        integration, 3-D int
    """
    (nreads, asize2, asize1) = gdq_sect.shape
    npix = asize1 * asize2
    imshape = (asize2, asize1)

    # Create integration-specific sections of input arrays for determination
    #   of the variances.
    gdq_2d = gdq_sect[:, :, :].reshape((nreads, npix))
    gain_1d = gain_sect.reshape(npix)
    gdq_2d_nan = gdq_2d.copy()  # group dq with SATS will be replaced by nans
    gdq_2d_nan = gdq_2d_nan.astype(np.float32)

    wh_sat = np.where(np.bitwise_and(gdq_2d, constants.dqflags["SATURATED"]))
    if len(wh_sat[0]) > 0:
        gdq_2d_nan[wh_sat] = np.nan  # set all SAT groups to nan

    del wh_sat

    # Get lengths of semiramps for all pix [number_of_semiramps, number_of_pix]
    segs = np.zeros_like(gdq_2d)

    # Counter of semiramp for each pixel
    sr_index = np.zeros(npix, dtype=np.uint8)
    pix_not_done = np.ones(npix, dtype=bool)  # initialize to True

    i_read = 0
    # Loop over reads for all pixels to get segments (segments per pixel)
    while (i_read < nreads and np.any(pix_not_done)):
        gdq_1d = gdq_2d_nan[i_read, :]
        wh_good = np.where(gdq_1d == 0)  # good groups

        # if this group is good, increment those pixels' segments' lengths
        if len(wh_good[0]) > 0:
            segs[sr_index[wh_good], wh_good] += 1
        del wh_good

        # Locate any CRs that appear before the first SAT group...
        wh_cr = np.where(gdq_2d_nan[i_read, :].astype(np.int32)
                         & constants.dqflags["JUMP_DET"] > 0)

        # ... but not on final read:
        if (len(wh_cr[0]) > 0 and (i_read < nreads - 1)):
            sr_index[wh_cr[0]] += 1
            segs[sr_index[wh_cr], wh_cr] += 1

        del wh_cr

        # If current group is a NaN, this pixel is done (pix_not_done is False)
        wh_nan = np.where(np.isnan(gdq_2d_nan[i_read, :]))
        if len(wh_nan[0]) > 0:
            pix_not_done[wh_nan[0]] = False

        del wh_nan

        i_read += 1

    segs = segs.astype(np.uint8)
    segs_beg = segs[:max_seg, :]  # the leading nonzero lengths

    # Create reshaped version [ segs, y, x ] to simplify computation
    segs_beg_3 = segs_beg.reshape(max_seg, imshape[0], imshape[1])
    segs_beg_3 = remove_bad_singles(segs_beg_3)

    # Create a version 1 less for later calculations for the variance due to
    #   Poisson, with a floor=1 to handle single-group segments
    wh_pos_3 = np.where(segs_beg_3 > 1)
    segs_beg_3_m1 = segs_beg_3.copy()
    segs_beg_3_m1[wh_pos_3] -= 1
    segs_beg_3_m1[segs_beg_3_m1 < 1] = 1

    # For a segment, the variance due to Poisson noise
    #   = slope/(tgroup * gain * (ngroups-1)),
    #   where slope is the estimated median slope, tgroup is the group time,
    #   and ngroups is the number of groups in the segment.
    #   Here the denominator of this quantity will be computed, which will be
    #   later multiplied by the estimated median slope.

    # Suppress, then re-enable, harmless arithmetic warnings, as NaN will be
    #   checked for and handled later
    warnings.filterwarnings("ignore", ".*invalid value.*", RuntimeWarning)
    warnings.filterwarnings("ignore", ".*divide by zero.*", RuntimeWarning)
    den_p3 = 1. / (group_time * gain_1d.reshape(imshape) * segs_beg_3_m1)
    warnings.resetwarnings()

    # For a segment, the variance due to readnoise noise
    # = 12 * readnoise**2 /(ngroups_seg**3. - ngroups_seg)/( tgroup **2.)
    num_r3 = 12. * (rn_sect / group_time)**2.  # always >0

    # Reshape for every group, every pixel in section
    num_r3 = np.dstack([num_r3] * max_seg)
    num_r3 = np.transpose(num_r3, (2, 0, 1))

    # Denominator den_r3 = 1./(segs_beg_3 **3.-segs_beg_3). The minimum number
    #   of allowed groups is 2, which will apply if there is actually only 1
    #   group; in this case den_r3 = 1/6. This covers the case in which there is
    #   only one good group at the beginning of the integration, so it will be
    #   be compared to the plane of (near) zeros resulting from the reset. For
    #   longer segments, this value is overwritten below.
    den_r3 = num_r3.copy() * 0. + 1. / 6
    wh_seg_pos = np.where(segs_beg_3 > 1)

    # Suppress, then, re-enable harmless arithmetic warnings, as NaN will be
    #   checked for and handled later
    warnings.filterwarnings("ignore", ".*invalid value.*", RuntimeWarning)
    warnings.filterwarnings("ignore", ".*divide by zero.*", RuntimeWarning)
    den_r3[wh_seg_pos] = 1. / (
        segs_beg_3[wh_seg_pos]**3. - segs_beg_3[wh_seg_pos]
    )  # overwrite where segs>1
    warnings.resetwarnings()

    return (den_r3, den_p3, num_r3, segs_beg_3)
Example #42
0
    def multichannel_correction_Dam(self):
        from scipy import optimize
        import ctypes  # An included library with Python install.
        ctypes.windll.user32.MessageBoxW(
            0, "Select area for multichannel correction", "", 0)
        rs = self.SelectRectangle()
        x = [np.int(rs.corners[0][0]), np.int(rs.corners[0][2])]
        y = [np.int(rs.corners[1][0]), np.int(rs.corners[1][2])]
        #x[0] = 650
        #x[1]=7104
        #y[0] = 225
        #y[1] = 293
        nod_roi = self.OD[y[0]:(y[1] + 1), x[0]:(x[1] + 1), 0:3]
        nod_roi = nod_roi - self.OD0
        self.alpha = np.zeros([self.OD.shape[0], self.OD.shape[1]])
        self.beta = np.zeros([self.OD.shape[0], self.OD.shape[1]])
        lim = 0.2
        cont = 0.0
        cont2 = 0.0
        cont_lim = nod_roi.shape[0] * nod_roi.shape[1] * nod_roi.shape[2]
        F = np.zeros([3, 1])
        J = np.zeros([3, 3])

        coef1 = self.Calibration.DevicParam_A
        coef2 = self.Calibration.DevicParam_B
        coef3 = self.Calibration.DevicParam_n
        alpha_media = self.Calibration.AlphaCal
        beta_media = self.Calibration.BetaCal
        lambda_alpha = 1.0 / np.power(self.Calibration.SigmaAlphaCal, 2)
        lambda_beta = 1.0 / np.power(self.Calibration.SigmaBetaCal, 2)
        sigma_coef1 = self.Calibration.Sigma_A
        sigma_coef2 = self.Calibration.Sigma_B
        sigma_coef3 = self.Calibration.Sigma_n

        od0 = self.OD0

        indice_medio = 0
        alpha_average = 0
        beta_average = 0

        for h in range(nod_roi.shape[0]):
            if cont2 / cont_lim > 0.05:
                cont2 = 0.0
                print(
                    f'Multichannel correction process: {np.trunc(100*cont/cont_lim)}%'
                )
            for w in range(nod_roi.shape[1]):
                netOD = np.array([nod_roi[h, w, c] for c in [0, 1, 2]])
                D_pixel = coef1 * netOD + np.sign(netOD) * coef2 * np.power(
                    np.abs(netOD), coef3)
                sigma_D = np.sqrt(
                    np.power(netOD * sigma_coef1, 2) +
                    np.power(np.abs(netOD), 2 * coef3) *
                    np.power(sigma_coef2, 2))
                nod = np.array([
                    0.0 + (netOD[c] > 0) * netOD[c] + (netOD[c] < 0) * 0.00001
                    for c in [0, 1, 2]
                ])
                dose = coef1 * nod + coef2 * np.power(nod, coef3)
                d_dose = coef1 + coef3 * coef2 * np.power(nod, coef3 - 1)
                d2_dose = (coef3 - 1) * coef3 * coef2 * np.power(
                    nod, coef3 - 2)
                d3_dose = (coef3 - 2) * (coef3 - 1) * coef3 * coef2 * np.power(
                    nod, coef3 - 3)

                Ca = np.sum(np.power(d_dose * nod / sigma_D, 2)) + lambda_alpha
                Cb = np.sum(np.power(d_dose * od0 / sigma_D, 2)) + lambda_beta
                Cab = np.sum(np.power(d_dose / sigma_D, 2) * nod * od0)
                Cia = np.sum(d_dose * nod * (D_pixel - dose) /
                             np.power(sigma_D, 2)) + lambda_alpha * alpha_media
                Cib = np.sum(d_dose * od0 * (D_pixel - dose) /
                             np.power(sigma_D, 2)) + lambda_beta * beta_media

                d_Ca = 2 * d_dose * nod * (nod * d2_dose + d_dose) / np.power(
                    sigma_D, 2)
                d_Cb = 2 * d_dose * d2_dose * np.power(od0 / sigma_D, 2)
                d_Cab = d_dose * od0 * (2 * nod * d2_dose + d_dose) / np.power(
                    sigma_D, 2)
                d_Cia = (d2_dose * nod * (D_pixel - dose) + d_dose *
                         (D_pixel - dose) -
                         np.power(d_dose, 2) * nod) / np.power(sigma_D, 2)
                d_Cib = (d2_dose * od0 * (D_pixel - dose) -
                         np.power(d_dose, 2) * od0) / np.power(sigma_D, 2)

                alpha = (Cia * Cb - Cib * Cab) / (Ca * Cb - Cab * Cab)
                beta = (Cia * Cab - Cib * Ca) / (Cab * Cab - Ca * Cb)

                if np.isnan(alpha) or np.isnan(beta):
                    xx = 1

                d_alpha = (d_Cia * Cb + Cia * d_Cb - d_Cib * Cab -
                           Cib * d_Cab - alpha *
                           (d_Ca * Cb + Ca * d_Cb - 2. * Cab * d_Cab)) / (
                               Ca * Cb - Cab * Cab)
                d_beta = (d_Cia * Cab + Cia * d_Cab - d_Cib * Ca - Cib * d_Ca -
                          beta *
                          (2. * Cab * d_Cab - d_Ca * Cb - Ca * d_Cb)) / (
                              Cab * Cab - Ca * Cb)

                var_NOD = nod * alpha + od0 * beta
                d_var_NOD = alpha + nod * d_alpha + od0 * d_beta

                mu = dose + d_dose * var_NOD
                der_mu = d_dose * (alpha + 1) + d2_dose * var_NOD

                d_mu = d_dose * (d_var_NOD + 1) + d2_dose * var_NOD
                d_der_mu = d2_dose * (alpha + 1 + d_var_NOD
                                      ) + d_dose * d_alpha + d3_dose * var_NOD

                dif = (mu - D_pixel) / np.power(sigma_D, 2)

                F[0, 0] = np.sum(dif * der_mu / d_dose)
                F[1, 0] = dose[0] - dose[1]
                F[2, 0] = dose[0] - dose[2]

                for c in np.arange(3):
                    J[0, c] = d_mu[c] * der_mu[c] / (d_dose[c] * np.power(sigma_D[c], 2))+dif[c] * d_der_mu[c] / \
                            d_dose[c] - dif[c] * der_mu[c] * d2_dose[c] / np.power(d_dose[c], 2)
                J[1, 0] = d_dose[0]
                J[1, 1] = -d_dose[1]
                J[2, 0] = d_dose[0]
                J[2, 2] = -d_dose[2]

                A = np.linalg.inv(J)

                indice_max = 0
                dif_dosis = 1
                F_new = np.zeros([3, 1])
                NOD_pix = np.array([0.0, 0.0, 0.0]) + nod
                NOD_new = NOD_pix - np.matrix.transpose(np.linalg.solve(J, F))
                NOD_new[NOD_new < 0] = 1e-10

                while (indice_max <= 30) and (dif_dosis > 1.0e-10):
                    nod = np.array([0.0, 0.0, 0.0]) + NOD_new
                    dose = coef1 * nod + coef2 * np.power(nod, coef3)
                    d_dose = coef1 + coef3 * coef2 * np.power(nod, coef3 - 1)
                    d2_dose = (coef3 - 1) * coef3 * coef2 * np.power(
                        nod, coef3 - 2)

                    Ca = np.sum(np.power(d_dose * nod / sigma_D, 2),
                                axis=1)[0] + lambda_alpha
                    Cb = np.sum(np.power(d_dose * od0 / sigma_D, 2),
                                axis=1)[0] + lambda_beta
                    Cab = np.sum(np.power(d_dose / sigma_D, 2) * nod * od0,
                                 axis=1)[0]
                    Cia = np.sum(d_dose * nod * (D_pixel - dose) / np.power(sigma_D, 2),axis=1)[0] + \
                          lambda_alpha * alpha_media
                    Cib = np.sum(d_dose * od0 * (D_pixel - dose) / np.power(sigma_D, 2),axis=1)[0] + \
                          lambda_beta * beta_media

                    alpha = (Cia * Cb - Cib * Cab) / (Ca * Cb - Cab * Cab)
                    beta = (Cia * Cab - Cib * Ca) / (Cab * Cab - Ca * Cb)

                    var_NOD = nod * alpha + od0 * beta

                    mu = dose + d_dose * var_NOD
                    der_mu = d_dose * (alpha + 1) + d2_dose * var_NOD

                    dif = (mu - D_pixel) / np.power(sigma_D, 2)

                    F_new[0, 0] = np.sum(dif * der_mu / d_dose, axis=1)[0]
                    F_new[1, 0] = dose[0, 0] - dose[0, 1]
                    F_new[2, 0] = dose[0, 0] - dose[0, 2]

                    dif_NOD = NOD_new - NOD_pix
                    dif_F = F_new - F

                    denom = np.matmul(dif_NOD, np.matmul(A, dif_F))[0]
                    if denom == 0: denom = 1

                    #dif_NODt = np.zeros([1,3])
                    #dif_NODt[0,:] = dif_NOD[:]
                    A = A + np.matmul(
                        np.matmul(
                            np.transpose(dif_NOD) - np.matmul(A, dif_F),
                            dif_NOD), A) / denom

                    NOD_pix = np.array([0.0, 0.0, 0.0]) + NOD_new
                    NOD_new = NOD_pix - np.matrix.transpose(np.matmul(
                        A, F_new))
                    NOD_new[NOD_new < 0] = 1e-10

                    F = np.zeros([3, 1]) + F_new
                    dosis_new = coef1 * NOD_new + coef2 * np.power(
                        NOD_new, coef3)

                    dif_dosis = np.max(
                        np.abs(
                            np.array([
                                dosis_new[0, 0] - dosis_new[0, 1],
                                dosis_new[0, 0] - dosis_new[0, 2],
                                dosis_new[0, 1] - dosis_new[0, 2]
                            ]))) / np.min(dosis_new)
                    indice_max = indice_max + 1

                indice_medio = indice_medio + indice_max
                alpha_average = alpha_average + alpha
                beta_average = beta_average + beta
                self.alpha[h + y[0], w + x[0]] = alpha
                self.beta[h + y[0], w + x[0]] = beta

                for c in [0, 1, 2]:
                    #aux = NOD_new[0, c] + od0[c]
                    self.OD[h + y[0], w + x[0], c] = NOD_new[0, c] + od0[c]
                    #self.OD[h + y[0], w + x[0], c] = (self.OD[h + y[0], w + x[0], c]-
                    #                                 od0[c]*(1+beta))/(1.0+alpha) + od0[c]
                cont2 = cont2 + 3.0
                cont = cont + 3.0

        indice_medio = indice_medio / (nod_roi.shape[0] * nod_roi.shape[1])
        alpha_average = alpha_average / (nod_roi.shape[0] * nod_roi.shape[1])
        beta_average = beta_average / (nod_roi.shape[0] * nod_roi.shape[1])
        print(f'alpha average = {alpha_average}')
        print(f'beta average = {beta_average}')
        print(f'indice medio = {indice_medio}')
        alpha_image = np.array((65535 * 0.5 + self.alpha * 1000))
        np.clip(alpha_image, 0, 65535)
        imname = 'AlphaMap_' + self.imagefilename
        tifffile.imwrite(self.workingdir + imname,
                         alpha_image.astype(np.uint16),
                         resolution=(self.dpi[0], self.dpi[1]))

        beta_image = np.array((65535 * 0.5 + self.beta * 1000))
        np.clip(beta_image, 0, 65535)
        imname = 'BetaMap_' + self.imagefilename
        tifffile.imwrite(self.workingdir + imname,
                         beta_image.astype(np.uint16),
                         resolution=(self.dpi[0], self.dpi[1]))
Example #43
0
    def execute(self):

        init_process_logger('log.txt')
        self.output_log.setValue('log.txt')

        from os.path import basename
        from flyingpigeon import sdm
        from flyingpigeon.utils import archive, archiveextract, download
        self.status.set('Start process', 0)

        try:
            logger.info('reading the arguments')
            resources_raw = self.getInputValues(identifier='resources')
            csv_url = self.getInputValues(identifier='gbif')[0]
            period = self.getInputValues(identifier='period')
            period = period[0]
            indices = self.getInputValues(identifier='input_indices')
            archive_format = self.archive_format.getValue()
            logger.info('indices %s ' % indices)
            logger.debug('csv_url %s' % csv_url)
        except Exception as e:
            logger.error('failed to read in the arguments %s ' % e)
            raise

        try:
            logger.info('set up the environment')
            csv_file = download(csv_url)
            resources = archiveextract(resources_raw)
        except Exception as e:
            logger.error('failed to set up the environment %s ' % e)
            raise

        try:
            self.status.set('read in latlon coordinates', 10)
            latlon = sdm.latlon_gbifcsv(csv_file)
            logger.info('got occurence coordinates %s ' % csv_file)
        except Exception as e:
            logger.exception(
                'failed to extract the latlon points from file: %s: %s' %
                (csv_file, e))

        try:
            self.status.set('plot map', 20)
            from flyingpigeon.visualisation import map_gbifoccurrences

            # latlon = sdm.latlon_gbifdic(gbifdic)
            occurence_map = map_gbifoccurrences(latlon)
        except Exception as e:
            logger.exception('failed to plot occurence map %s' % e)

        #################################
        # calculate the climate indices
        #################################

        # get the indices
        ncs_indices = None
        try:
            self.status.set(
                'start calculation of climate indices for %s' % indices, 30)
            ncs_indices = sdm.get_indices(resources=resources, indices=indices)
            logger.info('indice calculation done')
        except:
            msg = 'failed to calculate indices'
            logger.exception(msg)
            raise Exception(msg)

        try:
            self.status.set('get domain', 30)
            domains = set()
            for resource in ncs_indices:
                # get_domain works only if metadata are set in a correct way
                domains = domains.union([basename(resource).split('_')[1]])
            if len(domains) == 1:
                domain = list(domains)[0]
                logger.debug('Domain %s found in indices files' % domain)
            else:
                logger.error('Not a single domain in indices files %s' %
                             domains)
        except Exception as e:
            logger.exception('failed to get domains %s' % e)

        try:
            self.status.set('generating the PA mask', 20)
            PAmask = sdm.get_PAmask(coordinates=latlon, domain=domain)
            logger.info('PA mask sucessfully generated')
        except Exception as e:
            logger.exception('failed to generate the PA mask: %s' % e)

        try:
            self.status.set('Ploting PA mask', 25)
            from flyingpigeon.visualisation import map_PAmask
            PAmask_png = map_PAmask(PAmask)
        except Exception as e:
            logger.exception('failed to plot the PA mask: %s' % e)

        try:
            # sort indices
            indices_dic = None
            indices_dic = sdm.sort_indices(ncs_indices)
            logger.info('indice files sorted for %s Datasets' %
                        len(indices_dic.keys()))
        except:
            msg = 'failed to sort indices'
            logger.exception(msg)
            raise Exception(msg)

        ncs_references = []
        species_files = []
        stat_infos = []

        for count, key in enumerate(indices_dic.keys()):
            try:
                staus_nr = 40 + count * 10
                self.status.set('Start processing of %s' % key, staus_nr)
                ncs = indices_dic[key]
                logger.info('with %s files' % len(ncs))
                try:
                    ncs_reference = sdm.get_reference(ncs_indices=ncs,
                                                      period=period)
                    ncs_references.extend(ncs_reference)
                    logger.info('reference indice calculated %s ' %
                                ncs_references)
                except:
                    msg = 'failed to calculate the reference'
                    logger.exception(msg)
                    raise Exception(msg)

                try:
                    gam_model, predict_gam, gam_info = sdm.get_gam(
                        ncs_reference, PAmask)
                    stat_infos.append(gam_info)
                    self.status.set('GAM sucessfully trained', staus_nr + 5)
                except Exception as e:
                    msg = 'failed to train GAM for %s : %s' % (key, e)
                    logger.debug(msg)

                try:
                    prediction = sdm.get_prediction(gam_model, ncs)
                    self.status.set('prediction done', staus_nr + 7)
                except Exception as e:
                    msg = 'failed to predict tree occurence %s' % e
                    logger.exception(msg)
                    # raise Exception(msg)

                try:
                    self.status.set('land sea mask for predicted data',
                                    staus_nr + 8)
                    from numpy import invert, isnan, nan, broadcast_arrays  # , array, zeros, linspace, meshgrid
                    mask = invert(isnan(PAmask))
                    mask = broadcast_arrays(prediction, mask)[1]
                    prediction[mask is False] = nan
                except Exception as e:
                    logger.debug('failed to mask predicted data: %s' % e)

                try:
                    species_files.append(sdm.write_to_file(ncs[0], prediction))
                    logger.info('Favourabillity written to file')
                except Exception as e:
                    msg = 'failed to write species file %s' % e
                    logger.debug(msg)
                    # raise Exception(msg)

            except Exception as e:
                msg = 'failed to calculate reference indices. %s ' % e
                logger.exception(msg)
                raise Exception(msg)

        try:
            archive_indices = None
            archive_indices = archive(ncs_indices, format=archive_format)
            logger.info('indices added to archive')
        except:
            msg = 'failed adding indices to archive'
            logger.exception(msg)
            raise Exception(msg)

        archive_references = None
        try:
            archive_references = archive(ncs_references, format=archive_format)
            logger.info('indices reference added to archive')
        except:
            msg = 'failed adding reference indices to archive'
            logger.exception(msg)
            raise Exception(msg)

        archive_predicion = None
        try:
            archive_predicion = archive(species_files, format=archive_format)
            logger.info('species_files added to archive')
        except:
            msg = 'failed adding species_files indices to archive'
            logger.exception(msg)
            raise Exception(msg)

        try:
            from flyingpigeon.visualisation import pdfmerge
            stat_infosconcat = pdfmerge(stat_infos)
            logger.info('stat infos pdfs merged')
        except:
            logger.exception('failed to concat images')
            _, stat_infosconcat = tempfile.mkstemp(suffix='.pdf',
                                                   prefix='foobar-',
                                                   dir='.')

        # self.output_csv.setValue(csv_file)
        self.output_gbif.setValue(occurence_map)
        self.output_PA.setValue(PAmask_png)
        self.output_indices.setValue(archive_indices)
        self.output_reference.setValue(archive_references)
        self.output_prediction.setValue(archive_predicion)
        self.output_info.setValue(stat_infosconcat)

        self.status.set('done', 100)
Example #44
0

# In[31]:

import numpy as np
np.nan == None


# In[32]:

np.nan == np.nan


# In[33]:

np.isnan(np.nan)


# In[34]:

sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s


# In[35]:

s.index
 def testMeanNonInfNaN(self):
   prob = tf.random.uniform([int(1e4)], seed=test_util.test_seed())
   dist = tfd.ContinuousBernoulli(probs=prob, validate_args=True)
   mean_ = self.evaluate(dist.mean())
   self.assertFalse(np.any(np.isinf(mean_)))
   self.assertFalse(np.any(np.isnan(mean_)))
Example #46
0
print(similarity)
def cross_entropy(predictions, targets):
    N = predictions.shape[0]
    ce = -np.sum(targets * np.log(predictions)) / N
    return ce
for factor in factors:
    probs = probs_true * factor
    probs[:2, 1] = 1 - factor
    probs[2:, 0] = 1 - factor
    predictions = torch.mm(probs, torch.transpose(probs, 0, 1))
    print(predictions)
    print(cross_entropy(predictions.numpy(), similarity.numpy()))
    _loss = losses["MOESimVAELoss"].similarity(probs, similarity)
    _loss = torch.mean(torch.sum(_loss, dim=1), dim=0)
    _ce_loss = log_loss(probs_true[:, 0], probs[:, 0])
    if np.isnan(_ce_loss):
        _ce_loss = 0.
    print(probs)
    print(_loss, _ce_loss)
    sim_losses.append(_loss.cpu().numpy())
    ce_losses.append(_ce_loss)
fig, ax = plt.subplots()
ax.plot(factors, sim_losses, color="blue", label="SIM")
ax.plot(factors, ce_losses, color="green", label="CE")
ax.set_title("SIMILARITY losses", fontsize=10)
ax.set_xlabel("factors")
ax.grid(True, which="both")
ax.legend()


#############################################################################
Example #47
0
def test_rpc_trade_status(default_conf, ticker, fee, mocker) -> None:
    mocker.patch('freqtrade.rpc.telegram.Telegram', MagicMock())
    mocker.patch.multiple(
        'freqtrade.exchange.Exchange',
        fetch_ticker=ticker,
        get_fee=fee,
    )

    freqtradebot = get_patched_freqtradebot(mocker, default_conf)
    patch_get_signal(freqtradebot, (True, False))
    rpc = RPC(freqtradebot)

    freqtradebot.state = State.RUNNING
    with pytest.raises(RPCException, match=r'.*no active trade*'):
        rpc._rpc_trade_status()

    freqtradebot.enter_positions()
    trades = Trade.get_open_trades()
    trades[0].open_order_id = None
    freqtradebot.exit_positions(trades)

    results = rpc._rpc_trade_status()
    assert results[0] == {
        'trade_id': 1,
        'pair': 'ETH/BTC',
        'base_currency': 'BTC',
        'open_date': ANY,
        'open_date_hum': ANY,
        'open_timestamp': ANY,
        'is_open': ANY,
        'fee_open': ANY,
        'fee_open_cost': ANY,
        'fee_open_currency': ANY,
        'fee_close': fee.return_value,
        'fee_close_cost': ANY,
        'fee_close_currency': ANY,
        'open_rate_requested': ANY,
        'open_trade_value': 0.0010025,
        'close_rate_requested': ANY,
        'sell_reason': ANY,
        'sell_order_status': ANY,
        'min_rate': ANY,
        'max_rate': ANY,
        'strategy': ANY,
        'timeframe': 5,
        'open_order_id': ANY,
        'close_date': None,
        'close_date_hum': None,
        'close_timestamp': None,
        'open_rate': 1.098e-05,
        'close_rate': None,
        'current_rate': 1.099e-05,
        'amount': 91.07468123,
        'amount_requested': 91.07468123,
        'stake_amount': 0.001,
        'trade_duration': None,
        'trade_duration_s': None,
        'close_profit': None,
        'close_profit_pct': None,
        'close_profit_abs': None,
        'current_profit': -0.00408133,
        'current_profit_pct': -0.41,
        'current_profit_abs': -4.09e-06,
        'profit_ratio': -0.00408133,
        'profit_pct': -0.41,
        'profit_abs': -4.09e-06,
        'profit_fiat': ANY,
        'stop_loss_abs': 9.882e-06,
        'stop_loss_pct': -10.0,
        'stop_loss_ratio': -0.1,
        'stoploss_order_id': None,
        'stoploss_last_update': ANY,
        'stoploss_last_update_timestamp': ANY,
        'initial_stop_loss_abs': 9.882e-06,
        'initial_stop_loss_pct': -10.0,
        'initial_stop_loss_ratio': -0.1,
        'stoploss_current_dist': -1.1080000000000002e-06,
        'stoploss_current_dist_ratio': -0.10081893,
        'stoploss_current_dist_pct': -10.08,
        'stoploss_entry_dist': -0.00010475,
        'stoploss_entry_dist_ratio': -0.10448878,
        'open_order': None,
        'exchange': 'bittrex',
    }

    mocker.patch('freqtrade.freqtradebot.FreqtradeBot.get_sell_rate',
                 MagicMock(side_effect=ExchangeError("Pair 'ETH/BTC' not available")))
    results = rpc._rpc_trade_status()
    assert isnan(results[0]['current_profit'])
    assert isnan(results[0]['current_rate'])
    assert results[0] == {
        'trade_id': 1,
        'pair': 'ETH/BTC',
        'base_currency': 'BTC',
        'open_date': ANY,
        'open_date_hum': ANY,
        'open_timestamp': ANY,
        'is_open': ANY,
        'fee_open': ANY,
        'fee_open_cost': ANY,
        'fee_open_currency': ANY,
        'fee_close': fee.return_value,
        'fee_close_cost': ANY,
        'fee_close_currency': ANY,
        'open_rate_requested': ANY,
        'open_trade_value': ANY,
        'close_rate_requested': ANY,
        'sell_reason': ANY,
        'sell_order_status': ANY,
        'min_rate': ANY,
        'max_rate': ANY,
        'strategy': ANY,
        'timeframe': ANY,
        'open_order_id': ANY,
        'close_date': None,
        'close_date_hum': None,
        'close_timestamp': None,
        'open_rate': 1.098e-05,
        'close_rate': None,
        'current_rate': ANY,
        'amount': 91.07468123,
        'amount_requested': 91.07468123,
        'trade_duration': ANY,
        'trade_duration_s': ANY,
        'stake_amount': 0.001,
        'close_profit': None,
        'close_profit_pct': None,
        'close_profit_abs': None,
        'current_profit': ANY,
        'current_profit_pct': ANY,
        'current_profit_abs': ANY,
        'profit_ratio': ANY,
        'profit_pct': ANY,
        'profit_abs': ANY,
        'profit_fiat': ANY,
        'stop_loss_abs': 9.882e-06,
        'stop_loss_pct': -10.0,
        'stop_loss_ratio': -0.1,
        'stoploss_order_id': None,
        'stoploss_last_update': ANY,
        'stoploss_last_update_timestamp': ANY,
        'initial_stop_loss_abs': 9.882e-06,
        'initial_stop_loss_pct': -10.0,
        'initial_stop_loss_ratio': -0.1,
        'stoploss_current_dist': ANY,
        'stoploss_current_dist_ratio': ANY,
        'stoploss_current_dist_pct': ANY,
        'stoploss_entry_dist': -0.00010475,
        'stoploss_entry_dist_ratio': -0.10448878,
        'open_order': None,
        'exchange': 'bittrex',
    }
Example #48
0
    def __call__(self, test_rows, chunk_size):
        import sqlalchemy as sa
        from sqlalchemy.sql import elements

        with create_sa_connection(self._con, **(self._engine_kwargs or dict())) as con:
            self._con = str(con.engine.url)
            selectable = self._get_selectable(con)

            # process index_col
            index_col = self._index_col
            if index_col is not None:
                if not isinstance(index_col, (list, tuple)):
                    index_col = (index_col,)
                new_index_col = []
                for col in index_col:
                    if isinstance(col, (sa.Column, elements.Label)):
                        new_index_col.append(col.name)
                    elif isinstance(col, str):
                        new_index_col.append(col)
                    elif col is not None:
                        raise TypeError(f'unknown index_col type: {type(col)}')
                self._index_col = new_index_col

            # process columns
            columns = self._columns or []
            new_columns = []
            for col in columns:
                if isinstance(col, str):
                    new_columns.append(col)
                else:
                    new_columns.append(col.name)
            self._columns = new_columns

            if self._columns:
                collect_cols = self._columns + (self._index_col or [])
            else:
                collect_cols = []
            test_df, shape = self._collect_info(con, selectable, collect_cols, test_rows)

            # reconstruct selectable using known column names
            if not collect_cols:
                self._columns = list(test_df.columns)
                if self._selectable is not None:
                    self._selectable = None
                    self._get_selectable(con, columns=self._columns + (self._index_col or []))

            if self.method == 'partition':
                if not self.index_col or self.partition_col not in self.index_col:
                    part_frame = test_df
                else:
                    part_frame = test_df.index.to_frame()

                if not issubclass(part_frame[self.partition_col].dtype.type, (np.number, np.datetime64)):
                    raise TypeError('Type of partition column should be numeric or datetime, '
                                    f'now it is {test_df[self.partition_col].dtype}')

            if isinstance(test_df.index, pd.RangeIndex):
                index_value = parse_index(pd.RangeIndex(shape[0] if not np.isnan(shape[0]) else -1),
                                          str(selectable), self._con)
            else:
                index_value = parse_index(test_df.index)

            columns_value = parse_index(test_df.columns, store_data=True)

            dtypes = test_df.dtypes
            use_arrow_dtype = self._use_arrow_dtype
            if use_arrow_dtype is None:
                use_arrow_dtype = options.dataframe.use_arrow_dtype
            if use_arrow_dtype:
                dtypes = to_arrow_dtypes(dtypes, test_df=test_df)

            return self.new_dataframe(None, shape=shape, dtypes=dtypes,
                                      index_value=index_value,
                                      columns_value=columns_value,
                                      raw_chunk_size=chunk_size)
Example #49
0
 def sum_nan(y):
     if np.isnan(y).all():
         return float("nan")
     return np.nansum(y)
Example #50
0
def test_rpc_trade_statistics(default_conf, ticker, ticker_sell_up, fee,
                              limit_buy_order, limit_sell_order, mocker) -> None:
    mocker.patch.multiple(
        'freqtrade.rpc.fiat_convert.CoinGeckoAPI',
        get_price=MagicMock(return_value={'bitcoin': {'usd': 15000.0}}),
    )
    mocker.patch('freqtrade.rpc.rpc.CryptoToFiatConverter._find_price', return_value=15000.0)
    mocker.patch('freqtrade.rpc.telegram.Telegram', MagicMock())
    mocker.patch.multiple(
        'freqtrade.exchange.Exchange',
        fetch_ticker=ticker,
        get_fee=fee,
    )

    freqtradebot = get_patched_freqtradebot(mocker, default_conf)
    patch_get_signal(freqtradebot, (True, False))
    stake_currency = default_conf['stake_currency']
    fiat_display_currency = default_conf['fiat_display_currency']

    rpc = RPC(freqtradebot)
    rpc._fiat_converter = CryptoToFiatConverter()

    res = rpc._rpc_trade_statistics(stake_currency, fiat_display_currency)
    assert res['trade_count'] == 0
    assert res['first_trade_date'] == ''
    assert res['first_trade_timestamp'] == 0
    assert res['latest_trade_date'] == ''
    assert res['latest_trade_timestamp'] == 0

    # Create some test data
    freqtradebot.enter_positions()
    trade = Trade.query.first()
    # Simulate fulfilled LIMIT_BUY order for trade
    trade.update(limit_buy_order)

    # Update the ticker with a market going up
    mocker.patch.multiple(
        'freqtrade.exchange.Exchange',
        fetch_ticker=ticker_sell_up
    )
    trade.update(limit_sell_order)
    trade.close_date = datetime.utcnow()
    trade.is_open = False

    freqtradebot.enter_positions()
    trade = Trade.query.first()
    # Simulate fulfilled LIMIT_BUY order for trade
    trade.update(limit_buy_order)

    # Update the ticker with a market going up
    mocker.patch.multiple(
        'freqtrade.exchange.Exchange',
        fetch_ticker=ticker_sell_up
    )
    trade.update(limit_sell_order)
    trade.close_date = datetime.utcnow()
    trade.is_open = False

    stats = rpc._rpc_trade_statistics(stake_currency, fiat_display_currency)
    assert prec_satoshi(stats['profit_closed_coin'], 6.217e-05)
    assert prec_satoshi(stats['profit_closed_percent_mean'], 6.2)
    assert prec_satoshi(stats['profit_closed_fiat'], 0.93255)
    assert prec_satoshi(stats['profit_all_coin'], 5.802e-05)
    assert prec_satoshi(stats['profit_all_percent_mean'], 2.89)
    assert prec_satoshi(stats['profit_all_fiat'], 0.8703)
    assert stats['trade_count'] == 2
    assert stats['first_trade_date'] == 'just now'
    assert stats['latest_trade_date'] == 'just now'
    assert stats['avg_duration'] == '0:00:00'
    assert stats['best_pair'] == 'ETH/BTC'
    assert prec_satoshi(stats['best_rate'], 6.2)

    # Test non-available pair
    mocker.patch('freqtrade.freqtradebot.FreqtradeBot.get_sell_rate',
                 MagicMock(side_effect=ExchangeError("Pair 'ETH/BTC' not available")))
    stats = rpc._rpc_trade_statistics(stake_currency, fiat_display_currency)
    assert stats['trade_count'] == 2
    assert stats['first_trade_date'] == 'just now'
    assert stats['latest_trade_date'] == 'just now'
    assert stats['avg_duration'] == '0:00:00'
    assert stats['best_pair'] == 'ETH/BTC'
    assert prec_satoshi(stats['best_rate'], 6.2)
    assert isnan(stats['profit_all_coin'])
Example #51
0
    def transform(self, patient_df, pid=None):
        ''' Transformer method, taking as input a data-frame with irregularly sampled input data. The method 
            assumes that the data-frame contains a time-stamp column, and the data-frame is sorted along the first 
            axis in non-decreasing order with respect to the timestamp column. Pass the <pid> of the patient stay
            as additional information'''
        self._check_state()
        static_table = self.df_static[self.df_static["PatientID"] == pid]

        # No static data, patient is not valid, exclude on-the-fly
        if static_table.shape[0] == 0:
            print("WARNING: No static data in patient table...")
            return None

        # More than one row, select one of the rows arbitrarily
        if static_table.shape[0] > 1:
            print("WARNING: More than one row in static table...")
            static_table = static_table.take([0], axis=0)

        static_height = float(static_table["Height"])
        static_gender = str(static_table["Sex"].values[0]).strip()
        assert (static_gender in ["F", "M", "U"])

        if static_gender in ["F", "M"]:
            typical_weight = self.typical_weight_dict[static_gender]
        else:
            typical_weight = (self.typical_weight_dict["M"] +
                              self.typical_weight_dict["F"]) / 2.0

        personal_bmi = self.median_bmi_dict[self.key_dict[static_gender]]

        ## If either the endpoints or the features don't exist, log the failure but do nothing, the missing patients can be
        #  latter added as a new group to the output H5
        if patient_df.shape[0] == 0:
            print(
                "WARNING: p{} has missing features, skipping output generation..."
                .format(pid))
            return None

        all_keys = list(
            set(patient_df.columns.values.tolist()).difference(
                set(["Datetime", "PatientID", "a_temp", "m_pm_1", "m_pm_2"])))

        ts = patient_df["Datetime"]
        ts_arr = np.array(ts)
        n_ts = ts_arr.size

        if self.is_dim_reduced:
            hr = np.array(patient_df["vm1"])
        else:
            hr = np.array(patient_df["v200"])

        finite_hr = ts_arr[np.isfinite(hr)]

        if finite_hr.size == 0:
            print("WARNING: Patient {} has no HR, ignoring patient...".format(
                pid))
            return None

        ts_min = ts_arr[np.isfinite(hr)][0]
        ts_max = ts_arr[np.isfinite(hr)][-1]
        max_ts_diff = (ts_max - ts_min) / np.timedelta64(1, 's')

        time_grid = np.arange(
            0.0, min(max_ts_diff + 1.0, self.max_grid_length_secs),
            self.grid_period)
        time_grid_abs = [
            ts_min + pdts.Timedelta(seconds=time_grid[idx])
            for idx in range(time_grid.size)
        ]
        imputed_df_dict = {}
        imputed_df_dict[self.patient_id_key] = [int(pid)] * time_grid.size
        imputed_df_dict[self.rel_datetime_key] = time_grid
        imputed_df_dict[self.abs_datetime_key] = time_grid_abs

        ## There is nothing to do if the patient has no records, just return...
        if n_ts == 0:
            print(
                "WARNING: p{} has an empty record, skipping output generation..."
                .format(patient))
            return None

        ## Initialize the storage for the imputed time grid, NANs for the non-pharma, 0 for pharma.
        for col in all_keys:
            if col[0] == "p":
                imputed_df_dict[col] = np.zeros(time_grid.size)
            elif col[0] == "v":
                imputed_df_dict[col] = mlhc_array.empty_nan(time_grid.size)
            else:
                print("ERROR: Invalid variable type")
                assert (False)

        imputed_df = pd.DataFrame(imputed_df_dict)
        norm_ts = np.array(ts - ts_min) / np.timedelta64(1, 's')

        # Schedule for order of variable imputation
        if self.is_dim_reduced:
            all_keys.remove("vm131")
            all_keys = ["vm131"] + all_keys
        else:
            all_keys.remove("v10000400")
            all_keys = ["v10000400"] + all_keys

        ## Impute all variables independently, with the two relevant cases pharma variable and other variable,
        #  distinguishable from the variable prefix. We enforce that weight is the first variable to be imputed, so that
        #  its time-gridded information can later be used by other custom formulae imputations that depend on it.
        for var_idx, variable in enumerate(all_keys):
            df_var = patient_df[variable]
            assert (n_ts == df_var.shape[0] == norm_ts.size)

            ## Non-pharma variable case
            if variable[0] == "v":
                valid_normal = False
                var_encoding = self.var_encoding_map[variable]

                # Saved a value in the dict of normal values
                if variable in self.normal_dict:
                    saved_normal_var = self.normal_dict[variable]

                    # Saved normal value is already numeric, no need to encode it here...
                    if mlhc_math.is_numeric(saved_normal_var) and np.isfinite(
                            saved_normal_var):
                        global_impute_val = saved_normal_var
                        valid_normal = True

                # Could not determine a valid normal value, have to fall back to pre-computed global statistic
                if not valid_normal:

                    # Fill in the weight using BMI calculations
                    if variable in ["v10000400", "vm131"]:

                        # If we have an observed height can use BMI
                        if np.isfinite(static_height):
                            global_impute_val = personal_bmi * (static_height /
                                                                100)**2
                        else:
                            global_impute_val = typical_weight

                    # Fill in with the global statistic
                    elif variable in self.global_impute_dict:
                        global_impute_val = self.global_impute_dict[variable]

                    # Rare case, no observation in the imputation data-set
                    else:
                        global_impute_val = np.nan

                # Default values where median/IQR interval not saved
                if variable not in self.interval_median_dict:
                    fill_interval_secs = self.default_fill_interval_secs
                    rolling_mean_secs = self.default_rolling_mean_secs
                    fill_interval_secs = self.default_fill_interval_secs

                # We have to impose minimum period to have boundary conditions where the backward window for
                # slope estimation is empty or an observation is not even filled to the next grid point to the right.
                else:
                    med_interval = self.interval_median_dict[variable]
                    iqr_interval = self.interval_iqr_dict[variable]
                    base_val = med_interval + 2 * iqr_interval
                    fill_interval_secs = max(self.grid_period, base_val)
                    rolling_mean_secs = max(2 * self.grid_period, 2 * base_val)
                    return_mean_secs = max(2 * self.grid_period, base_val)

                raw_col = np.array(df_var)
                assert (raw_col.size == norm_ts.size)
                observ_idx = np.isfinite(raw_col)
                observ_ts = norm_ts[observ_idx]
                observ_val = raw_col[observ_idx]

                ## No values have been observed for this variable, it has to be imputed using the global mean
                if observ_val.size == 0:
                    est_vals = mlhc_array.value_empty(time_grid.size,
                                                      global_impute_val)
                    imputed_df[variable] = est_vals
                    imputed_df["{}_IMPUTED_STATUS_CUM_COUNT".format(
                        variable)] = np.zeros(time_grid.size)
                    imputed_df["{}_IMPUTED_STATUS_TIME_TO".format(
                        variable)] = mlhc_array.value_empty(
                            time_grid.size, -1.0)
                    continue

                assert (np.isfinite(observ_val).all())
                assert (np.isfinite(observ_ts).all())

                if self.use_adaptive_impute:

                    # Formulae imputation
                    if variable in [
                            "v1000", "v1010", "v10020000", "v30005010",
                            "v30005110", "vm13", "vm24", "vm31", "vm32"
                    ]:
                        existing_weight_col = np.array(
                            imputed_df["vm131"]
                        ) if self.is_dim_reduced else np.array(
                            imputed_df["v10000400"])
                        est_vals, cum_count_ts, time_to_last_ms = bern_forward_fill.impute_forward_fill_new_only_ffill(
                            observ_ts,
                            observ_val,
                            time_grid,
                            global_impute_val,
                            self.grid_period,
                            fill_interval_secs=fill_interval_secs,
                            rolling_mean_secs=rolling_mean_secs,
                            return_mean_secs=return_mean_secs,
                            var_type="non_pharma",
                            var_encoding=var_encoding,
                            variable_id=variable,
                            weight_imputed_col=existing_weight_col,
                            static_height=static_height,
                            personal_bmi=personal_bmi)
                    elif variable in ["v10000400", "vm131"]:
                        est_vals, cum_count_ts, time_to_last_ms = bern_forward_fill.impute_forward_fill_new_only_ffill(
                            observ_ts,
                            observ_val,
                            time_grid,
                            global_impute_val,
                            self.grid_period,
                            var_type="weight")
                    else:
                        est_vals, cum_count_ts, time_to_last_ms = bern_forward_fill.impute_forward_fill_new_only_ffill(
                            observ_ts,
                            observ_val,
                            time_grid,
                            global_impute_val,
                            self.grid_period,
                            fill_interval_secs=fill_interval_secs,
                            rolling_mean_secs=rolling_mean_secs,
                            return_mean_secs=return_mean_secs,
                            var_type="non_pharma",
                            var_encoding=var_encoding,
                            variable_id=variable)

                else:
                    assert (False)
                    est_vals = bern_forward_fill.impute_forward_fill(
                        observ_ts, observ_val, time_grid, global_mean_var)

                assert (np.isnan(global_impute_val)
                        or np.isfinite(est_vals).all())
                imputed_df[variable] = est_vals
                imputed_df["{}_IMPUTED_STATUS_CUM_COUNT".format(
                    variable)] = cum_count_ts
                imputed_df["{}_IMPUTED_STATUS_TIME_TO".format(
                    variable)] = time_to_last_ms

            ## Pharma variable case, the doses have to be recomputed to the time-grid. The global imputation value is 0, because the rate assumed w/o observation
            #  is 0 (no medication flow)
            elif variable[0] == "p":
                global_impute_val = 0.0
                raw_col = np.array(df_var)
                assert (raw_col.size == norm_ts.size)
                observ_idx = np.isfinite(raw_col)
                observ_ts = norm_ts[observ_idx]
                observ_val = raw_col[observ_idx]

                ## No values have been observed for this pharma-variable, leave Zero in this series
                if observ_val.size == 0:
                    continue

                assert (np.isfinite(observ_val).all())
                assert (np.isfinite(observ_ts).all())
                est_vals, cum_count_ts, time_to_last_ms = bern_forward_fill.impute_forward_fill_new_only_ffill(
                    observ_ts,
                    observ_val,
                    time_grid,
                    global_impute_val,
                    self.grid_period,
                    var_type="pharma")
                assert (np.isfinite(est_vals).all())
                imputed_df[variable] = est_vals
            else:
                print("ERROR: Invalid variable, exiting...")
                assert (False)

        return imputed_df
Example #52
0
    def generateGroupAdditivityValues(self,
                                      trainingSet,
                                      kunits,
                                      method='Arrhenius'):
        """
        Generate the group additivity values using the given `trainingSet`,
        a list of 2-tuples of the form ``(template, kinetics)``. You must also
        specify the `kunits` for the family and the `method` to use when
        generating the group values. Returns ``True`` if the group values have
        changed significantly since the last time they were fitted, or ``False``
        otherwise.
        """
        warnings.warn(
            "Group additivity is no longer supported and may be"
            " removed in version 2.3.", DeprecationWarning)
        # keep track of previous values so we can detect if they change
        old_entries = dict()
        for label, entry in self.entries.items():
            if entry.data is not None:
                old_entries[label] = entry.data

        # Determine a complete list of the entries in the database, sorted as in the tree
        groupEntries = self.top[:]
        for entry in self.top:
            groupEntries.extend(self.descendants(entry))

        # Determine a unique list of the groups we will be able to fit parameters for
        groupList = []
        for template, kinetics in trainingSet:
            for group in template:
                if group not in self.top:
                    groupList.append(group)
                    groupList.extend(self.ancestors(group)[:-1])
        groupList = list(set(groupList))
        groupList.sort(key=lambda x: x.index)

        if method == 'KineticsData':
            # Fit a discrete set of k(T) data points by training against k(T) data

            Tdata = numpy.array([300, 400, 500, 600, 800, 1000, 1500, 2000])

            # Initialize dictionaries of fitted group values and uncertainties
            groupValues = {}
            groupUncertainties = {}
            groupCounts = {}
            groupComments = {}
            for entry in groupEntries:
                groupValues[entry] = []
                groupUncertainties[entry] = []
                groupCounts[entry] = []
                groupComments[entry] = set()

            # Generate least-squares matrix and vector
            A = []
            b = []

            kdata = []
            for template, kinetics in trainingSet:

                if isinstance(kinetics, (Arrhenius, KineticsData)):
                    kd = [kinetics.getRateCoefficient(T) for T in Tdata]
                elif isinstance(kinetics, ArrheniusEP):
                    kd = [kinetics.getRateCoefficient(T, 0) for T in Tdata]
                else:
                    raise Exception(
                        'Unexpected kinetics model of type {0} for template {1}.'
                        .format(kinetics.__class__, template))
                kdata.append(kd)

                # Create every combination of each group and its ancestors with each other
                combinations = []
                for group in template:
                    groups = [group]
                    groups.extend(self.ancestors(group))
                    combinations.append(groups)
                combinations = getAllCombinations(combinations)
                # Add a row to the matrix for each combination
                for groups in combinations:
                    Arow = [1 if group in groups else 0 for group in groupList]
                    Arow.append(1)
                    brow = [math.log10(k) for k in kd]
                    A.append(Arow)
                    b.append(brow)

                    for group in groups:
                        groupComments[group].add("{0!s}".format(template))

            if len(A) == 0:
                logging.warning(
                    'Unable to fit kinetics groups for family "{0}"; no valid data found.'
                    .format(self.label))
                return
            A = numpy.array(A)
            b = numpy.array(b)
            kdata = numpy.array(kdata)

            x, residues, rank, s = numpy.linalg.lstsq(A, b)

            for t, T in enumerate(Tdata):

                # Determine error in each group (on log scale)
                stdev = numpy.zeros(len(groupList) + 1, numpy.float64)
                count = numpy.zeros(len(groupList) + 1, numpy.int)

                for index in range(len(trainingSet)):
                    template, kinetics = trainingSet[index]
                    kd = math.log10(kdata[index, t])
                    km = x[-1, t] + sum([
                        x[groupList.index(group), t]
                        for group in template if group in groupList
                    ])
                    variance = (km - kd)**2
                    for group in template:
                        groups = [group]
                        groups.extend(self.ancestors(group))
                        for g in groups:
                            if g not in self.top:
                                ind = groupList.index(g)
                                stdev[ind] += variance
                                count[ind] += 1
                    stdev[-1] += variance
                    count[-1] += 1
                stdev = numpy.sqrt(stdev / (count - 1))
                import scipy.stats
                ci = scipy.stats.t.ppf(0.975, count - 1) * stdev

                # Update dictionaries of fitted group values and uncertainties
                for entry in groupEntries:
                    if entry == self.top[0]:
                        groupValues[entry].append(10**x[-1, t])
                        groupUncertainties[entry].append(10**ci[-1])
                        groupCounts[entry].append(count[-1])
                    elif entry in groupList:
                        index = groupList.index(entry)
                        groupValues[entry].append(10**x[index, t])
                        groupUncertainties[entry].append(10**ci[index])
                        groupCounts[entry].append(count[index])
                    else:
                        groupValues[entry] = None
                        groupUncertainties[entry] = None
                        groupCounts[entry] = None

            # Store the fitted group values and uncertainties on the associated entries
            for entry in groupEntries:
                if groupValues[entry] is not None:
                    entry.data = KineticsData(Tdata=(Tdata, "K"),
                                              kdata=(groupValues[entry],
                                                     kunits))
                    if not any(
                            numpy.isnan(numpy.array(
                                groupUncertainties[entry]))):
                        entry.data.kdata.uncertainties = numpy.array(
                            groupUncertainties[entry])
                        entry.data.kdata.uncertaintyType = '*|/'
                    entry.shortDesc = "Group additive kinetics."
                    entry.longDesc = "Fitted to {0} rates.\n".format(
                        groupCounts[entry])
                    entry.longDesc += "\n".join(groupComments[entry])
                else:
                    entry.data = None

        elif method == 'Arrhenius':
            # Fit Arrhenius parameters (A, n, Ea) by training against k(T) data

            Tdata = numpy.array([300, 400, 500, 600, 800, 1000, 1500, 2000])
            logTdata = numpy.log(Tdata)
            Tinvdata = 1000. / (constants.R * Tdata)

            A = []
            b = []

            kdata = []
            for template, kinetics in trainingSet:

                if isinstance(kinetics, (Arrhenius, KineticsData)):
                    kd = [kinetics.getRateCoefficient(T) for T in Tdata]
                elif isinstance(kinetics, ArrheniusEP):
                    kd = [kinetics.getRateCoefficient(T, 0) for T in Tdata]
                else:
                    raise Exception(
                        'Unexpected kinetics model of type {0} for template {1}.'
                        .format(kinetics.__class__, template))
                kdata.append(kd)

                # Create every combination of each group and its ancestors with each other
                combinations = []
                for group in template:
                    groups = [group]
                    groups.extend(self.ancestors(group))
                    combinations.append(groups)
                combinations = getAllCombinations(combinations)

                # Add a row to the matrix for each combination at each temperature
                for t, T in enumerate(Tdata):
                    logT = logTdata[t]
                    Tinv = Tinvdata[t]
                    for groups in combinations:
                        Arow = []
                        for group in groupList:
                            if group in groups:
                                Arow.extend([1, logT, -Tinv])
                            else:
                                Arow.extend([0, 0, 0])
                        Arow.extend([1, logT, -Tinv])
                        brow = math.log(kd[t])
                        A.append(Arow)
                        b.append(brow)

            if len(A) == 0:
                logging.warning(
                    'Unable to fit kinetics groups for family "{0}"; no valid data found.'
                    .format(self.label))
                return
            A = numpy.array(A)
            b = numpy.array(b)
            kdata = numpy.array(kdata)

            x, residues, rank, s = numpy.linalg.lstsq(A, b)

            # Store the results
            self.top[0].data = Arrhenius(
                A=(math.exp(x[-3]), kunits),
                n=x[-2],
                Ea=(x[-1], "kJ/mol"),
                T0=(1, "K"),
            )
            for i, group in enumerate(groupList):
                group.data = Arrhenius(
                    A=(math.exp(x[3 * i]), kunits),
                    n=x[3 * i + 1],
                    Ea=(x[3 * i + 2], "kJ/mol"),
                    T0=(1, "K"),
                )

        elif method == 'Arrhenius2':
            # Fit Arrhenius parameters (A, n, Ea) by training against (A, n, Ea) values

            A = []
            b = []

            for template, kinetics in trainingSet:

                # Create every combination of each group and its ancestors with each other
                combinations = []
                for group in template:
                    groups = [group]
                    groups.extend(self.ancestors(group))
                    combinations.append(groups)
                combinations = getAllCombinations(combinations)

                # Add a row to the matrix for each parameter
                if isinstance(kinetics,
                              Arrhenius) or (isinstance(kinetics, ArrheniusEP)
                                             and kinetics.alpha.value_si == 0):
                    for groups in combinations:
                        Arow = []
                        for group in groupList:
                            if group in groups:
                                Arow.append(1)
                            else:
                                Arow.append(0)
                        Arow.append(1)
                        Ea = kinetics.E0.value_si if isinstance(
                            kinetics, ArrheniusEP) else kinetics.Ea.value_si
                        brow = [
                            math.log(kinetics.A.value_si), kinetics.n.value_si,
                            Ea / 1000.
                        ]
                        A.append(Arow)
                        b.append(brow)

            if len(A) == 0:
                logging.warning(
                    'Unable to fit kinetics groups for family "{0}"; no valid data found.'
                    .format(self.label))
                return
            A = numpy.array(A)
            b = numpy.array(b)

            x, residues, rank, s = numpy.linalg.lstsq(A, b)

            # Store the results
            self.top[0].data = Arrhenius(
                A=(math.exp(x[-1, 0]), kunits),
                n=x[-1, 1],
                Ea=(x[-1, 2], "kJ/mol"),
                T0=(1, "K"),
            )
            for i, group in enumerate(groupList):
                group.data = Arrhenius(
                    A=(math.exp(x[i, 0]), kunits),
                    n=x[i, 1],
                    Ea=(x[i, 2], "kJ/mol"),
                    T0=(1, "K"),
                )

        # Add a note to the history of each changed item indicating that we've generated new group values
        changed = False
        for label, entry in self.entries.items():
            if entry.data is not None and old_entries.has_key(label):
                if (isinstance(entry.data, KineticsData)
                        and isinstance(old_entries[label], KineticsData)
                        and len(entry.data.kdata.value_si) == len(
                            old_entries[label].kdata.value_si) and all(
                                abs(entry.data.kdata.value_si /
                                    old_entries[label].kdata.value_si -
                                    1) < 0.01)):
                    #print "New group values within 1% of old."
                    pass
                elif (isinstance(entry.data, Arrhenius)
                      and isinstance(old_entries[label], Arrhenius)
                      and abs(entry.data.A.value_si /
                              old_entries[label].A.value_si - 1) < 0.01
                      and abs(entry.data.n.value_si /
                              old_entries[label].n.value_si - 1) < 0.01
                      and abs(entry.data.Ea.value_si /
                              old_entries[label].Ea.value_si - 1) < 0.01
                      and abs(entry.data.T0.value_si /
                              old_entries[label].T0.value_si - 1) < 0.01):
                    #print "New group values within 1% of old."
                    pass
                else:
                    changed = True
                    break
            else:
                changed = True
                break

        return changed
Example #53
0
def local_thresholding_prop(conn_matrix, thr):
    from pynets import thresholding
    from pynets.stats import netstats
    '''
    Threshold the adjacency matrix by building from the minimum spanning tree (MST) and adding
    successive N-nearest neighbour degree graphs to achieve target proportional threshold.
    '''

    fail_tol = 10
    conn_matrix = np.nan_to_num(conn_matrix)
    G = nx.from_numpy_matrix(conn_matrix)
    if not nx.is_connected(G):
        [G, _] = netstats.prune_disconnected(G)

    maximum_edges = G.number_of_edges()
    G = thresholding.weight_to_distance(G)
    min_t = nx.minimum_spanning_tree(G, weight="distance")
    len_edges = min_t.number_of_edges()
    upper_values = np.triu_indices(np.shape(conn_matrix)[0], k=1)
    weights = np.array(conn_matrix[upper_values])
    weights = weights[~np.isnan(weights)]
    edgenum = int(float(thr) * float(len(weights)))
    if len_edges > edgenum:
        print("%s%s%s" % ('Warning: The minimum spanning tree already has: ', len_edges,
                          ' edges, select more edges. Local Threshold will be applied by just retaining the Minimum '
                          'Spanning Tree'))
        conn_matrix_thr = nx.to_numpy_array(G)
        return conn_matrix_thr

    k = 1
    len_edge_list = []
    while len_edges < edgenum and k <= np.shape(conn_matrix)[0] and (len(len_edge_list[-fail_tol:]) -
                                                                     len(set(len_edge_list[-fail_tol:]))) < (fail_tol-1):
        print(k)
        print(len_edges)
        len_edge_list.append(len_edges)
        # Create nearest neighbour graph
        nng = thresholding.knn(conn_matrix, k)
        number_before = nng.number_of_edges()
        # Remove edges from the NNG that exist already in the new graph/MST
        nng.remove_edges_from(min_t.edges())
        if nng.number_of_edges() == 0 and number_before >= maximum_edges:
            break

        # Add weights to NNG
        for e in nng.edges():
            nng.edges[e[0], e[1]]['weight'] = float(conn_matrix[e[0], e[1]])

        # Obtain list of edges from the NNG in order of weight
        edge_list = sorted(nng.edges(data=True), key=lambda t: t[2]['weight'], reverse=True)
        # Add edges in order of connectivity strength
        for edge in edge_list:
            #print("%s%s" % ('Adding edge to mst: ', edge))
            min_t.add_edges_from([edge])
            min_t_mx = nx.to_numpy_array(min_t)
            len_edges = nx.from_numpy_matrix(min_t_mx).number_of_edges()
            if len_edges >= edgenum:
                #print(len_edges)
                break

        if (len(len_edge_list[-fail_tol:]) - len(set(len_edge_list[-fail_tol:]))) >= (fail_tol-1):
            print("%s%s%s" % ('Cannot apply local thresholding to achieve threshold of: ', thr,
                              '. Using maximally saturated connected matrix instead...'))

        k += 1

    conn_matrix_thr = nx.to_numpy_array(min_t, nodelist=sorted(min_t.nodes()), dtype=np.float64)
    if len(min_t.nodes()) < conn_matrix.shape[0]:
        raise RuntimeWarning("%s%s%s" % ('Cannot apply local thresholding to achieve threshold of: ', thr,
                                         '. Try a higher -thr or -min_thr'))

    return conn_matrix_thr
            print(Ei)
            run.remove_file("observables")

            for j, gamma in enumerate(gammas[i:]):

                # write the remaining values of observables as those corresponding to the delta = 0
                # case, as non-zero d-band produces a higher energy fibril.
                scan['\\gamma_s'] = str(gamma)
                rp = ReadParams(scan=scan, loadsuf=loadsuf, savesuf=savesuf)
                run = SingleRun(rp, scan_dir=scan_dir)
                run.write_observables(E0, R0, eta0, delta0, surftwist0,
                                      "\\gamma_s")

            break

        if (np.isnan(Ri) or Ri <= 0) and gamma > 0.15:

            # if Ri is infinite, then the calculation failed.
            # Retry it with a different initial guess.

            print("Ri is NAN, trying again with Rguess = 1.0")

            # remove the current observables file, so that a new one can be written.
            run.remove_file("observables")
            if abs(float(scan['Rguess']) - 1.0) > 1e-10:
                Ri = 1.0
            else:
                break

        else:
            # calculation ran smoothly.
def get_spike_rate_map(map_matrix, map_function, start_frames, end_frames, camera_positions = None, show_colorbar = True, title = None, ax = None, use_log = False, dummy = False, text_size = 48):
    delta_x = 187 # distance to leftmost x-coordinate from origin on field
    delta_y = 297 # distance to topmost y-coordinate from origin on field
    
    if dummy:
        img_map = map_matrix
    elif map_function == MAP_FUNCTION.threshold:
        img_map = np.full((355, 258), np.nan)
        
        for i in range(len(map_matrix)):
            for y in range(len(map_matrix[i])):
                for x in range(len(map_matrix[i, y])):
                    if not np.isnan(map_matrix[i, y, x]):
                        img_map[y, x] = map_matrix[i, y, x]
    
    elif map_function == MAP_FUNCTION.mean:
        img_map = np.nanmean(map_matrix, axis = 0)        
    elif map_function == MAP_FUNCTION.min:
        img_map = np.nanmin(map_matrix, axis = 0)
    elif map_function == MAP_FUNCTION.max:
        img_map = np.nanmax(map_matrix, axis = 0)
    elif map_function == MAP_FUNCTION.std:
        img_map = np.nanstd(map_matrix, axis = 0)
    elif map_function == MAP_FUNCTION.var:
        img_map = np.nanvar(map_matrix, axis = 0)
    elif map_function == MAP_FUNCTION.percentile5:
        img_map = np.nanpercentile(map_matrix, 5, axis = 0)
    elif map_function == MAP_FUNCTION.percentile95:
        img_map = np.nanpercentile(map_matrix, 95, axis = 0)
    elif map_function == MAP_FUNCTION.median:
        img_map = np.nanpercentile(map_matrix, 50, axis = 0)
    elif map_function == MAP_FUNCTION.count:
        # count non-nan elements
        img_map = np.count_nonzero(~np.isnan(map_matrix), axis = 0)
    else:
        raise NotImplementedError('Map function not implemented!')
    
    if use_log:
        img_map = np.log1p(img_map)
    
    u = np.nanmean(img_map)
    std = np.nanstd(img_map)
    
    fig = figure(figsize = (15,15)) if ax is None else None
    
    im = None
    if ax:
        im = ax.imshow(img_map, vmin = max(0, u - 2 * std), vmax = u + 2 * std)
    else:
        im = plt.imshow(img_map, vmin = max(0, u - 2 * std), vmax = u + 2 * std)
    
    # camera positions
    if camera_positions is not None:
        cam_overlay = np.full((img_map.shape[0], img_map.shape[1], 4), [0,0,0,0], dtype = np.uint8)
        
        current_cmap = matplotlib.cm.get_cmap()
        current_cmap.set_bad(alpha=0)  
        
        for i in range(len(start_frames)):
            start_frame = start_frames[i]
            end_frame = end_frames[i]
            
            for frame_num in range(start_frame, end_frame):
                y = delta_y - int(camera_positions[frame_num][2])
                x = delta_x + int(camera_positions[frame_num][0])

                if y < 0 or x < 0 or y >= img_map.shape[0] or x >= img_map.shape[1]:
                            continue

                cam_overlay[y, x] = np.array([255, 0, 0, 255])

            if ax:
                ax.imshow(cam_overlay)
            else:
                plt.imshow(cam_overlay)
    
    if title:
        if ax:
            ax.set_title(title, fontsize = text_size, y = 1.03)
        else:
            plt.suptitle(title, fontsize = text_size, y = 1.03)
    
    if ax:
        ax.axis('off')
    
    if show_colorbar:
        if ax:
            divider = make_axes_locatable(ax)
            cax = divider.append_axes('right', size='5%', pad=0.1)
            cbar = plt.colorbar(im, cax=cax, orientation='vertical')
        else:        
            cbar = colorbar(im)
            
        cbar.ax.tick_params(labelsize = text_size)
        
    return fig, img_map, im
Example #56
0
def stderror(v):
    non_nan = np.count_nonzero(
        ~np.isnan(v))  # number of valid (non NaN) elements in the vector
    return np.nanstd(v, ddof=1) / np.sqrt(non_nan)
Example #57
0
                     columns=['phase', 'pvalue', 'kappa'],
                     data=theta_mod['rem'])

# filtering swr_mod
swr = pd.DataFrame(index=swr.index,
                   columns=swr.columns,
                   data=gaussFilt(swr.values, (10, )))

# Cut swr_mod from -500 to 500
nbins = 200
binsize = 5
times = np.arange(0, binsize * (nbins + 1), binsize) - (nbins * binsize) / 2
swr = swr.loc[:, times]

# CHECK FOR NAN
tmp1 = swr.index[np.unique(np.where(np.isnan(swr))[0])]
tmp2 = theta.index[theta.isnull().any(1)]
# CHECK P-VALUE
tmp3 = theta.index[(theta['pvalue'] > 0.01).values]
tmp = np.unique(np.concatenate([tmp1, tmp2, tmp3]))
# copy and delete
if len(tmp):
    swr_modth = swr.drop(tmp)
    theta_modth = theta.drop(tmp)

swr_modth_copy = swr_modth.copy()
neuron_index = swr_modth.index
swr_modth = swr_modth.values

###############################################################################################################
# PCA
Example #58
0
if len(name_new) > 1:
    fname_new = cm.save_memmap_join(
        name_new, base_name='Yr', n_chunks=12, dview=dview)
else:
    print('One file only, not saving!')
    fname_new = name_new[0]
#%%
# fname_new='Yr_d1_501_d2_398_d3_1_order_F_frames_369_.mmap'
Yr, dims, T = cm.load_memmap(fname_new)
d1, d2 = dims
images = np.reshape(Yr.T, [T] + list(dims), order='F')
Y = np.reshape(Yr, dims + (T,), order='F')
#%%
if np.min(images) < 0:
    raise Exception('Movie too negative, add_to_movie should be larger')
if np.sum(np.isnan(images)) > 0:
    raise Exception('Movie contains nan! You did not remove enough borders')
#%%
Cn = cm.local_correlations(Y[:, :, :1000])
pl.imshow(Cn, cmap='gray')

#%%
if not is_patches:
    #%%
    K = 35  # number of neurons expected per patch
    gSig = [7, 7]  # expected half size of neurons
    merge_thresh = 0.8  # merging threshold, max correlation allowed
    p = 2  # order of the autoregressive system
    cnm = cnmf.CNMF(n_processes, method_init=init_method, k=K, gSig=gSig, merge_thresh=merge_thresh,
                    p=p, dview=dview, Ain=None, method_deconvolution='oasis', skip_refinement=False)
    cnm = cnm.fit(images)
Example #59
0
    def __init__(self, field, params, interpolation='trilinear',
                 integration='simple', h_min=2e-3, h_max=2e4, len_max=500,
                 tol=1e-2, iter_max=1e3, xx=np.array([0, 0, 0])):
        """
        Creates the traced streamline for a specified vector field field.

        call signature:

          Stream(field, p, interpolation='trilinear',
                 integration='simple', h_min=2e-3, h_max=2e4, len_max=500,
                 tol=1e-2, iter_max=1e3, xx=np.array([0,0,0])):

        Keyword arguments:

         *field*:
            Vector field which is integrated over.

         *params*:
           Simulation and tracer parameters.

         *interpolation*:
            Interpolation of the vector field.
            'mean': Take the mean of the adjacent grid point.
            'trilinear': Weigh the adjacent grid points according to their
                         distance.

         *integration*:
            Integration method.
            'simple': low order method.
            'RK6': Runge-Kutta 6th order.

         *h_min*:
            Minimum step length for and underflow to occur.

         *h_max*:
            Parameter for the initial step length.

         *len_max*:
            Maximum length of the streamline. Integration will stop if
            l >= len_max.

         *tol*:
            Tolerance for each integration step. Reduces the step length if
            error >= tol.

         *iter_max*:
            Maximum number of iterations.

         *xx*:
            Initial seed.
        """

        # Tentative streamline length.
        self.tracers = np.zeros([iter_max, 3], dtype='float32')

        tol2 = tol**2
        dh = np.sqrt(h_max*h_min) # Initial step size.

        # Declare some vectors.
        xMid = np.zeros(3)
        xSingle = np.zeros(3)
        xHalf = np.zeros(3)
        xDouble = np.zeros(3)

        # Initialize the coefficient for the 6th order adaptive time step RK.
        a = np.zeros(6); b = np.zeros((6, 5)); c = np.zeros(6); cs = np.zeros(6)
        k = np.zeros((6, 3))
        a[1] = 0.2; a[2] = 0.3; a[3] = 0.6; a[4] = 1; a[5] = 0.875
        b[1, 0] = 0.2
        b[2, 0] = 3/40.; b[2, 1] = 9/40.
        b[3, 0] = 0.3; b[3, 1] = -0.9; b[3, 2] = 1.2
        b[4, 0] = -11/54.; b[4, 1] = 2.5; b[4, 2] = -70/27.; b[4, 3] = 35/27.
        b[5, 0] = 1631/55296.; b[5, 1] = 175/512.; b[5, 2] = 575/13824.
        b[5, 3] = 44275/110592.; b[5, 4] = 253/4096.
        c[0] = 37/378.; c[2] = 250/621.; c[3] = 125/594.; c[5] = 512/1771.
        cs[0] = 2825/27648.; cs[2] = 18575/48384.; cs[3] = 13525/55296.
        cs[4] = 277/14336.; cs[5] = 0.25

        # Do the streamline tracing.
        self.tracers[0, :] = xx
        outside = False
        stream_len = 0
        len = 0

        if integration == 'simple':
            while ((len < len_max) and (stream_len < iter_max-1) and
            (not np.isnan(xx[0])) and (outside == False)):
                # (a) single step (midpoint method)
                xMid = xx + 0.5*dh*vec_int_no_var(xx, field, params, interpolation)
                xSingle = xx + dh*vec_int_no_var(xMid, field, params, interpolation)

                # (b) two steps with half stepsize
                xMid = xx + 0.25*dh*vec_int_no_var(xx, field, params, interpolation)
                xHalf = xx + 0.5*dh*vec_int_no_var(xMid, field, params, interpolation)
                xMid = xHalf + 0.25*dh*vec_int_no_var(xHalf, field, params,
                                                      interpolation)
                xDouble = xHalf + 0.5*dh*vec_int_no_var(xMid, field, params,
                                                        interpolation)

                # (c) Check error (difference between methods).
                dist2 = np.sum((xSingle-xDouble)**2)
                if dist2 > tol2:
                    dh = 0.5*dh
                    if abs(dh) < h_min:
                        print "Error: stepsize underflow"
                        break
                else:
                    len += np.sqrt(np.sum((xx-xDouble)**2))
                    xx = xDouble.copy()
                    if abs(dh) < h_min:
                        dh = 2*dh
                    stream_len += 1
                    self.tracers[stream_len, :] = xx.copy()
                    if (dh > h_max) or (np.isnan(dh)):
                        dh = h_max
                    # Check if this point lies outside the domain.
                    if ((xx[0] < params.Ox-params.dx) or
                    (xx[0] > params.Ox+params.Lx+params.dx) or
                    (xx[1] < params.Oy-params.dy) or
                    (xx[1] > params.Oy+params.Ly+params.dy) or
                    (xx[2] < params.Oz) or (xx[2] > params.Oz+params.Lz)):
                        outside = True

        if integration == 'RK6':
            while ((len < len_max) and (stream_len < iter_max-1) and
            (not np.isnan(xx[0])) and (outside == False)):
                k[0, :] = dh*vec_int_no_var(xx, field, params, interpolation)
                k[1, :] = dh*vec_int_no_var(xx + b[1, 0]*k[0, :], field, params,
                                            interpolation)
                k[2, :] = dh*vec_int_no_var(xx + b[2, 0]*k[0, :] + b[2, 1]*k[1, :],
                                            field, params, interpolation)
                k[3, :] = dh*vec_int_no_var(xx + b[3, 0]*k[0, :] + b[3, 1]*k[1, :] +
                                            b[3, 2]*k[2, :], field, params,
                                            interpolation)
                k[4, :] = dh*vec_int_no_var(xx + b[4, 0]*k[0, :] + b[4, 1]*k[1, :] +
                                            b[4, 2]*k[2, :] + b[4, 3]*k[3, :],
                                            field, params, interpolation)
                k[5, :] = dh*vec_int_no_var(xx + b[5, 0]*k[0, :] + b[5, 1]*k[1, :] +
                                            b[5, 2]*k[2, :] + b[5, 3]*k[3, :] +
                                            b[5, 4]*k[4, :], field, params,
                                            interpolation)

                xNew = xx + c[0]*k[0, :]  + c[1]*k[1, :]  + c[2]*k[2, :]  + \
                       c[3]*k[3, :]  + c[4]*k[4, :]  + c[5]*k[5, :]
                xNewS = xx + cs[0]*k[0, :] + cs[1]*k[1, :] + cs[2]*k[2, :] + \
                        cs[3]*k[3, :] + cs[4]*k[4, :] + cs[5]*k[5, :]

                delta2 = np.dot((xNew-xNewS), (xNew-xNewS))
                delta = np.sqrt(delta2)

                if delta2 > tol2:
                    dh = dh*(0.9*abs(tol/delta))**0.2
                    if abs(dh) < h_min:
                        print "Error: step size underflow"
                        break
                else:
                    len += np.sqrt(np.sum((xx-xNew)**2))
                    xx = xNew
                    if abs(dh) < h_min:
                        dh = 2*dh
                    stream_len += 1
                    self.tracers[stream_len, :] = xx
                    if (dh > h_max) or (np.isnan(dh)):
                        dh = h_max
                    # Check if this point lies outside the domain.
                    if ((xx[0] < params.Ox-params.dx) or
                    (xx[0] > params.Ox+params.Lx+params.dx) or
                    (xx[1] < params.Oy-params.dy) or
                    (xx[1] > params.Oy+params.Ly+params.dy) or
                    (xx[2] < params.Oz) or (xx[2] > params.Oz+params.Lz)):
                        outside = True
                if (dh > h_max) or (delta == 0) or (np.isnan(dh)):
                    dh = h_max

        self.tracers = np.resize(self.tracers, (stream_len, 3))
        self.len = len
        self.stream_len = stream_len
        self.params = params
Example #60
0
    def balance(self, tournament, train_only=True, seed=0):
        """
        Copy of data where specified eras have mean y of 0.5.

        Parameters
        ----------
        tournament : int or str
            Which tournament's targets to balance.
        train_only : {True, False}, optional
            By default (True) only train eras are y balanced. No matter what
            the setting of `train_only` any era that contains a y that is NaN
            is not balanced.
        seed : int, optional
            Seed used by random number generator that selects which rows to
            keep. Default is 0.

        Returns
        -------
        data : Data
            A copy of data where specified eras have mean y (for the
            given `tournament`) of 0.5.
        """
        # This function is not written in a straightforward manner.
        # A few speed optimizations have been made.
        data = self
        if train_only:
            f = REGION_STR_TO_FLOAT['train']
            eras = np.unique(data.era_float[data.region_float == f]).tolist()
        else:
            eras = data.unique_era(as_str=False).tolist()
        era = data.era_float
        y = data.y[tournament]
        index = np.arange(y.size)
        remove = []
        rs = np.random.RandomState(seed)
        for e in eras:
            idx = era == e
            yi = y[idx]
            indexi = index[idx]
            n1 = yi.sum()
            if np.isnan(n1):
                continue
            n1 = int(n1)
            n0 = yi.size - n1
            if n0 == n1:
                pass
            elif n0 > n1:
                ix = indexi[yi == 0]
                ix = rs.choice(ix, size=n0 - n1, replace=False)
                remove.append(ix)
            elif n0 < n1:
                ix = indexi[yi == 1]
                ix = rs.choice(ix, size=n1 - n0, replace=False)
                remove.append(ix)
            else:
                msg = "balance should not reach this line"  # pragma: no cover
                raise RuntimeError(msg)  # pragma: no cover
            idx = ~idx
            era = era[idx]
            y = y[idx]
            index = index[idx]
        if len(remove) == 0:
            data = data.copy()
        else:
            keep = set(range(data.shape[0])) - set(np.concatenate(remove))
            keep = list(keep)
            df = data.df.take(keep)
            data = Data(df)
        return data