Esempio n. 1
0
    def _crop_out_special_values(self, ws):

        if ws.getNumberHistograms() != 1:
            # Strip zeros is only possible on 1D workspaces
            return

        y_vals = ws.readY(0)
        length = len(y_vals)
        # Find the first non-zero value
        start = 0
        for i in range(0, length):
            if not np.isnan(y_vals[i]) and not np.isinf(y_vals[i]):
                start = i
                break
        # Now find the last non-zero value
        stop = 0
        length -= 1
        for j in range(length, 0, -1):
            if not np.isnan(y_vals[j]) and not np.isinf(y_vals[j]):
                stop = j
                break
        # Find the appropriate X values and call CropWorkspace
        x_vals = ws.readX(0)
        start_x = x_vals[start]
        # Make sure we're inside the bin that we want to crop
        end_x = x_vals[stop + 1]
        return self._crop_to_x_range(ws=ws,x_min=start_x, x_max=end_x)
    def reconstruct_coincidence(self, coincidence_events, station_numbers=None,
                                offsets=None, initial=None):
        """Reconstruct a single coincidence

        :param coincidence_events: a coincidence list consisting of one
                                   or more (station_number, event) tuples.
        :param station_numbers: list of station numbers, to only use
                                events from those stations.
        :param offsets: dictionary with detector offsets for each station.
                        These detector offsets should be relative to one
                        detector from a specific station.
        :param initial: dictionary with already fitted shower parameters.
        :return: list of theta, phi, and station numbers.

        """
        if len(coincidence_events) < 1:
            return nan, nan, []
        if offsets is None:
            offsets = {}
        if initial is None:
            initial = {}

        # Subtract base timestamp to prevent loss of precision
        ts0 = int(coincidence_events[0][1]['timestamp'])
        ets0 = ts0 * int(1e9)
        self.cluster.set_timestamp(ts0)
        t, x, y, z, nums = ([], [], [], [], [])

        offsets = self.get_station_offsets(coincidence_events, station_numbers,
                                           offsets, ts0)

        for station_number, event in coincidence_events:
            if station_numbers is not None:
                if station_number not in station_numbers:
                    continue
            t_off = offsets.get(station_number, NO_OFFSET)
            station = self.cluster.get_station(station_number)
            t_detectors = relative_detector_arrival_times(event, ets0,
                                                          offsets=t_off,
                                                          station=station)
            for t_detector, detector in zip(t_detectors, station.detectors):
                if not isnan(t_detector):
                    dx, dy, dz = detector.get_coordinates()
                    t.append(t_detector)
                    x.append(dx)
                    y.append(dy)
                    z.append(dz)
            if not all(isnan(t_detectors)):
                nums.append(station_number)

        if len(t) >= 3 and 'core_x' in initial and 'core_y' in initial:
            theta, phi = self.curved.reconstruct_common(t, x, y, z, initial)
        elif len(t) == 3:
            theta, phi = self.direct.reconstruct_common(t, x, y, z, initial)
        elif len(t) > 3:
            theta, phi = self.fit.reconstruct_common(t, x, y, z, initial)
        else:
            theta, phi = (nan, nan)

        return theta, phi, nums
Esempio n. 3
0
def responsetime(conn):
    """ Determine the average response time for tasks in bins """
    c = conn.cursor()
    results = c.execute("""
    select finished.time, event.time, finished.time - event.time as responsetime
    from event
    left join (select time, task_id from event where type_id=""" + taskid("run_task") + """) as finished
          on event.task_id = finished.task_id
    where event.type_id=""" + taskid("add_task")).fetchall()

    results = np.matrix(results, dtype=float)
    runtimes = results[:,2]

    nones = runtimes == np.array(None)
    (finished, nofinish) = (runtimes[~np.isnan(runtimes).all(axis=1)], runtimes[np.isnan(runtimes).any(axis=1)])

    return {
        "completion":{
            "finished":finished.size,
            "dnf":nofinish.size,
        },
        "response_times":{
            "min":np.min(finished),
            "mean":np.mean(finished),
            "max":np.max(finished),
            "std":np.std(finished)
        }
    }
Esempio n. 4
0
    def test_align(self):
        left = create_test_data()
        right = left.copy(deep=True)
        right['dim3'] = ('dim3', list('cdefghijkl'))
        right['var3'][:-2] = right['var3'][2:]
        right['var3'][-2:] = np.random.randn(*right['var3'][-2:].shape)

        intersection = list('cdefghij')
        union = list('abcdefghijkl')

        left2, right2 = align(left, right, join='inner')
        self.assertArrayEqual(left2['dim3'], intersection)
        self.assertDatasetIdentical(left2, right2)

        left2, right2 = align(left, right, join='outer')
        self.assertVariableEqual(left2['dim3'], right2['dim3'])
        self.assertArrayEqual(left2['dim3'], union)
        self.assertDatasetIdentical(left2.labeled(dim3=intersection),
                                    right2.labeled(dim3=intersection))
        self.assertTrue(np.isnan(left2['var3'][-2:]).all())
        self.assertTrue(np.isnan(right2['var3'][:2]).all())

        left2, right2 = align(left, right, join='left')
        self.assertVariableEqual(left2['dim3'], right2['dim3'])
        self.assertVariableEqual(left2['dim3'], left['dim3'])
        self.assertDatasetIdentical(left2.labeled(dim3=intersection),
                                    right2.labeled(dim3=intersection))
        self.assertTrue(np.isnan(right2['var3'][:2]).all())

        left2, right2 = align(left, right, join='right')
        self.assertVariableEqual(left2['dim3'], right2['dim3'])
        self.assertVariableEqual(left2['dim3'], right['dim3'])
        self.assertDatasetIdentical(left2.labeled(dim3=intersection),
                                    right2.labeled(dim3=intersection))
        self.assertTrue(np.isnan(left2['var3'][-2:]).all())
 def __set_static_gaus_pmfs(self):
     if np.logical_not(self.off_buff.is_full()):
         print "The long term buffer is not yet full.  This may give undesirable results"
     
     # median RSS of off-state buffer
     cal_med = self.off_buff.get_no_nan_median()
     
     if (np.sum(cal_med == 127) > 0) | (np.sum(np.isnan(cal_med)) > 0):
         sys.stderr.write('At least one link has a median of 127 or is nan\n\n')
         quit()
          
     if (np.sum(np.isnan(self.off_buff.get_nanvar())) > 0):
         sys.stderr.write('the long term buffer has a nan')
         quit()
     
     cal_med_mat = np.tile(cal_med,(self.V_mat.shape[1],1)).T
     
     # variance of RSS during calibration
     cal_var = np.maximum(self.off_buff.get_nanvar(),self.omega) #3.0 
     cal_var_mat = np.tile(cal_var,(self.V_mat.shape[1],1)).T
     
     # Compute the off_link emission probabilities for each link
     x = np.exp(- (self.V_mat - cal_med_mat)**2/(2*cal_var_mat/1.0)) # 1.0
     self.off_links = self.__normalize_pmf(x)
     
     # Compute the on_link emission probabilities for each link
     x = np.exp(- (self.V_mat - (cal_med_mat-self.Delta))**2/(self.eta*2*cal_var_mat)) # 3
     self.on_links = self.__normalize_pmf(x) 
Esempio n. 6
0
def calcForces_and_potentialE(F_x, F_y, old_or_new, x_positions, y_positions, V_atoms):
    """calculates x and y forces and potential energy per atom as summed over
    all contributions due to all neighbors, as functions of position and the
    parameters of the LJ potential"""

    for atom in xrange(Natoms):
        for i in xrange(Natoms):
            if i != atom:                    
                    delx = x_positions[atom,old_or_new]-x_positions[i,old_or_new]
                    dely = y_positions[atom,old_or_new]-y_positions[i,old_or_new]
                    r_ij = np.sqrt( (x_positions[atom,old_or_new]-x_positions[i,old_or_new])**2\
                                    + (y_positions[atom,old_or_new]-y_positions[i,old_or_new])**2 )
                    F_x[atom,old_or_new] =  F_x[atom,old_or_new] - 24.0 *epsilon * sigma**6 \
                                           * delx * ( 1 - 2.0*(sigma/r_ij)**6 ) / r_ij**8
                    F_y[atom,old_or_new] =  F_y[atom,old_or_new] - 24.0 *epsilon * sigma**6 * \
                                           dely * ( 1 - 2.0*(sigma/r_ij)**6 ) / r_ij**8    
                    V_atoms[atom] = V_atoms[atom] + 4.0 * epsilon \
                                    * ( (sigma/r_ij)**12-(sigma/r_ij)**6 )
                    if np.isnan(F_x[atom,old_or_new]) or np.isinf(F_x[atom,old_or_new]):
                        F_x[atom,old_or_new]=0
                    if np.isnan(F_y[atom,old_or_new]) or np.isinf(F_y[atom,old_or_new]):
                        F_y[atom,0]=0
                    if np.isnan(V_atoms[atom]) or np.isinf(V_atoms[atom]):
                        V_atoms[atom]=0                   
    return F_x, F_y, V_atoms
def estimateBIsochrone(R,z,pot=None):
    """
    NAME:
       estimateBIsochrone
    PURPOSE:
       Estimate a good value for the scale of the isochrone potential by matching the slope of the rotation curve
    INPUT:
       R,z = coordinates (if these are arrays, the median estimated delta is returned, i.e., if this is an orbit)
       pot= Potential instance or list thereof
    OUTPUT:
       b if 1 R,Z given
       bmin,bmedian,bmax if multiple R given       
    HISTORY:
       2013-09-12 - Written - Bovy (IAS)
    """
    if pot is None: #pragma: no cover
        raise IOError("pot= needs to be set to a Potential instance or list thereof")
    if isinstance(R,nu.ndarray):
        bs= nu.array([estimateBIsochrone(R[ii],z[ii],pot=pot) for ii in range(len(R))])
        return (nu.amin(bs[True-nu.isnan(bs)]),
                nu.median(bs[True-nu.isnan(bs)]),
                nu.amax(bs[True-nu.isnan(bs)]))
    else:
        r2= R**2.+z**2
        r= math.sqrt(r2)
        dlvcdlr= dvcircdR(pot,r)/vcirc(pot,r)*r
        try:
            b= optimize.brentq(lambda x: dlvcdlr-(x/math.sqrt(r2+x**2.)-0.5*r2/(r2+x**2.)),
                               0.01,100.)
        except: #pragma: no cover
            b= nu.nan
        return b
Esempio n. 8
0
    def test_float_modulus_corner_cases(self):
        # Check remainder magnitude.
        for dt in np.typecodes['Float']:
            b = np.array(1.0, dtype=dt)
            a = np.nextafter(np.array(0.0, dtype=dt), -b)
            rem = self.mod(a, b)
            assert_(rem <= b, 'dt: %s' % dt)
            rem = self.mod(-a, -b)
            assert_(rem >= -b, 'dt: %s' % dt)

        # Check nans, inf
        with suppress_warnings() as sup:
            sup.filter(RuntimeWarning, "invalid value encountered in remainder")
            for dt in np.typecodes['Float']:
                fone = np.array(1.0, dtype=dt)
                fzer = np.array(0.0, dtype=dt)
                finf = np.array(np.inf, dtype=dt)
                fnan = np.array(np.nan, dtype=dt)
                rem = self.mod(fone, fzer)
                assert_(np.isnan(rem), 'dt: %s' % dt)
                # MSVC 2008 returns NaN here, so disable the check.
                #rem = self.mod(fone, finf)
                #assert_(rem == fone, 'dt: %s' % dt)
                rem = self.mod(fone, fnan)
                assert_(np.isnan(rem), 'dt: %s' % dt)
                rem = self.mod(finf, fone)
                assert_(np.isnan(rem), 'dt: %s' % dt)
Esempio n. 9
0
    def test_autocorr(self):
        # Just run the function
        corr1 = self.ts.autocorr()

        # Now run it with the lag parameter
        corr2 = self.ts.autocorr(lag=1)

        # corr() with lag needs Series of at least length 2
        if len(self.ts) <= 2:
            self.assertTrue(np.isnan(corr1))
            self.assertTrue(np.isnan(corr2))
        else:
            self.assertEqual(corr1, corr2)

        # Choose a random lag between 1 and length of Series - 2
        # and compare the result with the Series corr() function
        n = 1 + np.random.randint(max(1, len(self.ts) - 2))
        corr1 = self.ts.corr(self.ts.shift(n))
        corr2 = self.ts.autocorr(lag=n)

        # corr() with lag needs Series of at least length 2
        if len(self.ts) <= 2:
            self.assertTrue(np.isnan(corr1))
            self.assertTrue(np.isnan(corr2))
        else:
            self.assertEqual(corr1, corr2)
Esempio n. 10
0
def exact_roc(actuals, controls):
    """
    computes the area under the roc curve for separating to sets. Uses all
    possibl thresholds and trapezoidal interpolation. Also returns arrays of
    the true positive rate and the false positive rate.
    """

    actuals = np.ravel(actuals)
    controls = np.ravel(controls)
    if np.isnan(actuals).any():
        raise RuntimeError('NaN found in actuals')
    if np.isnan(controls).any():
        raise RuntimeError('NaN found in controls')

    thresholds = np.hstack([-np.inf,
        np.unique(np.concatenate((actuals,controls))), np.inf])[::-1]
    true_pos_rate = np.empty(thresholds.size)
    false_pos_rate = np.empty(thresholds.size)
    num_act = float(len(actuals))
    num_ctr = float(len(controls))

    for i, value in enumerate(thresholds):
        true_pos_rate[i] = (actuals >= value).sum() / num_act
        false_pos_rate[i] = (controls >= value).sum() / num_ctr
    auc = np.dot(np.diff(false_pos_rate),
            (true_pos_rate[0:-1]+true_pos_rate[1:])/2)
    return(auc, true_pos_rate, false_pos_rate)
Esempio n. 11
0
 def __init__(self, x, y):
             
     assert np.ndim(x)==2 and np.ndim(y)==2 and np.shape(x)==np.shape(y), \
         'x and y must be 2D arrays of the same size.'
     
     if np.any(np.isnan(x)) or np.any(np.isnan(y)):
         x = np.ma.masked_where( (isnan(x)) | (isnan(y)) , x)
         y = np.ma.masked_where( (isnan(x)) | (isnan(y)) , y)
         
     self.x_vert = x
     self.y_vert = y
     
     mask_shape = tuple([n-1 for n in self.x_vert.shape])
     self.mask_rho = np.ones(mask_shape, dtype='d')
     
     # If maskedarray is given for verticies, modify the mask such that 
     # non-existant grid points are masked.  A cell requires all four
     # verticies to be defined as a water point.
     if isinstance(self.x_vert, np.ma.MaskedArray):
         mask = (self.x_vert.mask[:-1,:-1] | self.x_vert.mask[1:,:-1] | \
                 self.x_vert.mask[:-1,1:] | self.x_vert.mask[1:,1:])
         self.mask_rho = np.asarray(~(~np.bool_(self.mask_rho) | mask), dtype='d')
     
     if isinstance(self.y_vert, np.ma.MaskedArray):
         mask = (self.y_vert.mask[:-1,:-1] | self.y_vert.mask[1:,:-1] | \
                 self.y_vert.mask[:-1,1:] | self.y_vert.mask[1:,1:])
         self.mask_rho = np.asarray(~(~np.bool_(self.mask_rho) | mask), dtype='d')
     
     self._calculate_subgrids()
     self._calculate_metrics()        
Esempio n. 12
0
def test_nan_arithmetic(ctx_getter):
    context = ctx_getter()
    queue = cl.CommandQueue(context)

    def make_nan_contaminated_vector(size):
        shape = (size,)
        a = numpy.random.randn(*shape).astype(numpy.float32)
        #for i in range(0, shape[0], 3):
            #a[i] = float('nan')
        from random import randrange
        for i in range(size//10):
            a[randrange(0, size)] = float('nan')
        return a

    size = 1 << 20

    a = make_nan_contaminated_vector(size)
    a_gpu = cl_array.to_device(context, queue, a)
    b = make_nan_contaminated_vector(size)
    b_gpu = cl_array.to_device(context, queue, b)

    ab = a*b
    ab_gpu = (a_gpu*b_gpu).get()

    for i in range(size):
        assert numpy.isnan(ab[i]) == numpy.isnan(ab_gpu[i])
Esempio n. 13
0
    def update(self, tick):
        security = tick['security']
        quote_time = datetime.datetime.fromtimestamp(int(tick['data']['timestamp']))
        last_price = tick['data']['last']
        log.debug("tick update security %s with tick %s, price %s" % (security.symbol, quote_time, last_price))
        # update sma

        # appending new row to df is not efficient
        data = tick['data']
        row = [quote_time, float(data['volume']), float(data['bid']), float(data['ask']), float(data['last']), float(data['high']), float(data['low'])]
        new_serie = pd.Series(row, index=['datetime', 'volume', 'bid', 'ask', 'last', 'high', 'low'])
        self.quotes = self.quotes.append(new_serie, ignore_index=True)

        self.sma_short = SMA(self.quotes, timeperiod=10, key='last')
        self.sma_mid = SMA(self.quotes, timeperiod=60, key='last')
        self.sma_long = SMA(self.quotes, timeperiod=200, key='last')

        if np.isnan(self.sma_long.iloc[-1]) or np.isnan(self.sma_mid.iloc[-1]) or np.isnan(self.sma_short.iloc[-1]):
            log.info('not enough data, skip to reduce risk')
            return None

        action = None
        if security.symbol not in self.account.holdings:
            action = self.check_buy(security)

        # already have some holdings
        else:
            action = self.check_sell(security)

        log.info('strategy action {0}'.format(action))
        return action
Esempio n. 14
0
def norm_range(data, mins, maxs, lowbound, highbound):
  """ Normalizing the data with range normalization between lowbound and highbound
  
  Keyword parameters:
  
  data
    the data to be normalized, numpy.ndarray, each row is a sample

  mins, maxs
    arrays of minimum and maximum values that each feature can take

  lowbound, highbound
    the bounds of the normalization
"""
  denom = maxs - mins
  diff = highbound - lowbound
  addit = numpy.ndarray([data.shape[0],1])
  addit.fill(lowbound)
  for i in range(data.shape[0]): # for each feature vector
    data[i] = diff * (data[i] - mins) / denom + lowbound
    nanCounter = numpy.isnan(data[i])
    #If all data was nan, maitain nan,
    if(sum(nanCounter)!=data.shape[1]):
      data[i][numpy.isnan(data[i])] = (lowbound + highbound) / 2
  return data
Esempio n. 15
0
    def _evaluate_projection(self, x, y):
        """
        kNNEvaluate - evaluate class separation in the given projection using a k-NN method
        Parameters
        ----------
        x - variables to evaluate
        y - class

        Returns
        -------
        scores
        """
        if self.percent_data_used != 100:
            rand = np.random.choice(len(x), int(len(x) * self.percent_data_used / 100),
                                    replace=False)
            x = x[rand]
            y = y[rand]
        neigh = KNeighborsClassifier(n_neighbors=3) if self.attr_color.is_discrete else \
            KNeighborsRegressor(n_neighbors=3)
        assert ~(np.isnan(x).any(axis=None) | np.isnan(x).any(axis=None))
        neigh.fit(x, y)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UserWarning)
            scores = cross_val_score(neigh, x, y, cv=3)
        return scores.mean()
Esempio n. 16
0
 def test_update_player(self):
   player_dict = io.create_player_dict({'jamesle01': ''})
   player_dict['jamesle01']['gamelog_url_list'] = ['http://www.basketball-reference.com/players/j/jamesle01/gamelog/2013/',
                                                   'http://www.basketball-reference.com/players/j/jamesle01/gamelog/2015/',
                                                   'http://www.basketball-reference.com/players/j/jamesle01/gamelog/2014/']
   loaded_dict = scraper.load_player(player_dict, 'jamesle01')
   assert loaded_dict['jamesle01']['gamelog_data'] is not None
   gd = loaded_dict['jamesle01']['gamelog_data']
   assert len(gd) == 285
   # Okay now pretend this URL was there all along as well
   player_dict['jamesle01']['gamelog_url_list'].append('http://www.basketball-reference.com/players/j/jamesle01/gamelog/2016/')
   scraper.update_player(player_dict, 'jamesle01', 2016)
   gd = loaded_dict['jamesle01']['gamelog_data']
   assert len(gd) > 285  # but I mean, I don't know exactly what it'll be since more games are still being played this year
   import datetime       # so explicitly make sure this test is updated for the 2016-17 season
   assert datetime.datetime.today() <= datetime.datetime(year=2016, month=7, day=1)
   # Spot check a game to make sure the stats are what we expect
   test_game_dict = dict(gd.loc['2015-10-30'])
   reference_dict = {u'+/-': 7.0,
                     u'3P': 0.0,
                     u'3P%': 0.0,
                     u'3PA': 2.0,
                     u'AST': 4.0,
                     u'Age': u'30-304',
                     u'BLK': 0.0,
                     u'DFS': 41.3,
                     u'DRB': 3.0,
                     u'Date': nan,
                     u'FG': 13.0,
                     u'FG%': 0.684,
                     u'FGA': 19.0,
                     u'FT%': 0.6,
                     u'FT': 3.0,
                     u'FTA': 5.0,
                     u'G': 3.0,
                     u'GS': 1.0,
                     u'GmSc': 21.0,
                     u'HomeAway': nan,
                     u'MP': u'33:56',
                     u'ORB': 2.0,
                     u'Opp': u'MIA',
                     u'PF': 3.0,
                     u'PTS': 29.0,
                     u'Rk': 3.0,
                     u'STL': 1.0,
                     u'TOV': 4.0,
                     u'TRB': 5.0,
                     u'Tm': u'CLE',
                     u'WinLoss': u'W (+10)'}
   self.assertItemsEqual(reference_dict.keys(), test_game_dict.keys())
   for k in reference_dict:
     # fortunately almost equal works fine if the items == each other so we can just pass in strings w/o worrying
     # unfortunately nan doesn't match :( :( so we might as well case it out anyway; nevermind
     if isinstance(reference_dict[k], float):
       if isnan(reference_dict[k]):
         assert isnan(test_game_dict[k])
       else:
         self.assertAlmostEqual(reference_dict[k], test_game_dict[k], places=3)
     else:
       self.assertEqual(reference_dict[k], test_game_dict[k])
Esempio n. 17
0
def Column8(df,Nlen,Tlen):
    mA = np.zeros((Nlen*Tlen,Nlen*2+9),float)
    vb = np.zeros(Nlen*Tlen)
    i = 0
    for firmid,firmgroup in df.groupby('Firmid'):
        if not firmgroup['Dprice'].isnull().values.any():
            mA[i*Tlen:(i+1)*Tlen,i] = np.ones(Tlen)
            mA[i*Tlen:(i+1)*Tlen,i+Nlen] = firmgroup['Dmarket'].values
            mA[i*Tlen:(i+1)*Tlen,2*Nlen] = firmgroup['Event'].values
            eu = firmgroup['Conc'].values
            where_are_NaNs = np.isnan(eu)
            eu[where_are_NaNs] = 0
            mis = firmgroup['Dumconc'].values
            where_are_NaNs = np.isnan(mis)
            mis[where_are_NaNs] = 0
            mA[i*Tlen:(i+1)*Tlen,1+2*Nlen] = np.multiply(firmgroup['Do'].values,firmgroup['Event'].values)
            mA[i*Tlen:(i+1)*Tlen,2+2*Nlen] = np.multiply(firmgroup['Di'].values,firmgroup['Event'].values)
            mA[i*Tlen:(i+1)*Tlen,3+2*Nlen] = np.multiply(eu,firmgroup['Event'].values)
            mA[i*Tlen:(i+1)*Tlen,4+2*Nlen] = np.multiply(mis,firmgroup['Event'].values)
            mA[i*Tlen:(i+1)*Tlen,5+2*Nlen] = np.multiply(np.multiply(eu,firmgroup['Event'].values),firmgroup['Do'].values)
            mA[i*Tlen:(i+1)*Tlen,6+2*Nlen] = np.multiply(np.multiply(mis,firmgroup['Event'].values),firmgroup['Do'].values)
            mA[i*Tlen:(i+1)*Tlen,7+2*Nlen] = np.multiply(np.multiply(eu,firmgroup['Event'].values),firmgroup['Di'].values)
            mA[i*Tlen:(i+1)*Tlen,8+2*Nlen] = np.multiply(np.multiply(mis,firmgroup['Event'].values),firmgroup['Di'].values)
            vb[i*Tlen:(i+1)*Tlen] = [p2f(x) for x in firmgroup['Dprice'].values]
            i += 1
    tmpp = inv(mA.T.dot(mA)).dot(mA.T)
    Xhat = tmpp.dot(vb)
    gamma = Xhat[-9:]
    print gamma
    return gamma
Esempio n. 18
0
def get_coded_data(cases_df, case_ids, coded_feature_names):
    """
    Retrieves the valences corresponding to case_ids, 
    along with coded features, if any
    Recode unknown valences to neutral.
    args:
      cases_df: A dataframe containing the case variables.
      case_ids: list of sorted case_ids
      coded_feature_names: list of column names to pull from cases_df (ie 'geniss' or ['geniss','casetyp1'])
    returns:
      valences: np array of valences
      coded_feature_array: np array of coded features
      filtered_cases_df: Dataframe containing the sorted, filtered case variables
    """
    UNKNOWN_VALENCE = 0
    NEUTRAL_VALENCE = 2

    if isinstance(coded_feature_names, str):
        coded_feature_names = [coded_feature_names]

    print "coded_feature_names: ",coded_feature_names

    valences = []
    coded_feature_list = []
    for case_id in case_ids:
        valence = cases_df[cases_df['caseid'] == case_id]['direct1'].values[0]
        if np.isnan(valence)==False:
            valence = int(valence)
        else: valence = 2

        if coded_feature_names is not None:
            coded_feature_row = cases_df[cases_df['caseid'] == case_id][coded_feature_names].values[0]
            clean_row = []

            #clean row
            for val in coded_feature_row:
                if val and np.isnan(val) == False:
                    clean_row.append(int(val))
                else:
                    clean_row.append(0)
            assert clean_row[0]>=0, ""
            coded_feature_list.append(clean_row)
            
        # Replacing unknown valence variables with netural scores.
        if valence == UNKNOWN_VALENCE:
            valence = NEUTRAL_VALENCE
        valences.append(valence)

    #one-hot encoding
    if coded_feature_names is not None:
        enc = OneHotEncoder()
        coded_feature_array = enc.fit_transform(np.array(coded_feature_list))
        print "Coded Feature Array shape: ", coded_feature_array.shape
    else: 
        coded_feature_array = np.array([])

    #Filter case df
    filtered_case_df = filter_cases_df(cases_df,case_ids)

    return np.array(valences),coded_feature_array,filtered_case_df
Esempio n. 19
0
def analyze_symbols(symbols):
    number = 0
    total_bull_correct = np.zeros(len(patterns))
    total_bull_wrong = np.zeros(len(patterns))
    total_bear_correct = np.zeros(len(patterns))
    total_bear_wrong = np.zeros(len(patterns))

    for symbol in symbols:
        print symbol
        bc, bw, bco, bwr = evaluate_pattern(symbol)
        if bc is None:
            continue

        for i in range(len(bc)):
            if not np.isnan(bc[i]):
                total_bull_correct[i] += bc[i]
                total_bull_wrong[i] += bw[i]
            if not np.isnan(bco[i]):
                total_bear_correct[i] += bco[i]
                total_bear_wrong[i] += bwr[i]
        number += 1

    sum_bull = total_bull_correct + total_bull_wrong
    sum_bear = total_bear_correct + total_bear_wrong
    pgain = total_bull_correct*1.0/sum_bull
    plose = total_bear_correct*1.0/sum_bear

    keys = patterns
    for i in range(len(keys)):
        print keys[i], ": ", pgain[i], " ", sum_bull[i], " ", plose[i], " ", sum_bear[i]
Esempio n. 20
0
    def test_nan_inf(self):
        # Not-a-number
        q = u.Quantity('nan', unit='cm')
        assert np.isnan(q.value)

        q = u.Quantity('NaN', unit='cm')
        assert np.isnan(q.value)

        q = u.Quantity('-nan', unit='cm')  # float() allows this
        assert np.isnan(q.value)

        q = u.Quantity('nan cm')
        assert np.isnan(q.value)
        assert q.unit == u.cm

        # Infinity
        q = u.Quantity('inf', unit='cm')
        assert np.isinf(q.value)

        q = u.Quantity('-inf', unit='cm')
        assert np.isinf(q.value)

        q = u.Quantity('inf cm')
        assert np.isinf(q.value)
        assert q.unit == u.cm

        q = u.Quantity('Infinity', unit='cm')  # float() allows this
        assert np.isinf(q.value)

        # make sure these strings don't parse...
        with pytest.raises(TypeError):
            q = u.Quantity('', unit='cm')

        with pytest.raises(TypeError):
            q = u.Quantity('spam', unit='cm')
Esempio n. 21
0
 def _get_sum(self):
     """Compute sum of non NaN / Inf values in the array."""
     try:
         return self._sum
     except AttributeError:
         self._sum = self.no_nan.sum()
         # The following 2 lines are needede as in Python 3.3 with NumPy
         # 1.7.1, numpy.ndarray and numpy.memmap aren't hashable.
         if type(self._sum) is numpy.memmap:
             self._sum = numpy.asarray(self._sum).item()
         if self.has_nan and self.no_nan.mask.all():
             # In this case the sum is not properly computed by numpy.
             self._sum = 0
         if numpy.isinf(self._sum) or numpy.isnan(self._sum):
             # NaN may happen when there are both -inf and +inf values.
             if self.has_nan:
                 # Filter both NaN and Inf values.
                 mask = self.no_nan.mask + numpy.isinf(self[1])
             else:
                 # Filter only Inf values.
                 mask = numpy.isinf(self[1])
             if mask.all():
                 self._sum = 0
             else:
                 self._sum = numpy.ma.masked_array(self[1], mask).sum()
             # At this point there should be no more NaN.
             assert not numpy.isnan(self._sum)
     return self._sum
Esempio n. 22
0
    def reportPowerDeviationsDifference(self, book, sheetName, deviationsA, deviationsB, gradient):
        
        sh = book.add_sheet(sheetName, cell_overwrite_ok=True)

        for i in range(self.windSpeedBins.numberOfBins):
            sh.col(i + 1).width = 256 * 5

        for j in range(self.turbulenceBins.numberOfBins):        

            turbulence = self.turbulenceBins.binCenterByIndex(j)
            row = self.turbulenceBins.numberOfBins - j - 1
            
            sh.write(row, 0, turbulence, self.percent_no_dp_style)
            
            for i in range(self.windSpeedBins.numberOfBins):

                windSpeed = self.windSpeedBins.binCenterByIndex(i)
                col = i + 1
                
                if j == 0: sh.write(self.turbulenceBins.numberOfBins, col, windSpeed, self.one_dp_style)
                
                if windSpeed in deviationsA.matrix:
                    if turbulence  in deviationsA.matrix[windSpeed]:
                        deviationA = deviationsA.matrix[windSpeed][turbulence]
                        deviationB = deviationsB.matrix[windSpeed][turbulence]
                        if not np.isnan(deviationA) and not np.isnan(deviationB):
                            diff = abs(deviationA) - abs(deviationB)
                            sh.write(row, col, diff, gradient.getStyle(diff))
Esempio n. 23
0
def nanallclose(x, y, rtol=1.0e-5, atol=1.0e-8):
    """Numpy allclose function which allows NaN

    Input
        x, y: Either scalars or numpy arrays

    Output
        True or False

    Returns True if all non-nan elements pass.
    """

    xn = numpy.isnan(x)
    yn = numpy.isnan(y)
    if numpy.any(xn != yn):
        # Presence of NaNs is not the same in x and y
        return False

    if numpy.all(xn):
        # Everything is NaN.
        # This will also take care of x and y being NaN scalars
        return True

    # Filter NaN's out
    if numpy.any(xn):
        x = x[-xn]
        y = y[-yn]

    # Compare non NaN's and return
    return numpy.allclose(x, y, rtol=rtol, atol=atol)
Esempio n. 24
0
def bootstrap(func, arglist, N, kwargs={}):
    '''Computes error via bootstrapping on an arbitrary function. The
    major restriction is that func is assumed to return a single, 1D,
    Numpy array. Bootstrap will also resample ALL of the elements of
    arglist. If you want to keep some inputs unchanged pass them as
    keywords. The func can have an arbitrary number of arguments and
    keyword arguments. If the output of func is a Ndarray of length N
    then bootstrap returns two arrays of length N. The first is the
    mean value over all bootstraps and the second is the stddev of the
    same.
    '''
    
    if type(arglist) != list:
        arglist = [arglist]
    size = len(arglist[0])
    resultarr = None
    for i in range(N):

        idx = np.random.randint(0,size,size)
        bootargs = [i[idx] for i in arglist]
        result = func(*bootargs,**kwargs)
        try:
            resultarr = np.vstack((resultarr,result))
        except ValueError:
            resultarr = result

    print np.isnan(resultarr).sum()
    return bn.nanmean(resultarr,axis=0),bn.nanstd(resultarr,axis=0)
 def __init__(self, data, classes, tree_features, n_trees=100):
     self.n_features = np.shape(data)[1]
     n_rows = np.shape(data)[0]
     n_nans = np.sum(np.isnan(data), 0)
     data = data[:, n_nans < n_rows]
     self.n_features = np.shape(data)[1]
     
     n_nans = np.sum(np.isnan(data), 1)
     data = data[n_nans < self.n_features, :]
     self.n_rows = np.shape(data)[0]
     
     if (tree_features > self.n_features):
         tree_features = self.n_features
     
     self.col_list = np.zeros((n_trees, tree_features), dtype='int')
     self.n_trees = n_trees
     self.bags = []
     for i in range(n_trees):
         cols = sample(range(self.n_features), tree_features)
         cols.sort()
         self.col_list[i, :] = cols
         data_temp = data[:, cols]
         n_nans = np.sum(np.isnan(data_temp), 1)
         data_temp = data_temp[n_nans == 0, :]
         classes_temp = classes[n_nans == 0]
         #bag = BaggingClassifier(n_estimators=1, max_features=tree_features)
         bag = RandomForestClassifier(n_estimators=1, max_features=tree_features)
         bag.fit(data_temp, classes_temp)
         self.bags.append(bag)
         print(np.shape(data_temp))
Esempio n. 26
0
def moments(data, circle, rotate, vheight, estimator=median, **kwargs):
    """Returns (height, amplitude, x, y, width_x, width_y, rotation angle)
    the gaussian parameters of a 2D distribution by calculating its
    moments.  Depending on the input parameters, will only output
    a subset of the above.
    """
    total = np.abs(data).sum()
    Y, X = np.indices(data.shape)  # python convention: reverse x,y np.indices
    y = np.argmax((X*np.abs(data)).sum(axis=1)/total)
    x = np.argmax((Y*np.abs(data)).sum(axis=0)/total)
    col = data[int(y), :]
    # FIRST moment, not second!
    width_x = np.sqrt(np.abs((np.arange(col.size)-y)*col).sum() / np.abs(col).sum())
    row = data[:, int(x)]
    width_y = np.sqrt(np.abs((np.arange(row.size)-x)*row).sum() / np.abs(row).sum())
    width = (width_x + width_y) / 2.
    height = estimator(data.ravel())
    amplitude = data.max()-height
    mylist = [amplitude, x, y]
    if (np.isnan(width_y) or np.isnan(width_x) or np.isnan(height) or np.isnan(amplitude)):
        raise ValueError("something is nan")
    if vheight:
        mylist = [height] + mylist
    if not circle:
        mylist = mylist + [width_x, width_y]
        if rotate:
            mylist = mylist + [0.]  # rotation "moment" is just zero...
            # also, circles don't rotate.
    else:
        mylist = mylist + [width]
    return mylist
def ll(actual, predicted):
    """
    Computes the log likelihood.

    This function computes the log likelihood between two numbers,
    or for element between a pair of lists or numpy arrays.

    Parameters
    ----------
    actual : int, float, list of numbers, numpy array
             The ground truth value
    predicted : same type as actual
                The predicted value

    Returns
    -------
    score : double or list of doubles
            The log likelihood error between actual and predicted

    """
    actual = np.array(actual)
    predicted = np.array(predicted)
    err = np.seterr(all='ignore')
    score = -(actual * np.log(predicted) + (1 - actual) * np.log(1 - predicted))
    np.seterr(divide=err['divide'], over=err['over'],
              under=err['under'], invalid=err['invalid'])
    if type(score) == np.ndarray:
        score[np.isnan(score)] = 0
    else:
        if np.isnan(score):
            score = 0
    return score
Esempio n. 28
0
    def get_depth_color(self, value):

        vmin = -0.02
        vmax = 0.02

        if value < vmin:
            value = vmin
        elif value > vmax:
            value = vmax

        dv = vmax - vmin
        r = g = b = 1

        if value < (vmin + 0.25 * dv):
            r = 0
            g = 4 * (value - vmin) / dv
        elif value < (vmin + 0.5 * dv):
            r = 0
            b = 1 + 4 * (vmin + 0.25 * dv - value) / dv
        elif value < (vmin + 0.75 * dv):
            r = 4 * (value - vmin - 0.5 * dv) / dv
            b = 0
        else:
            g = 1 + 4 * (vmin + 0.75 * dv - value) / dv
            b = 0

        if np.isnan(r) or np.isnan(g) or np.isnan(b):
            r = b = g = 0

        return (np.array([b, g, r]) * 255).astype(int)
Esempio n. 29
0
    def transform(self, data):

        assert np.isfinite(data).all()

        ntest = len(data)

        data = data.copy()

        data.shape = ntest, -1

        assert np.isfinite(data).all()

        print ">>> Computing traintest linear kernel"
        start = time.time()
        kernel_traintest = np.dot(data,
                                  self._train_data.T)

        assert not np.isnan(kernel_traintest).any()
        assert not np.isinf(kernel_traintest).any()

        kernel_traintest /= self._ktrace

        assert not np.isnan(kernel_traintest).any()
        assert not np.isinf(kernel_traintest).any()

        end = time.time()
        print "Time: %s" % (end-start)

        return self._clf.decision_function(kernel_traintest).ravel()
Esempio n. 30
0
def visualize_depth_image(data):

    data[data == 0.0] = np.nan

    maxdepth = np.nanmax(data)
    mindepth = np.nanmin(data)
    data = data.copy()
    data -= mindepth
    data /= (maxdepth - mindepth)

    gray = np.zeros(list(data.shape) + [3], dtype=data.dtype)
    data = (1.0 - data)
    gray[..., :3] = np.dstack((data, data, data))

    # use a greenish color to visualize missing depth
    gray[np.isnan(data), :] = (97, 160, 123)
    gray[np.isnan(data), :] /= 255

    gray = exposure.equalize_hist(gray)

    # set alpha channel
    gray = np.dstack((gray, np.ones(data.shape[:2])))
    gray[np.isnan(data), -1] = 0.5

    return gray * 255