Exemple #1
        def check(a, b, expected):
            expected_val = expected
            not_expected_val = not expected

            if numpy_support.version >= (1, 16):
                # since np 1.16 all NaT magnitude comparisons including equality
                # are False (as NaT == NaT is now False)
                if np.isnat(a) or np.isnat(a):
                    expected_val = False
                    not_expected_val = False
            self.assertPreciseEqual(le(a, b), expected_val)
            self.assertPreciseEqual(gt(a, b), not_expected_val)
Exemple #2
        def check(a, b, expected):
            expected_val = expected
            not_expected_val = not expected

            if numpy_support.version >= (1, 16):
                # since np 1.16 all NaT == comparisons are False, including
                # NaT==NaT, conversely != is True
                if np.isnat(a) or np.isnat(a):
                    expected_val = False
                    not_expected_val = True

            self.assertPreciseEqual(eq(a, b), expected_val)
            self.assertPreciseEqual(eq(b, a), expected_val)
            self.assertPreciseEqual(ne(a, b), not_expected_val)
            self.assertPreciseEqual(ne(b, a), not_expected_val)
 def convert_value(value, value_type, tz=None):
     if value_type == "time":
         if np.isnat(value):
             return str(Table.NAT_VALUE)
         return DateType(value, tz)
         return value
Exemple #4
        def check_lt(a, b, expected):
            expected_val = expected
            not_expected_val = not expected

            if numpy_support.version >= (1, 16):
                # since np 1.16 all NaT magnitude comparisons including equality
                # are False (as NaT == NaT is now False)
                if np.isnat(a) or np.isnat(b):
                    expected_val = False
                    not_expected_val = False

            with self.silence_numpy_warnings():
                lt = self.jit(lt_usecase)
                self.assertPreciseEqual(lt(a, b), expected_val, (a, b, expected))
                self.assertPreciseEqual(gt(b, a), expected_val, (a, b, expected))
                self.assertPreciseEqual(ge(a, b), not_expected_val, (a, b, expected))
                self.assertPreciseEqual(le(b, a), not_expected_val, (a, b, expected))
                if expected_val:
                    # If true, then values are not equal
                    check_eq(a, b, False)
                # Did we get it right?
                self.assertPreciseEqual(a < b, expected_val)
Exemple #5
        def check_eq(a, b, expected):
            expected_val = expected
            not_expected_val = not expected

            if numpy_support.version >= (1, 16):
                # since np 1.16 all NaT comparisons bar != are False, including
                # NaT==NaT
                if np.isnat(a) or np.isnat(b):
                    expected_val = False
                    not_expected_val = True
                    self.assertFalse(le(a, b), (a, b))
                    self.assertFalse(ge(a, b), (a, b))
                    self.assertFalse(le(b, a), (a, b))
                    self.assertFalse(ge(b, a), (a, b))
                    self.assertFalse(lt(a, b), (a, b))
                    self.assertFalse(gt(a, b), (a, b))
                    self.assertFalse(lt(b, a), (a, b))
                    self.assertFalse(gt(b, a), (a, b))

            with self.silence_numpy_warnings():
                self.assertPreciseEqual(eq(a, b), expected_val, (a, b, expected))
                self.assertPreciseEqual(eq(b, a), expected_val, (a, b, expected))
                self.assertPreciseEqual(ne(a, b), not_expected_val, (a, b, expected))
                self.assertPreciseEqual(ne(b, a), not_expected_val, (a, b, expected))
                if expected_val:
                    # If equal, then equal-ordered comparisons are true
                    self.assertTrue(le(a, b), (a, b))
                    self.assertTrue(ge(a, b), (a, b))
                    self.assertTrue(le(b, a), (a, b))
                    self.assertTrue(ge(b, a), (a, b))
                    # and strictly ordered comparisons are false
                    self.assertFalse(lt(a, b), (a, b))
                    self.assertFalse(gt(a, b), (a, b))
                    self.assertFalse(lt(b, a), (a, b))
                    self.assertFalse(gt(b, a), (a, b))
                # Did we get it right?
                self.assertPreciseEqual(a == b, expected_val)
Exemple #6
    def test_time_to_time(self, from_dt, to_dt, expected_casting,
                          expected_view_off, nom, denom):
        from_dt = np.dtype(from_dt)
        if to_dt is not None:
            to_dt = np.dtype(to_dt)

        # Test a few values for casting (results generated with NumPy 1.19)
        values = np.array([-2**63, 1, 2**63 - 1, 10000, -10000, 2**32])
        values = values.astype(
        assert values.dtype.byteorder == from_dt.byteorder
        assert np.isnat(values.view(from_dt)[0])

        DType = type(from_dt)
        cast = get_castingimpl(DType, DType)
        casting, (from_res, to_res), view_off = cast._resolve_descriptors(
            (from_dt, to_dt))
        assert from_res is from_dt
        assert to_res is to_dt or to_dt is None
        assert casting == expected_casting
        assert view_off == expected_view_off

        if nom is not None:
            expected_out = (values * nom // denom).view(to_res)
            expected_out[0] = "NaT"
            expected_out = np.empty_like(values)
            expected_out[...] = denom
            expected_out = expected_out.view(to_dt)

        orig_arr = values.view(from_dt)
        orig_out = np.empty_like(expected_out)

        if casting == Casting.unsafe and (to_dt == "m8" or to_dt == "M8"):
            # Casting from non-generic to generic units is an error and should
            # probably be reported as an invalid cast earlier.
            with pytest.raises(ValueError):
                cast._simple_strided_call((orig_arr, orig_out))

        for aligned in [True, True]:
            for contig in [True, True]:
                arr, out = self.get_data_variation(orig_arr, orig_out, aligned,
                out[...] = 0
                cast._simple_strided_call((arr, out))
Exemple #7
    def test_nanmean_skipna_false(self, dtype):
        arr = np.arange(12).astype(np.int64).view(dtype).reshape(4, 3)

        arr[-1, -1] = "NaT"

        result = nanops.nanmean(arr, skipna=False)
        assert np.isnat(result)
        assert result.dtype == dtype

        result = nanops.nanmean(arr, axis=0, skipna=False)
        expected = np.array([4, 5, "NaT"], dtype=arr.dtype)
        tm.assert_numpy_array_equal(result, expected)

        result = nanops.nanmean(arr, axis=1, skipna=False)
        expected = np.array([arr[0, 1], arr[1, 1], arr[2, 1], arr[-1, -1]])
        tm.assert_numpy_array_equal(result, expected)
Exemple #8
 def compute(self, today, assets, out, values, dates, is_cum):
     filled = np.where(np.isnat(dates), np.datetime64('1970-03-31'), dates)
     # 按股票计算位置变化及季度乘子
     locs = map(self._locs_and_quarterly_multiplier, filled.T)
     if is_cum:
         # 使用季度因子加权平均
         ttm = np.array([
             nanmean(value[loc] * ms)
             for value, (loc, ms) in zip(values.T, locs)
         # 非累计的原始数据,只需要简单相加,不需要季度因子调整
         ttm = np.array([
             nansum(value[loc]) for value, (loc, _) in zip(values.T, locs)
     out[:] = ttm
Exemple #9
def PandasData(request):
    if request.method == 'POST':
        form = PandasdataForm(request.POST)
        if form.is_valid():
            username = form.cleaned_data['username']

            df = read_frame(AuthUser.objects.all(),
                                'username', 'login_num', 'is_pay',
                                'mobilephone', 'date_joined', 'last_login'

                return render(request, 'pandas_data.html',
                              {'messageerror': 'The username does not exist'})

            df_select = df[df.username == username]

            username = df_select.username.values[0]
            login_num = df_select.login_num.values[0]
            register_time = df_select.date_joined.values[0]
            last_login = df_select.last_login.values[0]
            is_login = not np.isnat(last_login)

            register_time = str(np.datetime_as_string(register_time)).split("T")[0] + " " + \

            if is_login and last_login > np.datetime64('today'):
                is_today = True
                is_today = False

            if is_login:
                last_login = str(np.datetime_as_string(last_login)).split("T")[0] + " " + \

            message = {
                'username': username,
                'login_num': login_num,
                'register_time': register_time,
                'last_login': last_login,
                'is_login': is_login,
                'is_today': is_today
            return render(request, 'pandas_data.html', {'message': message})
def get_mean_acq_time(scene):
    """Compute mean scanline acquisition time over all bands."""
    dtype = scene['IR_108'].coords['acq_time'].dtype

    # Convert timestamps to float to facilitate averaging. Caveat: NaT is
    # not converted to NaN, but to -9.22E18. So we have to set these elements
    # to NaN manually
    acq_times = []
    for band in BANDNAMES:
        acq_time = scene[band].coords['acq_time'].drop_vars(['acq_time'])
        is_nat = np.isnat(acq_time.values)
        acq_time = acq_time.astype(int).where(np.logical_not(is_nat))

    # Compute average over all bands (skip NaNs)
    acq_times = xr.concat(acq_times, 'bands')
    return acq_times.mean(dim='bands', skipna=True).astype(dtype)
Exemple #11
def active_thermal_capacity(db_plant, year, dict_country, dict_id):
    active_plant = db_plant.loc[
        (db_plant['UnitOperOnlineDate'] < pd.Timestamp(year, 1, 1)) &
        (db_plant['UnitOperRetireDate'] > pd.Timestamp(year, 12, 31))
        | np.isnat(db_plant['UnitOperRetireDate'])]
    active_plant = active_plant.loc[(active_plant['MedeaType'] < 60) |
                                    (active_plant['MedeaType'] >= 70)]
    aggregate_thermal_capacity = active_plant.groupby(
        ['MedeaType', 'PlantCountry'])['UnitNameplate'].sum().to_frame() / 1000
    if dict_country:
                                          columns={'UnitNameplate': 'cap'},
    aggregate_thermal_capacity = aggregate_thermal_capacity.unstack(-1)
    aggregate_thermal_capacity.drop(0.0, axis=0, inplace=True)
    if dict_id:
        aggregate_thermal_capacity.rename(index=dict_id, inplace=True)
    return aggregate_thermal_capacity
Exemple #12
def get_timedelta_str(arr: ndarray):
    max_val = np.abs(arr[~np.isnat(arr)].view('int64')).max()
    if max_val < 10**3:
        unit = 'ns'
    elif max_val < 10**6:
        unit = 'us'
    elif max_val < 10**9:
        unit = 'ms'
    elif max_val < 60 * 10**9:
        unit = 's'
    elif max_val < 3600 * 10**9:
        unit = 'm'
    elif max_val < 3600 * 24 * 10**9:
        unit = 'h'
        unit = 'D'

    return unit
Exemple #13
def visitToDatetime(date, arrival, depart):
    Converts arrival and departure times from the TAWO visit log into a start and end datetime
    :return: datetime column with before, after tuple

    Warning: The fact that this function works is honestly beyond me. Only Jesus can understand it at this point. It
    is an amalgamation of frustration and defeat with Python datetimes. Good luck.
    import calendar
    import datetime as dt

    # replace NaT values with previous date
    mask = np.isnat(date)                                               # boolean array of where NaT values are
    idx = np.flatnonzero(mask)                                          # indexes of NaT
    nidx = np.flatnonzero(~mask)                                        # other indexs
    date[mask] = pd.Timestamp('1980-01-01')                             # replace NaT with 2000 date
    date = [pd.Timestamp(x) for x in date]                              # convert n64dt to timestamp
    date = [calendar.timegm(x.timetuple()) + 86400 for x in date]       # convert timestamp to unix time value
    date = np.array(date)                                               # convert back to numpy array
    nonnats = date[~mask]                                               # get actual non NaT values
    date[mask] = np.interp(idx, nidx, nonnats) - 86400                  # interp unix date values
    date = [dt.datetime.fromtimestamp(x) for x in date]                 # convert unix timestamp to datetime

    s, d = [], []
    for i in range(len(arrival)):
        base = dt.datetime(date[i].year,                                # collect the base remove hour

        # get start hour information
        shour = int(str(arrival[i])[:2])
        sminute = int(str(arrival[i])[2:])
        start = base + pd.Timedelta(f'{shour} hours') + pd.Timedelta(f'{sminute} minutes')

        # departing hour information
        dhour = int(str(depart[i])[:2])
        dminute = int(str(depart[i])[2:])
        end = base + pd.Timedelta(f'{dhour} hours') + pd.Timedelta(f'{dminute} minutes')

        # create and append a tuple

    return s, d
Exemple #14
    def post(self, request):
        username = request.POST.get('username')

        df = read_frame(Client.objects.all(),
                            'username', 'login_num', 'is_pay', 'telephone',
                            'register_time', 'last_login'

        except Client.DoesNotExist:
            return pandas_request_render(request, error="User not exist")

        df_select = df[df.username == username]

        username = df_select.username.values[0]
        login_num = df_select.login_num.values[0]
        register_time = df_select.register_time.values[0]
        last_login = df_select.last_login.values[0]
        have_login = not np.isnat(last_login)

        register_time = str(np.datetime_as_string(register_time)).split("T")[0] + " " + \

        if have_login and last_login > np.datetime64('today'):
            is_today = True
            is_today = False

        if have_login:
            last_login = str(np.datetime_as_string(last_login)).split("T")[0] + " " + \

        res = {
            'username': username,
            'login_num': login_num,
            'register_time': register_time,
            'last_login': last_login,
            'have_login': have_login,
            'is_today': is_today

        return pandas_request_render(request, res=res)
 def test_european_option_on_zcb(self) -> None:
     model = _make_model()
     notional = 1234
     currency = "USD"
     strike = 1000
     zcb = ZeroCouponBond(model.dategrid[-2], notional, currency)
     opt = EuropeanOption(model.dategrid[-2], zcb - strike * One(currency))
     cf = model.generate_cashflows(opt)
     self.assertEqual(cf.currencies.shape, (3,))
     self.assertEqual(cf.currencies[0], currency)
     self.assertEqual(cf.currencies[1], currency)
     self.assertEqual(cf.currencies[2], "NNN")
     self.assertEqual(cf.cashflows.shape, (model.nsim, 3))
     self.assertTrue((cf.cashflows["date"][:, 0] == model.dategrid[-2]).all())
     self.assertTrue((cf.cashflows["value"][:, 0] == notional).all())
     self.assertTrue((cf.cashflows["date"][:, 1] == model.dategrid[-2]).all())
     self.assertTrue((cf.cashflows["value"][:, 1] == -strike).all())
     self.assertTrue((np.isnat(cf.cashflows["date"][:, 2])).all())
     self.assertTrue((cf.cashflows["value"][:, 2] == 0).all())
 def test_or_cashflow_generation(self) -> None:
     model = _make_model()
     c2 = One("EUR") | When(At(model.dategrid[-1]), One("EUR"))
     self.assertRaises(NotImplementedError, lambda: model.generate_cashflows(c2))
     c3 = One("EUR") | 2 * One("EUR")
     cf = model.generate_cashflows(c3)
     self.assertEqual(cf.currencies.shape, (2,))
     self.assertEqual(cf.currencies[0], "EUR")
     self.assertEqual(cf.currencies[1], "EUR")
     self.assertEqual(cf.cashflows.shape, (model.nsim, 2))
     self.assertTrue((cf.cashflows["value"][:, 0] == 0).all())
     self.assertTrue((np.isnat(cf.cashflows["date"][:, 0])).all())
     self.assertTrue((cf.cashflows["value"][:, 1] == 2).all())
     self.assertTrue((cf.cashflows["date"][:, 1] == model.eval_date).all())
     c4 = One("EUR") | One("USD")
     cf4 = model.generate_cashflows(c4)
     self.assertEqual(cf4.currencies.shape, (2,))
     self.assertEqual(cf4.currencies[0], "EUR")
     self.assertEqual(cf4.currencies[1], "USD")
Exemple #17
def scalar_broadcast_to(scalar, size, dtype=None):

    if isinstance(size, (tuple, list)):
        size = size[0]

    if scalar is None or (
        isinstance(scalar, (np.datetime64, np.timedelta64))
        and np.isnat(scalar)
        if dtype is None:
            dtype = "object"
        return column.column_empty(size, dtype=dtype, masked=True)

    if isinstance(scalar, pd.Categorical):
        if dtype is None:
            return _categorical_scalar_broadcast_to(scalar, size)
            return scalar_broadcast_to(scalar.categories[0], size).astype(

    if isinstance(scalar, decimal.Decimal):
        if dtype is None:
            dtype = cudf.Decimal64Dtype._from_decimal(scalar)

        out_col = column.column_empty(size, dtype=dtype)
        if out_col.size != 0:
            out_col[:] = scalar
        return out_col

    scalar = to_cudf_compatible_scalar(scalar, dtype=dtype)
    dtype = scalar.dtype

    if np.dtype(dtype).kind in ("O", "U"):
        gather_map = column.full(size, 0, dtype="int32")
        scalar_str_col = column.as_column([scalar], dtype="str")
        return scalar_str_col[gather_map]
        out_col = column.column_empty(size, dtype=dtype)
        if out_col.size != 0:
            out_col.data_array_view[:] = scalar
        return out_col
def tidy_cases_lambda(interim_data, remove_territories=True):
    #Remove non-existent notification dates
    interim_data = interim_data[~np.isnat(interim_data.

    #Filter out territories
    if (remove_territories):
        df_linel = interim_data[(interim_data['STATE'] != 'NT')
                                & (interim_data['STATE'] != 'ACT')]

    #Melt down so that imported and local are no longer columns. Allows multiple draws for infection date.
    #i.e. create linelist data
    df_linel = df_linel.melt(id_vars=['NOTIFICATION_RECEIVE_DATE', 'STATE'],

    #Reset index or the joining doesn't work
    df_linel = df_linel[df_linel.n_cases != 0]
    df_linel = df_linel.reset_index(drop=True)
    return (df_linel)
Exemple #19
    def _add_timedelta_arraylike(
            self, other: TimedeltaArray | npt.NDArray[np.timedelta64]
    ) -> PeriodArray:
        other : TimedeltaArray or ndarray[timedelta64]

        freq = self.freq
        if not isinstance(freq, Tick):
            # We cannot add timedelta-like to non-tick PeriodArray
            raise TypeError(
                f"Cannot add or subtract timedelta64[ns] dtype from {self.dtype}"

        dtype = np.dtype(f"m8[{freq._td64_unit}]")

            delta = astype_overflowsafe(np.asarray(other),
        except ValueError as err:
            # e.g. if we have minutes freq and try to add 30s
            # "Cannot losslessly convert units"
            raise IncompatibleFrequency(
                "Cannot add/subtract timedelta-like from PeriodArray that is "
                "not an integer multiple of the PeriodArray's freq.") from err

        b_mask = np.isnat(delta)

        res_values = algos.checked_add_with_arr(self.asi8,
        np.putmask(res_values, self._isnan | b_mask, iNaT)
        return type(self)(res_values, freq=self.freq)
Exemple #20
  def _calc_global_values(self, array):
    """Calculate all the values of the Transform that are dependent on all the examples of the dataset. (e.g. mean, standard deviation, unique category values, etc.) This method must be run before any actual transformation can be done.

    array : np.ndarray
      Some of the data that will be transformed.

    self.input_dtype = array.dtype

    if len(array.shape) < 2 or array.shape[1] != 2:
      raise ValueError("Array must have exactly two columns. The first being the time, and the second being the amplitude.")

    batch_size = float(array.shape[0])
    total_examples = self.num_examples + batch_size

    if not batch_size:

    time_array = array[:, 0].astype(np.datetime64)
    time_array[np.isnat(time_array)] = self.fill_nat_func(time_array)

    # Get the maximum time and convert it to dtype
    end_datetime = (self.end_datetime - self.zero_datetime) / np.timedelta64(self.num_units, self.time_unit)
    end_datetime = end_datetime.astype(self.dtype)

    # Convert to dtype and scale to values between 0 and 1
    time_array = (time_array - self.zero_datetime)/np.timedelta64(self.num_units, self.time_unit)
    time_array = time_array.astype(self.dtype)
    time_array = time_array / end_datetime

    amp_array = array[:, 1: 2].astype(np.float64)
    amp_array[np.isnan(amp_array)] = self.fill_nan_func(amp_array)
    amp_array = np.tile(amp_array, [1, self.num_frequencies])

    exp = np.exp(-2.0j * np.pi * np.tensordot(time_array, self.w_k, [[], []]))

    self.X_k = batch_size / total_examples * np.mean(amp_array * exp, axis=0) + self.num_examples / total_examples * self.X_k

    self.num_examples += batch_size
Exemple #21
def subset_for_oco2(observations_path, run_directory_species_conc,
                    run_directory_level_edge, output_path, attributes,
    logger.debug('Reading observation file %s', observations_path)
    observation_ds = xarray.open_dataset(
    ) \
        .load() \

    # HACK(mgnb): hardcoding the unit rather than reading it from the file
    assert (observation_ds['time'].attrs['units'] ==
            'seconds since 1970-01-01 00:00:00')
    observation_ds['time'] = ('sounding_id', (
        np.datetime64('1970-01-01 00:00:00') +

    valid_times = np.argwhere(
    observation_ds = observation_ds \
        .isel(sounding_id=valid_times[:, 0])

    matched_ds = GEOSChemSubsetter(observation_ds, run_directory_species_conc,

    if matched_ds is None:
        logger.debug('Nothing to subset')

    matched_ds.attrs = attributes

    logger.debug('Saving results to %s', output_path)
    compression = dict(zlib=True, complevel=6)
    encoding = {var: compression for var in matched_ds.data_vars}
    matched_ds.to_netcdf(output_path, encoding=encoding)

Exemple #22
    def set_jds(self, val1, val2):
        # If there are any masked values in the ``val1`` datetime64 array
        # ('NaT') then stub them with a valid date so downstream parse_string
        # will work.  The value under the mask is arbitrary but a "modern" date
        # is good.
        mask = np.isnat(val1)
        masked = np.any(mask)
        if masked:
            val1 = val1.copy()
            val1[mask] = '2000'

        # Make sure M(onth) and Y(ear) dates will parse and convert to bytestring
        if val1.dtype.name in ['datetime64[M]', 'datetime64[Y]']:
            val1 = val1.astype('datetime64[D]')
        val1 = val1.astype('S')

        # Standard ISO string parsing now
        super().set_jds(val1, val2)

        # Finally apply mask if necessary
        if masked:
            self.jd2[mask] = np.nan
def get_solar_angles(scene, lons, lats):
    """Compute solar angles.

    Compute angles for each scanline using their acquisition time to account for
    the earth's rotation over the course of one scan.

        Solar azimuth angle, Solar zenith angle in degrees

    suna = np.full(lons.shape, np.nan)
    sunz = np.full(lons.shape, np.nan)
    mean_acq_time = get_mean_acq_time(scene)
    for line, acq_time in enumerate(mean_acq_time.values):
        if np.isnat(acq_time):
        _, suna_line = get_alt_az(acq_time, lons[line, :], lats[line, :])
        suna_line = np.rad2deg(suna_line)
        suna[line, :] = suna_line
        sunz[line, :] = sun_zenith_angle(acq_time, lons[line, :],
                                         lats[line, :])
    return suna, sunz
Exemple #24
    def values_list(self):
        all_values = []

        for idx, val in self.write_values.iterrows():
            values = collections.OrderedDict()

            for k, v in val.iteritems():
                if isinstance(v, dict) or isinstance(v, list):
                    v = json.dumps(v)
                if isinstance(v, np.bool_):
                    v = bool(v)
                if isinstance(v, np.datetime64) and np.isnat(v):
                    v = None
                if isinstance(v, pd._libs.tslib.NaTType):
                    v = None
                if isinstance(v, float) and np.isnan(v):
                    v = None

                values['%s_%s' % (k, idx)] = v

            all_values += [values]
        return all_values
Exemple #26
def get_last_played_match_vector(
        season_matches_dfs: typing.List[pd.DataFrame]) -> pd.Series:

    :param season_matches_dfs:
    df_concat = pd.concat(season_matches_dfs)

    sorted_df = df_concat.sort_values(by=['TeamName', 'MatchDate'],

    date_diff_df = sorted_df.groupby('TeamName').MatchDate.diff()

    # check we're seeing as many null values as expected (should be = n of unique teams in dataset)
    unique_teams = df_concat.TeamNames.nunique()
    null_vals = date_diff_df[np.isnat(date_diff_df.values)].shape[0]

    assert unique_teams == null_vals

    date_diff_df = date_diff_df.fillna(0)
    return date_diff_df.dt.total_seconds()
Exemple #27
    def evaluate(self, array_data):
        Calculate range value of the array and store it to history

        result_object = ResultObject(None, None, None, CommandStatus.Error)
        array = array_data.data

        if numpy.issubdtype(array.dtype, numpy.number):
            idx = numpy.logical_not(numpy.isnan(array))
        elif numpy.issubdtype(array.dtype, numpy.datetime64):
            idx = numpy.logical_not(numpy.isnat(array))
            Printer.Print("The array is not supported type so cannot find max")
            return result_object
        if StatContainer.conditional_array is not None and StatContainer.conditional_array.data.size == array.size:
            idx = numpy.logical_and(idx, StatContainer.conditional_array.data)
        max_val = numpy.max(array[idx])
        min_val = numpy.min(array[idx])
        range_val = max_val - min_val
        result_object = ResultObject(range_val, [], DataType.array,

        df_new = pd.DataFrame()
        df_new['Feature'] = [array_data.name]
        df_new['Range'] = [range_val]
        df_new['Minimum'] = [min_val]
        df_new['Maximum'] = [max_val]

        # Printer.Print("Range of", array_data.name, "is", range_val,
        #       "from", min_val, "to", max_val)

        return result_object
Exemple #28
    def addBaselineCreat(self, df, eGFR_impute=None):
        Adds the baseline creatinine to a dataframe. The baseline creatinine is defined as the median of the outpatient 
        creatinine values from 365 to 7 days prior to admission.

            df (pd.DataFrame): dataframe, typically of a single patient.
            eGFR_impute (bool): boolean, whether or not to impute the null baseline creatinines with the age/sex/race and eGFR of 75

            df (pd.DataFrame): dataframe with baseline creatinine values added in

        if eGFR_impute is None:
            eGFR_impute = self.eGFR_impute
        self.eGFR_impute = eGFR_impute

        split_dfs = list()
        unique_adms = df[self.admission].unique()
        for adm in unique_adms[~np.isnat(unique_adms)]:
            adm_df = df.loc[df[self.admission] == adm]
            adm_df.loc[:, self.baseline_creat] = adm_df[
                    self.time).loc[adm - pd.Timedelta(days=365):adm -

        df = pd.concat(split_dfs)

        if self.eGFR_impute:
            df.loc[df[self.baseline_creat].isnull(), self.baseline_creat] = df[
                    lambda d: self.eGFRbasedCreatImputation(
                        d[self.age], d[self.sex], d[self.race]),

        return df
Exemple #29
def convert_time_to_most_suitable_unit(arr):
    from pandas import Series
    from numpy import array, isnat, diff, NaN, nanmedian

    # test if dates
    arr = array(arr)
    if isnat(arr).any():
        return arr

    # convert to datetime[ns] floats
    time = array(arr).astype("datetime64[ns]").astype(float)

    # get the difference between time steps
    delta_time = diff(time)

    # approximate the best unit (without losing info)
    time_denominators = dict(ns=1, s=1e9, m=60, h=60, D=24, M=30, Y=12)

    dt_as_frac_of_unit = Series(index=time_denominators.keys())
    denominator = 1
    for key in time_denominators:
        denominator *= time_denominators[key]
        frac = nanmedian(delta_time / denominator)
        # only units that will not lose time are kept
        dt_as_frac_of_unit[key] = frac if frac >= 1 else NaN

    # if the difference is not near enough the unit, exclude it
    # e.g. 35 day interval will eliminate Month as a unit
    if not ((dt_as_frac_of_unit - 1) < 0.05).any():
        dt_as_frac_of_unit = dt_as_frac_of_unit.where(lambda a: (a - 1) >= 1)
    unit = dt_as_frac_of_unit.idxmin()

    # convert time units to appropriate units
    # dtype: datetime64 must be attached to unit
    # must be float when astype(float) is applied
    time_converted = arr.astype(f"datetime64[{unit}]")

    return time_converted
Exemple #30
def get_datetime_str(arr: ndarray):
    dt = {0: 'ns', 1: 'us', 2: 'ms', 3: 's', 4: 'D'}
    arr = arr[~np.isnat(arr)].view('int64')
    counts = np.zeros(len(arr), dtype='int64')
    for i, val in enumerate(arr):
        if val == 0:
            counts[i] = 4
        dec = decimal.Decimal(int(val)).as_tuple()
        ct = 0

        for digit in dec.digits[::-1]:
            if digit == 0:
                ct += 1

        if ct >= 11:
            counts[i] = 4
            counts[i] = ct // 3

    return dt[counts.min()]
Exemple #31
    def normalize_binop_value(self, other):
        if isinstance(other, dt.timedelta):
            other = np.timedelta64(other)
        elif isinstance(other, pd.Timestamp):
            other = other.to_datetime64()
        elif isinstance(other, pd.Timedelta):
            other = other.to_timedelta64()

        if isinstance(other, np.timedelta64):
            other_time_unit = cudf.utils.dtypes.get_time_unit(other)
            if np.isnat(other):
                return as_scalar(val=None, dtype=self.dtype)

            if other_time_unit not in ("s", "ms", "ns", "us"):
                other = other.astype("timedelta64[s]")
                common_dtype = determine_out_dtype(self.dtype, other.dtype)
                other = other.astype(common_dtype)
            return as_scalar(other)
        elif np.isscalar(other):
            return as_scalar(other)
            raise TypeError("cannot normalize {}".format(type(other)))
def make_null_mask(array):
    """Given a numpy array, return a numpy array of int64s containing the
    indices of `array` where the value is either invalid or null.

    Invalid values are:
        - None
        - numpy.nat
        - numpy.nan

        array (:obj:`numpy.array`)
    mask = []

    is_object_or_string_dtype = np.issubdtype(array.dtype, np.str_) or\
        np.issubdtype(array.dtype, np.object_)

    if six.PY2:
        is_object_or_string_dtype = is_object_or_string_dtype or np.issubdtype(
            array.dtype, np.unicode_)

    is_datetime_dtype = np.issubdtype(array.dtype, np.datetime64) or\
        np.issubdtype(array.dtype, np.timedelta64)

    for i, item in enumerate(array):
        invalid = item is None

        if not is_object_or_string_dtype:
            if is_datetime_dtype:
                invalid = invalid or np.isnat(item)
                invalid = invalid or np.isnan(item)

        if invalid:

    return mask
Exemple #33
def isnull(data):
    data = asarray(data)
    scalar_type = data.dtype.type
    if issubclass(scalar_type, (np.datetime64, np.timedelta64)):
        # datetime types use NaT for null
        # note: must check timedelta64 before integers, because currently
        # timedelta64 inherits from np.integer
        return isnat(data)
    elif issubclass(scalar_type, np.inexact):
        # float types use NaN for null
        return isnan(data)
    elif issubclass(scalar_type, (np.bool_, np.integer, np.character, np.void)):
        # these types cannot represent missing values
        return zeros_like(data, dtype=bool)
        # at this point, array should have dtype=object
        if isinstance(data, (np.ndarray, dask_array_type)):
            return pandas_isnull(data)
            # Not reachable yet, but intended for use with other duck array
            # types. For full consistency with pandas, we should accept None as
            # a null value as well as NaN, but it isn't clear how to do this
            # with duck typing.
            return data != data
Exemple #34
    def fillna(self, fill_value):
        if is_scalar(fill_value):
            if not isinstance(fill_value, Scalar):
                fill_value = np.datetime64(fill_value, self.time_unit)
            fill_value = column.as_column(fill_value, nan_as_null=False)

        result = libcudf.replace.replace_nulls(self, fill_value)
        if isinstance(fill_value, np.datetime64) and np.isnat(fill_value):
            # If the value we are filling is np.datetime64("NAT")
            # we set the same mask as current column.
            # However where there are "<NA>" in the
            # columns, their corresponding locations
            # in base_data will contain min(int64) values.

            return column.build_column(
        return result
 def releasedate_dist_similarity(google_releasedate, apple_releasedate):
     if np.isnat(google_releasedate) or np.isnat(apple_releasedate):
         return 241.040  # the mean of differences that we calculated in the eda
         return np.absolute(google_releasedate -
Exemple #36
ventas_final = pd.DataFrame(sales_dict, columns=columns[:-3], index=df_ventas.index.values)
descuentos = df_ventas.loc[:, ['Descuentos', 'Tarjeta D.']]
ventas_final = pd.merge(ventas_final, descuentos, left_index=True, right_index=True)
ventas_final = ventas_final.iloc[1:]
ventas_final['Total Nuevo'] = np.sum(ventas_final, axis=1)
ventas_final = pd.concat([df_ventas, df_ventas_nuevo], axis=0)
# Agrupar por fecha
ventas_agrupadas = ventas_final.groupby(ventas_final.index).sum().reset_index()
ventas_agrupadas.rename(columns={'Hora transacción': 'index'}, inplace=True)
ventas_agrupadas = ventas_agrupadas.groupby(ventas_agrupadas['index'].dt.date).sum()

# Calcular costos totales
costos_final = df_costos.reset_index()
dates = []
for v, d in enumerate(costos_final['Hora transacción'].values):
    if np.isnat(d) == True:
        position = v
        d_new = pd.to_datetime(d)
        year = d_new.year
        month = d_new.month
        day = d_new.day

n_dates = []
for d in dates:
    year,month,day = d
    d_n = dt.datetime(year,month,day)
Exemple #37
    def _assertPreciseEqual(self, first, second, prec='exact', ulps=1,
                            msg=None, ignore_sign_on_zero=False,
        """Recursive workhorse for assertPreciseEqual()."""

        def _assertNumberEqual(first, second, delta=None):
            if (delta is None or first == second == 0.0
                or math.isinf(first) or math.isinf(second)):
                self.assertEqual(first, second, msg=msg)
                # For signed zeros
                if not ignore_sign_on_zero:
                        if math.copysign(1, first) != math.copysign(1, second):
                                                    "%s != %s" %
                                                    (first, second)))
                    except TypeError:
                self.assertAlmostEqual(first, second, delta=delta, msg=msg)

        first_family = self._detect_family(first)
        second_family = self._detect_family(second)

        assertion_message = "Type Family mismatch. (%s != %s)" % (first_family,
        if msg:
            assertion_message += ': %s' % (msg,)
        self.assertEqual(first_family, second_family, msg=assertion_message)

        # We now know they are in the same comparison family
        compare_family = first_family

        # For recognized sequences, recurse
        if compare_family == "ndarray":
            dtype = self._fix_dtype(first.dtype)
            self.assertEqual(dtype, self._fix_dtype(second.dtype))
            self.assertEqual(first.ndim, second.ndim,
                             "different number of dimensions")
            self.assertEqual(first.shape, second.shape,
                             "different shapes")
            self.assertEqual(first.flags.writeable, second.flags.writeable,
                             "different mutability")
            # itemsize is already checked by the dtype test above
                self._fix_strides(second), "different strides")
            if first.dtype != dtype:
                first = first.astype(dtype)
            if second.dtype != dtype:
                second = second.astype(dtype)
            for a, b in zip(first.flat, second.flat):
                self._assertPreciseEqual(a, b, prec, ulps, msg,
                                         ignore_sign_on_zero, abs_tol)

        elif compare_family == "sequence":
            self.assertEqual(len(first), len(second), msg=msg)
            for a, b in zip(first, second):
                self._assertPreciseEqual(a, b, prec, ulps, msg,
                                         ignore_sign_on_zero, abs_tol)

        elif compare_family == "exact":
            exact_comparison = True

        elif compare_family in ["complex", "approximate"]:
            exact_comparison = False

        elif compare_family == "enum":
            self.assertIs(first.__class__, second.__class__)
            self._assertPreciseEqual(first.value, second.value,
                                     prec, ulps, msg,
                                     ignore_sign_on_zero, abs_tol)

        elif compare_family == "unknown":
            # Assume these are non-numeric types: we will fall back
            # on regular unittest comparison.
            self.assertIs(first.__class__, second.__class__)
            exact_comparison = True

            assert 0, "unexpected family"

        # If a Numpy scalar, check the dtype is exactly the same too
        # (required for datetime64 and timedelta64).
        if hasattr(first, 'dtype') and hasattr(second, 'dtype'):
            self.assertEqual(first.dtype, second.dtype)

        # Mixing bools and non-bools should always fail
        if (isinstance(first, self._bool_types) !=
            isinstance(second, self._bool_types)):
            assertion_message = ("Mismatching return types (%s vs. %s)"
                                 % (first.__class__, second.__class__))
            if msg:
                assertion_message += ': %s' % (msg,)

            if cmath.isnan(first) and cmath.isnan(second):
                # The NaNs will compare unequal, skip regular comparison
        except TypeError:
            # Not floats.

        # if absolute comparison is set, use it
        if abs_tol is not None:
            if abs_tol == "eps":
                rtol = np.finfo(type(first)).eps
            elif isinstance(abs_tol, float):
                rtol = abs_tol
                raise ValueError("abs_tol is not \"eps\" or a float, found %s"
                    % abs_tol)
            if abs(first - second) < rtol:

        exact_comparison = exact_comparison or prec == 'exact'

        if not exact_comparison and prec != 'exact':
            if prec == 'single':
                bits = 24
            elif prec == 'double':
                bits = 53
                raise ValueError("unsupported precision %r" % (prec,))
            k = 2 ** (ulps - bits - 1)
            delta = k * (abs(first) + abs(second))
            delta = None
        if isinstance(first, self._complex_types):
            _assertNumberEqual(first.real, second.real, delta)
            _assertNumberEqual(first.imag, second.imag, delta)
        elif isinstance(first, (np.timedelta64, np.datetime64)):
            # Since Np 1.16 NaT == NaT is False, so special comparison needed
            if numpy_support.version >= (1, 16) and np.isnat(first):
                self.assertEqual(np.isnat(first), np.isnat(second))
                _assertNumberEqual(first, second, delta)
            _assertNumberEqual(first, second, delta)
def pandas_to_table(df):
    # type: (pd.DataFrame) -> Orange.data.Table
    Convert a pandas.DataFrame to a Orange.data.Table instance.
    index = df.index
    if not isinstance(index, pd.RangeIndex):
        df = df.reset_index()

    columns = []  # type: List[Tuple[Orange.data.Variable, np.ndarray]]

    for header, series in df.items():  # type: (Any, pd.Series)
        if pdtypes.is_categorical(series):
            coldata = series.values  # type: pd.Categorical
            categories = [str(c) for c in coldata.categories]
            var = Orange.data.DiscreteVariable.make(
                str(header), values=categories, ordered=coldata.ordered
            # Remap the coldata into the var.values order/set
            coldata = pd.Categorical(
                coldata, categories=var.values, ordered=coldata.ordered
            codes = coldata.codes
            assert np.issubdtype(codes.dtype, np.integer)
            orangecol = np.array(codes, dtype=np.float)
            orangecol[codes < 0] = np.nan
        elif pdtypes.is_datetime64_any_dtype(series):
            # Check that this converts tz local to UTC
            series = series.astype(np.dtype("M8[ns]"))
            coldata = series.values  # type: np.ndarray
            assert coldata.dtype == "M8[ns]"
            mask = np.isnat(coldata)
            orangecol = coldata.astype(np.int64) / 10 ** 9
            orangecol[mask] = np.nan
            var = Orange.data.TimeVariable.make(str(header))
            var.have_date = var.have_time = 1
        elif pdtypes.is_object_dtype(series):
            coldata = series.values
            assert isinstance(coldata, np.ndarray)
            orangecol = coldata
            var = Orange.data.StringVariable.make(str(header))
        elif pdtypes.is_integer_dtype(series):
            coldata = series.values
            var = Orange.data.ContinuousVariable.make(str(header))
            var.number_of_decimals = 0
            orangecol = coldata.astype(np.float64)
        elif pdtypes.is_numeric_dtype(series):
            orangecol = series.values.astype(np.float64)
            var = Orange.data.ContinuousVariable.make(str(header))
            var._out_format = "%.15g"
                "Column '{}' with dtype: {} skipped."
                .format(header, series.dtype),
        columns.append((var, orangecol))

    cols_x = [(var, col) for var, col in columns if var.is_primitive()]
    cols_m = [(var, col) for var, col in columns if not var.is_primitive()]

    variables = [v for v, _ in cols_x]
    if cols_x:
        X = np.column_stack([a for _, a in cols_x])
        X = np.empty((df.shape[0], 0), dtype=np.float)
    metas = [v for v, _ in cols_m]
    if cols_m:
        M = np.column_stack([a for _, a in cols_m])
        M = None

    domain = Orange.data.Domain(variables, metas=metas)
    return Orange.data.Table.from_numpy(domain, X, None, M)