Python Series.max Examples, pandas.Series.max Python Examples

Example #1

0

Show file

File: test_reductions.py Project: bashtage/pandas

    def test_sum_overflow(self, use_bottleneck):

        with pd.option_context('use_bottleneck', use_bottleneck):
            # GH#6915
            # overflowing on the smaller int dtypes
            for dtype in ['int32', 'int64']:
                v = np.arange(5000000, dtype=dtype)
                s = Series(v)

                result = s.sum(skipna=False)
                assert int(result) == v.sum(dtype='int64')
                result = s.min(skipna=False)
                assert int(result) == 0
                result = s.max(skipna=False)
                assert int(result) == v[-1]

            for dtype in ['float32', 'float64']:
                v = np.arange(5000000, dtype=dtype)
                s = Series(v)

                result = s.sum(skipna=False)
                assert result == v.sum(dtype=dtype)
                result = s.min(skipna=False)
                assert np.allclose(float(result), 0.0)
                result = s.max(skipna=False)
                assert np.allclose(float(result), v[-1])

Example #2

0

Show file

File: html.py Project: Vistarino/pandas

def _expand_elements(body):
    lens = Series(lmap(len, body))
    lens_max = lens.max()
    not_max = lens[lens != lens_max]

    for ind, length in iteritems(not_max):
        body[ind] += [np.nan] * (lens_max - length)

Example #3

0

Show file

File: test_datetime.py Project: BobMcFry/pandas

def test_nat_operations():
    # GH 8617
    s = Series([0, pd.NaT], dtype='m8[ns]')
    exp = s[0]
    assert s.median() == exp
    assert s.min() == exp
    assert s.max() == exp

Example #4

0

Show file

File: uscovid.py Project: briangalindoherbert/simplecovid

def getelapsed(dtin: pd.Series):
    """ takes a series of date strings and returns the number of elapsed days
    from the earliest to the last date
    """
    dmax = datetime.datetime.strptime(dtin.max(), '%Y-%m-%d')
    dmin = datetime.datetime.strptime(dtin.min(), '%Y-%m-%d')
    ddif: datetime.timedelta = dmax - dmin
    return ddif.days

Example #5

0

Show file

File: html.py Project: bwignall/pandas

def _expand_elements(body):
    lens = Series([len(elem) for elem in body])
    lens_max = lens.max()
    not_max = lens[lens != lens_max]

    empty = ['']
    for ind, length in not_max.items():
        body[ind] += empty * (lens_max - length)

Example #6

0

Show file

 def create_interaction_description(interaction_count_series: Series) -> Dict:
     """某种类型交互的用户交互次数统计值"""
     interaction_description = dict()
     interaction_description[MIN] = interaction_count_series.min()
     interaction_description[MAX] = interaction_count_series.max()
     interaction_description[MEAN] = interaction_count_series.mean()
     interaction_description[MEDIAN] = interaction_count_series.median()
     return interaction_description

Example #7

0

Show file

def NormalizeDatasetMethod1(ds: pd.Series):
    minimum = ds.min()
    maximum = ds.max()
    delta = maximum - minimum
    result = []
    for i in range(len(ds)):
        result.append(float((ds[i] - minimum) / delta))
    return result

Example #8

0

Show file

File: NumericColumn.py Project: THUyansh/LSRL

 def from_series(feature_name: str, series: Series):
     """从pandas.Series中构造"""
     assert types.is_numeric_dtype(series), series.dtypes
     return NumericColumn(feature_name=feature_name,
                          min_value=series.min(),
                          max_value=series.max(),
                          mean_value=series.mean(),
                          std_value=series.std())

Example #9

0

Show file

File: tabular.py Project: dblueai/dblue-stats

    def get_numerical_distribution(cls,
                                   column: pd.Series,
                                   column_baseline: Dict = None):
        if column_baseline:
            bins = [
                x["lower_bound"]
                for x in column_baseline["numerical_stats"]["distribution"]
            ]
            bins.append(column_baseline["numerical_stats"]["distribution"][-1]
                        ["upper_bound"])

            # Insert a bin if new value is less than the min value
            if column.min() < column_baseline["numerical_stats"]["min"]:
                bins.insert(0, column.min().item())

            # Insert a bin if new value is less than the max value
            if column.max() > column_baseline["numerical_stats"]["max"]:
                bins.append(column.max().item())

            bin_size = len(bins) - 1
            labels = [str(x + 1) for x in range(bin_size)]
            cuts = pd.cut(x=column, bins=bins, precision=2, labels=labels)
        else:
            bin_size = 10
            labels = [str(x + 1) for x in range(bin_size)]
            cuts, bins = pd.cut(x=column,
                                bins=bin_size,
                                precision=2,
                                labels=labels,
                                retbins=True)

        value_counts = cuts.value_counts(normalize=True).to_dict()

        distribution = []

        for index, bin_value in enumerate(bins[:-1]):
            _bin = {
                "lower_bound": bin_value,
                "upper_bound": bins[index + 1],
                "percent":
                value_counts[str(index + 1)] * 100,  # Normalize to 100
            }

            distribution.append(_bin)

        return distribution

Example #10

0

Show file

File: exo_dm_RenaultCaptur.py Project: gitthabet/MS-BGD

def nbr_pages_parrecherche(Region,type):
	Result=getSoupFromUrl(getURL_Annonces(Region,1,type))
	Balises_a=Result.find_all("a")
	Numeros_pages= Series([int(A.text) for A in Balises_a if A.text.isnumeric()])
	if len(Numeros_pages)==0:
		return 1
	else:
		return Numeros_pages.max()

Example #11

0

Show file

def _expand_elements(body):
    lens = Series([len(elem) for elem in body])
    lens_max = lens.max()
    not_max = lens[lens != lens_max]

    empty = ['']
    for ind, length in not_max.items():
        body[ind] += empty * (lens_max - length)

Example #12

0

Show file

File: feat_ext.py Project: MMMMMMMingor/SCUT-OFSV

def __normolization_min_max(a: pd.Series, index: list) -> pd.Series:
    if a.name in index:
        minimum = a.min()
        maximum = a.max()

        a = (a - minimum) / (maximum - minimum)

    return a

Example #13

0

Show file

    def __init__(self, col: Series):
        col: ndarray = col.to_numpy()

        self._min: number = col.min(initial=None)
        self._max: number = col.max(initial=None)
        self._range: number = self._max - self._min
        self._mean: number = col.mean()
        self._std: number = col.std()

Example #14

0

Show file

def split_data(date_blocks: pd.Series, X: pd.DataFrame, y: pd.Series):
    val_block = date_blocks.max()
    return (
        X.loc[date_blocks < val_block],
        y.loc[date_blocks < val_block],
        X.loc[date_blocks == val_block],
        y.loc[date_blocks == val_block]
    )

Example #15

0

Show file

File: html.py Project: xxspurs/pandas

def _expand_elements(body):
    lens = Series(lmap(len, body))
    lens_max = lens.max()
    not_max = lens[lens != lens_max]

    empty = ['']
    for ind, length in iteritems(not_max):
        body[ind] += empty * (lens_max - length)

Example #16

0

Show file

def float_formatter(column: pd.Series,
                    value: float,
                    minimize: bool = True) -> str:  # type: ignore
    """
    Returns a formatter to be used when printing data frames to LaTeX.
    """
    if value == (column.min() if minimize else column.max()):
        return f"\\textbf{{{value:,.2f}}}"
    return f"{value:,.2f}"

Example #17

0

Show file

File: rfunctions.py Project: Advestis/ruleskit

 def get_max(s: pd.Series):
     ps = s.index
     m = s.max()
     if isinstance(ps[0], str):
         s = (s == m).astype(int).replace(0, np.nan)
         s[~s.isna()] = ps[~s.isna()]
         return s
     else:
         return (s == m).astype(int).replace(0, np.nan) * ps

Example #18

0

Show file

File: group.py Project: humlab/penelope

def create_category_series(category_series: pd.Series,
                           fill_gaps: bool = True,
                           fill_steps: int = 1):
    """Returns sorted distinct category values, optionally with gaps filled"""
    if fill_gaps:
        return list(
            range(category_series.min(),
                  category_series.max() + 1, fill_steps))
    return list(sorted(category_series.unique().tolist()))

Example #19

0

Show file

def nbr_pages_parrecherche(Region, type):
    Result = getSoupFromUrl(getURL_Annonces(Region, 1, type))
    Balises_a = Result.find_all("a")
    Numeros_pages = Series(
        [int(A.text) for A in Balises_a if A.text.isnumeric()])
    if len(Numeros_pages) == 0:
        return 1
    else:
        return Numeros_pages.max()

Example #20

0

Show file

def normalize_column(column: pd.Series) -> pd.Series:
    """
    Normalizes a column of data and applies a visual scale to it.

    :param column: a column of numeric data
    :return: a normalized column of data
    """
    return ((column - column.min()) /
            (column.max() - column.min()) + .1) * VISUAL_SCALE

Example #21

0

Show file

File: core.py Project: keitakurita/torchtable

 def fit(self, x: pd.Series):
     if self.method == "Gaussian":
         self.mean, self.std = x.mean(), x.std()
     elif self.method == "RankGaussian":
         # TODO: store state
         pass
     elif self.method == "MinMax":
         self.min, self.max = x.min(), x.max()
     return self

Example #22

0

Show file

    def trajectory_is_constant(self, trajectory: pandas.Series) -> bool:
        """
			Determines whether a specific trajectory remains at a reletively constant frequency throughout the experiment.
			Trajectories must change in frequency by at least 10% over the course of the experiment.
		"""

        maximum_difference = trajectory.max() - trajectory.min()

        return maximum_difference <= self.filter_consistency

Example #23

0

Show file

File: feat_ext.py Project: MMMMMMMingor/SCUT-OFSV

def __normolization_centroid(a: pd.Series, index: list) -> pd.Series:
    if a.name in index:
        minimum = a.min()
        maximum = a.max()
        centroid = (maximum - minimum) / 2

        a = (a - centroid) / (maximum - minimum)

    return a

Example #24

0

Show file

    def plot(self,
             forecast: np.ndarray,
             training_data: pd.Series,
             test_data: pd.Series = None,
             show: bool = False) -> matplotlib.figure.Figure:
        logger.debug('Plotting...')

        history = training_data
        timeframe = history.index[-1] - history.index[-2]
        forecast = pd.Series(forecast,
                             index=pd.date_range(start=history.index[-1] +
                                                 timeframe,
                                                 periods=len(forecast),
                                                 freq=timeframe))

        highest_datapoint = max(history.max(), forecast.max())
        lowest_datapoint = min(history.min(), forecast.min())
        if test_data is not None:
            highest_datapoint = max(highest_datapoint, test_data.max())
            lowest_datapoint = min(lowest_datapoint, test_data.min())

        fig, ax1 = plt.subplots()
        # ax1.set_ylim(bottom=lowest_datapoint - (lowest_datapoint / 2), top=highest_datapoint + (highest_datapoint / 2))

        ax1.plot(history, color='red', linewidth=config.plot.linewidth)
        if test_data is not None:
            ax1.plot(test_data,
                     color='orange',
                     linewidth=config.plot.linewidth)

        ax1.plot(forecast,
                 color='black',
                 linestyle=':',
                 linewidth=config.plot.linewidth + 0.2)
        ax1.set_title(
            f'{self.currency}/{self.to_currency} Price (Orange) vs {self.currency}/{self.to_currency} Price Forecast (Black)'
        )
        ax1.set_ylabel(f'{self.currency}/{self.to_currency} Price')
        ax1.set_xlabel('Date')

        legend = ax1.legend()

        texts = legend.get_texts()
        if test_data is not None:
            texts[0].set_text('Actual Price (training data)')
            texts[1].set_text('Actual Price (test data)')
            if len(texts) == 3:
                texts[2].set_text('Forecasted Price')
        else:
            texts[0].set_text('Actual Price')
            texts[1].set_text('Forecasted Price')

        if show:
            plt.show()

        return fig

Example #25

0

Show file

def normalize_column(df_column: Series) -> Series:
    # Just normalize numeric columns
    if df_column.dtype == np.float64 or df_column.dtype == np.int64:
        max_value = df_column.max()
        min_value = df_column.min()

        if min_value != max_value:
            df_column = (df_column - min_value) / (max_value - min_value)

    return df_column  # If min=max, normalization is undefined so I return the same column

Example #26

0

Show file

 def time_gap(dates: pd.Series, uom='weekly'):
     """
     Returns the relative time gap from the latest date. 
     The uom (unit of mesure) arguments defines the output units, by default weeks.
     If not specified, uom would be hourly
     """
     uom = (3600 * 24 * 7) if uom == 'weekly' else 3600  ## can be improved
     max_Date = dates.max()
     time_delta = max_Date - dates
     return 1 + np.floor(time_delta.dt.total_seconds() / uom)

Example #27

0

Show file

File: calibration.py Project: prio-data/views_stepshift

    def _check_inputs(
        s_test_pred: pd.Series,
        s_calib_pred: pd.Series,
        s_calib_actual: pd.Series,
    ) -> None:
        """ Check that inputs have valid names and could be proabilities """

        if (
            s_test_pred.min() < 0
            or s_test_pred.max() > 1
            or s_calib_pred.min() < 0
            or s_calib_pred.max() > 1
        ):
            raise RuntimeError(
                "Probabilities outside (0,1) range were passed to calibrate"
            )

        if not s_calib_pred.name == s_test_pred.name:
            warnings.warn(f"{s_calib_pred.name} != {s_test_pred.name}")
        if s_test_pred.isnull().sum() > 0:
            _log_missing_indices(s_test_pred)
            raise RuntimeError("Missing values in s_test_pred")
        if s_calib_pred.isnull().sum() > 0:
            _log_missing_indices(s_calib_pred)
            raise RuntimeError("Missing values in s_calib_pred")
        if s_calib_actual.isnull().sum() > 0:
            _log_missing_indices(s_calib_actual)
            raise RuntimeError("Missing values in s_calib_actual")

        if (
            not len(s_calib_pred) == len(s_calib_actual)
            or len(s_calib_pred.index.difference(s_calib_actual.index)) > 0
        ):
            raise RuntimeError(
                f"len(s_calib_pred): {len(s_calib_pred)} "
                f"len(s_calib_actual): {len(s_calib_actual)} "
                f"index diff: "
                f"{s_calib_pred.index.difference(s_calib_actual.index)}"
                f"s_calib_pred.head() : {s_calib_pred.head()}"
                f"s_calib_pred.tail() : {s_calib_pred.tail()}"
                f"s_calib_actual.head() : {s_calib_actual.head()}"
                f"s_calib_actual.tail() : {s_calib_actual.tail()}"
            )

Example #28

0

Show file

def _assign_bins(values: pd.Series, no_bins, column_names) -> pd.DataFrame:
    """
    Assigns values to bins [1; no_bins]
    :return: DataFrame with three columns: bin, left_bound, right_bound
    """
    limits = np.linspace(values.min(), values.max(), no_bins)
    return pd.DataFrame(
        np.array([_find_bin(limits, val) for val in values.values]),
        columns=column_names
    )

Example #29

0

Show file

File: codec.py Project: JCSDA-internal/pyodc

 def from_dataframe(cls, column_name: str, data: pd.Series,
                    data_type: DataType):
     assert data_type in cls.accepted_types
     c = cls(column_name,
             data.min(),
             data.max(),
             data_type,
             has_missing=data.hasnans)
     c._data = data
     return c

Example #30

0

Show file

File: multi_armed_bandit.py Project: stankolubomir/pythia-tools

def calculate_row_max_arm(row: pd.Series) -> str:
    '''
    Finds the winning probability for a given row
    :param row:
    :return:
    '''
    row = row.squeeze()
    max_value_arm = str(row[row == row.max()].index[0])

    return max_value_arm

Example #31

0

Show file

File: scales.py Project: labaran1/seaborn

    def setup(
        self,
        data: Series,
        prop: Property,
        axis: Axis | None = None,
    ) -> Scale:

        new = copy(self)
        forward, inverse = self._get_transform()

        mpl_scale = self._get_scale(data.name, forward, inverse)

        if axis is None:
            axis = PseudoAxis(mpl_scale)
            axis.update_units(data)

        mpl_scale.set_default_locators_and_formatters(axis)

        normalize: Optional[Callable[[ArrayLike], ArrayLike]]
        if prop.normed:
            if self.norm is None:
                vmin, vmax = data.min(), data.max()
            else:
                vmin, vmax = self.norm
            vmin, vmax = axis.convert_units((vmin, vmax))
            a = forward(vmin)
            b = forward(vmax) - forward(vmin)

            def normalize(x):
                return (x - a) / b

        else:
            normalize = vmin = vmax = None

        forward_pipe = [
            axis.convert_units, forward, normalize,
            prop.get_mapping(new, data)
        ]

        def spacer(x):
            return np.min(np.diff(np.sort(x.dropna().unique())))

        # TODO make legend optional on per-plot basis with ScaleSpec parameter?
        if prop.legend:
            axis.set_view_interval(vmin, vmax)
            locs = axis.major.locator()
            locs = locs[(vmin <= locs) & (locs <= vmax)]
            labels = axis.major.formatter.format_ticks(locs)
            legend = list(locs), list(labels)

        else:
            legend = None

        scale_type = self.__class__.__name__.lower()
        return Scale(forward_pipe, spacer, legend, scale_type, mpl_scale)

Example #32

0

Show file

File: test_reductions.py Project: zaragomes/pandas

    def test_min_max_numeric_only(self):
        # TODO deprecate numeric_only argument for Categorical and use
        # skipna as well, see GH25303
        cat = Series(Categorical(
            ["a", "b", np.nan, "a"], categories=['b', 'a'], ordered=True))

        _min = cat.min()
        _max = cat.max()
        assert np.isnan(_min)
        assert _max == "a"

        _min = cat.min(numeric_only=True)
        _max = cat.max(numeric_only=True)
        assert _min == "b"
        assert _max == "a"

        _min = cat.min(numeric_only=False)
        _max = cat.max(numeric_only=False)
        assert np.isnan(_min)
        assert _max == "a"

Example #33

0

Show file

File: FeatureColumnStrategies.py Project: Jos1988/AIBuilder

def bucketize_data(column_data: pd.Series, num_buckets: int) -> List[int]:
    min_val = column_data.min()
    max_val = column_data.max()
    bucket_size = (max_val - min_val) / num_buckets
    boundries = []
    boundry = min_val
    while (len(boundries) + 1) < num_buckets:
        boundry += bucket_size
        boundries.append(round(boundry + 0.000001))

    return boundries

Example #34

0

Show file

 def test_min_max_dt64_api_consistency_empty_df(self):
     # check DataFrame/Series api consistency when calling min/max on an empty
     # DataFrame/Series.
     df = DataFrame(dict(x=[]))
     expected_float_series = Series([], dtype=float)
     # check axis 0
     assert np.isnan(df.min(axis=0).x) == np.isnan(expected_float_series.min())
     assert np.isnan(df.max(axis=0).x) == np.isnan(expected_float_series.max())
     # check axis 1
     tm.assert_series_equal(df.min(axis=1), expected_float_series)
     tm.assert_series_equal(df.min(axis=1), expected_float_series)

Example #35

0

Show file

File: test_reductions.py Project: zhengfeiwang/pandas

    def test_sum_overflow_float(self, use_bottleneck, dtype):
        with pd.option_context("use_bottleneck", use_bottleneck):
            v = np.arange(5000000, dtype=dtype)
            s = Series(v)

            result = s.sum(skipna=False)
            assert result == v.sum(dtype=dtype)
            result = s.min(skipna=False)
            assert np.allclose(float(result), 0.0)
            result = s.max(skipna=False)
            assert np.allclose(float(result), v[-1])

Example #36

0

Show file

File: test_reductions.py Project: bashtage/pandas

    def test_min_max(self):
        # unordered cats have no min/max
        cat = Series(Categorical(["a", "b", "c", "d"], ordered=False))
        with pytest.raises(TypeError):
            cat.min()
        with pytest.raises(TypeError):
            cat.max()

        cat = Series(Categorical(["a", "b", "c", "d"], ordered=True))
        _min = cat.min()
        _max = cat.max()
        assert _min == "a"
        assert _max == "d"

        cat = Series(Categorical(["a", "b", "c", "d"], categories=[
                     'd', 'c', 'b', 'a'], ordered=True))
        _min = cat.min()
        _max = cat.max()
        assert _min == "d"
        assert _max == "a"

        cat = Series(Categorical(
            [np.nan, "b", "c", np.nan], categories=['d', 'c', 'b', 'a'
                                                    ], ordered=True))
        _min = cat.min()
        _max = cat.max()
        assert np.isnan(_min)
        assert _max == "b"

        cat = Series(Categorical(
            [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True))
        _min = cat.min()
        _max = cat.max()
        assert np.isnan(_min)
        assert _max == 1

Example #37

0

Show file

File: test_utils.py Project: cpcloud/span

def test_name2num():
    num_to_test = 10
    str_len = 4
    letters = string.ascii_letters
    x = Series(dict(zip(letters, map(ord, letters))))
    base = 256 ** np.arange(str_len)
    mn = base.dot(np.repeat(x.min(), str_len))
    mx = base.dot(np.repeat(x.max(), str_len))

    for _ in xrange(num_to_test):
        name = random.sample(letters, str_len)
        num = name2num(name)
        assert mn <= num <= mx

Example #38

0

Show file

File: test_reductions.py Project: bashtage/pandas

    def test_timedelta64_analytics(self):

        # index min/max
        dti = pd.date_range('2012-1-1', periods=3, freq='D')
        td = Series(dti) - pd.Timestamp('20120101')

        result = td.idxmin()
        assert result == 0

        result = td.idxmax()
        assert result == 2

        # GH#2982
        # with NaT
        td[0] = np.nan

        result = td.idxmin()
        assert result == 1

        result = td.idxmax()
        assert result == 2

        # abs
        s1 = Series(pd.date_range('20120101', periods=3))
        s2 = Series(pd.date_range('20120102', periods=3))
        expected = Series(s2 - s1)

        # FIXME: don't leave commented-out code
        # this fails as numpy returns timedelta64[us]
        # result = np.abs(s1-s2)
        # assert_frame_equal(result,expected)

        result = (s1 - s2).abs()
        tm.assert_series_equal(result, expected)

        # max/min
        result = td.max()
        expected = pd.Timedelta('2 days')
        assert result == expected

        result = td.min()
        expected = pd.Timedelta('1 days')
        assert result == expected

Example #39

0

Show file

File: processing.py Project: igncp/encina

  def get_summary_indicators_from_hist(sf, hist, int_index=False):
    seriesHist = Series(hist)
    maxs = {
      'freq': dict()
    }
    
    means = {'freq': seriesHist.mean()}
    medians = {'freq': seriesHist.median()}
    stds = {'freq': seriesHist.std()}
    maxs['freq']['freq'] = seriesHist.max()
    maxs['freq']['index'] = seriesHist.idxmax()
    index_total = 'NA'

    if int_index:
      index = seriesHist.index
      index = index.astype(int)
      index_list = index.tolist()
      index_total = sum([seriesHist[i] * index_list[i] for i in range(len(index_list))])
      index_series = Series(index_list)

      means['index'] = index_series.mean()
      medians['index'] = index_series.median()
      stds['index'] = index_series.std()
      
      maxs['freq']['index'] = int(maxs['freq']['index'])

      maxs['index'] = dict()
      maxs['index']['index'] = max(index_list)
      maxs['index']['freq'] = hist[str(maxs['index']['index'])]

    return {
      'means': means,
      'medians': medians,
      'stds': stds,
      'max': maxs,
      'index_total': index_total
    }

Example #40

0

Show file

File: PnIEstims.py Project: titenko1996/mathstatistics

def count_estims(dist, gamma = 0.95):
    '''
    Counts all estimates
    :param dist: dsitribution
    :param gamma: probability of realisation of value
    :return point: point estimates
    :return interval: confidance intervals for point estimates
    '''
    import numpy as np
    x = Series(dist)
    #Точечные оценки
    point = {}
    N = x.count()

    med_ = med_u(x)#
    med = np.median(dist)
    mad = x.mad()#
    mean_c = mean(dist)#
    var = np.var(dist)
    std = np.std(dist)
    mod = stats.mode(dist).mode#
    kurt = stats.kurtosis(dist)
    skew_my = stats.skew(dist)#
    Chi = 1/np.sqrt(np.abs(kurt))
    quantiles = np.round(x.quantile([0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95]), 5)
    W = std/mean_c;#

    quantiles_str = ""
    for index in quantiles.index:
        quantiles_str+='<p><pre>{0}\t{1}</pre></p>'.format(index, quantiles[index])

    point['MED'] = np.round(med, 5)
    point['MED*'] = np.round(med_, 5)
    point['MAD'] = np.round(mad, 5)
    point['Min'] = np.round(x.min(), 5)
    point['Max'] = np.round(x.max(), 5)
    point['Mean'] = np.round(mean_c, 5)
    point['S^2'] = np.round(var, 5)
    point['S'] = np.round(std, 5)
    point['MOD'] = np.round(mod, 5)
    point['E'] = np.round(kurt, 5)
    point['A'] = np.round(skew_my, 5)
    point['Chi'] = np.round(Chi, 5)
    point['X(alpha)'] = quantiles_str
    point['W'] = np.round(W, 5)



    #Интервальные оценки
    from scipy.stats import t, norm
    import numpy as np
    interval = {}
    if N < 61:
        l = t.ppf((1-gamma)/2, N-1)
        u = t.ppf(1-(1-gamma)/2, N-1)
    else:
        l = norm.ppf((1-gamma)/2)
        u = norm.ppf(1-(1-gamma)/2)
    X_cf = (mean_c+l*sigma_X(x), mean_c+u*sigma_X(x))
    A_cf = (skew_my + l * sigma_A(x), skew_my + u * sigma_A(x))
    S_cf = (std + l*sigma_S(x), std+u*sigma_S(x))
    E_cf = (kurt + l*sigma_E(x), kurt+u*sigma_E(x))
    if W < 1:
        v = l/np.sqrt(2*(N-1))
        W_cf = np.round((W/(1+v*np.sqrt(1+2*W**2)), W/(1-v*np.sqrt(1+2*W**2))), 5)
    else: W_cf = (None, None)

    interval['Mean'] = np.round(X_cf, 5)
    interval['S'] = np.round(S_cf, 5)
    interval['E'] = np.round(E_cf, 5)
    interval['A'] = np.round(A_cf, 5)
    interval['W'] = W_cf

    return point, interval

Example #41

0

Show file

File: MarketingMixedModel_SARIMAX.py Project: amirmeisami/SARIMAX-MARKETING-MIXED-MODEL

    def source_data(self):
        
        st_date = self.stTrain
#        st_date = '2014-10-1'
        stD = date(int(st_date.split('-')[0]), int(st_date.split('-')[1]), int(st_date.split('-')[2]))
        if self.view and stD < datetime.datetime.strptime('2015-4-1',"%Y-%m-%d").date():
            raise RuntimeError('I know it sucks but we dont have view-count data for anytime before 2015-4-1!')
        if self.view:
            db_red = psycopg2.connect(host="***", database="***", port="***",
                                  user="******", password="******")
            db_red.autocommit = True
            df_red = pd.read_sql('''select date,sum(installs) as install, sum(pageviewcount) as view
                                from appstoredata_itunes_metrics where game='***' 
                                and country='%s' group by date;''' % pycountry.countries.get(alpha2=self.target).name, 
                                con=db_red)  
                            
            df_red['date'] = pd.to_datetime(df_red['date'])
            ts_view_target1 = Series(df_red.view.tolist(), 
                                     index=df_red.date.tolist()).resample('D', how='sum')[st_date:self.endPred].fillna(0)
            ts_install_target1 = Series(df_red.install.tolist(), 
                                        index=df_red.date.tolist()).resample('D', how='sum')[st_date:self.endPred].fillna(0)
            if len(ts_view_target1) < (self.endP-stD).days :
                ts_view_target1[pd.to_datetime(st_date)] = 0
                ts_view_target1 = ts_view_target1.resample('D', how='sum')[st_date:self.endPred].fillna(0)
                ts_install_target1[pd.to_datetime(st_date)] = 0
                ts_install_target1 = ts_install_target1.resample('D', how='sum')[st_date:self.endPred].fillna(0)
            ts_view_target = (ts_view_target1)/(ts_view_target1.sum())
            ts_install_target = (ts_install_target1)/(ts_install_target1.sum())
        else:
            ts_view_target = []
            ts_view_target1 = []
            ts_install_target = []  
            ts_install_target1 = []
        
        db = MySQLdb.connect(
        host = '***', 
        user = '******', 
        passwd = '***', 
        db = '***', 
        port = '***')
        
        df_mysql = pd.read_sql('''select metrics_daily.date as date, dim_country.name as country,
                               sum(metrics_daily.value) as value, dim_channel.channel_type as type
                               from metrics_daily left join dim_channel on dim_channel.id = metrics_daily.channel_id 
                               left join dim_country on dim_country.id = metrics_daily.country_id where project_id=195 
                               and metrics_daily.platform_id=2 and metric_id in (5) group by date, type, country;''', con=db)  
                       
        
        df_mysql['date'] = pd.to_datetime(df_mysql['date'])
        all_data_target = df_mysql[df_mysql.country==self.target]
        org_data_target = df_mysql[(df_mysql.type=='ORGANIC') & (df_mysql.country==self.target)]
        ts_org_target1 = Series(org_data_target.value.tolist(), 
                               index=org_data_target.date.tolist()).resample('D', how='sum')[st_date:self.endPred].fillna(0)
        ts_all_target1 = Series(all_data_target.value.tolist(), 
                                index=all_data_target.date.tolist()).resample('D', how='sum')[st_date:self.endPred].fillna(0)
        ts_org_target = (ts_org_target1)/(ts_org_target1.sum())
        ts_all_target = (ts_all_target1)/(ts_all_target1.sum())
        
        if self.baseorg:
            org_data_base = df_mysql[(df_mysql.type=='ORGANIC') & (df_mysql.country==self.baseline)]
            ts_org_base1 = Series(org_data_base.value.tolist(), 
                                 index=org_data_base.date.tolist()).resample('D', how='sum')[st_date:self.endPred].fillna(0)   
            ts_org_base = (ts_org_base1-ts_org_base1.min())/(ts_org_base1.max()-ts_org_base1.min())
        else:
            ts_org_base = []
            ts_org_base1 = []
        
        if self.paid:
            paid_data_target = df_mysql[(df_mysql.type=='PAID') & (df_mysql.country==self.target)]
            ts_paid_target1 = Series(paid_data_target.value.tolist(),
                                    index=paid_data_target.date.tolist()).resample('D', how='sum')[st_date:self.endPred].fillna(0)
            if len(ts_paid_target1) < (self.endP-stD).days :
                ts_paid_target1[pd.to_datetime(st_date)] = 0
                ts_paid_target1 = ts_paid_target1.resample('D', how='sum')[st_date:self.endPred].fillna(0)
            ts_paid_target = (ts_paid_target1)/(ts_paid_target1.sum())
        else:
            ts_paid_target = []
            ts_paid_target1 = []
            
        if self.rank:
            df_rank = pd.read_sql('''select date, max(1/sqrt(rank)) as bestRank from kabam_ranks_data_free where 
                                    country='%s' and device!='android'and game='***' 
                                    and category='Overall' group by date;''' % self.target, con=db)  
            
            df_rank['date'] = pd.to_datetime(df_rank['date'])
            ts_rank_target1 = Series(df_rank.bestRank.tolist(), 
                                     index=df_rank.date.tolist()).resample('D', how='sum')[st_date:self.endPred].fillna(0)
            if len(ts_rank_target1) < (self.endP-stD).days :
                ts_rank_target1[pd.to_datetime(st_date)] = 0
                ts_rank_target1 = ts_rank_target1.resample('D', how='sum')[st_date:self.endPred].fillna(0)
            ts_rank_target = (ts_rank_target1)/(ts_rank_target1.sum())
        else:
            ts_rank_target = []
            ts_rank_target1 = []
        
#        endog = ts_org_target1
#        endog = ts_install_target
        endog = ts_all_target1
        
        Tlist = [self.paid, self.baseorg, self.view, self.rank]
        dff = DataFrame()
        tList = [ts_paid_target, ts_org_base, ts_view_target, ts_rank_target]
        tlist = ['paid', 'base', 'view', 'rank']
        for i in xrange(0,len(Tlist)):
            if Tlist[i]:
                dff[tlist[i]] = tList[i]
        if dff.empty:
            raise RuntimeError('Where is your exog variable? Do you need a coffee or something?!')
                
        exog = dff
        
        return (endog, exog)

Example #42

0

Show file

File: SMekala_hw_09.py Project: msekhar12/MSDA_602_Python_assignments

    
    l = Series((HTTP_DF['origin']))
    l = l.value_counts()

    clear_scr()
    
    print "\n"
    print "Questions"
    print "---------"
    print "Question:1."
    print "-----------"
    print "Which hostname or IP address made the most requests?"
    print "Answer:"
    print "-------"
    print "The MAXIMUM number of requests were made by '%s'.\nFrom this address, a total of %d requests were made" % (l.idxmax(),l.max())
    print "\n"

    l = HTTP_DF.groupby(['origin'])['bytes_transferred'].sum()
    print "Question:2."
    print "-----------"
    print "Which hostname or IP address received the most total bytes from the server?  How many bytes did it receive?"
    print "Answer:"
    print "-------"
    print "The MAXIMUM number of bytes were received by '%s'. This address has received a total of %d bytes." % (l.idxmax(), l.max())
    print "\n"
    

    l = Series((HTTP_DF['hour']))
    l = l.value_counts()

Example #43

0

Show file

File: pluralsight_wheeler.py Project: schaunwheeler/schaunwheeler.github.io

    datad = get_dummies(datas, prefix=col, prefix_sep='__')
    data[datad.columns] = datad

# drop non-predictor columns and fill in missing values with means
data = data.drop(drop_cols + category_cols, axis=1)
data = data.fillna(data.mean())

rf = RandomForestClassifier(
    n_estimators=1000,
    oob_score=True,
    random_state=42,
    class_weight='balanced_subsample',
    verbose=False,
    n_jobs=-1
)

# model using all variables
evals = cv_results(x=data, y=outcome, model=rf, nfolds=10, nparts=20, verbose=True)

# get importances and keep only those variables at least one-tenth as important as the most important variable
_ = rf.fit(data, outcome)
importance = Series(rf.feature_importances_, index=data.columns).sort_values(ascending=False)
importance2 = importance / importance.max()
most_important = importance[importance2.gt(0.1)]

# model using only most important variables
evals2 = cv_results(x=data.loc[:, most_important.index], y=outcome, model=rf, nfolds=10, nparts=20, verbose=True)

# compare both models
eval_df = evals.merge(evals2, left_on='prob', right_on='prob', suffixes=['_full', '_imp'])
eval_df['renewed_pct_diff'] = eval_df['renewed_pct_full'] - eval_df['renewed_pct_imp']

Example #44

0

Show file

File: numpyAndPandas.py Project: glorysongglory/pythonTest

frame
'''

   A  B  C
a  0  1  2
b  3  4  5
c  6  7  8
'''
print
frame.max()
'''
A    6
B    7
C    8
'''
f = lambda x: x.max() - x.min()
print
frame.apply(f)  # 作用到每一列
'''
A    6
B    6
C    6
'''
print
frame.apply(f, axis=1)  # 作用到每一行
'''
a    2
b    2
c    2
'''

Example #45

0

Show file

File: labeled_array.py Project: huxt2014/python-examples

s.name = 'name'

# length
assert len(s) == s.size == s.shape[0]

# number of element that a not NaN
s.count()

# get a array of unique values
s.unique()

# count(*) group by non-NaN value, get a Series
s.value_counts()

# aggregation and statistic
s.max()
s.mean()
s.var()

# location of the max element
s.idxmax()

# rank
s = Series([4, 1, 2, 5])
s.rank()                     # return [3,1,2,4]

# plot
s.plot()
plt.show()

# translate ##################################################