Example #1
0
    def test_binary_ops_align(self):

        # test aligning binary ops

        # GH 6681
        index = MultiIndex.from_product(
            [list("abc"), ["one", "two", "three"], [1, 2, 3]], names=["first", "second", "third"]
        )

        df = DataFrame(
            np.arange(27 * 3).reshape(27, 3), index=index, columns=["value1", "value2", "value3"]
        ).sortlevel()

        idx = pd.IndexSlice
        for op in ["add", "sub", "mul", "div", "truediv"]:
            opa = getattr(operator, op, None)
            if opa is None:
                continue

            x = Series([1.0, 10.0, 100.0], [1, 2, 3])
            result = getattr(df, op)(x, level="third", axis=0)

            expected = pd.concat([opa(df.loc[idx[:, :, i], :], v) for i, v in x.iteritems()]).sortlevel()
            assert_frame_equal(result, expected)

            x = Series([1.0, 10.0], ["two", "three"])
            result = getattr(df, op)(x, level="second", axis=0)

            expected = pd.concat([opa(df.loc[idx[:, i], :], v) for i, v in x.iteritems()]).reindex_like(df).sortlevel()
            assert_frame_equal(result, expected)

        # GH9463 (alignment level of dataframe with series)

        midx = MultiIndex.from_product([["A", "B"], ["a", "b"]])
        df = DataFrame(np.ones((2, 4), dtype="int64"), columns=midx)
        s = pd.Series({"a": 1, "b": 2})

        df2 = df.copy()
        df2.columns.names = ["lvl0", "lvl1"]
        s2 = s.copy()
        s2.index.name = "lvl1"

        # different cases of integer/string level names:
        res1 = df.mul(s, axis=1, level=1)
        res2 = df.mul(s2, axis=1, level=1)
        res3 = df2.mul(s, axis=1, level=1)
        res4 = df2.mul(s2, axis=1, level=1)
        res5 = df2.mul(s, axis=1, level="lvl1")
        res6 = df2.mul(s2, axis=1, level="lvl1")

        exp = DataFrame(np.array([[1, 2, 1, 2], [1, 2, 1, 2]], dtype="int64"), columns=midx)

        for res in [res1, res2]:
            assert_frame_equal(res, exp)

        exp.columns.names = ["lvl0", "lvl1"]
        for res in [res3, res4, res5, res6]:
            assert_frame_equal(res, exp)
Example #2
0
    def filter_by_exposure_from_factor_model(self,
                                             factor_model,
                                             lower_bounds: pd.Series,
                                             upper_bounds: pd.Series,
                                             benchmark_returns=None,
                                             regression_period: int = 36):
        """

        :param factor_model:
        :param lower_bounds: pd.Series of floats between 0 and 100, each representing a factor exposure.
            Example: lower_bounds = pd.Series(data=[80], index=['Alpha'])
        :param upper_bounds: pd.Series of floats between 0 and 100, each representing a factor exposure.
            Example: upper_bounds = pd.Series(data=[60, 90], index=['MKT', 'Alpha'])
        :param benchmark_returns:
        :param regression_period:
        :param frequency:
        :return:
        """

        if not isinstance(factor_model, FactorModels):
            raise Exception('Factor model should be of type `FactorModels`')

        self.conditions.append(
            (StockScreener.filter_by_exposure_from_factor_model, factor_model,
             lower_bounds, upper_bounds, benchmark_returns, regression_period))

        regression_dict = {}
        factor_model = factor_model.value(to_date=self.date)

        for stock in self.stocks:
            regression = factor_model.regress_factor_loadings(
                portfolio=TimeDataFrame(stock),
                benchmark_returns=benchmark_returns,
                regression_window=regression_period,
                rolling=False,
                show=False)

            regression_dict[stock] = regression.params
        regression_df = pd.DataFrame(data=regression_dict)
        regression_df.rename(index={'Intercept': 'Alpha'}, inplace=True)
        # min-max normalize and scale
        normalized_df = regression_df.apply(func=lambda x: 100 * (x - min(x)) /
                                            (max(x) - min(x)),
                                            axis=1)
        for idx, factor in lower_bounds.iteritems():
            normalized_df = normalized_df.loc[:,
                                              normalized_df.loc[idx] >= factor]
        for idx, factor in upper_bounds.iteritems():
            normalized_df = normalized_df.loc[:,
                                              normalized_df.loc[idx] <= factor]
        return list(regression_df.columns)
Example #3
0
def ml_get_train_times(samples_info_sets: pd.Series,
                       test_times: pd.Series) -> pd.Series:
    # pylint: disable=invalid-name
    """
    Advances in Financial Machine Learning, Snippet 7.1, page 106.

    Purging observations in the training set

    This function find the training set indexes given the information on which each record is based
    and the range for the test set.
    Given test_times, find the times of the training observations.

    :param samples_info_sets: (pd.Series) The information range on which each record is constructed from
        *samples_info_sets.index*: Time when the information extraction started.
        *samples_info_sets.value*: Time when the information extraction ended.
    :param test_times: (pd.Series) Times for the test dataset.
    :return: (pd.Series) Training set
    """
    train = samples_info_sets.copy(deep=True)
    for start_ix, end_ix in test_times.iteritems():
        df0 = train[(start_ix <= train.index) &
                    (train.index <= end_ix)].index  # Train starts within test
        df1 = train[(start_ix <= train)
                    & (train <= end_ix)].index  # Train ends within test
        df2 = train[(train.index <= start_ix)
                    & (end_ix <= train)].index  # Train envelops test
        train = train.drop(df0.union(df1).union(df2))
    return train
Example #4
0
def convert_to_result(series: pd.Series):
    return {
        "r": [{
            "d": format_date(date),
            "v": round(value, 1) if not np.isnan(value) else None
        } for date, value in series.iteritems()]
    }
Example #5
0
def run_query(connection_string,
              sql_query,
              index=None,
              offset=None,
              output_format='dbx'):
    if output_format not in ['dbx', 'json']:
        print_error('Invalid output format "{}" - try "dbx" or "json"'.format(
            output_format))
        return
    engine = create_engine(connection_string)
    df = read_sql_query(sql_query,
                        engine,
                        parse_dates=[index] if index is not None else [])
    if index is not None:
        df['__ds_checkpoint'] = to_numeric(df[index])
        df = df.set_index('__ds_checkpoint')
    if index is not None and offset is not None:
        df = df[df.index > float(offset)]
    if output_format == 'json':
        fmt = df.to_json(orient='records', lines=True).split('\n')
        out = Series(fmt, index=df.index)
    else:
        out = df.apply(format_dbx, axis=1)
    for idx, row in out.iteritems():
        print_row(idx, row)
    if index is not None and df.shape[0] > 0:
        print_checkpoint(df.index.max())
Example #6
0
def cal_trailing_year_series(input_series: pd.Series) -> pd.Series:
    """Calculate the trailing year series based on the original series data

    :param input_series: The original data
    :return: The trailing year series calculated

    """

    result_dict = {}
    for report_date, value in input_series.iteritems():
        ttm_dates = find_trailing_year_dates(report_date)
        if len(ttm_dates) == 1:
            result_dict[report_date] = input_series[report_date]
        else:
            begin_date, annual_report_date, end_date = ttm_dates
            if (begin_date in input_series.index
                    and annual_report_date in input_series.index):

                result_dict[report_date] = (input_series[annual_report_date] -
                                            input_series[begin_date] +
                                            input_series[end_date])
            else:
                result_dict[report_date] = np.nan

    return pd.Series(result_dict)
Example #7
0
def seed_table_recommendations():
    sys.stdout.write('\rLoading Data')
    data = read_csv(movie_data_file + '.csv', delimiter=',')
    sys.stdout.write('\rFormatting Data')
    data['soup'] = data.apply(create_soup, axis=1)
    sys.stdout.write('\rCalculating Counts')
    count_matrix = CountVectorizer().fit_transform(data['soup'])
    sys.stdout.write('\rCalculating Similarities')
    matrix = cosine_similarity(count_matrix, count_matrix)
    sys.stdout.write('\rCalculating Indices     ')
    indices = Series(data.index, index=data['id'])
    sys.stdout.write('\rSaving Similarities     ')
    n = len(indices)
    command = 'INSERT into {0}'.format(recommendation_table) + " (id, similarities) VALUES ({0}, '{1}')"
    con = None
    try:
        con = connect(database = database, user = user, password = password)
        cur = con.cursor()
        for row in indices.iteritems():
            cur.execute(command.format(row[0], '{' + ', '.join(str(item) for item in matrix[row[1]]) + '}'))
            j = row[1] / n
            sys.stdout.write("\rSeeding Movie Table: [%-20s] %d%%" % ('='*int(20*j), 100*j))
        cur.close()
        con.commit()
    except (Exception, DatabaseError) as error:
        print(error)
    finally:
        if con is not None:
            con.close()
    return indices
Example #8
0
def split_preds_proximal_distant(
        x: pd.Series,
        pos: Tuple[str, int, int],
        window: int = 50000) -> Tuple[List[float], List[float]]:
    """
    Split predictions into those that are proximal and those that are distant
    These are defined by being within <window> of the <pos>
    """
    ref_chrom, ref_start, ref_end = pos
    assert ref_start < ref_end
    if not ref_chrom.startswith("chr"):
        ref_chrom = "chr" + ref_chrom

    proximal, distant = [], []
    for i, val in x.iteritems():
        if not i.startswith("chr"):  # Maybe extend to genes later
            continue
        chrom, span = i.split(":")
        if not chrom.startswith("chr"):
            chrom = "chr" + chrom
        start, end = map(int, span.split("-"))
        assert start < end
        if chrom != ref_chrom:
            distant.append(val)
        elif _interval_distance((start, end), (ref_start, ref_end)) <= window:
            proximal.append(val)
        else:
            distant.append(val)

    return proximal, distant
Example #9
0
    def transform(self, epochSeries: pd.Series):
        print("Deleting Nan's...")

        for series_index, df in epochSeries.iteritems():
            df.fillna(self.replaceValue, inplace=True)

        return epochSeries
Example #10
0
def _16(data: pd.Series) -> DNAFASTAFormat:
    ff = DNAFASTAFormat()
    with ff.open() as f:
        for id_, seq in data.iteritems():
            sequence = skbio.DNA(seq, metadata={'id': id_})
            skbio.io.write(sequence, format='fasta', into=f)
    return ff
Example #11
0
def allAppFeature():
    colNames = np.array(["userid", "age", "app"])
    rawDf = pd.read_table("D://data//uidAgeAppMerge.dat",
                          sep="\t",
                          names=colNames)
    appSer = Series({})
    appCol = rawDf["app"]
    i = 0
    for apps in appCol:
        i += 1
        if i % 100 == 0: print i
        appArr = np.array(apps.split(","))
        for app in appArr:
            if app in appSer:
                appSer[app] = appSer[app] + 1
            else:
                appSer[app] = 1

    print "sort"
    appSer.sort_values(ascending=False, inplace=True)
    indexFile = open("D://data//allAppIndex.dat", "w")
    allAppNumFile = open("D://data//allAppNum.dat", "w")
    print "outputing"
    index = 1
    for key, val in appSer.iteritems():
        indexFile.write(key + "\t" + str(index) + "\n")
        allAppNumFile.write(key + "\t" + str(val) + "\n")
        index += 1
    indexFile.close()
    allAppNumFile.close()
    print "ok"
Example #12
0
def build_char_to_words(words: pd.Series,
                        word_len: int = None) -> dict[str, set[str]]:
    """
    Build graph of words.

    The output is a dict which keys are words and
    values are words that share exactly one common character

    Parameters
    ----------
    words : list[str]
        List of words
    word_len : int, Optional
        defaults to None
        if not none restricts the length in characters of the words to keep
        a natural setting would be to set it to 2

    Returns
    -------
    dict[str, list[str]]
        Char to list of words ids
    """

    char_to_words = defaultdict(set)

    for idx, word in words.iteritems():

        if word_len is not None and len(word) != word_len:
            continue

        for char in word:
            char_to_words[char].add(idx)

    return char_to_words
Example #13
0
def tabulate(output_dir: str, data: pd.Series) -> None:
    prepped = []
    for _id, taxa in data.iteritems():
        prepped.append({'id': _id, 'taxa': taxa})

    index = os.path.join(TEMPLATES, 'tabulate', 'index.html')
    q2templates.render(index, output_dir, context={'data': prepped})
Example #14
0
def gini():
    giniSer = Series({})
    #加载数据
    colNames = np.array(
        ["buying", "maint", "doors", "persons", "lug_boot", "safety", "class"])
    carDf = pd.read_csv("dataCar//car_data.txt", sep=",", names=colNames)
    classNames = carDf["class"].unique()
    totalNum = carDf["class"].count() * 1.0
    #遍历每个特征
    for feature in colNames[:-1]:
        featureItems = carDf[feature].unique()
        featureGini = 0.0
        #遍历每个特征项
        for item in featureItems:
            itemCla = carDf[carDf[feature] == item]["class"]
            itmeNum = itemCla.count() * 1.0
            #计算每个特征项的熵
            tmpGini = 0.0
            for cla in classNames:
                itemClaNum = itemCla[itemCla.values == cla].count()
                tmpGini = tmpGini + (itemClaNum / itmeNum)**2
            featureGini = featureGini + (itmeNum / totalNum) * (1 - tmpGini)
            print feature, item, (itmeNum / totalNum) * (1 - tmpGini)
            print "--------------"
        print "====================="
        giniSer[feature] = featureGini
    print "----------------gini ------------------"
    giniSer.sort_values()
    for key, val in giniSer.iteritems():
        print key + " : " + str(val)
Example #15
0
def create_triplet_time_series(ts: pd.Series, with_support: bool = False):
    """
    create triplet timely series encoding
    withSupport if return the number of compressed records
    """
    res = []
    start = -1
    prev = -1
    support = 0
    for k, val in ts.iteritems():
        support += 1
        if start == -1 and val > 0:
            start = k
            support = 0
        elif start >= 0 and val == 0:
            x = {'feature': ts.name, 'start': start, 'end': prev}
            if with_support:
                x['support'] = support
            res.append(x)
            start = -1

        prev = k

    if start != -1:
        x = {'feature': ts.name, 'start': start, 'end': prev}
        if with_support:
            x['support'] = support
        res.append(x)

    return res
Example #16
0
def _16(data: pd.Series) -> DNAFASTAFormat:
    ff = DNAFASTAFormat()
    with ff.open() as f:
        for id_, seq in data.iteritems():
            sequence = skbio.DNA(seq, metadata={'id': id_})
            skbio.io.write(sequence, format='fasta', into=f)
    return ff
Example #17
0
def create_property_values(row: pd.Series, scope: str, domain: str,
                           dtypes: pd.Series) -> dict:
    """
    This function generates the property values for a row in a file

    :param pd.Series row: The current row of the data frame to create property values for
    :param str scope: The scope to create the property values in
    :param str domain: The domain to create the property values in
    :param pd.Series dtypes: The data types of each column to create property values for

    :return: dict {str, models.PerpetualProperty} properties:
    """

    # Ensure that all data types in the file have been mapped
    if not (set([str(data_type) for data_type in dtypes.unique()]) <= set(
            global_constants["data_type_mapping"])):
        raise TypeError(
            """There are data types in the data_frame which have not been mapped to LUSID data types,
            please ensure that all data types have been mapped before retrying"""
        )

    # Initialise the empty properties dictionary
    properties = {}

    # Iterate over each column name and data type
    for column_name, data_type in dtypes.iteritems():

        # Set the data type to be a string so that it is easier to work with
        string_data_type = str(data_type)
        # Convert the numpy data type to a LUSID data type using the global mapping
        lusid_data_type = global_constants["data_type_mapping"][
            string_data_type]
        # Get the value of the column from the row
        row_value = row[column_name]

        # Use the correct LUSID property value based on the data type
        if lusid_data_type == "string":
            if pd.isna(row_value):
                continue
            property_value = lusid.models.PropertyValue(label_value=row_value)

        if lusid_data_type == "number":
            # Handle null values given the input null value override
            if pd.isnull(row_value):
                continue
            property_value = lusid.models.PropertyValue(
                metric_value=lusid.models.MetricValue(value=row_value))

        # Set the property
        property_key = (
            f"{domain}/{scope}/{cocoon.utilities.make_code_lusid_friendly(column_name)}"
        )
        properties[property_key] = lusid.models.PerpetualProperty(
            key=property_key, value=property_value)

    if domain.lower() == "instrument":
        properties = list(properties.values())

    return properties
Example #18
0
 def create_pandas_dataframe_in_partition(series_chunk: pandas.Series):
     records = []
     index = []
     for i, v in series_chunk.iteritems():
         records.extend(v)
         index.extend([i] * len(v))
     df = pandas.DataFrame.from_records(records, index=index)
     return df
def add_target(session, symbol, name, series: pd.Series):
    for idx, value in series.iteritems():
        res = Target(
            symbol=symbol,
            day=idx,  # Ignore the warning, index must be a DateTimeIndex
            name=name,
            value=value)
        session.add(res)
Example #20
0
 def _create_enum_mapping_multiple(self, field: Field,
                                   data_series: pd.Series) -> None:
     """Creates enum mappings for an enum field that allows multiple values."""
     for _, multiple_raw_values in data_series.iteritems():
         if multiple_raw_values is None:
             continue
         for raw_text in multiple_raw_values:
             self._create_enum_mapping(field, raw_text)
Example #21
0
def get_moving_average(data_column: pd.Series, minutes: int = 10) -> pd.Series:
    moving_average = pd.Series(np.zeros_like(data_column.values),
                               index=data_column.index)
    for index, value in data_column.iteritems():
        start_time = index - timedelta(minutes=minutes)
        end_time = index + timedelta(minutes=minutes)
        moving_average.loc[index] = np.mean(
            data_column.loc[start_time:end_time])
    return moving_average
def average_precision(user_evaluations: pd.Series) -> float:
    ap_score = 0.0
    total_evaluation = 0
    for index, evaluation in user_evaluations.iteritems():
        if evaluation > 0:
            total_evaluation += 1
            ap_score += total_evaluation / index

    return ap_score / total_evaluation
 def __evaluate_buy_ability(self, order_series: pd.Series) -> float:
     cash = self.portfolio.cash
     accumulated_amount = 0
     for ticker, amount_delta in order_series.iteritems():
         if amount_delta > 0:
             accumulated_amount += amount_delta
     if accumulated_amount == 0:
         return 1
     return cash / accumulated_amount
def add_feature(session, group, symbol, name, series: pd.Series):
    for idx, value in series.iteritems():
        res = Feature(
            group=group,
            symbol=symbol,
            name=name,
            day=idx,  # Ignore the warning, index must be a DateTimeIndex
            value=value)
        session.add(res)
Example #25
0
def filter_items(data: pd.Series, participants: pd.Series):
    filtered_items = []

    for items in data.iteritems():
        items = items[1].split(" ")
        compatible_items = " ".join([item for item in items if item in participants])
        filtered_items.append(compatible_items)

    return filtered_items
Example #26
0
    def test_iterable_items(self, dtype, rdtype):
        # gh-13258
        # test items / iteritems yields the correct boxed scalars
        # this only applies to series
        s = Series([1], dtype=dtype)
        _, result = list(s.items())[0]
        assert isinstance(result, rdtype)

        _, result = list(s.iteritems())[0]
        assert isinstance(result, rdtype)
Example #27
0
    def test_iterable_items(self, dtype, rdtype):
        # gh-13258
        # test items / iteritems yields the correct boxed scalars
        # this only applies to series
        s = Series([1], dtype=dtype)
        _, result = list(s.items())[0]
        assert isinstance(result, rdtype)

        _, result = list(s.iteritems())[0]
        assert isinstance(result, rdtype)
Example #28
0
def tokenize_data(data: pd.Series, tokenizer):
    out = []
    if data is not None:
        for idx, ele in data.iteritems():
            if ele is None:
                out.append(np.ones((0, ), dtype=np.int32))
            else:
                out.append(np.array(tokenizer.encode(ele, int),
                                    dtype=np.int32))
    return out
Example #29
0
def load_localities_coord_size_list(cities: pd.Series):
    global dict_localities
    data = []
    for (city, size) in cities.iteritems():
        if city != 'nenhum':
            data.append([
                dict_localities[city]['lat'], dict_localities[city]['lon'],
                size, city
            ])
    return pd.DataFrame(data, columns=['lat', 'lon', 'size', 'city'])
Example #30
0
def time_since_last_true(s: pd.Series) -> pd.Series:
    s.iloc[0] = prev_val = int(
        round(s.value_counts()[False] / 2 / s.value_counts()[True], 0))
    for i, v in list(s.iteritems())[1:]:
        if v:
            s.at[i] = 0
        else:
            s.at[i] = prev_val + 1
        prev_val = s.at[i]
    return s.astype(int)
Example #31
0
def throughput(s: pd.Series,
               window_size_ms: float,
               trim: bool = False) -> pd.Series:
    """
    Consider a series of timestamps:

        timestamp
      0 11:00:01 am
      1 11:00:03 am
      2 11:00:54 am
      3 11:01:34 am
      4 11:02:16 am

    Imagine we divide the data into 1 minute rolling windows with every window
    having its right edge be a entry in the dataframe. We'd get the following
    windows and latencies:

                                  timestamps
      10:59:01 am - 11:00:01 am | [0]       |
      10:59:03 am - 11:00:03 am | [0, 1]    |
      10:59:54 am - 11:00:54 am | [0, 1, 2] |
      11:00:34 am - 11:01:34 am | [2, 3]    |
      11:01:16 am - 11:02:16 am | [2, 3, 4] |

    If we count the number of entries in each window and divide by the window
    size, we get the throughput of each window measured in events per second.

                                  throughput
      10:59:01 am - 11:00:01 am | 1 / 60     |
      10:59:03 am - 11:00:03 am | 2 / 60     |
      10:59:54 am - 11:00:54 am | 3 / 60     |
      11:00:34 am - 11:01:34 am | 2 / 60     |
      11:01:16 am - 11:02:16 am | 3 / 60     |

    This is what `throughput` computes. If `trim` is true, the first
    window_size_ms of throughput data is trimmed.
    """
    s = pd.Series(0, index=s.sort_values())
    throughput = (s.rolling(f'{window_size_ms}ms').count() /
                  (window_size_ms / 1000))
    if trim:
        t = (throughput.index[0] +
             pd.DateOffset(microseconds=window_size_ms * 1000))
        return throughput[throughput.index >= t]
    else:
        # TODO(mwhittaker): Fix up. It's a little jank.
        start_time = throughput.index[0]
        offset = pd.DateOffset(microseconds=window_size_ms * 1000)
        for i, (index, row) in enumerate(s.iteritems(), start=1):
            if i < 100:
                continue
            if index > start_time + offset:
                return throughput[100:]
            throughput[index] = i / (index - start_time).total_seconds()
        return throughput[100:]
Example #32
0
def validate_series(data_value: pd.Series):
    global ERROR_ROWS_IDXS
    # print(type(data_value), data_value)
    # print(type(data_value.name))
    if data_value.name == SELECTED_COLUMN:
        for index, row in data_value.iteritems():
            # print(type(index), type(row))
            # print(f"Index : {index}, Row : {row}")
            curr_row = validate_obj.detect_and_validate(str(row))
            if curr_row["is_error"] is True:
                ERROR_ROWS_IDXS.append(int(index))
def convert_usd(exchange_rate: pd.DataFrame, usd_vals: pd.Series) -> pd.Series:
    """
    usd_vals -  Series indexed by non-continuous subset of dates from currency_pair index; decimal values
    """
    return pd.Series(
        [
            val * exchange_rate.at[idx, "Close"]
            for idx, val in usd_vals.iteritems()
        ],
        usd_vals.index,
    )
Example #34
0
    def test_mixed_index_at_iat_loc_iloc_series(self):
        # GH 19860
        s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 1, 2])
        for el, item in s.iteritems():
            assert s.at[el] == s.loc[el] == item
        for i in range(len(s)):
            assert s.iat[i] == s.iloc[i] == i + 1

        with pytest.raises(KeyError):
            s.at[4]
        with pytest.raises(KeyError):
            s.loc[4]
Example #35
0
 def peaks(self, video, num_bins=100, num_std=2, plot=True):
   
   # Take the video information and divide it into bins bins
   h,edges = np.histogram(video.currentTime, bins=num_bins)
   
   # Calculate the threshold
   t = h.mean() + num_std*h.std()
   
   # Create a series of value counts and edges
   time_counts = Series(h, index=edges[:-1])
   
   # Times the time_counts are above the threshold
   t_indices = time_counts[time_counts > t].index
   
   # Break all of the times above the threshold into neighborhoods
   neighborhoods = []
   in_hood = False
   for edge, count in time_counts.iteritems():
       if (in_hood) & (edge in t_indices):
           neighborhoods[-1] = neighborhoods[-1].set_value(edge, count)
       elif edge in t_indices:
           in_hood = True
           neighborhoods.append(Series())
           neighborhoods[-1] = neighborhoods[-1].set_value(edge, count)
       else:
           in_hood = False
   
   # Find the maximum point in each neighborhood
   peaks = Series()
   for neighborhood in neighborhoods:
       count = neighborhood.max()
       peak = neighborhood.idxmax()
       peaks = peaks.set_value(peak, count)
       
   # Plot the time counts, threshold line and peaks
   if plot:
       l = len(h)
       threshold_series = Series([t]*l)
       time_counts.plot()
       plt.plot(edges[:-1], threshold_series)
       peaks.plot(marker='o', color='r', ls='')
   
   return peaks
Example #36
0
def getError(signal, normedDay, period, phase):
    '''
    Gets the error for a list of points across a normed day given a sklearn 
    model, the period, and the phase of the fitted signal.
    
    Returns the Euclidean error.
    '''
   
    if rank(normedDay.index[0]) > 0:
        t0= round((array(normedDay.index.get_level_values(0))- phase)%period,3)
    else:
        t0 = round((array(normedDay.index,dtype=float) - phase)%period,3)
    nD = Series(array(normedDay), index=t0)
    tUp = array([arange(-5,period+5.,.1)]).T
    sampled = Series(signal.predict(tUp), index=tUp.flatten())

    diff = [(((sampled - val)/2.)**2 + 
            (((array(sampled.index, dtype=float) - t)/period))**2).min()
            for t,val in nD.iteritems()]

    error = mean(sqrt(diff))
    return error
Example #37
0
 def test_float_index_at_iat(self):
     s = Series([1, 2, 3], index=[0.1, 0.2, 0.3])
     for el, item in s.iteritems():
         assert s.at[el] == item
     for i in range(len(s)):
         assert s.iat[i] == i + 1
Example #38
0
    def test_binary_ops_align(self):

        # test aligning binary ops

        # GH 6681
        index = MultiIndex.from_product([list('abc'),
                                         ['one', 'two', 'three'],
                                         [1, 2, 3]],
                                        names=['first', 'second', 'third'])

        df = DataFrame(np.arange(27 * 3).reshape(27, 3),
                       index=index,
                       columns=['value1', 'value2', 'value3']).sort_index()

        idx = pd.IndexSlice
        for op in ['add', 'sub', 'mul', 'div', 'truediv']:
            opa = getattr(operator, op, None)
            if opa is None:
                continue

            x = Series([1.0, 10.0, 100.0], [1, 2, 3])
            result = getattr(df, op)(x, level='third', axis=0)

            expected = pd.concat([opa(df.loc[idx[:, :, i], :], v)
                                  for i, v in x.iteritems()]).sort_index()
            assert_frame_equal(result, expected)

            x = Series([1.0, 10.0], ['two', 'three'])
            result = getattr(df, op)(x, level='second', axis=0)

            expected = (pd.concat([opa(df.loc[idx[:, i], :], v)
                                   for i, v in x.iteritems()])
                        .reindex_like(df).sort_index())
            assert_frame_equal(result, expected)

        # GH9463 (alignment level of dataframe with series)

        midx = MultiIndex.from_product([['A', 'B'], ['a', 'b']])
        df = DataFrame(np.ones((2, 4), dtype='int64'), columns=midx)
        s = pd.Series({'a': 1, 'b': 2})

        df2 = df.copy()
        df2.columns.names = ['lvl0', 'lvl1']
        s2 = s.copy()
        s2.index.name = 'lvl1'

        # different cases of integer/string level names:
        res1 = df.mul(s, axis=1, level=1)
        res2 = df.mul(s2, axis=1, level=1)
        res3 = df2.mul(s, axis=1, level=1)
        res4 = df2.mul(s2, axis=1, level=1)
        res5 = df2.mul(s, axis=1, level='lvl1')
        res6 = df2.mul(s2, axis=1, level='lvl1')

        exp = DataFrame(np.array([[1, 2, 1, 2], [1, 2, 1, 2]], dtype='int64'),
                        columns=midx)

        for res in [res1, res2]:
            assert_frame_equal(res, exp)

        exp.columns.names = ['lvl0', 'lvl1']
        for res in [res3, res4, res5, res6]:
            assert_frame_equal(res, exp)