Python mode Beispiele, scipy.stats.stats.mode Python Beispiele

Beispiel #1

0

Datei anzeigen

def get_sms_feats(df):
    df['request_datetime'] = pd.to_datetime(df['request_datetime'])
    df["hour"] = df['request_datetime'].dt.hour
    df["day"] = df['request_datetime'].dt.day

    phone_no_m = df[["phone_no_m"]].copy()
    phone_no_m = phone_no_m.drop_duplicates(subset=['phone_no_m'], keep='last')
    #对话人数和对话次数
    tmp = df.groupby("phone_no_m")["opposite_no_m"].agg(sms_count="count",
                                                        sms_nunique="nunique")
    tmp["sms_rate"] = tmp["sms_count"] / tmp["sms_nunique"]
    phone_no_m = phone_no_m.merge(tmp, on="phone_no_m", how="left")
    """短信下行比例
    """
    calltype2 = df[df["calltype_id"] == 2].copy()
    calltype2 = calltype2.groupby("phone_no_m")["calltype_id"].agg(
        calltype_2="count")
    phone_no_m = phone_no_m.merge(calltype2, on="phone_no_m", how="left")
    phone_no_m[
        "calltype_rate"] = phone_no_m["calltype_2"] / phone_no_m["sms_count"]
    """短信时间
    """
    tmp = df.groupby("phone_no_m")["hour"].agg(
        hour_mode=lambda x: stats.mode(x)[0][0],
        hour_mode_count=lambda x: stats.mode(x)[1][0],
        hour_nunique="nunique")
    phone_no_m = phone_no_m.merge(tmp, on="phone_no_m", how="left")

    tmp = df.groupby("phone_no_m")["day"].agg(
        day_mode=lambda x: stats.mode(x)[0][0],
        day_mode_count=lambda x: stats.mode(x)[1][0],
        day_nunique="nunique")
    phone_no_m = phone_no_m.merge(tmp, on="phone_no_m", how="left")

    return phone_no_m

Beispiel #2

0

Datei anzeigen

Datei: pyqt_window.py Projekt: ewanmac/MMPE

 def actionUpdate(self):
     x = np.arange(-np.pi, np.pi, np.pi / 10)
     y = eval(str(self.ui.lineEdit.text()));
     self.mpl.axes.plot(x, y, '--rx', linewidth=2);
     self.mpl.axes.set_title('Sine Function');
     self.mpl.draw()
     print stats.mode([1, 2, 3, 3, 4, 5])

Beispiel #3

0

Datei anzeigen

Datei: pyqt_window.py Projekt: luasdtu/MMPE

 def actionUpdate(self):
     x = np.arange(-np.pi, np.pi, np.pi / 10)
     if str(self.ui.lineEdit.text()) != "":
         y = eval(str(self.ui.lineEdit.text()))
         self.mpl.axes.plot(x, y, '--rx', linewidth=2)
         self.mpl.axes.set_title('Sine Function')
         self.mpl.draw()
         print stats.mode([1, 2, 3, 3, 4, 5])

Beispiel #4

0

Datei anzeigen

Datei: feature_transform.py Projekt: TIXhjq/ML_Function

    def stat_mode(df, cate_fea, num_fea):
        aim_ = pd.concat([
            df.groupby(cate_fea)[num_].agg(
                {
                    num_ + '_mode': lambda x: stats.mode(x)[0][0],
                    num_ + '_mode_count': lambda x: stats.mode(x)[1][0]
                }) for num_ in num_fea
        ],
                         axis=1)
        aim_.reset_index(inplace=True)

        return aim_

Beispiel #5

0

Datei anzeigen

Datei: describe.py Projekt: jattenberg/datascience-utilities

def get_data(column, np_values, alpha):

    mvs = bayes_mvs(np_values, alpha)

    #report these metrics
    output = [
        present("Column", column),
        present("Length", len(np_values)),
        present("Unique", len(np.unique(np_values))),
        present("Min", np_values.min()),
        present("Max", np_values.max()),
        present("Mid-Range", (np_values.max() - np_values.min())/2),
        present("Range", np_values.max() - np_values.min()),
        present("Mean", np_values.mean()),
        present("Mean-%s-CI" % alpha, tupleToString(mvs[0][1])),
        present("Variance", mvs[1][0]),
        present("Var-%s-CI" % alpha, tupleToString(mvs[1][1])),
        present("StdDev", mvs[2][0]),
        present("Std-%s-CI" % alpha, tupleToString(mvs[2][1])),
        present("Mode", stats.mode(np_values)[0][0]),
        present("Q1", stats.scoreatpercentile(np_values, 25)),
        present("Q2", stats.scoreatpercentile(np_values, 50)),
        present("Q3", stats.scoreatpercentile(np_values, 75)),
        present("Trimean", trimean(np_values)),
        present("Minhinge", midhinge(np_values)),
        present("Skewness", stats.skew(np_values)),
        present("Kurtosis", stats.kurtosis(np_values)),
        present("StdErr", sem(np_values)),
        present("Normal-P-value", normaltest(np_values)[1])
        ]
    return output

Beispiel #6

0

Datei anzeigen

Datei: stats.py Projekt: cueas-rickmoore/NDFDplus

def arrayStatistics(numpy_array, missing_value=N.nan):
    if N.isfinite(missing_value):
        valid_values = numpy_array[N.where(numpy_array!=missing_value)]
        if numpy_array.dtype.kind == 'f':
            valid_values = valid_values[N.where(N.isfinite(valid_values))]
    else:
        valid_values = numpy_array[N.where(N.isfinite(numpy_array))]

    if len(valid_values) > 0:
        statistics =  { 'min' : N.min(valid_values),
                        'max' : N.max(valid_values),
                        'mean' : N.mean(valid_values),
                        'stddev' : N.std(valid_values),
                        'median' : N.median(valid_values),
                        'mode' : scipy_stats.mode(valid_values),
                        'missing' : len(numpy_array) - len(valid_values),
                      }
    else:
        statistics =  { 'min' : missing_value, 'max' : missing_value,
                        'mean' : missing_value, 'stddev' : 0.0,
                        'median' : missing_value,
                        'mode' : ( N.array([missing_value,]),
                                   N.array([len(numpy_array),]) ),
                        'missing' : len(numpy_array),
                      }
    return statistics

Beispiel #7

0

Datei anzeigen

Datei: OGO ~compleet.py Projekt: merlijnvb/8QA01-Beeldanalyse-voor-Pathologie

def knn_classifier(X_train, y_train, X_validation, X_test, k):
    # Returns the labels for test_data, predicted by the k-NN clasifier trained on X_train and y_train
    # Input:
    # X_train - num_train x num_features matrix with features for the training data
    # y_train - num_train x 1 vector with labels for the training data
    # X_validation - num_test x num_features matrix with features for the validation data
    # X_test - num_test x num_features matrix with features for the test data
    # k - Number of neighbors to take into account
    # Output:
    # y_pred_validation - num_test x 1 predicted vector with labels for the validation data
    # y_pred_test - num_test x 1 predicted vector with labels for the test data

    X_test_val = np.vstack((X_validation, X_test))
    # Compute standardized euclidian distance of validation and test points to the other points
    D = cdist(X_test_val, X_train, metric='seuclidean')
    # Sort distances per row and return array of indices from low to high
    sort_ix = np.argsort(D, axis=1)
    # Get the k smallest distances
    sort_ix_k = sort_ix[:, :k]
    predicted_labels = y_train[sort_ix_k]
    # Predictions for each point is the mode of the K labels closest to the point
    predicted_labels = mode(predicted_labels, axis=1)[0]
    y_pred_validation = predicted_labels[:len(X_validation)]
    y_pred_test = predicted_labels[len(X_validation):]
    
    return y_pred_validation, y_pred_test

Beispiel #8

0

Datei anzeigen

def get_diff(set_val,time_slots,num_type,conf_lev):

    time_slots_utc = dtime_to_unix(time_slots)
    TIMELET_INV_seconds = (time_slots[1]-time_slots[0]).seconds
    diff_mean = list()

    for r, utc_t in enumerate(time_slots_utc):
        utc_t_s = utc_t
        utc_t_e = utc_t + TIMELET_INV_seconds
        idx = np.nonzero((set_val[0] >= utc_t_s) & (set_val[0] < utc_t_e))[0]

        if len(idx) < 2:
            diff_val = np.inf
        else:
            temp_val = abs(np.diff(set_val[1][idx]))
            upper_val = np.sort(temp_val)[int(np.floor(len(temp_val)*conf_lev)):]
            if len(upper_val) == 0:
                 diff_val = np.inf
            else:
                if num_type == FLOAT_TYPE:
                    diff_val = np.mean(upper_val)
                elif num_type == INT_TYPE:
                    diff_val = int(stats.mode(upper_val)[0])
                else:
                    log.error('Sample type must either INT or FLOAT type')
                    raise NameError('Sample type must either INT or FLOAT type')

            #diff_val=max(abs(diff(set_val[1][idx])))
            #sort(abs(diff(set_val[1][idx])))[::-1]

        diff_mean.append(diff_val)

    #diff_mean=np.array(diff_mean)[:,np.newaxis]
    diff_mean = np.array(diff_mean)
    return diff_mean

Beispiel #9

0

Datei anzeigen

Datei: describe.py Projekt: ibrahim85/datascience-utilities

def get_data(column, np_values, alpha):

    mvs = bayes_mvs(np_values, alpha)

    #report these metrics
    output = [
        present("Column", column),
        present("Length", len(np_values)),
        present("Unique", len(np.unique(np_values))),
        present("Min", np_values.min()),
        present("Max", np_values.max()),
        present("Mid-Range", (np_values.max() - np_values.min()) / 2),
        present("Range",
                np_values.max() - np_values.min()),
        present("Mean", np_values.mean()),
        present("Mean-%s-CI" % alpha, tupleToString(mvs[0][1])),
        present("Variance", mvs[1][0]),
        present("Var-%s-CI" % alpha, tupleToString(mvs[1][1])),
        present("StdDev", mvs[2][0]),
        present("Std-%s-CI" % alpha, tupleToString(mvs[2][1])),
        present("Mode",
                stats.mode(np_values)[0][0]),
        present("Q1", stats.scoreatpercentile(np_values, 25)),
        present("Q2", stats.scoreatpercentile(np_values, 50)),
        present("Q3", stats.scoreatpercentile(np_values, 75)),
        present("Trimean", trimean(np_values)),
        present("Minhinge", midhinge(np_values)),
        present("Skewness", stats.skew(np_values)),
        present("Kurtosis", stats.kurtosis(np_values)),
        present("StdErr", sem(np_values)),
        present("Normal-P-value",
                normaltest(np_values)[1])
    ]
    return output

Beispiel #10

0

Datei anzeigen

Datei: lgb_v3.py Projekt: yingxi320/ActiveUserPrediction

def predict(clf2, test_set):
    uid = pd.DataFrame()
    # test_set = processing(trainSpan=(1, 30), label=False)
    uid["user_id"] = test_set["user_id"]
    test_set = test_set.drop(labels=["user_id"], axis=1)
    # if isinstance(selector,RFECV):
    #     test_set_new = selector.transform(test_set.values)
    # elif isinstance(selector,list):
    #     test_set_new = test_set[selector]
    # else:
    #     test_set_new = test_set
    print("begin to make predictions")
    res = clf2.predict(test_set.values)
    uid["y_hat"] = pd.Series(res)
    uid["label"] = uid.groupby(by=["user_id"])["y_hat"].transform(lambda x: stats.mode(x)[0][0])
    str_time = str(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M"))
    uid_file = "result/uid_" + str_time + ".csv"
    uid.to_csv(uid_file,header=True,index=False)
    active_users = (uid.loc[uid["label"] == 1]).user_id.unique().tolist()
    print(len(active_users))
    print(active_users)
    str_time = str(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M"))
    submission_file = "result/submission_" + str_time + ".csv"
    with open(submission_file, "a", newline="") as f:
        writer = csv.writer(f)
        for i in active_users:
            writer.writerow([i])

Beispiel #11

0

Datei anzeigen

    def fit_predict(self, X, y, **fit_params):
        self.X_ = X
        self.y_ = y

        # 这里为了尽可能用np的向量化简化计算，对训练集自身的预测通过滚动训练集的方法进行。
        X_roll = X
        distances = np.zeros((len(X), len(X) - 1))
        for i in range(len(self.X_) - 1):
            X_roll = np.roll(X_roll, -1, axis=0)
            # 通过对样本整体滚动计算样本两两之间的距离
            distances[:, i] = np.power(
                np.sum(np.power(X - X_roll, self.p), axis=1), 1 / self.p)
        min_k_ind = np.argpartition(distances, self.k, axis=1)[:, :self.k]
        # distances的第[i, j]个元素为x[i]与x[(i + j + 1) % len(X)]的距离，因此需要调整一下
        neighbors_ind = (min_k_ind +
                         np.arange(1,
                                   len(X) + 1).reshape(-1, 1)) % len(X)
        neighbors_labels = y[neighbors_ind]

        # 等权预测
        if self.weights == 'uniform':
            return stats.mode(neighbors_labels, axis=1).mode.flatten()

        # 加权预测
        neighbors_distances = np.vstack(
            (distances[i][min_k_ind[i]] for i in range(len(X))))
        # 距离倒数
        if self.weights == 'inverse':
            weights_ = 1 / neighbors_distances
        # 距离的高斯函数
        if self.weights == 'Gaussian':
            weights_ = np.exp(-np.square(neighbors_distances) / 2)
        return self.weighted_predict(neighbors_labels, weights_)

Beispiel #12

0

Datei anzeigen

 def extractfromfits(self, filename, loc, size, sky=0.):
     """
     Extracts a subregion from a fits file  and converts it according
     to the astro and math convention: pixel (0,0) is at the bottom left
     """
     x, y = loc
     radius = int(size/2)
     r = size-radius * 2
     hdulist = pyfits.open(filename)  # open a FITS file
     if len(hdulist) != 1:
         raise RuntimeError, "extractfromfits : len(hdulist) > 1 not allowed"
     fulldata = hdulist[0].data       # assumes the first extension is an image
     if x< 0 or y< 0 or x>= fulldata.shape[1] or y>= fulldata.shape[0]:
         raise RuntimeError, "extractfromfits : bad extraction parameters"
     if x+radius+r >= fulldata.shape[1] or y+radius+r >= fulldata.shape[0] or x-radius<0 or y-radius<0:
         #TODO: set outside pixels to NaN
         print "outside"
     self.array = self.array + np.zeros(self.array.shape, dtype=np.float64)    # switch to 8 byte   
     self.array = fulldata[y-radius:y+radius+r, x-radius:x+radius+r].transpose()            # get values from the subsection 
     #    This tansposition makes the pixelarray coordinates (x,y) equal to those in the ds9 display etc.
     #    In other words, we are in the math and astro convention.
     #    x = horizontal, y = vertical, (0, 0) is bottom left.
     self.array[np.where(np.isnan(self.array))] = sky
     hdulist.close()
     if sky is None:
         #TODO: check...
         self.array -= stats.mode(self.array.ravel())[0][0]
     else:
         self.array -= sky
     self.setzscale()

Beispiel #13

0

Datei anzeigen

 def create_dataset(X, y, time_steps=1, step=1):
     Xs, ys = [], []
     for i in range(0, len(X) - time_steps, step):
         v = X.iloc[i:(i + time_steps)].values
         labels = y.iloc[i:i + time_steps]
         Xs.append(v)
         ys.append(stats.mode(labels)[0][0])
     return np.array(Xs), np.array(ys).reshape(-1, 1)

Beispiel #14

0

Datei anzeigen

Datei: GroundTruthShp.py Projekt: jkibele/OpticalRS

 def unsupervised_habitat_class_modes(self):
     hcm = {}
     for hab in self.habitats:
         md, cn = mode( self.unsupervised_habitat_class_dict[hab] )
         if len( md )==1:
             hcm[hab] = md[0]
         else:
             hcm[hab] = None
     return hcm

Beispiel #15

0

Datei anzeigen

Datei: GroundTruthShp.py Projekt: Python3pkg/OpticalRS

 def unsupervised_habitat_class_modes(self):
     hcm = {}
     for hab in self.habitats:
         md, cn = mode(self.unsupervised_habitat_class_dict[hab])
         if len(md) == 1:
             hcm[hab] = md[0]
         else:
             hcm[hab] = None
     return hcm

Beispiel #16

0

Datei anzeigen

Datei: utils.py Projekt: ShilpaGopal/machine-learning-problems

def calculate_weighted_loss(data):
    wighted_loss = 0
    if data.shape[0] != 0:
        label_column = data[:, -1]
        label_column = np.array(label_column.tolist())
        y_prediction_mode = stats.mode(label_column, axis=0)[0][0]
        y_prediction = [y_prediction_mode] * label_column.shape[0]
        wighted_loss = ch.custom_weighted_loss(label_column, np.array(y_prediction))
    return wighted_loss

Beispiel #17

0

Datei anzeigen

Datei: PARC.py Projekt: jpocom/sahm

    def Aggregate(self,
                  sourceRaster,
                  outFName,
                  method=None,
                  numSourcePerTarget=10):

        tmpOutput = os.path.splitext(outFName)[0] + ".tif"
        tmpOutDataset = SpatialUtilities.SAHMRaster(tmpOutput)
        tmpOutDataset.pullParamsFromRaster(self.templateRaster.source)
        tmpOutDataset.createNewRaster()

        rows = int(sourceRaster.height)
        cols = int(sourceRaster.width)

        # loop of 'blocks' of data maybe.
        bSize = 2048  # source pixels
        # convert this to the nearest whole number of target pixels
        bSize = int(round(bSize / numSourcePerTarget) * numSourcePerTarget)
        if bSize == 0:
            bSize = int(numSourcePerTarget)

        for i in range(0, rows, bSize):
            if i + bSize < rows:
                numRows = bSize
            else:
                numRows = rows - i

            for j in range(0, cols, bSize):
                if j + bSize < cols:
                    numCols = bSize
                else:
                    numCols = cols - j

                data = sourceRaster.getBlock(j, i, numCols, numRows)

                if method == None:
                    method = "Mean"
                if method in ["Mean", "Max", "Min", "STD"]:
                    ans = self.rebin(data, (numRows / numSourcePerTarget,
                                            numCols / numSourcePerTarget),
                                     method)
                else:
                    X, Y = data.shape
                    x = X // numSourcePerTarget
                    y = Y // numSourcePerTarget
                    ndMask = data.reshape(
                        (x, numSourcePerTarget, y, numSourcePerTarget))
                    ndMask = ndMask.transpose([0, 2, 1, 3])
                    ndMask = ndMask.reshape(
                        (x * y, numSourcePerTarget * numSourcePerTarget))
                    ans = np.array(stats.mode(ndMask, 1)[0]).reshape(x, y)

                tmpOutDataset.putBlock(ans, int(j / numSourcePerTarget),
                                       int(i / numSourcePerTarget))

        tmpOutDataset.calcStats()
        tmpOutDataset.close()

Beispiel #18

0

Datei anzeigen

Datei: human_parcels_sl.py Projekt: jamalw/music_event_structures_tigress

def HMM(X, Y, human_bounds):
    """fit hidden markov model
  
       Fit HMM to average data and cross-validate with leftout subject using within song and between song average correlations              

       Parameters
       ----------
       A: voxel by time ndarray (2D)
       B: voxel by time ndarray (2D)
       C: voxel by time ndarray (2D)
       D: voxel by time ndarray (2D)
       K: # of events for HMM (scalar)
 
       Returns
       -------
       z: z-score after performing permuted cross-validation analysis      

    """

    # Fit to all but one subject
    nPerm = 1000
    within_across = np.zeros(nPerm + 1)
    K = len(human_bounds) + 1
    nTR = X.shape[1]

    # create events vector out of human bounds
    add_zero = np.append([0], human_bounds)
    full_human_bounds = np.append(add_zero, [nTR])
    diff_bounds = np.diff(full_human_bounds)
    events = np.zeros((nTR))

    for l in range(len(diff_bounds)):
        events[full_human_bounds[l]:full_human_bounds[l + 1]] = l

    max_event_length = stats.mode(events)[1][0]

    # compute timepoint by timepoint correlation matrix
    cc = np.corrcoef(Y.T)  # Should be a time by time correlation matrix

    # Create a mask to only look at values up to max_event_length
    local_mask = np.zeros(cc.shape, dtype=bool)
    for k in range(1, max_event_length):
        local_mask[np.diag(np.ones(cc.shape[0] - k, dtype=bool), k)] = True

    for p in range(nPerm + 1):
        same_event = events[:, np.newaxis] == events
        within = fisher_mean(cc[same_event * local_mask])
        across = fisher_mean(cc[(~same_event) * local_mask])
        within_across[p] = within - across

        np.random.seed(p)
        events = np.zeros(nTR, dtype=np.int)
        events[np.random.choice(nTR, K - 1, replace=False)] = 1
        events = np.cumsum(events)

    return within_across

Beispiel #19

0

Datei anzeigen

Datei: hmm_parcels_sl.py Projekt: jamalw/music_event_structures_tigress

def HMM(X, Y, human_bounds):
    """fit hidden markov model
  
       Fit HMM to average data and cross-validate with leftout subject using within song and between song average correlations              

       Parameters
       ----------
       A: voxel by time ndarray (2D)
       B: voxel by time ndarray (2D)
       C: voxel by time ndarray (2D)
       D: voxel by time ndarray (2D)
       K: # of events for HMM (scalar)
 
       Returns
       -------
       z: z-score after performing permuted cross-validation analysis      

    """

    # Fit to all but one subject
    nPerm = 1000
    within_across = np.zeros(nPerm + 1)
    K = len(human_bounds) + 1
    nTR = X.shape[1]
    ev = brainiak.eventseg.event.EventSegment(K,
                                              split_merge=True,
                                              split_merge_proposals=3)
    ev.fit(X.T)
    events = np.argmax(ev.segments_[0], axis=1)
    bounds = np.where(np.diff(np.argmax(ev.segments_[0], axis=1)))[0]
    _, event_lengths = np.unique(events, return_counts=True)
    max_event_length = stats.mode(events)[1][0]

    # compute timepoint by timepoint correlation matrix
    cc = np.corrcoef(Y.T)  # Should be a time by time correlation matrix

    # Create a mask to only look at values up to max_event_length
    local_mask = np.zeros(cc.shape, dtype=bool)
    for k in range(1, max_event_length):
        local_mask[np.diag(np.ones(cc.shape[0] - k, dtype=bool), k)] = True

    for p in range(nPerm + 1):
        same_event = events[:, np.newaxis] == events
        within = fisher_mean(cc[same_event * local_mask])
        across = fisher_mean(cc[(~same_event) * local_mask])
        within_across[p] = within - across

        np.random.seed(p)
        perm_lengths = np.random.permutation(event_lengths)
        events = np.zeros(nTR, dtype=np.int)
        events[np.cumsum(perm_lengths[:-1])] = 1
        events = np.cumsum(events)

    return within_across, bounds

Beispiel #20

0

Datei anzeigen

def test_kpi_level_model(predict_result_file, final_result_file):
    df = pd.read_csv(predict_result_file, sep=',', dtype=str)
    df = df[df['predict_event'] == '1']
    mapping_dict = {'Biz': 0, 'Mon': 1, 'Ora': 2, 'Trd': 3, 'Other': 4}
    knn_model_list = []
    knn_model_list = kpi_level_model.test_KNN_model(cluster_data_dir)
    all_df = pd.DataFrame(columns=[
        'alertgroup', 'classifier', 'hostname', 'predict_event',
        'predict_level'
    ])
    for alertgroup, group in df.groupby('alertgroup'):
        column_list = [
            'cpu_max', 'cpu_min', 'mem_max', 'mem_min', 'cpu_max_1',
            'cpu_min_1', 'mem_max_1', 'mem_min_1', 'cpu_max_2', 'cpu_min_2',
            'mem_max_2', 'mem_min_2'
        ]
        data = group[column_list]
        kpi_predict_result = []
        for i in knn_model_list:
            kpi_predict_result.append(i.predict(data))
        print(kpi_predict_result)
        predict_results = np.zeros(len(group))
        df_res = pd.DataFrame(columns=['predict_level'])
        for idx in range(len(group)):
            sample_predict_vec = np.array([
                np.round(kpi_predict_result[0][idx]),
                np.round(kpi_predict_result[1][idx]),
                np.round(kpi_predict_result[2][idx]),
                np.round(kpi_predict_result[3][idx]),
                np.round(kpi_predict_result[4][idx])
            ])
            # print(sample_predict_vec)
            mode_prediction_res = stats.mode(sample_predict_vec)[0][
                0]  # 5个模型预测结果的众数
            print(mode_prediction_res)
            max_prediction_res = sample_predict_vec[np.argmax(
                sample_predict_vec)]  # 5个模型预测结果的最大值
            print(max_prediction_res)
            group_prediction_res = sample_predict_vec[mapping_dict[
                alertgroup]]  # group_prediction_val <= max_prediction_val， 该条数据对应的业务模型预测的结果
            print(group_prediction_res)
            if (mode_prediction_res <= 2 and max_prediction_res <= 2):
                predict_results[idx] = group_prediction_res
            else:
                predict_results[idx] = max_prediction_res
            df_res.loc[idx] = int(predict_results[idx])

        new_df = group[[
            'alertgroup', 'classifier', 'hostname', 'predict_event'
        ]].reset_index(drop=True).join(df_res, how='outer')
        all_df = pd.concat([all_df, new_df])

    print(all_df)
    all_df.to_csv(final_result_file, sep=',', index=False)

Beispiel #21

0

Datei anzeigen

Datei: helpers.py Projekt: elthran/kaggle

def average_predictions(models, test_features, np_value="int", method="mode"):
    predictions = np.column_stack([model.predict(test_features) for model in models])
    print(f"Predictions of first 5 rows: {predictions[:5]}")
    if method == "mode":
        averaged_predictions = stats.mode(predictions, axis=1)[0].astype(np_value)
    elif method == "mean":
        averaged_predictions = np.average(predictions, axis=1).astype(np_value)
    else:
        raise Exception("Method undefined")
    print(f"Averaged predictions of first 5 rows: {averaged_predictions[:5]}")
    return np.hstack(averaged_predictions)

Beispiel #22

0

Datei anzeigen

Datei: lshModel.py Projekt: lewellen/digit-recognizer

    def predict(self, examples):
        X = vstack( [reshape(x.X, (1, x.WIDTH * x.HEIGHT)) for x in examples] )

        dist, ind = self.lsh.kneighbors(X)

        rows, columns = ind.shape
        for row in xrange(0, rows):
            for column in xrange(0, columns):
                ind[row, column] = self.Y[ind[row, column]]
                
        vals, counts = mode(ind, axis=1)
        
        return reshape(vals, (1, len(examples))).tolist()[0]

Beispiel #23

0

Datei anzeigen

def info_univariate(data, features_name):
    df_np = np.array(data)
    df_transposed = np.transpose(d)
    for f in range(0, len(df_transposed), 1):
        ds = sorted(df_transposed[f])
        moda = stats.mode(ds)
        print(
            'Feature: {}:\nMAX: --> {}\nMIN:  --> {}\nAVG:  --> {}\nMODE:  --> V:{} --> {}\nMed  --> {}\n'
            .format(features_name[f], np.max(df_transposed[f]),
                    np.min(df_transposed[f]),
                    round(np.mean(df_transposed[f]), 1), moda[0], moda[1],
                    np.median(ds)))
    plot_boxnotch_univariateanalysis(df_transposed, features_name)
    return

Beispiel #24

0

Datei anzeigen

Datei: ensembling.py Projekt: siebeniris/ntua-slp-wassa-iest2018

def ensemble_voting(predictions, gold, dataset):
    stacked = numpy.stack(predictions, axis=0)
    modals = stats.mode(stacked, axis=0)[0].squeeze().astype(int)

    if dataset != "test":
        accuracy = acc(gold, modals)
        f1 = f1_macro(gold, modals)
        print("acc: ", accuracy)
        print("f1: ", f1)
    else:
        accuracy = 0
        f1 = 0

    return modals, accuracy, f1

Beispiel #25

0

Datei anzeigen

Datei: main.py Projekt: thomas-schillaci/cs7641assignment3

def density_categorical_accuracy(labels, predicted_labels, classes):
    assert (len(labels) == len(predicted_labels))
    if len(labels) == 0:
        return 0

    n_cluster = np.max(predicted_labels) + 1
    clusters = [[] for _ in range(n_cluster)]

    for label, predicted_label in zip(labels, predicted_labels):
        clusters[predicted_label].append(label)

    catacc = np.average([stats.mode(d)[1][0] / len(d) for d in clusters],
                        weights=[len(d) for d in clusters])
    corrected_catacc = (catacc - 1.0 / classes) / (1.0 - 1.0 / classes)

    return corrected_catacc

Beispiel #26

0

Datei anzeigen

def get_qda_oof_prediction(x_train,y_train,x_test):
    oof_train = np.zeros((ntrain,))
    oof_test = np.zeros((ntest,))
    oof_test_skf = np.empty((NFOLDS,ntest))
    
    for i,(train_ind,test_ind) in enumerate(skf.split(x_train,y_train)):
        model = QuadraticDiscriminantAnalysis()
        y_tr = y_train[train_ind]
        x_tr = x_train[train_ind]
        x_ts = x_train[test_ind]
        model.fit(x_tr,y_tr)
        oof_train[test_ind] = model.predict(x_ts)
        oof_test_skf[i,:] = model.predict(x_test)
        print("Test score {} ".format(f1_score(y_train[test_ind],oof_train[test_ind])))        
    oof_test = stats.mode(oof_test_skf,axis=0)[0]
    return oof_train.reshape(-1,1),oof_test.reshape(-1,1)

Beispiel #27

0

Datei anzeigen

    def predict(self, X):
        distances = np.zeros((len(X), len(self.X_)))
        for i in range(len(X)):
            distances[i, :] = np.power(
                np.sum(np.power(X[i] - self.X_, self.p), axis=1), 1 / self.p)
        neighbors_ind = np.argpartition(distances, self.k, axis=1)[:, :self.k]
        neighbors_labels = self.y_[neighbors_ind]

        if self.weights == 'uniform': return stats.mode(neighbors_labels).mode

        neighbors_distances = np.vstack(
            (distances[i][neighbors_ind[i]] for i in range(len(X))))
        if self.weights == 'inverse':
            weights_ = 1 / neighbors_distances
        if self.weights == 'Gaussian':
            weights_ = np.exp(-np.square(neighbors_distances) / 2)
        return self.weighted_predict(neighbors_labels, weights_)

Beispiel #28

0

Datei anzeigen

Datei: data_preprocess.py Projekt: TinyOS-Camp/DDEA-DEV

def get_feature(data_dict_samples,num_type):
    x_temp=[]
    for i,sample in enumerate(data_dict_samples):
        # If sample=[], np.std returns 0. Avoid zero std, add a infitestimal number
        if len(sample)==0: # Set infty if no sample is availble
            x_temp.append(np.inf)                

        else:
            if num_type==INT_TYPE:
                x_temp.append(int(stats.mode(sample)[0]))                
            elif num_type==FLOAT_TYPE:
                x_temp.append(np.mean(sample))                
            else:
                raise NameError('Sample type must either INT or FLOAT type')

    x_temp=np.array(x_temp)[:,np.newaxis]
    return x_temp

Beispiel #29

0

Datei anzeigen

Datei: OptimizedModel.py Projekt: lewellen/facial-keypoints-detection

    def fit(self, trainExamples):
        self.expectedValues = {}
        
        for x in trainExamples:
            for (key, value) in x.items():
                if key != "Image":
                    if not key in self.expectedValues:
                        self.expectedValues[key] = []
                    
                    if len(value) > 0:
                        self.expectedValues[key].append(round(float(value)/0.5,0)*0.5)
                        
        for key in self.expectedValues.keys():
            self.expectedValues[key], _ = mode(self.expectedValues[key])
            self.expectedValues[key] = self.expectedValues[key][0]
            

        return self

Beispiel #30

0

Datei anzeigen

Datei: feature.py Projekt: kinokosu3/2018_kuaishou_game

def get_create_feature(row):
    feature = pd.Series()
    feature['user_id'] = list(row['user_id'])[0]
    # feature['create_count'] = len(row)
    diff_day = np.diff(row['day'])
    if len(diff_day) != 0:
        # feature['create_day_diff_mean'] = np.mean(diff_day)
        # feature['create_day_diff_std'] = np.std(diff_day)
        # feature['create_day_diff_min'] = np.min(diff_day)
        # feature['create_day_diff_mode'] = stats.mode(interval_data)[0][0]
        feature['create_day_diff_ske'] = stats.skew(diff_day)
        feature['create_day_diff_kur'] = stats.kurtosis(diff_day)
        # feature['create_day_diff_max'] = np.max(diff_day)
        feature['create_day_last'] = diff_day[-1]
        feature['create_sub_register'] = np.subtract(np.max(row['max_day']),
                                                     np.max(row['day']))
        feature['create_mode'] = stats.mode(row['day'])[0][0]
        return feature

Beispiel #31

0

Datei anzeigen

def get_sgd_oof_prediction(SEED,x_train,y_train,x_test):
    oof_train = np.zeros((ntrain,))
    oof_test = np.zeros((ntest,))
    oof_test_skf = np.empty((NFOLDS,ntest))
    
    for i,(train_ind,test_ind) in enumerate(skf.split(x_train,y_train)):
        model = SGDClassifier(max_iter=100,random_state=SEED,loss="squared_hinge",alpha=0.009,penalty='l1')
        y_tr = y_train[train_ind]
        scaler = StandardScaler()
        x_tr = scaler.fit_transform(x_train[train_ind])
        x_ts = scaler.transform(x_train[test_ind])
        x_test_s = scaler.transform(x_test)
        model.fit(x_tr,y_tr)
        oof_train[test_ind] = model.predict(x_ts)
        oof_test_skf[i,:] = model.predict(x_test_s)
        print("Test score {} ".format(f1_score(y_train[test_ind],oof_train[test_ind])))
        
    oof_test = stats.mode(oof_test_skf,axis=0)[0]
    return oof_train.reshape(-1,1),oof_test.reshape(-1,1)

Beispiel #32

0

Datei anzeigen

def get_log_oof_prediction(SEED,x_train,y_train,x_test):
    oof_train = np.zeros((ntrain,))
    oof_test = np.zeros((ntest,))
    oof_test_skf = np.empty((NFOLDS,ntest))
    
    for i,(train_ind,test_ind) in enumerate(skf.split(x_train,y_train)):
        model = LogisticRegression(random_state=SEED,C=0.8252042855888113,penalty='l1',verbose=2)
        y_tr = y_train[train_ind]
        scaler = StandardScaler()
        x_tr = scaler.fit_transform(x_train[train_ind])
        x_ts = scaler.transform(x_train[test_ind])
        x_test_s = scaler.transform(x_test)
        model.fit(x_tr,y_tr)
        oof_train[test_ind] = model.predict(x_ts)
        oof_test_skf[i,:] = model.predict(x_test_s)
        print("Test score {} ".format(f1_score(y_train[test_ind],oof_train[test_ind])))
        
    oof_test = stats.mode(oof_test_skf,axis=0)[0]
    return oof_train.reshape(-1,1),oof_test.reshape(-1,1)

Beispiel #33

0

Datei anzeigen

Datei: viewer.py Projekt: Kai3645/KaisMemory

    def process(img, head, gray_old):
        if shot_frame:
            cv2.imwrite(folder_depth + head + f"{loop:05d}.jpg", img)
        img = cv2.resize(img, resize_shape, interpolation=cv2.INTER_LINEAR)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        gray = cv2.medianBlur(gray, 3)
        if mix_image:
            diff = cv2.absdiff(gray, gray_old)
            th = stats.mode(diff, axis=None)[0][0]
            diff_valid = diff > th + gray_diff_th
            if gray_mode:
                diff[np.logical_not(diff_valid)] = 0
                return diff, gray

            if np.sum(diff_valid) > change_th: return None, gray
            return gray, gray
        elif gray_mode:
            return gray, gray
        return img, gray

Beispiel #34

0

Datei anzeigen

Datei: data.py Projekt: DaiCapra/activity-classifier

def split_data_into_steps(data, N_TIME_STEPS, N_FEATURES, step, RANDOM_SEED):
    segments = []
    labels = []
    activities = set()
    for i in range(0, len(data) - N_TIME_STEPS, step):
        xs = data['x'].values[i: i + N_TIME_STEPS]
        ys = data['y'].values[i: i + N_TIME_STEPS]
        zs = data['z'].values[i: i + N_TIME_STEPS]
        label = stats.mode(data['activity'][i: i + N_TIME_STEPS])[0][0]
        segments.append([xs, ys, zs])
        labels.append(label)
        activities.add(label)

    reshaped_segments = pd.np.asarray(segments, dtype=pd.np.float32).reshape(-1, N_TIME_STEPS, N_FEATURES)
    labels = pd.np.asarray(pd.get_dummies(labels), dtype=pd.np.float32)

    validation_split = get_validation_split()
    x_train, x_test, y_train, y_test = train_test_split(reshaped_segments, labels, test_size=validation_split,
                                                        random_state=RANDOM_SEED)
    return x_train, x_test, y_train, y_test, activities

Beispiel #35

0

Datei anzeigen

def test_voting(model, ipca, tensor, labels, times, interval_len, cnt):
    ok = 0.
    total = 0.
    with tf.Session() as sess:
        for i in range(cnt):
            x, y = prepate_data_for_voting(sess, ipca, tensor, labels, times, interval_len)
            num, width, temp, feat = x.shape
            x = np.reshape(x, (-1, x.shape[2] * x.shape[3]))

            x = (x - ipca.mean_my) / ipca.var_my
            x = ipca.transform(x)
            y_pred = model.predict(x)
            y_pred = np.reshape(y_pred, (-1, width))
            y_pred = stats.mode(y_pred, axis=1)[0]
            y_pred = np.reshape(y_pred, (-1))
            y = np.argmax(y, axis=1)
            ok += np.sum(y == y_pred, axis=0)
            total += y.shape[0]

    return ok / total

Beispiel #36

0

Datei anzeigen

def get_feature(data_dict_samples,num_type):
    x_temp = []
    for i, sample in enumerate(data_dict_samples):

        # If sample=[], np.std returns 0. Avoid zero std, add a infitestimal number

        # Set infty if no sample is availble
        if len(sample) == 0:
            x_temp.append(np.inf)                

        else:
            if num_type == INT_TYPE:
                x_temp.append(int(stats.mode(sample)[0]))                
            elif num_type == FLOAT_TYPE:
                x_temp.append(np.mean(sample))                
            else:
                raise NameError('Sample type must either INT or FLOAT type')

    x_temp = np.array(x_temp)[:, np.newaxis]
    return x_temp

Beispiel #37

0

Datei anzeigen

Datei: bad_predictors.py Projekt: QUVA-Lab/artemis

 def train(self, input_data, target_data):
     (self._most_frequent_value, ), _ = mode(target_data, axis = 0)
     self._target_type = target_data.dtype

Beispiel #38

0

Datei anzeigen

Datei: alfosc_seeing.py Projekt: eddienko/SamPy

    #A special column
    #This would be obsolete with:
    #from matplotlib.dates import MONDAY, MonthLocator, WeekdayLocator, DateFormatter
    time = getTime(DateTimeUT)
    
    #Just an empty class
    class Dummy(object): pass
    
    #Lets get some statistics
    tdfst = Dummy()
    tdfStat = []
    
    tdfst.name, tdfst.median, tdfst.max, tdfst.min, tdfst.mean, tdfst.stdev = \
    "Seeing", median(medianFWHM), max(medianFWHM), min(medianFWHM), mean(medianFWHM), std(medianFWHM)
    
    tdfst.mode = mode(medianFWHM)
 
    tdfStat.append(tdfst)
    
    if opts.verbose == True:
        print
        print ("%5s" + "%11s"*6) % ("Name", "Median", "Max", "Min", "Mean", "Stdev", "Mode")
        frmt = "%5s" + "%11.2f"*6
        print frmt % (tdfst.name, tdfst.median, tdfst.max, tdfst.min, tdfst.mean, tdfst.stdev, tdfst.mode[0])
        print
    
    #Calculates some 2D correlations
    WDCorr = spearmanr(medianFWHM, WindDirectionDeg)
    Humidity = spearmanr(medianFWHM, HumidityPercent)
    Pressure = spearmanr(medianFWHM, PressureHPA)

Beispiel #39

0

Datei anzeigen

Datei: analysis.py Projekt: robbisg/py_eye

def open_behavioural(path, subj, **kwargs):
    ############# BOLOGNA ##################
    
    dropped_trials = []
    behavioural_data = []
    for arg in kwargs:
        if arg == 'dropped_trials':
            dropped_trials = np.int_(kwargs[arg].split(','))
        if arg == 'behavioural_data':
            behavioural_data = kwargs[arg].split(',')
    
    import xlrd
    fn = os.path.join(path, subj)
    
    book = xlrd.open_workbook(fn) #Open workbook
    sh = book.sheet_by_index(0) #Choose sheet
    
    labels = sh.row_values(0)
    labels = [unicode.lower(unicode(l)) for l in labels]
    l_array = np.array(labels, dtype = np.str)
    
    indexes = []
    data_tot = []
    dtype = []
    for field in behavioural_data:
        index = np.nonzero(l_array == str.lower(field))[0][0]
        
        data = sh.col_values(int(index))[1:]
        print field
        type_ = mode([x.__class__ for x in data])[0][0]
        if type_ == unicode or type == str:
            data = [x.__class__.lower(x) for x in data]
            t = (field, np.str_, 45)
        else:
            #print data
            data = [(int(x) if (x != 'NULL') and (x != '') else 0) for x in data]
            t = (field, np.int_, 1)
        
        dtype.append(t)
        data_tot.append(data)
    
    data_tot.append(range(1,len(sh.col_values(0)[1:])+1))
    dtype.append(('TrialNo.', np.int_, 1))
    
    '''    
    behavioural = np.array(zip(
                               sh.col_values(6)[1:], #Condition Label
                               sh.col_values(19)[1:],
                               np.float_([(int(x) if x else 0) for x in sh.col_values(18)[1:]]), #Accuracy
                               np.int_([(int(x) if x else 0) for x in sh.col_values(4)[1:]]),
                               np.arange(len(sh.col_values(0)[1:]))+1 #Combination
                            ), 
                           dtype=[('Condition', np.str_,2),
                                  ('SlideImage', np.str_,10),
                                  ('Accuracy', np.int_, 1),
                                  ('Combination', np.int_, 1),
                                  ('TrialNo.', np.int_, 1)]
                           )
    '''
    
    behavioural = np.array(zip(*data_tot), dtype=dtype)
    
    
    if len(dropped_trials) > 0:
        mask = 0
        for trial in dropped_trials:
            mask = mask + np.int_(behavioural['TrialNo.'] == trial)
    
        behavioural = behavioural[~np.bool_(mask)]
    
    return behavioural

Beispiel #40

0

Datei anzeigen

Datei: stats.py Projekt: crdarter/THINKFUL_2.1.1

Southwest, 4.79, 2.71
Wales, 5.27, 3.53
Scotland, 6.08, 4.51
Northern Ireland, 4.02, 4.56'''

data = data.splitlines()
data = [i.split(', ') for i in data]

column_names = data[0]
data_rows = data[1::]
df = pd.DataFrame(data_rows, columns=column_names)

df['Alcohol'] = df['Alcohol'].astype(float)
df['Tobacco'] = df['Tobacco'].astype(float)

print "The mean for the Alcohol dataset is", df['Alcohol'].mean() 
print  "The median for the Alcohol dataset is", df['Alcohol'].median() 
# NOT SURE HOW TO MODIFY THE SCRIPT TO ONLY RETURN THE FIRST ARRAY
# ALSO NOT SURE HOW TO IMPROVE UPON THE VALUE RETURNED SO THAT IF THE FREQUENCY ARRAY RETURNED IS 1, "there is no mode" is the response returned.
print  "The mode for the Alcohol dataset is", stats.mode(df['Alcohol']) 
print "The range for the Alcohol dataset is", max(df['Alcohol']) - min(df['Alcohol'])
print "The standard deviation for the Alcohol dataset is", df['Alcohol'].std() 
print "The variance for the Alcohol dataset is", df['Alcohol'].var() 

print  "The mean for the Tobacco dataset is", df['Tobacco'].mean() 
print "The median for the Tobacco dataset is", df['Tobacco'].median() 
print "The mode for the Tobacco dataset is", stats.mode(df['Tobacco']) 
print "The range for the Tobacco dataset is", max(df['Tobacco']) - min(df['Tobacco'])
print "The standard deviation for the Tobacco dataset is", df['Tobacco'].std() 
print "The variance for the Tobacco dataset is", df['Tobacco'].var()