Ejemplo n.º 1
0
def detect_ground(profile):
    """Automatic detection of ground (end of snowpack).

    :param snowmicropyn.Profile profile: The profile to detect ground in.
    :return: Distance where ground was detected.
    :rtype: float
    """

    force = profile.samples.force
    distance = profile.samples.distance

    ground = distance.iloc[-1]

    if force.max() >= profile.overload:
        i_ol = force.argmax()
        i_threshhold = np.where(
            distance.values >= distance.values[i_ol] - 20)[0][0]
        f_mean = np.mean(force.iloc[0:i_threshhold])
        f_std = np.std(force.iloc[0:i_threshhold])
        threshhold = f_mean + 5 * f_std

        while force.iloc[i_ol] > threshhold:
            i_ol -= 10

        ground = distance.iloc[i_ol]

    log.info('Detected ground at {:.3f} mm in profile {}'.format(
        ground, profile))
    return ground
Ejemplo n.º 2
0
def find_missing_seat(seats_data: List[Tuple[int, int]]) -> Tuple[int, int]:
    """
    Given a list of seat coordinates taken, return the only one with previous and after
    seats occupied

    :param seats_data: Occupied seats data coordinates
    :return:
    """
    # Create seats matrix
    ar = np.array(seats_data)
    res = np.zeros((PLANE_ROW_NUMBER, PLANE_COLUMN_NUMBER), dtype=int)
    res[ar[:, 0], ar[:, 1]] = 1

    # Find all empty seats
    empty_seats_raw = np.where(res == 0)
    empty_seats = list(zip(empty_seats_raw[0], empty_seats_raw[1]))

    # Find the only valid empty seat
    for empty_seat in empty_seats:
        before_seat, after_seat = get_adjacent_seats(*empty_seat)

        if (before_seat is not None and after_seat is not None
                and res[before_seat[0]][before_seat[1]]
                and res[after_seat[0]][after_seat[1]]):
            return empty_seat
Ejemplo n.º 3
0
    def predict(self, X, Y=None):
        results = {}
        print(f'Get data for {X}')
        data = self._data_fetcher(X, last=True)
        print(f'Data:\n {data}')

        # create features
        df = create_features(data)

        # Split test train data
        X, Y = create_X_Y(df, 'buysell')
        X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0, stratify=Y)

        # Base model to tune
        rf = RandomForestClassifier()
        param_grid = {
            'max_depth': [5, 10, 20, 40],
            'max_features': ['sqrt'],
            'min_samples_leaf': [2],
            'min_samples_split': [6],
            'n_estimators': [50, 100, 200]
        }
        grid_search = GridSearchCV(estimator=rf, param_grid=param_grid,
                                   cv=3, n_jobs=-1, verbose=2)
        grid_search.fit(X_train, y_train)
        best_param = grid_search.best_params_
        clf = RandomForestClassifier(n_estimators=best_param['n_estimators'],
                                     min_samples_split=best_param['min_samples_split'],
                                     min_samples_leaf=best_param['min_samples_leaf'],
                                     max_features=best_param['max_features'],
                                     max_depth=best_param['max_depth'])

        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        results["balanced_accuracy"] = balanced_accuracy_score(y_test, y_pred)
        results["important_features"] = {"data": clf.feature_importances_, "index": X.columns}

        prediction = clf.predict(X_train)
        prediction = np.where(prediction == 1, "BUY", "SELL")
        results["prediction"] = prediction.flatten()[-1]

        return results
Ejemplo n.º 4
0
def f1(y_hat, y_true, THRESHOLD=0.5):
    '''
    y_hat是未经过sigmoid函数激活的
    输出的f1为Macro-F1
    '''
    epsilon = 1e-7
    y_hat = y_hat > THRESHOLD
    y_hat = np.int8(y_hat)
    tp = np.sum(y_hat * y_true, axis=0)
    fp = np.sum(y_hat * (1 - y_true), axis=0)
    fn = np.sum((1 - y_hat) * y_true, axis=0)

    p = tp / (tp + fp + epsilon)  # epsilon的意义在于防止分母为0,否则当分母为0时python会报错
    r = tp / (tp + fn + epsilon)

    f1 = 2 * p * r / (p + r + epsilon)
    f1 = np.where(np.isnan(f1), np.zeros_like(f1), f1)

    return np.mean(f1)
def dice(img1, img2, labels=None, nargout=1):
 '''
 Dice [1] volume overlap metric

 The default is to *not* return a measure for the background layer (label = 0)

 [1] Dice, Lee R. "Measures of the amount of ecologic association between species."
 Ecology 26.3 (1945): 297-302.

 Parameters
 ----------
 vol1 : nd array. The first volume (e.g. predicted volume)
 vol2 : nd array. The second volume (e.g. "true" volume)
 labels : optional vector of labels on which to compute Dice.
     If this is not provided, Dice is computed on all non-background (non-0) labels
 nargout : optional control of output arguments. if 1, output Dice measure(s).
     if 2, output tuple of (Dice, labels)

 Output
 ------
 if nargout == 1 : dice : vector of dice measures for each labels
 if nargout == 2 : (dice, labels) : where labels is a vector of the labels on which
     dice was computed
 '''
 if labels is None:
  labels = np.unique(np.concatenate((img1, img2)))  # 输出一维数组
  labels = np.delete(labels, np.where(labels == 0))  # remove background

 dicem = np.zeros(len(labels))
 for idx, lab in enumerate(labels):
  top = 2 * np.sum(np.logical_and(img1 == lab, img2 == lab))
  bottom = np.sum(img1 == lab) + np.sum(img2 == lab)
  bottom = np.maximum(bottom, np.finfo(float).eps)  # add epsilon. 机器最小的正数
  dicem[idx] = top / bottom

 if nargout == 1:
  return dicem
 else:
  return (dicem, labels)
Ejemplo n.º 6
0
    def init_groups(self, c, label_size):
        gp_size = math.ceil(c * len(self.clients))
        done = False
        size = len(self.clients)

        wrk_cls = [[False for i in range(label_size)] for j in range(size)]
        cls_q = [Queue(maxsize=size) for _ in range(10)]
        for i, cls_list in enumerate(self.class_distributions):
            wrk_cls[i] = [True if freq != 0 else False for _, freq in cls_list]
        for worker, class_list in enumerate(reversed(wrk_cls)):
            for cls, exist in enumerate(class_list):
                if exist:
                    cls_q[cls].put(size - worker - 1)
        taken_count = [0 for _ in range(label_size)]

        print('generating balanced groups for training...')
        while not done:
            visited = [False for _ in range(size)]
            g = []
            for _ in range(gp_size):
                cls = np.where(taken_count == np.amin(taken_count))[0][0]
                assert 0 <= cls <= len(taken_count)
                done_q = False
                count = 0
                while not done_q:
                    wrkr = cls_q[cls].get()
                    if not visited[wrkr] and wrk_cls[wrkr][cls]:
                        g.append(wrkr)
                        taken_count += self.class_distributions[wrkr][1]
                        visited[wrkr] = True
                        done_q = True
                    cls_q[cls].put(wrkr)
                    count += 1
                    if count == size:
                        done_q = True

            self.groups.append(g)
            if len(self.groups) > 10:
                done = True
Ejemplo n.º 7
0
tfidf = TfidfVectorizer()
tfidf.fit(result['Concultatory'])

X = tfidf.transform(result['Concultatory'])
result['Concultatory'][1]

#print([X[1, tfidf.vocabulary_['διοίκησης']]])
#print([X[1, tfidf.vocabulary_['βαθμό']]])
#print([X[1, tfidf.vocabulary_['αποσπάσεως']]])

#Sentiment Classification
#Θετικές 1,2 , Αρνητικές 3,4
result.dropna(inplace=True)
#result[result['Score'] != 1]
result['Positivity'] = np.where(result['Status'] == 1, 1, -1)
cols = ['Status']
result.drop(cols, axis=1, inplace=True)
result.head()

result.groupby('Positivity').size()

fig = plt.figure(figsize=(8,6))
result.groupby('Positivity').Concultatory.count().plot.bar(ylim=0)
plt.show()

X = result.Concultatory
y = result.Positivity

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state = 0)
print("Το σύνολο Εκπαίδευσης έχει συνολικά {0} Γνωμοδοτήσεις με {1:.2f}% σε εκκρεμότητα, {2:.2f}% αποδεκτές".format(len(X_train),
Ejemplo n.º 8
0
tfidf = TfidfVectorizer()
tfidf.fit(result['Subject'])

X = tfidf.transform(result['Subject'])
result['Subject'][1]

#print([X[1, tfidf.vocabulary_['δημόσιας']]])
#print([X[1, tfidf.vocabulary_['κατάταξη']]])
#print([X[1, tfidf.vocabulary_['βαθμό']]])

#Sentiment Classification

result.dropna(inplace=True)
result[result['Score'] != 3]
result['Positivity'] = np.where(result['Score'] >= 2, 1, 0)
cols = ['Score']
result.drop(cols, axis=1, inplace=True)
result.head()

result.groupby('Positivity').size()

X = result.Subject
y = result.Positivity
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
print(
    "Train set has total {0} entries with {1:.2f}% negative, {2:.2f}% positive"
    .format(len(X_train),
            (len(X_train[y_train == 0]) / (len(X_train) * 1.)) * 100,
            (len(X_train[y_train == 1]) / (len(X_train) * 1.)) * 100))
print(
def threshold(data):
 data[np.where((0.5 > data))] = 0
 data[np.where((0.5 <= data) & (data < 1.5))] = 1
 data[np.where((1.5 <= data) & (data < 2.5))] = 2
 data[np.where((2.5 <= data) & (data < 3.5))] = 3
 return data
Ejemplo n.º 10
0
 def add_ranking_flags(self, df: pd.DataFrame) -> pd.DataFrame:
     self.logger.info("Adding potential target flags")
     df['is_oscar_winner'] = np.where(df.index.isin(oscar_winners), 1, 0)
     df['is_top250'] = np.where(df.index.isin(top_250_engl), 1, 0)
     df['is_worst100'] = np.where(df.index.isin(worst_100), 1, 0)
     return df
Ejemplo n.º 11
0
t0 = cpu_time()
train_features_tfidf = tfidf.fit_transform(train_features).toarray()
del train_features
print("Computing the TFIDF took {} sec of the CPU's time.".format(cpu_time() - t0))

print('Transforming the test data using the trained TFIDF...')
test = tfidf.transform(test).toarray()
print('Finished transforming the test data using the trained TFIDF.')

# encode labels as integers 0-8 (from "Class_1", "Class_2", etc)
classification_encoder = preprocessing.LabelEncoder()
print('Transforming labels from text (class names) into an integer ENUM...')
t0 = cpu_time()
train_targets_encoded = classification_encoder.fit_transform(train_targets)
print("Transforming labels took {} sec of the CPU's time.".format(cpu_time() - t0))
assert(all(sample_label_set[i] == 'Class_{}'.format(i+1) == train_targets[np.where(train_targets_encoded == i)[0][0]]
       for i in range(len(sample_label_set))))

# train a random forest classifier
rfc = RandomForestClassifier(n_jobs=-1, n_estimators=300)
print('Training a random forest on the training set...')
t0 = cpu_time()
#  `train_features_tfidf` = 60k x 93 matrix of term frequencies normalized (divided by) document frequencies
#  `train_targets` = array(['Class_1', 'Class_1', 'Class_1', ..., 'Class_9', 'Class_9', 'Class_9'], dtype=object)

rfc.fit(train_features_tfidf, train_targets)
print("Random Forest took {} sec of the CPU's time.".format(cpu_time() - t0))

# predict on training set
print('Rerunning the predictor to predict the the labels for the {} training set records...'.format(len(train_features_tfidf)))
t0 = cpu_time()