Python transform_dataの例、utils.transform_data Pythonの例

コード例 #1

0

ファイルを表示

ファイル: main.py プロジェクト: liufei2606/code

def main():
    """
        主函数
    """
    # 加载数据
    raw_data = pd.read_csv(os.path.join(config.dataset_path,
                                        'charging_pile.csv'),
                           index_col='id')

    # 分割数据集
    train_data, test_data = train_test_split(raw_data,
                                             test_size=1 / 4,
                                             random_state=10)

    # 数据查看
    utils.inspect_dataset(train_data, test_data)

    # 特征工程
    print('\n===================== 特征工程 =====================')
    X_train, y_train = utils.transform_data(train_data)
    X_test, y_test = utils.transform_data(test_data)

    # 构建训练测试数据
    # 数据建模及验证
    print('\n===================== 数据建模及验证 =====================')
    model_name_param_dict = {'kNN': [5, 11, 15], 'LR': [0.1, 1, 10]}

    # 比较结果的DataFrame
    results_df = pd.DataFrame(columns=['Accuracy (%)', 'Time (s)'],
                              index=list(model_name_param_dict.keys()))
    results_df.index.name = 'Model'
    for model_name, param_range in model_name_param_dict.items():
        _, best_acc, mean_duration = utils.train_test_model(
            X_train, y_train, X_test, y_test, param_range, model_name)
        results_df.loc[model_name, 'Accuracy (%)'] = best_acc * 100
        results_df.loc[model_name, 'Time (s)'] = mean_duration

    results_df.to_csv(os.path.join(config.output_path, 'model_comparison.csv'))

    # 模型及结果比较
    print('\n===================== 模型及结果比较 =====================')

    plt.figure(figsize=(10, 4))
    ax1 = plt.subplot(1, 2, 1)
    results_df.plot(y=['Accuracy (%)'],
                    kind='bar',
                    ylim=[60, 100],
                    ax=ax1,
                    title='Accuracy(%)',
                    legend=False)

    ax2 = plt.subplot(1, 2, 2)
    results_df.plot(y=['Time (s)'],
                    kind='bar',
                    ax=ax2,
                    title='Time(s)',
                    legend=False)
    plt.tight_layout()
    plt.savefig(os.path.join(config.output_path, 'pred_results.png'))
    plt.show()

コード例 #2

0

ファイルを表示

ファイル: naive_bayes.py プロジェクト: edisonlee0212/MLProjects

 def predict(self, test_x):
     ret = []
     y = 0
     for i in range(0, len(test_x) - 1):
         x = np.zeros(self.dimension)
         for i in transform_data(test_x[i]):
             if i > 2 and i < self.dimension + 3:
                 if self.binary:
                     x[i] = 1
                 else:
                     x[i] += 1
         y1 = 0
         y2 = 0
         pos = np.multiply(x, self.x_pos_prob)
         neg = np.multiply(x, self.x_neg_prob)
         for i in np.nditer(pos):
             if i > 0:
                 y1 += math.log(i, 2)
         for i in np.nditer(neg):
             if i > 0:
                 y2 += math.log(i, 2)
         if y1 >= y2:
             ret.append(1)
         else:
             ret.append(-1)
     return ret

コード例 #3

0

ファイルを表示

def sentiemnt():
    sentence = request.args.get('sentence')
    record = {'data': sentence}
    data, _ = transform_data(record, TEXT, LABEL)
    prediction = net(data).argmax(dim=1).item()
    if prediction == 0:
        result = '积极'
    else:
        result = '消极'
    return jsonify({'data': result, 'status_code': 200})

コード例 #4

0

ファイルを表示

 def cal_new_w(self, tr_xi, label):
     x = np.zeros(self.dimension)
     for i in transform_data(tr_xi):
         if i > 2 and i < self.dimension + 3:
             if self.binary:
                 x[i - 3] = 1
             else:
                 x[i - 3] += 1
     self.b = self.b + self.rate * label
     y = np.inner(self.w, x) + self.b
     x = x * label * self.rate
     if (label == 1 and y < 0) or (label == -1 and y >= 0):
         self.w = np.sum((self.w, x), axis=0)

コード例 #5

0

ファイルを表示

def evaluate(model, df):
    result = {'correct': 0, 'wrong': 0}
    df_len = df.shape[0]
    for i in range(df_len):
        record = df.loc[i, :].to_dict()
        data, label = transform_data(record, TEXT, LABEL)
        score = model(data)
        if score.argmax(dim=1) == label:
            result['correct'] += 1
        else:
            result['wrong'] += 1
    print(
        f"Classification Accuracy of Model({model.__class__.__name__})is {result['correct']/df_len} "
    )

コード例 #6

0

ファイルを表示

ファイル: main.py プロジェクト: lschmiddey/API_with_Flask

def predict():
    if request.method == 'GET':
        Pclass = request.args.get('Pclass')
        Age = request.args.get('Age')
        Sex = request.args.get('Sex')
        Parch = request.args.get('Parch')
        raw_data = pd.DataFrame({'Pclass': [Pclass], 'Age': [Age], 'Sex':[Sex], 'Parch':[Parch]})
        transf_data = transform_data(raw_data)
        prediction = get_prediction(transf_data)
        prediction = prediction[0][1].item()
        # We take the first value of our predictions, representing the probability not to churn.
        data = {'prediction': prediction}
        return jsonify(data)
    else:
        return jsonify({'error': 'Only GET requests possible'})

コード例 #7

0

ファイルを表示

 def predict(self, test_x):
     ret = []
     y = 0;
     for i in range(0, len(test_x)):
         x = np.zeros(self.dimension)
         for i in transform_data(test_x[i]):
             if i > 2 and i < self.dimension + 3:
                 if self.binary:
                     x[i - 3] = 1
                 else:
                     x[i - 3] += 1
         y = np.inner(self.w, x) + self.b
         if y >= 0:
             ret.append(1)
         else:
             ret.append(-1)
     return ret

コード例 #8

0

ファイルを表示

 def cal_new_w(self, tr_xi, label):
     x = np.zeros(self.dimension)
     for i in transform_data(tr_xi):
         if i > 2 and i < self.dimension + 3:
             if self.binary:
                 x[i - 3] = 1
             else:
                 x[i - 3] += 1
     
     y = np.inner(self.w, x) + self.b
     x = x * float(label) * self.rate
     if (label == 1 and y >= 0) or (label == -1 and y < 0):
         self.survival = self.survival + 1
     else:
         self.b = self.b + self.rate * label / (self.survival + 1)
         tw = np.sum((self.w, x), axis=0)
         self.w = self.w * self.survival
         self.w = np.sum((self.w, tw), axis=0)
         self.w = self.w / (self.survival + 1)
         self.survival = 1

コード例 #9

0

ファイルを表示

    def __getitem__(self, i):
        # Read image
        image = Image.open(self.images[i], mode='r')
        image = image.convert('RGB')

        # Read objects in this image (bounding boxes, labels, difficulties)
        objects = self.objects[i]
        boxes = torch.FloatTensor(objects['boxes'])  # (n_objects, 4)
        labels = torch.LongTensor(objects['labels'])  # (n_objects)
        difficulties = torch.ByteTensor(objects['difficulties'])  # (n_objects)

        # Discard difficult objects, if desired
        if not self.keep_difficult:
            boxes = boxes[1 - difficulties]
            labels = labels[1 - difficulties]
            difficulties = difficulties[1 - difficulties]

        # Apply transformations
        image, boxes, labels, difficulties = transform_data(image, boxes, labels, difficulties, split=self.split)

        return image, boxes, labels, difficulties

コード例 #10

0

ファイルを表示

ファイル: lambda_function.py プロジェクト: janeyx99/test-infra

async def handle_webhook(payload: Dict[str, Any], type: str):
    engine = get_engine(connection_string())

    # Only look at allowlisted webhooks
    if type not in ACCEPTABLE_WEBHOOKS:
        return {"statusCode": 200, "body": f"not processing {type}"}

    # Marshal JSON into SQL-able data
    objects = extract_github_objects(payload, type)

    print("Writing", ", ".join([n for n, o in objects]))

    with engine.connect() as conn:
        for tablename, obj in objects:
            # Some of the data is not already in the right form (e.g. dates and
            # lists, so fix that up here)
            obj = transform_data(obj)

            model_data = [tablename] + [column(k) for k in obj.keys()]
            model = table(*model_data)
            upsert(conn, model, obj)

    return {"statusCode": 200, "body": "ok"}

コード例 #11

0

ファイルを表示

ファイル: lambda_function.py プロジェクト: pytorch/test-infra

async def handle_webhook(payload: Dict[str, Any], type: str):
    engine = get_engine(connection_string())

    # Marshal JSON into SQL-able data
    objects = extract_github_objects(payload, type)

    print("Writing", ", ".join([n for n, o in objects]))

    with engine.connect() as conn:
        for tablename, obj in objects:
            # Some of the data is not already in the right form (e.g. dates and
            # lists, so fix that up here)
            obj = transform_data(obj)

            model_data = [tablename] + [column(k) for k in obj.keys()]
            model = table(*model_data)

            if tablename not in existing_schema:
                print(
                    f"Skipping write of {tablename} since it doesn't exist in hardcoded schema"
                )
                continue

            # Remove non-existent fields
            newdata = {}
            for key, value in obj.items():
                if key in existing_schema[tablename]:
                    newdata[key] = value
                else:
                    print(
                        f"Dropping key '{key}' with value '{value}' since it doesn't exist in table {tablename}"
                    )
            obj = newdata
            upsert(conn, model, obj)

    return {"statusCode": 200, "body": "ok"}

コード例 #12

0

ファイルを表示

ファイル: main.py プロジェクト: radiumweilei/chinahadoop-python-ai-2

def main():
    """
        主函数
    """
    # 加载数据
    raw_data = pd.read_csv(os.path.join(config.dataset_path, 'zoo.csv'), usecols=config.all_cols)

    # 分割数据集
    train_data, test_data = train_test_split(raw_data, test_size=1/4, random_state=10)

    # 数据查看
    # utils.inspect_dataset(train_data, test_data)

    # 特征工程
    print('\n===================== 特征工程 =====================')
    X_train, X_test = utils.transform_data(train_data, test_data)

    # 标签
    y_train = train_data[config.label_col].values
    y_test = test_data[config.label_col].values

    # 数据建模及验证
    print('\n===================== 数据建模及验证 =====================')

    sclf = StackingClassifier(classifiers=[KNeighborsClassifier(),
                                           SVC(),
                                           DecisionTreeClassifier()],
                              meta_classifier=LogisticRegression())

    model_name_param_dict = {'kNN': (KNeighborsClassifier(),
                                     {'n_neighbors': [5, 25, 55]}),
                             'LR': (LogisticRegression(),
                                    {'C': [0.01, 1, 100]}),
                             'SVM': (SVC(),
                                     {'C': [0.01, 1, 100]}),
                             'DT': (DecisionTreeClassifier(),
                                    {'max_depth': [50, 100, 150]}),
                             'Stacking': (sclf,
                                          {'kneighborsclassifier__n_neighbors': [5, 25, 55],
                                           'svc__C': [0.01, 1, 100],
                                           'decisiontreeclassifier__max_depth': [50, 100, 150],
                                           'meta-logisticregression__C': [0.01, 1, 100]}),
                             'AdaBoost': (AdaBoostClassifier(),
                                          {'n_estimators': [50, 100, 150, 200]}),
                             'GBDT': (GradientBoostingClassifier(),
                                      {'learning_rate': [0.01, 0.1, 1, 10, 100]}),
                             'RF': (RandomForestClassifier(),
                                    {'n_estimators': [100, 150, 200, 250]})}

    # 比较结果的DataFrame
    results_df = pd.DataFrame(columns=['Accuracy (%)', 'Time (s)'],
                              index=list(model_name_param_dict.keys()))
    results_df.index.name = 'Model'
    for model_name, (model, param_range) in model_name_param_dict.items():
        _, best_acc, mean_duration = utils.train_test_model(X_train, y_train, X_test, y_test,
                                                            model_name, model, param_range)
        results_df.loc[model_name, 'Accuracy (%)'] = best_acc * 100
        results_df.loc[model_name, 'Time (s)'] = mean_duration

    results_df.to_csv(os.path.join(config.output_path, 'model_comparison.csv'))

    # 模型及结果比较
    print('\n===================== 模型及结果比较 =====================')

    plt.figure(figsize=(10, 4))
    ax1 = plt.subplot(1, 2, 1)
    results_df.plot(y=['Accuracy (%)'], kind='bar', ylim=[60, 100], ax=ax1, title='Accuracy(%)', legend=False)

    ax2 = plt.subplot(1, 2, 2)
    results_df.plot(y=['Time (s)'], kind='bar', ax=ax2, title='Time(s)', legend=False)
    plt.tight_layout()
    plt.savefig(os.path.join(config.output_path, 'pred_results.png'))
    plt.show()

コード例 #13

0

ファイルを表示

ファイル: naive_bayes.py プロジェクト: edisonlee0212/MLProjects

 def add_neg(self, tr_xi):
     for i in transform_data(tr_xi):
         if i > 2 and i < self.dimension + 3:
             self.x_dict_neg[i] += 1