Ejemplos de DataContainer.GetArray en Python

Lenguaje de programación: Python

Namespace/Package Name: BC.DataContainer.DataContainer

Clase / Tipo: DataContainer

Método / Función: GetArray

Ejemplos en hotexamples.com: 6

Python DataContainer.GetArray - 6 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de BC.DataContainer.DataContainer.DataContainer.GetArray extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

DataContainer(20)

Load(12)

GetArray(6)

Save(5)

GetFrame(3)

SetFrame(3)

UpdateFrameByData(2)

GetCaseName(2)

GetFeatureName(2)

GetLabel(2)

UpdateDataByFrame(1)

RemoveInvalid(1)

HasInvalidNumber(1)

IsEmpty(1)

IsBinaryLabel(1)

FindInvalidNumber(1)

GetData(1)

FindInvalidNumberIndex(1)

UsualNormalize(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: DataSeparate.py Proyecto: zhangjingcode/FAE

    def VisualizePartsVariance(self,
                               dc: DataContainer,
                               max_k=None,
                               method='SSE',
                               store_folder=None,
                               is_show=True):
        # method must be one of SSE or SC. SSE denotes xxxx, SC denotes Silhouette Coefficient

        # TODO: Normalize the train_data
        data = dc.GetArray().transpose()

        if max_k is None:
            max_k = min(data.shape[0], 50)

        assert (method in ['SSE', 'SC'])

        #TODO: plot
        score = []
        for k in range(2, max_k):
            if method == 'SSE':
                pass
            elif method == 'SC':
                pass

        if store_folder and os.path.isdir(store_folder):
            plt.savefig(os.path.join(store_folder, 'ClusteringPlot.jpg'))

        if is_show:
            plt.show()

Ejemplo n.º 2

Mostrar archivo

    def VisualizePartsVariance(self, dc: DataContainer, max_k=None, method='SSE',
                               store_folder=None, is_show=True):
        # method must be one of SSE or SC. SSE denotes xxxx, SC denotes Silhouette Coefficient

        data = dc.GetArray()  # get train data
        processed_data = self._DataPreProcess(dc)

        if max_k is None:
            max_k = min(data.shape[0], 50)

        assert(method in ['SSE', 'SC'])

        score = []
        for k in range(2, max_k):
            print('make cluster k=', k)
            estimator = KMeans(n_clusters=k) 
            estimator.fit(processed_data)
            if method == 'SSE':
                score.append(estimator.inertia_)
            elif method == 'SC':
                score.append(silhouette_score(processed_data, estimator.labels_, metric='euclidean'))
        X = range(2, max_k)
        plt.xlabel('k')
        plt.ylabel(method)
        plt.plot(X, score, 'o-')

        if store_folder and os.path.isdir(store_folder):
            plt.savefig(os.path.join(store_folder, 'ClusteringParameterPlot.jpg'))

        if is_show:
            plt.show()

Ejemplo n.º 3

Mostrar archivo

Archivo: TestFAEModel.py Proyecto: zhangjingcode/FAE

def TestNewData(NewDataCsv, model_folder, result_save_path=''):
    '''

    :param NewDataCsv: New radiomics feature matrix csv file path
    :param model_folder:The trained model path
    :return:classification result
    '''
    train_info = LoadTrainInfo(model_folder)
    new_data_container = DataContainer()

    #Normlization

    new_data_container.Load(NewDataCsv)

    # feature_selector = FeatureSelector()
    # feature_selector.SelectFeatureByName(new_data_container, train_info['selected_features'], is_replace=True)

    new_data_container = train_info['normalizer'].Transform(new_data_container)

    # data_frame = new_data_container.GetFrame()
    # data_frame = data_frame[train_info['selected_features']]
    # new_data_container.SetFrame(data_frame)
    # new_data_container.UpdateDataByFrame()

    ##Model
    train_info['classifier'].SetDataContainer(new_data_container)
    model = train_info['classifier'].GetModel()
    predict = model.predict_proba(new_data_container.GetArray())[:, 1]

    label = new_data_container.GetLabel()
    case_name = new_data_container.GetCaseName()

    test_result_info = [['CaseName', 'Pred', 'Label']]
    for index in range(len(label)):
        test_result_info.append(
            [case_name[index], predict[index], label[index]])

    metric = EstimateMetirc(predict, label)
    info = {}
    info.update(metric)
    cv = CrossValidation()

    print(metric)
    print('\t')

    if result_save_path:
        cv.SaveResult(info, result_save_path)
        np.save(os.path.join(result_save_path, 'test_predict.npy'), predict)
        np.save(os.path.join(result_save_path, 'test_label.npy'), label)
        with open(os.path.join(result_save_path, 'test_info.csv'),
                  'w',
                  newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerows(test_result_info)

    return metric

Ejemplo n.º 4

Mostrar archivo

            input_data_container = output
        return output

    def SaveInfo(self, store_folder, all_features):
        for fs in self.__selector_list:
            fs.SaveInfo(store_folder, all_features)

    def SaveDataContainer(self, data_container, store_folder, store_key):
        for fs in self.__selector_list:
            fs.SaveDataContainer(data_container, store_folder, store_key)


################################################################

if __name__ == '__main__':
    from BC.DataContainer.DataContainer import DataContainer
    from BC.FeatureAnalysis.Normalizer import NormalizerZeroCenter
    from BC.FeatureAnalysis.DimensionReduction import DimensionReductionByPCC

    dc = DataContainer()
    pcc = DimensionReductionByPCC()
    fs = FeatureSelectByKruskalWallis(selected_feature_number=5)

    dc.Load(r'..\..\Demo\train_numeric_feature.csv')

    dc = NormalizerZeroCenter.Run(dc)
    dc = pcc.Run(dc)
    print(dc.GetArray().shape)
    dc = fs.Run(dc)
    print(dc.GetArray().shape)

Ejemplo n.º 5

Mostrar archivo

            vif_dict[exog] = vif

            # calculate tolerance
            tolerance = 1 - r_squared
            tolerance_dict[exog] = tolerance

        # return VIF DataFrame
        df_vif = pd.DataFrame({'VIF': vif_dict, 'Tolerance': tolerance_dict})

        return df_vif


if __name__ == '__main__':
    data_path = r'..\..\Demo\train_numeric_feature.csv'
    from BC.DataContainer.DataContainer import DataContainer
    from BC.FeatureAnalysis.Normalizer import NormalizerZeroCenter
    pca = DimensionReductionByPCA()

    dc = DataContainer()
    dc.Load(data_path)
    dc = NormalizerZeroCenter.Run(dc)
    # dc = pca.Run(dc)

    df = pd.DataFrame(dc.GetArray(),
                      index=dc.GetCaseName(),
                      columns=dc.GetFeatureName())
    dr = DimensionReductionByVIF()

    new_df = dr.CalculateVIF(df)

    print(dc.GetArray().shape, new_df.shape)

Ejemplo n.º 6

Mostrar archivo

        return "To Remove the unbalance of the training data set, we applied an Tomek link after the " \
               "Synthetic Minority Oversampling TEchnique (SMOTE) to make positive/negative samples balance. "

    def Run(self, data_container, store_path=''):
        data, label, feature_name, label_name = data_container.GetData()
        data_resampled, label_resampled = self._model.fit_sample(data, label)

        new_case_name = [
            'Generate' + str(index) for index in range(data_resampled.shape[0])
        ]
        new_data_container = DataContainer(data_resampled, label_resampled,
                                           data_container.GetFeatureName(),
                                           new_case_name)
        if store_path != '':
            if os.path.isdir(store_path):
                new_data_container.Save(
                    os.path.join(store_path,
                                 '{}_features.csv'.format(self._name)))
            else:
                new_data_container.Save(store_path)
        return new_data_container


if __name__ == '__main__':
    dc = DataContainer()
    dc.Load(r'..\..\Example\numeric_feature.csv')
    print(dc.GetArray().shape, np.sum(dc.GetLabel()))
    b = SmoteTomekSampling()
    new = b.Run(dc)
    print(new.GetArray().shape, np.sum(new.GetLabel()))