Esempio n. 1
0
    def testTurnTriangle(self):
        dh = DataHolder("test")
        data = pd.DataFrame(data={'col1': ["", "", 0], 'col2': [0, 0, 2], '1991': [0, 0, 0], '1992': [1, 0, 0]})
        prof = pd.DataFrame(data={'col1': [1, 1, 6], 'col2': [6, 6, 6], '1991': [6, 6, 6], '1992': [2, 6, 6]})
        dh.add_sheet("test", data, prof)
        data = pd.DataFrame(data={'col1': ["", "", 0], 'col2': [1, 1, 0], '1991': [0, 0, 0], '1992': [1, 0, 0]})
        prof = pd.DataFrame(data={'col1': [1, 1, 6], 'col2': [6, 6, 6], '1991': [6, 6, 6], '1992': [2, 6, 6]})
        dh.add_sheet("test", data, prof)
        data = pd.DataFrame(data={'col1': ["", "", 0], 'col2': [3, 1, 0], '1991': [0, 0, 0], '1992-': [1, 0, 0]})
        prof = pd.DataFrame(data={'col1': [1, 1, 6], 'col2': [6, 6, 6], '1991': [6, 6, 6], '1992-': [2, 6, 6]})
        dh.add_sheet("test", data, prof)
        tr_cols_dict = {tuple(dh.data_struct_list[0].df_data.columns) + dh.data_struct_list[0].df_data.shape:
                pd.Series([False, True, True, True], index=dh.data_struct_list[0].df_data.columns),
                        tuple(dh.data_struct_list[2].df_data.columns) + dh.data_struct_list[0].df_data.shape:
                            pd.Series([False, True, True, True], index=dh.data_struct_list[2].df_data.columns)
        }
        dh_copy = dh.copy_without_memory()
        TriangleStripper.turn_triangle(dh, tr_cols_dict, alt_min_score=0.6)

        for ds in dh_copy.data_struct_list:
            tr_cols = tr_cols_dict[tuple(ds.df_data.columns) + ds.df_data.shape]
            tri_part = ds.df_data[tr_cols.index[tr_cols]].values
            ds.df_data[tr_cols.index[tr_cols]] = np.transpose(tri_part)

        for ds, ds_copy in zip(dh.data_struct_list, dh_copy.data_struct_list):
            self.assertTrue(ds.df_data.equals(ds_copy.df_data))
            self.assertTrue(ds.df_profiles.equals(ds_copy.df_profiles))
    def find_triangles(dh, **kwargs):
        return_meta = False
        if 'return_meta' in kwargs:
            return_meta = kwargs['return_meta']

        triangle_dh = DataHolder(dh.name)
        rest_dh = DataHolder(dh.name + '_non-triangular')
        #bool_array = np.zeros(triangle_dh.n, dtype=bool)
        for dh_ind, ds in enumerate(dh.data_struct_list):
            df_data, df_profiles = ds.df_data, ds.df_profiles
            # now select triangles in som smart way
            bool = TriangleFinder.is_triangle(ds, **kwargs)
            if bool:
                TriangleFinder.add_triangle_to_dh(ds, triangle_dh)
            else:
                rest_dh.add_sheet(ds.name,
                                  df_data,
                                  df_profiles,
                                  orig_sheet_name=ds.orig_sheet_name)
        # Now get the triangle similiar data structs
        triangle_similar = TriangleFinder.find_triangles_by_similarity(
            triangle_dh, rest_dh)
        if len(triangle_similar) > 0:
            rest_copy = rest_dh.copy_without_memory()
            rest_dh = DataHolder(rest_copy.name)
            for ds in rest_copy:
                if ds.id in triangle_similar:
                    TriangleFinder.add_triangle_to_dh(ds, triangle_dh)
                else:
                    rest_dh.add_sheet(ds.name,
                                      ds.df_data,
                                      ds.df_profiles,
                                      orig_sheet_name=ds.orig_sheet_name)
        if return_meta:
            return triangle_dh, rest_dh
        else:
            return triangle_dh