Esempio n. 1
0
    def make_sol_dict():
        """
        Run present pipeline and save the merge results
        :return:
        """
        file_names = ["FORMAT3_Copy of KommuneMTPLforTriangle.xls",
            "C Triangulations analysis R2017 GC20161109.xls",
            "EVOLUTION 2017 _ M+F - Triangles cat nat brut net.xls",
            "Bsp8 _ Dreiecke aus GCNA für CU1.4.1.xls",
            "Analysis MTPL MOD.xls",
            "Bsp6 _ Dreiecke aus GCNA für CU1.4.1.xls",
            "FORMAT6_sinistres.xls",
            "FORMAT1_LOSSES-MTPL-OVER-500-GROUP-2005_modified.xls"]
        solutions_dict = dict()
        raw_dict = dict()
        for file_name in file_names:
            sr_list, file_name = ExcelLoader.load_excel(pdir.RESOURCES_DIR + "/raw_test_files/" + file_name)
            dh = DataHolder()
            for sr in sr_list:
                dh.add_sheet(sr.sheet_name, pd.DataFrame(columns=sr.headers, data=sr.row_vals),
                             pd.DataFrame(columns=sr.headers, data=sr.xls_types), orig_sheet_name=sr.sheet_name)

            dh = SheetPreProcessor.separate_components(dh)
            raw_dict[file_name] = dh.encode()
            dh = HorizontalMerger.horizontal_merge(dh)
            #temp_path = pdir.RESOURCES_DIR + "/temp/"
            #dh.write_excel(temp_path + file_name)
            solutions_dict[file_name] = dh
        solutions_dict = MergePararametersOptimizer.make_ind_col_dict(solutions_dict)
        with open(pdir.RESOURCES_DIR + "/test/merge_solutions.obj", "wb") as temp_file:
            pickle.dump(solutions_dict, temp_file)
        with open(pdir.RESOURCES_DIR + "/test/raw_test.obj", "wb") as temp_file:
            pickle.dump(raw_dict, temp_file)
Esempio n. 2
0
    def post(self, request):
        sr_list = jsonpickle.decode(request.data['sr_list'])
        dhName = request.data['dhName']
        selected_sheets = request.data['selected_sheets']

        data_holder = DataHolder(dhName)

        for sr in sr_list:
            if sr.sheet_name in selected_sheets:
                data_holder.add_sheet(sr.sheet_name,
                                      pd.DataFrame(columns=sr.headers,
                                                   data=sr.row_vals),
                                      pd.DataFrame(columns=sr.headers,
                                                   data=sr.xls_types),
                                      orig_sheet_name=sr.sheet_name)

        encoded = data_holder.encode()

        return Response(encoded, status=200)
Esempio n. 3
0
class DataHolderTest(TestCase):

    def setUp(self):
        self.names = ["first", "second"]
        self.dh = DataHolder("test")
        d1 = pd.DataFrame(data={'col1': ["1", "2"], 'col2': ["3", "4"]})
        d2 = pd.DataFrame(data={'col1': [1, 2], 'col2': [3, 4]})
        self.dh.add_sheet(self.names[0], d1, d2, orig_sheet_name="1")
        d1 = pd.DataFrame(data={'col1': ["1", "2"], 'col2': ["1", "1"]})
        d2 = pd.DataFrame(data={'col1': [1, 2], 'col2': [1, 1]})
        self.dh.add_sheet(self.names[0], d1, d2, orig_sheet_name="2")
        d1 = pd.DataFrame(data={'col1': ["1", "2"], 'col2': ["15", "16"]})
        d2 = pd.DataFrame(data={'col1': [1, 2], 'col2': [15, 16]})
        self.dh.add_sheet(self.names[1], d1, d2, orig_sheet_name="2")

    def test_set_card_ids(self):
        trngs = []
        trngs.append({"group_id": "one"})
        trngs.append({"group_id": "two"})
        RowParser.set_card_ids(trngs, self.dh)
        # print(self.dh)
        true_ids = [0, 1, 1]
        for ds, ind in zip(self.dh.data_struct_list, true_ids):
            self.assertEqual(ds.card_id, ind)

    def test_updating(self):

        for dh_ind, df_data, df_profiles in self.dh.enumerate():
            if dh_ind == 0:
                df_data.loc[0, "col2"] = "5"
                df_profiles.loc[0, "col2"] = 5

            if dh_ind == 1:
                df_data.loc[0, "col1"] = "5"
                df_profiles.loc[0, "col1"] = 5

            if dh_ind == 2:
                df_data.loc[1, "col1"] = "19"
                df_profiles.loc[1, "col1"] = 19

            self.dh.update_with_ind(dh_ind, df_data, df_profiles)

        assert_frame_equal(self.dh.data_dict[self.names[0]][0].df_data, self.dh.data_struct_list[0].df_data)
        assert_frame_equal(self.dh.data_dict[self.names[0]][1].df_data, self.dh.data_struct_list[1].df_data)
        assert_frame_equal(self.dh.data_dict[self.names[1]][0].df_data, self.dh.data_struct_list[2].df_data)

        assert_frame_equal(self.dh.data_dict[self.names[0]][0].df_profiles, self.dh.data_struct_list[0].df_profiles)
        assert_frame_equal(self.dh.data_dict[self.names[0]][1].df_profiles, self.dh.data_struct_list[1].df_profiles)
        assert_frame_equal(self.dh.data_dict[self.names[1]][0].df_profiles, self.dh.data_struct_list[2].df_profiles)
        for key in self.dh.data_dict:
            for d_struct in self.dh.data_dict[key]:
                self.assertEqual(key, d_struct.name)

        for ds in self.dh.data_struct_list:
            self.assertEqual(ds, self.dh.id_dict[ds.id])


    def test_mementos(self):

        self.dh.create_memento()
        for dh_ind, df_data, df_profiles in self.dh.enumerate():
            if dh_ind == 0:
                df_data.loc[0, "col2"] = "5"
                df_profiles.loc[0, "col2"] = 5

            if dh_ind == 1:
                df_data.loc[0, "col1"] = "5"
                df_profiles.loc[0, "col1"] = 5

            if dh_ind == 2:
                df_data.loc[1, "col1"] = "19"
                df_profiles.loc[1, "col1"] = 19

            self.dh.update_with_ind(dh_ind, df_data, df_profiles)

        self.dh.create_memento()

        diff_dict_list = SheetStateComparer.compare_states(self.dh.mementos[0], self.dh.mementos[1])
        #for diff_dict in diff_dict_list:
            #pass#diff_
        for i in range(2):
            for j in range(2):
                el = diff_dict_list[0]["diff_array"][i][j]
                if i == 0 and j == 1:
                    self.assertEqual(el.change, "Corrected")
                else:
                    self.assertEqual(el.change, "No change")

    def test_serialization(self):
        self.dh.data_struct_list[0].roles.append("Claims Paid")
        self.dh.data_struct_list[0].df_data.sort_values("col1", ascending=False, inplace=True)
        serialized = self.dh.encode()
        data_framed = DataHolder.decode(serialized)

        assert_frame_equal(self.dh.data_struct_list[0].df_data, data_framed.data_struct_list[0].df_data)
        assert_frame_equal(self.dh.data_struct_list[1].df_data, data_framed.data_struct_list[1].df_data)
        assert_frame_equal(self.dh.data_struct_list[2].df_data, data_framed.data_struct_list[2].df_data)

        assert_frame_equal(self.dh.data_struct_list[0].df_profiles, data_framed.data_struct_list[0].df_profiles)
        assert_frame_equal(self.dh.data_struct_list[1].df_profiles, data_framed.data_struct_list[1].df_profiles)
        assert_frame_equal(self.dh.data_struct_list[2].df_profiles, data_framed.data_struct_list[2].df_profiles)
        self.assertEqual(data_framed.data_struct_list[0].roles[0], "Claims Paid")
        # Test conservation of ids
        for ind in range(len(self.dh.data_struct_list)):
            self.assertEqual(data_framed.data_struct_list[ind].id, self.dh.data_struct_list[ind].id)
Esempio n. 4
0
    def post(self, request):

        #Recieve name of file
        filename = request.data.get('fileName')
        # Build data holder
        sr_list = jsonpickle.decode(request.data['sr_list'])
        selected_sheets = request.data['selected_sheets']

        data_holder = DataHolder(filename)

        for sr in sr_list:
            if sr.sheet_name in selected_sheets:
                data_holder.add_sheet(sr.sheet_name,
                                      pd.DataFrame(columns=sr.headers,
                                                   data=sr.row_vals),
                                      pd.DataFrame(columns=sr.headers,
                                                   data=sr.xls_types),
                                      orig_sheet_name=sr.sheet_name)

        response_data = {}
        #dump this shiiiit
        #data_holder.to_pickle_file(pdir.TEMP_DIR + "from_views.pickle")
        if data_holder is None:
            raise ValueError("No data holder found")
        elif data_holder.n == 0:
            raise ValueError("No sheets in data holder")

        #Recieve triangle formats
        triangles = request.data.get('triangles')
        user_defined_triangles = triangles['templates']
        outputFormats = triangles['output_formats']
        n_outputs = triangles['number_of_outputs']
        input_format = triangles['inputFormat']
        tr_type = user_defined_triangles[0]['type']
        # if tr_type == "single":
        #     n_outputs = len(user_defined_triangles)
        # else:
        #     n_outputs = int(len(user_defined_triangles)/2)
        try:
            if input_format[0] == 'triangle':
                #print(tr_type, n_outputs)
                data_holder_dict, data_holder = TrianglePipeline.triangle_pipeline_dh(
                    data_holder, tri_type=tr_type, n_outputs=n_outputs)
            else:
                data_holder_dict, data_holder = TrianglePipeline.table_triangle_pipeline_dh(
                    data_holder)
            #DataHolder manipulation
            data_holder, group_ids, sheet_names = RowParser.set_card_ids(
                user_defined_triangles, data_holder)
            user_defined_triangles = InputMatcher.match_triangles_to_output(
                user_defined_triangles, data_holder)
            user_defined_triangles = RowParser.parse_output_from_triangle_forms(
                user_defined_triangles, data_holder)
        except DataHolderException as err:
            data = {}
            data['message'] = err.message
            data['dh'] = err.dh
            return Response({'response_error': data})

        SheetWriter.trngs_to_existing_excel(
            user_defined_triangles, pdir.TEMP_DIR + ps.OUTPUT_NAME + filename)

        #Unsure if all neded
        response_data["group_ids"] = group_ids
        response_data['output_triangles'] = user_defined_triangles
        #Building list for initial rendering

        response_data[
            "unit_triangles"] = ConnectDataAPIView.make_unit_triangle_list(
                data_holder)
        response_data["str_data_holder"] = data_holder.encode()
        if len(data_holder_dict) > 1:
            response_data["str_data_holder_dict"] = {
                key: val.encode()
                for key, val in data_holder_dict.items()
            }
        else:
            response_data["str_data_holder_dict"] = {
                data_holder.name: response_data["str_data_holder"]
            }

        return Response({'data': response_data})