def test_union_all(self, mock_flat_table):
     repo1 = FlatTableCollection({"MCO": mock_flat_table})
     repo2 = FlatTableCollection({"DCIR": mock_flat_table})
     repo3 = FlatTableCollection({"MCO_CE": mock_flat_table})
     repo = FlatTableCollection.union_all([repo1, repo2, repo3])
     expected_names = {"MCO", "DCIR", "MCO_CE"}
     self.assertEqual(expected_names, repo.flat_table_names())
def plot_and_save_flattening_stat(
    json, pdf_path, figsize=(8, 5), id_col="NUM_ENQ", date_col="EXE_SOI_DTD", years=None
):
    """
    This method is used to visualize flattening stat and save the result in a pdf
    :param json: flat table meta data
    :param pdf_path: the path of pdf to save the stat
    :param figsize: size of the figure default = (8,5)
    :param id_col: 'str' identity column default = 'NUM_ENQ'
    :param date_col: 'str' data column used for 'group by' statement
    default = 'EXE_SOI_DTD'
    :param years: a list of special years in which the data will be loaded,
    default is None
    :return:
    """
    assert isinstance(json, str), "expected a string in the json format"
    metadata = FlatTableCollection.load(json)
    save_plots(
        registry,
        pdf_path,
        list(metadata.flat_tables.values()),
        figsize=figsize,
        id_col=id_col,
        date_col=date_col,
        years=years,
    )
 def test_difference(self, mock_flat_table):
     repo1 = FlatTableCollection({"MCO": mock_flat_table, "DCIR": mock_flat_table})
     repo2 = FlatTableCollection({"DCIR": mock_flat_table})
     repo = repo1.difference(repo2)
     expected_names = {"MCO"}
     self.assertEqual(expected_names, repo.flat_table_names())
    def testFromJson(
        self, mock_flat_table_read_data_frame, mock_single_table_read_data_frame
    ):
        mock_flat_table_read_data_frame.return_value = self.spark.createDataFrame(
            pd.DataFrame(
                {
                    "NUM_ENQ": ["1", "2", "3"],
                    "EXE_SOI_DTD": ["01/01/2015", "01/02/2016", "01/03/2017"],
                }
            )
        )
        mock_single_table_read_data_frame.return_value = self.spark.createDataFrame(
            pd.DataFrame(
                {
                    "NUM_ENQ": ["1", "2", "3"],
                    "EXE_SOI_DTD": ["01/01/2015", "01/02/2016", "01/03/2017"],
                }
            )
        )
        json = """
{
  "class_name" : "fr.polytechnique.cmap.cnam.flattening.FlatteningMainJoin$",
  "start_timestamp" : "2019-09-26T13:30:24Z",
  "end_timestamp" : "2019-09-26T17:02:25Z",
  "operations" : [{
    "output_table" : "ER_UCD_F",
    "output_path" : "/user/ds/CNAM243/flattening/single_table",
    "output_type" : "single_table",
    "sources" : ["/shared/Observapur/raw_data/DCIR_2010/ER_UCD_F_2010.CSV",
     "/shared/Observapur/raw_data/DCIR_2011/ER_UCD_F_2011.CSV",
     "/shared/Observapur/raw_data/DCIR_2012/ER_UCD_F_2012.CSV",
     "/shared/Observapur/raw_data/DCIR_2013/ER_UCD_F_2013.CSV",
     "/shared/Observapur/raw_data/DCIR_2014/ER_UCD_F_2014.CSV"],
    "join_keys" : []
  }, {
    "output_table" : "ER_ETE_F",
    "output_path" : "/user/ds/CNAM243/flattening/single_table",
    "output_type" : "single_table",
    "sources" : ["/shared/Observapur/raw_data/DCIR_2010/ER_ETE_F_2010.CSV",
    "/shared/Observapur/raw_data/DCIR_2011/ER_ETE_F_2011.CSV",
    "/shared/Observapur/raw_data/DCIR_2012/ER_ETE_F_2012.CSV",
    "/shared/Observapur/raw_data/DCIR_2013/ER_ETE_F_2013.CSV",
    "/shared/Observapur/raw_data/DCIR_2014/ER_ETE_F_2014.CSV"],
    "join_keys" : []
  }, {
    "output_table" : "ER_PHA_F",
    "output_path" : "/user/ds/CNAM243/flattening/single_table",
    "output_type" : "single_table",
    "sources" : ["/shared/Observapur/raw_data/DCIR_2010/ER_PHA_F_2010.CSV",
    "/shared/Observapur/raw_data/DCIR_2011/ER_PHA_F_2011.CSV",
    "/shared/Observapur/raw_data/DCIR_2012/ER_PHA_F_2012.CSV",
    "/shared/Observapur/raw_data/DCIR_2013/ER_PHA_F_2013.CSV",
    "/shared/Observapur/raw_data/DCIR_2014/ER_PHA_F_2014.CSV"],
    "join_keys" : []
  }, {
    "output_table" : "ER_PRS_F",
    "output_path" : "/user/ds/CNAM243/flattening/single_table",
    "output_type" : "single_table",
    "sources" : ["/shared/Observapur/raw_data/DCIR_2010/ER_PRS_F_2010.CSV",
    "/shared/Observapur/raw_data/DCIR_2011/ER_PRS_F_2011.CSV",
    "/shared/Observapur/raw_data/DCIR_2012/ER_PRS_F_2012.CSV",
    "/shared/Observapur/raw_data/DCIR_2013/ER_PRS_F_2013.CSV",
    "/shared/Observapur/raw_data/DCIR_2014/ER_PRS_F_2014.CSV"],
    "join_keys" : []
  }, {
    "output_table" : "ER_CAM_F",
    "output_path" : "/user/ds/CNAM243/flattening/single_table",
    "output_type" : "single_table",
    "sources" : ["/shared/Observapur/raw_data/DCIR_2010/ER_CAM_F_2010.CSV",
    "/shared/Observapur/raw_data/DCIR_2011/ER_CAM_F_2011.CSV",
    "/shared/Observapur/raw_data/DCIR_2012/ER_CAM_F_2012.CSV",
    "/shared/Observapur/raw_data/DCIR_2013/ER_CAM_F_2013.CSV",
    "/shared/Observapur/raw_data/DCIR_2014/ER_CAM_F_2014.CSV"],
    "join_keys" : []
  }, {
    "output_table" : "DCIR",
    "output_path" : "/user/ds/CNAM243/flattening/flat_table",
    "output_type" : "flat_table",
    "sources" : ["ER_PRS_F", "ER_UCD_F", "ER_CAM_F", "ER_ETE_F", "ER_PHA_F"],
    "join_keys" : ["DCT_ORD_NUM", "FLX_DIS_DTD", "FLX_EMT_NUM", "FLX_EMT_ORD",
    "FLX_EMT_TYP", "FLX_TRT_DTD", "ORG_CLE_NUM", "PRS_ORD_NUM", "REM_TYP_AFF"]
  }]
}
        """
        repo = FlatTableCollection.from_json(json)
        expected_flat_names = {"DCIR"}
        expected_single_names = {
            "ER_PRS_F",
            "ER_UCD_F",
            "ER_CAM_F",
            "ER_ETE_F",
            "ER_PHA_F",
        }
        self.assertEqual(expected_flat_names, repo.flat_table_names())
        self.assertEqual(
            expected_single_names, repo.single_table_names_from_flat_table("DCIR")
        )
        self.assertEqual(set(), repo.single_table_names_from_flat_table("MCO"))
        dcir = repo.get("DCIR")
        self.assertEqual("ER_PRS_F", dcir.single_tables.get("ER_PRS_F").name)
        self.assertEqual("ER_PRS_F", dcir.single_tables.get("ER_PRS_F").characteristics)
 def test_add_flat_table(self, mock_flat_table):
     repo = FlatTableCollection({"MCO": mock_flat_table, "DCIR": mock_flat_table})
     repo.add_flat_table("MCO_CE", mock_flat_table)
     expected_names = {"MCO", "DCIR", "MCO_CE"}
     self.assertEqual(expected_names, repo.flat_table_names())
 def test_get(self, mock_flat_table):
     repo = FlatTableCollection({"MCO": mock_flat_table, "DCIR": mock_flat_table})
     self.assertEquals(repo.get("MCO"), mock_flat_table)
     self.assertRaises(KeyError, repo.get, "MCO_CE")
 def test_exists(self, mock_flat_table):
     repo = FlatTableCollection({"MCO": mock_flat_table, "DCIR": mock_flat_table})
     self.assertTrue(repo.exists("MCO"))