def test_csv_partition_number_0(remove_mindrecord_file):
    """
    test transform csv  to mindrecord
    when partition number is 0.
    """
    with pytest.raises(Exception, match="Invalid parameter value"):
        csv_trans = CsvToMR(CSV_FILE, MINDRECORD_FILE, None, 0)
        csv_trans.transform()
def test_csv_to_mindrecord_illegal_filename(remove_mindrecord_file):
    """
    test transform csv  to mindrecord
    when file name contains illegal character.
    """
    filename = "not_*ok"
    with pytest.raises(Exception, match="File name should not contains"):
        csv_trans = CsvToMR(CSV_FILE, filename)
        csv_trans.transform()
def test_csv_to_mindrecord_partition_number_none(remove_mindrecord_file):
    """
    test transform csv to mindrecord
    when partition number is none.
    """
    with pytest.raises(Exception,
                       match="The parameter partition_number must be int"):
        csv_trans = CsvToMR(CSV_FILE, MINDRECORD_FILE, None, None)
        csv_trans.transform()
def test_csv_partition_number_with_illegal_columns(remove_mindrecord_file):
    """
    test transform csv  to mindrecord
    """
    with pytest.raises(
            Exception,
            match="The parameter columns_list must be list of str."):
        csv_trans = CsvToMR(CSV_FILE, MINDRECORD_FILE, ["Sales", 2])
        csv_trans.transform()
def test_csv_to_mindrecord_with_columns(remove_mindrecord_file):
    """test transform csv  to mindrecord."""
    csv_trans = CsvToMR(CSV_FILE,
                        MINDRECORD_FILE,
                        columns_list=['Age', 'Sales'],
                        partition_number=PARTITION_NUMBER)
    csv_trans.transform()
    for i in range(PARTITION_NUMBER):
        assert os.path.exists(MINDRECORD_FILE + str(i))
        assert os.path.exists(MINDRECORD_FILE + str(i) + ".db")
    read(MINDRECORD_FILE + "0", ["Age", "Sales"], 5)
def test_csv_to_mindrecord(remove_mindrecord_file):
    """test transform csv  to mindrecord."""
    csv_trans = CsvToMR(CSV_FILE,
                        MINDRECORD_FILE,
                        partition_number=PARTITION_NUMBER)
    csv_trans.transform()
    for i in range(PARTITION_NUMBER):
        assert os.path.exists(MINDRECORD_FILE + str(i))
        assert os.path.exists(MINDRECORD_FILE + str(i) + ".db")
    read(MINDRECORD_FILE + "0",
         ["Age", "EmployNumber", "Name", "Sales", "Over18"], 5)
def test_csv_to_mindrecord_default_partition_number(remove_mindrecord_file):
    """
    test transform csv to mindrecord
    when partition number is default.
    """
    csv_trans = CsvToMR(CSV_FILE, MINDRECORD_FILE)
    csv_trans.transform()
    assert os.path.exists(MINDRECORD_FILE)
    assert os.path.exists(MINDRECORD_FILE + ".db")
    read(MINDRECORD_FILE, ["Age", "EmployNumber", "Name", "Sales", "Over18"],
         5)
def test_csv_to_mindrecord_with_no_exist_columns(remove_mindrecord_file):
    """test transform csv  to mindrecord."""
    with pytest.raises(
            Exception,
            match=
            "The parameter columns_list is illegal, column ssales does not exist."
    ):
        csv_trans = CsvToMR(CSV_FILE,
                            MINDRECORD_FILE,
                            columns_list=['Age', 'ssales'],
                            partition_number=PARTITION_NUMBER)
        csv_trans.transform()