Python train_dev_test_split Examples

Programming Language: Python

Namespace/Package Name: complainer.splitter

Method/Function: train_dev_test_split

Examples at hotexamples.com: 7

Python train_dev_test_split - 7 examples found. These are the top rated real world Python examples of complainer.splitter.train_dev_test_split extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

 def test_combined_fractions_greater_than_one_throw(self, df):
     with pytest.raises(ValueError):
         train_dev_test_split(df, dev_fraction=0.5, test_fraction = 0.51)

Example #2

Show file

File: split_train_dev_test_data.py Project: robertswh/moaney

INPUT_FILE = os.environ['INPUT_FILE']
TARGET_DIRECTORY = os.environ['TARGET_DIRECTORY']

# ## Read raw data

df = pd.read_csv(INPUT_FILE)

# ## Filter to data containing complaints only

df = df[df['Consumer complaint narrative'].notnull()]

# ## Split data into train, dev and test subsets

train, dev, test = train_dev_test_split(
  df, dev_fraction=0.2, test_fraction=0.1
)

# ## Create target directory
# If necessary.

if not os.path.exists(TARGET_DIRECTORY):
    os.mkdir(TARGET_DIRECTORY)

# ## Write subsets to disk
# Quote all fields to avoid weird character shenanigans.

train.to_csv(TARGET_DIRECTORY+'/train.csv', quoting=csv.QUOTE_ALL)
dev.to_csv(TARGET_DIRECTORY+'/dev.csv', quoting=csv.QUOTE_ALL)
test.to_csv(TARGET_DIRECTORY+'/test.csv', quoting=csv.QUOTE_ALL)

Example #3

Show file

 def test_same_random_seed_returns_same_split(self, df):
     train_a, dev_a, test_a = train_dev_test_split(df, random_state=42)
     train_b, dev_b, test_b = train_dev_test_split(df, random_state=42)
     assert_frame_equal(train_a, train_b)
     assert_frame_equal(dev_a, dev_b)
     assert_frame_equal(test_a, test_b)

Example #4

Show file

 def test_no_duplicates_in_splits(self, df):
     train, dev, test = train_dev_test_split(df)
     tdt = pd.concat([train, dev, test])
     assert len(tdt.drop_duplicates()) == len(tdt)

Example #5

Show file

 def test_splits_have_expected_lengths(self, df):
     train, dev, test = train_dev_test_split(df)
     assert len(train) == 60
     assert len(dev) == 20
     assert len(test) == 20

Example #6

Show file

 def test_total_length_of_splits_unchanged(self, df):
     train, dev, test = train_dev_test_split(df)
     assert (len(train) + len(dev) + len(test)) == len(df)

Example #7

Show file

 def test_train_dev_test_split_returns_six_objects(self, df):
     tdts = train_dev_test_split(df)
     assert len(tdts) == 3