Ejemplo n.º 1
0
  def random_test_train_test_split(self):
    """Test of singletask RF ECFP regression API."""
    #splittype = "random"
    model_params = {}
    tasks = ["log-solubility"]
    task_type = "regression"
    task_types = {task: task_type for task in tasks}
    input_file = os.path.join(self.current_dir, "example.csv")
    featurizer = CircularFingerprint(size=1024)
    loader = DataLoader(tasks=tasks,
                        smiles_field=self.smiles_field,
                        featurizer=featurizer,
                        verbosity="low")

    dataset = loader.featurize(input_file, self.data_dir)

    # Splits featurized samples into train/test
    splitter = RandomSplitter()
    train_dataset, test_dataset = splitter.train_test_split(
        dataset, self.train_dir, self.test_dir)
    assert len(train_dataset) == 8
    assert len(test_dataset) == 2