from sklearn.preprocessing import LabelEncoder from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score, confusion_matrix if __name__ == "__main__": here = os.path.dirname(os.path.abspath(__file__)) from d3mds import D3MDataset, D3MProblem, D3MDS dspath = os.path.join(here, '..', '..', 'LL0_acled_dataset') prpath = os.path.join(here, '..', '..', 'LL0_acled_problem') assert os.path.exists(dspath) assert os.path.exists(prpath) d3mds = D3MDS( dspath, prpath) # this checks that the problem and dataset correspond print('\nLoading train and test data') X_train = d3mds.get_train_data() y_train = d3mds.get_train_targets().ravel() print('X_train shape:', X_train.shape) print('y_train shape:', y_train.shape) X_test = d3mds.get_test_data() y_test = d3mds.get_test_targets().ravel() print('X_test shape:', X_test.shape) print('y_test shape:', y_test.shape) X_train = X_train[['notes']] X_test = X_test[['notes']]
here = os.path.dirname(os.path.abspath(__file__)) from d3mds import D3MDataset, D3MProblem, D3MDS from feat import AnnotatedTabularExtractor from estimation import SGDClassifierEstimator, SGDRegressorEstimator, RBFSamplerSGDClassifierEstimator, RBFSamplerSGDRegressorEstimator dspath = os.path.join(here, '..', '..', 'LL0_uci_las_vegas_strip_dataset') prpath = os.path.join(here, '..', '..', 'LL0_uci_las_vegas_strip_problem') solpath = os.path.join(here, '..') assert os.path.exists(dspath) assert os.path.exists(prpath) N_ITER = 100 # Number of parameter settings that are sampled in RandomizedSearchCV. n_iter trades off runtime vs quality of the solution. d3mds = D3MDS( dspath, prpath) # create the D3MDS object from the dataset and problem paths class Spinner: busy = False delay = 0.1 @staticmethod def spinning_cursor(): while 1: for cursor in '|/-\\': yield cursor def __init__(self, delay=None): self.spinner_generator = self.spinning_cursor()