def __init__ (self, Xdata=None, Ydata=None, csv=None,xlsx=None,x_col='X',y_col='Y',models='all',test_frac=0.1,train_frac=0.9):
		if models=='all':
			self.model_list = [
				'bert-base-uncased',
        'albert-base-v2',
        'roberta-base',
        'linear_SVM',
        'multinomial_naive_bayesian',]
		elif models=='count-vectorizer':
			self.model_list = [
        'linear_SVM',
        'multinomial_naive_bayesian',]
		elif models=='transformers':
			self.model_list = [
        'bert-base-uncased',
        'albert-base-v2',
        'roberta-base',]
		else:
			print('Models not recognized, the available options are currently "all", "count-vectorizer", and "transformers"')
			return
		if csv!=None and xlsx!= None and Xdata!=None:
			print("You have provided too much data, give just x and y data, or a csv or xlsx file!")
			return
		if csv!=None:
			csv_data=pd.read_csv(csv)
			Xdata=csv_data[x_col]
			Ydata=csv_data[y_col]
		if xlsx!=None:
			xlsx_data=pd.read_excel(xlsx)
			Xdata=xlsx_data[x_col]
			Ydata=xlsx_data[y_col]
		if isinstance(Xdata, pd.Series):
			print('converting pandas series to list')
			Xdata=list(Xdata)
		if isinstance(Ydata, pd.Series):
			print('converting pandas series to list')
			Ydata=list(Ydata)

		if Xdata==Ydata==None or (Xdata==None and Ydata!=None) or (Xdata!=None and Ydata==None):
			print('Either you have not put in your own data, or you have only put in X or Y data, loading default dataset...')
			self.train_dataset_raw, self.test_dataset_raw = load_dataset('imdb', split=['train', 'test'])
			X=self.train_dataset_raw['text']+self.test_dataset_raw['text']
			Y=self.train_dataset_raw['label']+self.test_dataset_raw['label']
			keys=set(Y)
		else:
			X=Xdata
			Y=Ydata
			if all(isinstance(n, int) for n in Y):
				keys=set(Y)
			else:
				Y,keys=string_labels_to_int(Y)
    #add method to make min label 0
			if min(Y)>=1:
				Y=[y-min(Y) for y in Y]
		if len(Xdata)<20:
		  print('dataset is really small, using default test/train split (0.25)')
		  test_frac=None
		  train_frac=None
		if len(Xdata)<8:
		  print('dataset is really too small, using default test/train split (0.5)')
		  test_frac=0.5
		  train_frac=0.5

		if len(Xdata)!=len(Ydata):
		  print('ERROR: X data and Y data lengths are not the same size, they need to be!')
		  return

		X_train, X_test, Y_train, Y_test = train_test_split(X, Y,
                                                        stratify=Y, 
                                                        test_size=test_frac,
                                                        train_size=train_frac)
		self.num_labels=len(keys)
		#self.train_dataset_raw_CNN = TensorDataset(X_train, int_labels_to_list(Y_train,keys))
		#self.test_dataset_raw_CNN = TensorDataset(X_test, int_labels_to_list(Y_test,keys))
		print('X_train length: ' + str(len(X_train)))
		print('X_test length: ' + str(len(X_test)))
		print('Y_train length: ' + str(len(Y_train)))
		print('Y_test length: ' + str(len(Y_test)))   
		self.train_dataset_raw = Dataset.from_pandas(pd.DataFrame({'text':X_train, 'labels': Y_train}))
		self.test_dataset_raw = Dataset.from_pandas(pd.DataFrame({'text':X_test, 'labels': Y_test}))	
		self.all_metrics = {}
                                   "SentiNews_sentence_test.tsv",
                                   sep="\t")

    df_document_sl_hr_train = pd.read_csv(sentinews_location +
                                          "HRSLSentiNews_document_train.tsv",
                                          sep="\t")
    df_document_sl_hr_valid = pd.read_csv(sentinews_location +
                                          "HRSLSentiNews_document_valid.tsv",
                                          sep="\t")

    # NO test hr mixed as HR test will be used as final test

    # gather everyone if you want to have a single DatasetDict
    document = DatasetDict({
        "train":
        Dataset.from_pandas(df_document_sl_hr_train),
        "valid":
        Dataset.from_pandas(df_document_sl_hr_valid),
        "test":
        Dataset.from_pandas(df_document_croatian_test)
    })
    # document.save_to_disk("sentinews-document")
    # gather everyone if you want to have a single DatasetDict
    paragraph = DatasetDict({
        "train": Dataset.from_pandas(df_paragraph_train),
        "valid": Dataset.from_pandas(df_paragraph_valid),
        "test": Dataset.from_pandas(df_paragraph_test),
    })
    # paragraph.save_to_disk("sentinews-paragraph")
    # gather everyone if you want to have a single DatasetDict
    sentence = DatasetDict({