def run(): jvm.start() load_csv = Loader("weka.core.converters.CSVLoader") data_csv = load_csv.load_file( "/Users/imeiliasantoso/web_graduate_project4/predict_page/predict_data.csv" ) saver = Saver("weka.core.converters.ArffSaver") saver.save_file( data_csv, "/Users/imeiliasantoso/web_graduate_project4/predict_page/predict_data.arff" ) load_arff = Loader("weka.core.converters.ArffLoader") data_arff = load_arff.load_file( "/Users/imeiliasantoso/web_graduate_project4/predict_page/predict_data.arff" ) data_arff.class_is_last() global j48 J48_class = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.25", "-M", "2"]) J48_class.build_classifier(data_arff) evaluationj48 = Evaluation(data_arff) evaluationj48.crossvalidate_model(J48_class, data_arff, 10, Random(100)) j48 = str(evaluationj48.percent_correct) jvm.stop() return j48
def generate_folds(dataset_path, output_folder, n_folds=10, random_state=None): """ Given a dataset df, generate n_folds for it and store them in <output_folder>/<dataset_name>. :type dataset_path: str :param dataset_path: Path to dataset with .arff file extension (i.e my_dataset.arff) :type output_folder: str :param output_folder: Path to store both index file with folds and fold files. :type n_folds: int :param n_folds: Optional - Number of folds to split the dataset into. Defaults to 10. :type random_state: int :param random_state: Optional - Seed to use in the splitting process. Defaults to None (no seed). """ import warnings warnings.filterwarnings('error') dataset_name = dataset_path.split('/')[-1].split('.')[0] af = load_arff(dataset_path) df = load_dataframe(af) skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state) fold_iter = skf.split(df[df.columns[:-1]], df[df.columns[-1]]) fold_index = dict() jvm.start() csv_loader = Loader(classname="weka.core.converters.CSVLoader") arff_saver = Saver(classname='weka.core.converters.ArffSaver') for i, (arg_rest, arg_test) in enumerate(fold_iter): fold_index[i] = list(arg_test) _temp_path = 'temp_%s_%d.csv' % (dataset_name, i) fold_data = df.loc[arg_test] # type: pd.DataFrame fold_data.to_csv(_temp_path, sep=',', index=False) java_arff_dataset = csv_loader.load_file(_temp_path) java_arff_dataset.relationname = af['relation'] java_arff_dataset.class_is_last() arff_saver.save_file( java_arff_dataset, os.path.join(output_folder, '%s_fold_%d.arff' % (dataset_name, i))) os.remove(_temp_path) json.dump(fold_index, open(os.path.join(output_folder, dataset_name + '.json'), 'w'), indent=2) jvm.stop() warnings.filterwarnings('default')
def convertCsvtoArff(indata, outdata): ''' :param indata: -> input csv file :param outdata: -> output file :return: ''' loader = Loader(classname="weka.core.converters.CSVLoader") data = loader.load_file(indata) saver = Saver(classname="weka.core.converters.ArffSaver") saver.save_file(data, outdata)
def generate_folds(dataset_path, output_folder, n_folds=10, random_state=None): """ Given a dataset df, generate n_folds for it and store them in <output_folder>/<dataset_name>. :type dataset_path: str :param dataset_path: Path to dataset with .arff file extension (i.e my_dataset.arff) :type output_folder: str :param output_folder: Path to store both index file with folds and fold files. :type n_folds: int :param n_folds: Optional - Number of folds to split the dataset into. Defaults to 10. :type random_state: int :param random_state: Optional - Seed to use in the splitting process. Defaults to None (no seed). """ import warnings warnings.filterwarnings('error') dataset_name = dataset_path.split('/')[-1].split('.')[0] af = load_arff(dataset_path) df = load_dataframe(af) skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state) fold_iter = skf.split(df[df.columns[:-1]], df[df.columns[-1]]) fold_index = dict() jvm.start() csv_loader = Loader(classname="weka.core.converters.CSVLoader") arff_saver = Saver(classname='weka.core.converters.ArffSaver') for i, (arg_rest, arg_test) in enumerate(fold_iter): fold_index[i] = list(arg_test) _temp_path = 'temp_%s_%d.csv' % (dataset_name, i) fold_data = df.loc[arg_test] # type: pd.DataFrame fold_data.to_csv(_temp_path, sep=',', index=False) java_arff_dataset = csv_loader.load_file(_temp_path) java_arff_dataset.relationname = af['relation'] java_arff_dataset.class_is_last() arff_saver.save_file(java_arff_dataset, os.path.join(output_folder, '%s_fold_%d.arff' % (dataset_name, i))) os.remove(_temp_path) json.dump( fold_index, open(os.path.join(output_folder, dataset_name + '.json'), 'w'), indent=2 ) jvm.stop() warnings.filterwarnings('default')
def save_arff(data, dest): """ Function for saving data into arff file. Function uses weka implementation of ArffSaver. :param data: weka arff data :param dest: output file :return: None """ args, _sufix = arff_saver_parser() saver = Saver(classname='weka.core.converters.ArffSaver', options=args_to_weka_options(args, _sufix)) saver.save_file(data, dest)
def run(): jvm.start() load_csv = Loader("weka.core.converters.CSVLoader") data_csv = load_csv.load_file( "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.csv" ) saver = Saver("weka.core.converters.ArffSaver") saver.save_file( data_csv, "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.arff" ) load_arff = Loader("weka.core.converters.ArffLoader") data_arff = load_arff.load_file( "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.arff" ) data_arff.class_is_last() cls = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.5"]) cls.build_classifier(data_arff) for index, inst in enumerate(data_arff): pred = cls.classify_instance(inst) dist = cls.distribution_for_instance(inst) # save tree prune in txt file saveFile = open( "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.txt", "w") saveFile.write(str(cls)) # print(cls) saveFile.close() global j48 J48_class = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.25", "-M", "2"]) J48_class.build_classifier(data_arff) evaluationj48 = Evaluation(data_arff) evaluationj48.crossvalidate_model(J48_class, data_arff, 10, Random(100)) j48 = str(evaluationj48.percent_correct) jvm.stop() return j48
def preparation(): data_file="csvfiles/data.csv" try: #Load data loader=Loader(classname="weka.core.converters.CSVLoader") data=loader.load_file(data_file) data.class_is_last() miss = mostMissing(data)#find attributes with significant missing data data = unsupFilters(data, "attribute.Remove", ["-R", "1,"+ miss])#remove id and most mising value attributes data = unsupFilters(data, "attribute.RemoveUseless", [])#remove use less attributes nonDistinct = notDistinct(data)#find attributes that are not distinct and convert them to nominal data = unsupFilters(data, "attribute.NumericToNominal", ["-R", "last," + nonDistinct])#class convert to nominal data = unsupFilters(data, "attribute.ReplaceMissingValues", [])#replace missing values data = unsupFilters(data, "attribute.Normalize", [])#normalize attributes to create less bias #split data into test and training set test = unsupFilters(data, "instance.RemoveRange", ["-V", "-R", "901-1000"]) train = unsupFilters(data, "instance.RemoveRange", ["-R", "901-1000"]) train = supFilters(train, "instance.SMOTE", ["-P", "160.0"]) #print(data) saver = Saver(classname="weka.core.converters.ArffSaver") saver.save_file(test, "test.arff") saver.save_file(train, "train.arff") #Performing cross validation using naiveBayes, IBk and j48 tree #get the accuracy of the cross validation and store all in an array accuracyArray = [naiveBayes(train), IBK(train), treeJ48(train)] mostAccurate = max(accuracyArray)#find most accurate print(mostAccurate) accuracyArray.remove(mostAccurate) secondAcc = max(accuracyArray)#get second accurate print(secondAcc) trainAndMakePred(train, test) #makePrediction(test) print("Data loaded successfully") except IOError: print("Error loading file " + data_file)
def main(): jvm.start() loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file("train_sorted.arff") numofStores = 1115 for storeNum in range(0, numofStores): tempData = data removeUpper = Filter( classname="weka.filters.unsupervised.instance.RemoveWithValues", options=[ "-S", str(storeNum + 2) + ".0", "-C", "first", "-L", "first-last", "-V" ]) removeUpper.inputformat(data) tempData = removeUpper.filter(data) removeLower = Filter( classname="weka.filters.unsupervised.instance.RemoveWithValues", options=[ "-S", str(storeNum + 1) + ".0", "-C", "first", "-L", "first-last" ]) removeLower.inputformat(tempData) tempData = removeLower.filter(tempData) #removing the storeID attribute tempData.delete_first_attribute() saver = Saver(classname="weka.core.converters.ArffSaver") saver.save_file(tempData, "stores/store" + str(storeNum + 1) + ".arff") print 'Saved Store' + str(storeNum + 1) jvm.stop()
def main(): """ Runs a filter from the command-line. Calls JVM start/stop automatically. Use -h to see all options. """ parser = argparse.ArgumentParser( description= 'Executes a filter from the command-line. Calls JVM start/stop automatically.' ) parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories") parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m") parser.add_argument("-i", metavar="input1", dest="input1", required=True, help="input file 1") parser.add_argument("-o", metavar="output1", dest="output1", required=True, help="output file 1") parser.add_argument("-r", metavar="input2", dest="input2", help="input file 2") parser.add_argument("-s", metavar="output2", dest="output2", help="output file 2") parser.add_argument("-c", metavar="classindex", default="-1", dest="classindex", help="1-based class attribute index") parser.add_argument("filter", help="filter classname, e.g., weka.filters.AllFilter") parser.add_argument("option", nargs=argparse.REMAINDER, help="additional filter options") parsed = parser.parse_args() if parsed.input2 is None and parsed.output2 is not None: raise Exception("No second input file provided ('-r ...')!") jars = [] if parsed.classpath is not None: jars = parsed.classpath.split(os.pathsep) params = [] if parsed.input1 is not None: params.extend(["-i", parsed.input1]) if parsed.output1 is not None: params.extend(["-o", parsed.output1]) if parsed.input2 is not None: params.extend(["-r", parsed.input2]) if parsed.output2 is not None: params.extend(["-s", parsed.output2]) if parsed.classindex is not None: params.extend(["-c", parsed.classindex]) jvm.start(jars, max_heap_size=parsed.heap, packages=True) logger.debug("Commandline: " + join_options(sys.argv[1:])) try: flter = Filter(parsed.filter) if len(parsed.option) > 0: flter.options = parsed.option loader = Loader(classname="weka.core.converters.ArffLoader") in1 = loader.load_file(parsed.input1) cls = parsed.classindex if str(parsed.classindex) == "first": cls = "0" if str(parsed.classindex) == "last": cls = str(in1.num_attributes - 1) in1.class_index = int(cls) flter.inputformat(in1) out1 = flter.filter(in1) saver = Saver(classname="weka.core.converters.ArffSaver") saver.save_file(out1, parsed.output1) if parsed.input2 is not None: in2 = loader.load_file(parsed.input2) in2.class_index = int(cls) out2 = flter.filter(in2) saver.save_file(out2, parsed.output2) except Exception as e: print(e) finally: jvm.stop()
'-E', 'weka.attributeSelection.CfsSubsetEval -P 1 -E 1', '-S', 'weka.attributeSelection.RerankingSearch -method 2 -blockSize 20 -rankingMeasure 0 -search "weka.attributeSelection.GreedyStepwise -T -1.7976931348623157E308 -N 20 -num-slots 1"' ]) # FS.inputformat(data) # data = FS.filter(data) # FS.inputformat(dataLast) # dataLast = FS.filter(dataLast) # ReplaceMV = Filter(classname="weka.filters.unsupervised.attribute.ReplaceMissingValues") # ReplaceMV.inputformat(data) # data = ReplaceMV.filter(data) # ReplaceMV.inputformat(dataLast) # dataLast = ReplaceMV.filter(dataLast) data.class_is_last() # dataLast.class_is_last() from weka.core.converters import Saver saver = Saver(classname="weka.core.converters.ArffSaver") saver.save_file( data, '/Users/Lino/PycharmProjects/Classification/Snapshots/ARFF_PROG/Fast/Data_' + str(window) + 'd_S' + str(seed) + '_' + str(fold) + 'FOLD' + '.arff') jvm.stop() print(MV)
def getFunctionFeaturesArff(self, wholeSetArff, functionFeatureArff): data = self.load_Arff(wholeSetArff) filteredData = self.getSetDataBySetIndex(data, 2) saver = Saver() saver.save_file(filteredData, functionFeatureArff)
def main(): """ Runs a filter from the command-line. Calls JVM start/stop automatically. Use -h to see all options. """ parser = argparse.ArgumentParser( description='Executes a filter from the command-line. Calls JVM start/stop automatically.') parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories") parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m") parser.add_argument("-i", metavar="input1", dest="input1", required=True, help="input file 1") parser.add_argument("-o", metavar="output1", dest="output1", required=True, help="output file 1") parser.add_argument("-r", metavar="input2", dest="input2", help="input file 2") parser.add_argument("-s", metavar="output2", dest="output2", help="output file 2") parser.add_argument("-c", metavar="classindex", default="-1", dest="classindex", help="1-based class attribute index") parser.add_argument("filter", help="filter classname, e.g., weka.filters.AllFilter") parser.add_argument("option", nargs=argparse.REMAINDER, help="additional filter options") parsed = parser.parse_args() if parsed.input2 is None and parsed.output2 is not None: raise Exception("No second input file provided ('-r ...')!") jars = [] if parsed.classpath is not None: jars = parsed.classpath.split(os.pathsep) params = [] if parsed.input1 is not None: params.extend(["-i", parsed.input1]) if parsed.output1 is not None: params.extend(["-o", parsed.output1]) if parsed.input2 is not None: params.extend(["-r", parsed.input2]) if parsed.output2 is not None: params.extend(["-s", parsed.output2]) if parsed.classindex is not None: params.extend(["-c", parsed.classindex]) jvm.start(jars, max_heap_size=parsed.heap, packages=True) logger.debug("Commandline: " + join_options(sys.argv[1:])) try: flter = Filter(parsed.filter) if len(parsed.option) > 0: flter.options = parsed.option loader = Loader(classname="weka.core.converters.ArffLoader") in1 = loader.load_file(parsed.input1) cls = parsed.classindex if str(parsed.classindex) == "first": cls = "0" if str(parsed.classindex) == "last": cls = str(in1.num_attributes - 1) in1.class_index = int(cls) flter.inputformat(in1) out1 = flter.filter(in1) saver = Saver(classname="weka.core.converters.ArffSaver") saver.save_file(out1, parsed.output1) if parsed.input2 is not None: in2 = loader.load_file(parsed.input2) in2.class_index = int(cls) out2 = flter.filter(in2) saver.save_file(out2, parsed.output2) except Exception, e: print(e)
def createpatientinfo(request): if request.method == 'POST': try: form = PatientInfoForm(request.POST, request.FILES) newPatientInfo = form.save(commit=False) newPatientInfo.user = request.user image = request.FILES['Skin_image'].name print("*******************", image) newPatientInfo.image_name = image newPatientInfo.save() # ******************************************** ENCRYPTION CODE ************************************************ key = Fernet.generate_key() key = key.decode('utf-8') newPatientInfo.image_key = key newPatientInfo.save(update_fields=['image_key']) input_file = "media/patient/images/" + image encrypted_file = "encryptedImages/" + image with open(input_file, 'rb') as f: data = f.read() fernet = Fernet(key) encrypted = fernet.encrypt(data) with open(encrypted_file, 'wb') as f: f.write(encrypted) # *************************************************** DECRYPTION CODE ************************************************ image = newPatientInfo.image_name input_file = encrypted_file decrypted_file = "decryptedImages/" + image key = newPatientInfo.image_key # print("************************************************",key) with open(input_file, 'rb') as f: data = f.read() fernet = Fernet(key) encrypted = fernet.decrypt(data) with open(decrypted_file, 'wb') as f: f.write(encrypted) # ----------------------------------------------------- WEKA CODE --------------------------------------------------- JVM.start(max_heap_size="4000m") clsfr, _ = Classifier.deserialize( r"patient\static\patient\Melanoma_Best_Performing_Weka3.8.model" ) haarSize = 8 dctMat = dct(np.eye(64), norm='ortho') haarMat = Hybrid.haar(haarSize) for i in range(haarSize): haarMat[i] = haarMat[i] / math.sqrt(abs(haarMat[i]).sum()) hybridTransformMat = Hybrid.hybridTransform( haarMat, dctMat.transpose()) fPath = "decryptedImages/" fName = image img = cv2.imread(fPath + fName) imgResize = cv2.resize(img, (512, 512), interpolation=cv2.INTER_AREA) bFeatures64, gFeatures64, rFeatures64, _, _, _, _, _, _ = Hybrid.hybridTransformation( imgResize, hybridTransformMat) bFeatures64 = bFeatures64.reshape((1, bFeatures64.shape[0])) gFeatures64 = gFeatures64.reshape((1, gFeatures64.shape[0])) rFeatures64 = rFeatures64.reshape((1, rFeatures64.shape[0])) diagnosisMat = np.full((1, 1), "NA") features64 = np.concatenate( (bFeatures64, gFeatures64, rFeatures64, diagnosisMat), axis=1) op_file_name = "arff_csv_files/HybridTransformFeatures64-Haar" + str( haarSize) + "DCT" + str(dctMat.shape[0]) + fName pd.DataFrame(features64).to_csv(op_file_name + ".csv", header=True, mode='a', index=False) csvLoader = Loader(classname="weka.core.converters.CSVLoader") data = csvLoader.load_file(op_file_name + ".csv") arffSaver = Saver(classname="weka.core.converters.ArffSaver") arffSaver.save_file(data, op_file_name + ".arff") arffLoader = Loader(classname="weka.core.converters.ArffLoader") arff_data = arffLoader.load_file(op_file_name + ".arff") arff_data.class_is_last() diagnosis = "" for index, inst in enumerate(arff_data): pred = clsfr.classify_instance(inst) print(pred) dist = clsfr.distribution_for_instance(inst) print(dist) if pred == 1.0: diagnosis = "Malignant" else: diagnosis = "Benign" print( "Final Diagnosis: ***************************************************", diagnosis) JVM.stop() # ----------------------------------------------------- WEKA CODE END --------------------------------------------------- newPatientInfo.result = diagnosis newPatientInfo.save(update_fields=['result']) return redirect('currentinfo') except ValueError: return render(request, 'patient/createpatientinfo.html', { 'form': PatientInfoForm(), "error": "Bad data passed in!" }) else: return render(request, 'patient/createpatientinfo.html', {'form': PatientInfoForm()})
from weka.core.converters import Loader, Saver from weka.core.dataset import Instances from weka.filters import Filter jvm.start() # load weather.nominal fname = data_dir + os.sep + "weather.nominal.arff" print("\nLoading dataset: " + fname + "\n") loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(fname) # output header print(Instances.template_instances(data)) # remove attribute no 3 print("\nRemove attribute no 3") fltr = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "3"]) fltr.inputformat(data) filtered = fltr.filter(data) # output header print(Instances.template_instances(filtered)) # save modified dataset saver = Saver(classname="weka.core.converters.ArffSaver") saver.save_file(filtered, data_dir + os.sep + "weather.nominal-filtered.arff") jvm.stop()
#Filter function from weka.filters import Filter print("Filter operation") remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveUseless", options=["-M", "99.0"]) remove.inputformat(data) filtered = remove.filter(data) print(filtered) #EM imputation function em = Filter( classname="weka.filters.unsupervised.attribute.ReplaceMissingValues") em.inputformat(filtered) em_imputed = em.filter(filtered) print(em_imputed) #weka.filters.unsupervised.attribute.EMImputation -N -1 -E 1.0E-4 -Q 1.0E-8 #Saver arff to csv saver = Saver(classname="weka.core.converters.CSVSaver") saver.save_file(em_imputed, "./Dataset/hepatitis.csv") saver.save_file(filtered, "./Dataset/hepatitis_removed_useless.csv") #saver.save_file(data, "./Dataset/hepatitis_weka.csv") #converters.save_any_file("./final_small_em.csv") #converters.save_any_file("/some/where/else/iris.csv") #converters.save_any_file("/some/where/else/iris.csv") #cls =Classifier(classname="weka.classifiers.trees.J48") #print(cls) jvm.stop()
def save_Arff(self, data, outputPath): saver = Saver() saver.save_file(data, outputPath)
def saveCSV(self, filename, path='/home/sbiastoch/Schreibtisch/csv_files/'): saver = Saver(classname="weka.core.converters.CSVSaver") saver.save_file(self.data, path+filename)
from weka.core.converters import Loader, Saver from weka.core.dataset import Instances from weka.filters import Filter jvm.start() # load weather.nominal fname = data_dir + os.sep + "weather.nominal.arff" print("\nLoading dataset: " + fname + "\n") loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(fname) # output header print(Instances.template_instances(data)) # remove attribute no 3 print("\nRemove attribute no 3") fltr = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "3"]) fltr.set_inputformat(data) filtered = fltr.filter(data) # output header print(Instances.template_instances(filtered)) # save modified dataset saver = Saver(classname="weka.core.converters.ArffSaver") saver.save_file(filtered, data_dir + os.sep + "weather.nominal-filtered.arff") jvm.stop()
import weka.core.jvm as jvm from weka.core.converters import Loader, Saver jvm.start() # load iris (gets rid of all the comments) fname = data_dir + os.sep + "iris.arff" print("\nLoading dataset: " + fname + "\n") loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(fname) data.class_is_last() # output arff outfile = tempfile.gettempdir() + os.sep + "test.arff" saver = Saver(classname="weka.core.converters.ArffSaver") saver.save_file(data, outfile) f = open(outfile, 'r') arff = f.read() f.close() print(arff) # output xrff outfile = tempfile.gettempdir() + os.sep + "test.xrff" saver = Saver(classname="weka.core.converters.XRFFSaver") saver.save_file(data, outfile) f = open(outfile, 'r') xrff = f.read() f.close() print(xrff) jvm.stop()
col = col.lower() atts.append(Attribute.create_numeric(col)) if not ref_present and (idx == 0): atts.append(Attribute.create_numeric("reference value")) data = Instances.create_instances("irdc", atts, 0) else: values = [] for idx, col in enumerate(row): values.append(float(col)) if not ref_present and (idx == 0): values.append(float('NaN')) inst = Instance.create_instance(values) data.add_instance(inst) saver = Saver(classname="weka.core.converters.ArffSaver") saver.save_file(data, data_dir + os.sep + outfile) # train/test/predict print("Train/test/predict...") groups = ["DataSet1", "DataSet2"] # groups = ["DataSet2"] for group in groups: print(group) train = data_dir + os.sep + group + "_Cal.arff" test = data_dir + os.sep + group + "_Test.arff" pred = data_dir + os.sep + group + "_Val.arff" loader = Loader(classname="weka.core.converters.ArffLoader") print(train)
def save_dataset(self,filepath): saver = Saver(classname="weka.core.converters.CSVSaver") saver.save_file(self.dataset, filepath)
def save(data, dst, saver="weka.core.converters.ArffSaver", **kwargs): if not MODULE_SUPPORTED: return saver = WekaSaver(classname=saver, **kwargs) saver.save_file(data, fs.path(dst))
atts.append(Attribute.create_numeric(col)) if not ref_present and (idx == 0): atts.append( Attribute.create_numeric("reference value")) data = Instances.create_instances("irdc", atts, 0) else: values = [] for idx, col in enumerate(row): values.append(float(col)) if not ref_present and (idx == 0): values.append(float('NaN')) inst = Instance.create_instance(values) data.add_instance(inst) saver = Saver(classname="weka.core.converters.ArffSaver") saver.save_file(data, data_dir + os.sep + outfile) # train/test/predict print("Train/test/predict...") groups = ["DataSet1", "DataSet2"] # groups = ["DataSet2"] for group in groups: print(group) train = data_dir + os.sep + group + "_Cal.arff" test = data_dir + os.sep + group + "_Test.arff" pred = data_dir + os.sep + group + "_Val.arff" loader = Loader(classname="weka.core.converters.ArffLoader") print(train)