コード例 #1
0
def run():
    jvm.start()
    load_csv = Loader("weka.core.converters.CSVLoader")
    data_csv = load_csv.load_file(
        "/Users/imeiliasantoso/web_graduate_project4/predict_page/predict_data.csv"
    )

    saver = Saver("weka.core.converters.ArffSaver")
    saver.save_file(
        data_csv,
        "/Users/imeiliasantoso/web_graduate_project4/predict_page/predict_data.arff"
    )

    load_arff = Loader("weka.core.converters.ArffLoader")
    data_arff = load_arff.load_file(
        "/Users/imeiliasantoso/web_graduate_project4/predict_page/predict_data.arff"
    )
    data_arff.class_is_last()

    global j48
    J48_class = Classifier(classname="weka.classifiers.trees.J48",
                           options=["-C", "0.25", "-M", "2"])
    J48_class.build_classifier(data_arff)
    evaluationj48 = Evaluation(data_arff)
    evaluationj48.crossvalidate_model(J48_class, data_arff, 10, Random(100))
    j48 = str(evaluationj48.percent_correct)
    jvm.stop()
    return j48
コード例 #2
0
ファイル: dataset.py プロジェクト: henryzord/ardennes
def generate_folds(dataset_path, output_folder, n_folds=10, random_state=None):
    """
    Given a dataset df, generate n_folds for it and store them in <output_folder>/<dataset_name>.

    :type dataset_path: str
    :param dataset_path: Path to dataset with .arff file extension (i.e my_dataset.arff)
    :type output_folder: str
    :param output_folder: Path to store both index file with folds and fold files.
    :type n_folds: int
    :param n_folds: Optional - Number of folds to split the dataset into. Defaults to 10.
    :type random_state: int
    :param random_state: Optional - Seed to use in the splitting process. Defaults to None (no seed).
    """

    import warnings
    warnings.filterwarnings('error')

    dataset_name = dataset_path.split('/')[-1].split('.')[0]

    af = load_arff(dataset_path)
    df = load_dataframe(af)

    skf = StratifiedKFold(n_splits=n_folds,
                          shuffle=True,
                          random_state=random_state)
    fold_iter = skf.split(df[df.columns[:-1]], df[df.columns[-1]])

    fold_index = dict()

    jvm.start()

    csv_loader = Loader(classname="weka.core.converters.CSVLoader")
    arff_saver = Saver(classname='weka.core.converters.ArffSaver')

    for i, (arg_rest, arg_test) in enumerate(fold_iter):
        fold_index[i] = list(arg_test)

        _temp_path = 'temp_%s_%d.csv' % (dataset_name, i)

        fold_data = df.loc[arg_test]  # type: pd.DataFrame
        fold_data.to_csv(_temp_path, sep=',', index=False)

        java_arff_dataset = csv_loader.load_file(_temp_path)
        java_arff_dataset.relationname = af['relation']
        java_arff_dataset.class_is_last()
        arff_saver.save_file(
            java_arff_dataset,
            os.path.join(output_folder, '%s_fold_%d.arff' % (dataset_name, i)))

        os.remove(_temp_path)

    json.dump(fold_index,
              open(os.path.join(output_folder, dataset_name + '.json'), 'w'),
              indent=2)

    jvm.stop()
    warnings.filterwarnings('default')
コード例 #3
0
ファイル: util.py プロジェクト: igabriel85/dmon-adp
def convertCsvtoArff(indata, outdata):
    '''
    :param indata: -> input csv file
    :param outdata: -> output file
    :return:
    '''
    loader = Loader(classname="weka.core.converters.CSVLoader")
    data = loader.load_file(indata)
    saver = Saver(classname="weka.core.converters.ArffSaver")
    saver.save_file(data, outdata)
コード例 #4
0
ファイル: dataset.py プロジェクト: henryzord/forrestTemp
def generate_folds(dataset_path, output_folder, n_folds=10, random_state=None):
    """
    Given a dataset df, generate n_folds for it and store them in <output_folder>/<dataset_name>.

    :type dataset_path: str
    :param dataset_path: Path to dataset with .arff file extension (i.e my_dataset.arff)
    :type output_folder: str
    :param output_folder: Path to store both index file with folds and fold files.
    :type n_folds: int
    :param n_folds: Optional - Number of folds to split the dataset into. Defaults to 10.
    :type random_state: int
    :param random_state: Optional - Seed to use in the splitting process. Defaults to None (no seed).
    """

    import warnings
    warnings.filterwarnings('error')

    dataset_name = dataset_path.split('/')[-1].split('.')[0]

    af = load_arff(dataset_path)
    df = load_dataframe(af)

    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state)
    fold_iter = skf.split(df[df.columns[:-1]], df[df.columns[-1]])

    fold_index = dict()

    jvm.start()

    csv_loader = Loader(classname="weka.core.converters.CSVLoader")
    arff_saver = Saver(classname='weka.core.converters.ArffSaver')

    for i, (arg_rest, arg_test) in enumerate(fold_iter):
        fold_index[i] = list(arg_test)

        _temp_path = 'temp_%s_%d.csv' % (dataset_name, i)

        fold_data = df.loc[arg_test]  # type: pd.DataFrame
        fold_data.to_csv(_temp_path, sep=',', index=False)

        java_arff_dataset = csv_loader.load_file(_temp_path)
        java_arff_dataset.relationname = af['relation']
        java_arff_dataset.class_is_last()
        arff_saver.save_file(java_arff_dataset, os.path.join(output_folder, '%s_fold_%d.arff' % (dataset_name, i)))

        os.remove(_temp_path)

    json.dump(
        fold_index, open(os.path.join(output_folder, dataset_name + '.json'), 'w'), indent=2
    )

    jvm.stop()
    warnings.filterwarnings('default')
コード例 #5
0
def save_arff(data, dest):
    """
    Function for saving data into arff file. Function uses weka implementation
    of ArffSaver.

    :param data: weka arff data
    :param dest: output file
    :return: None
    """
    args, _sufix = arff_saver_parser()
    saver = Saver(classname='weka.core.converters.ArffSaver',
                  options=args_to_weka_options(args, _sufix))
    saver.save_file(data, dest)
コード例 #6
0
def run():
    jvm.start()
    load_csv = Loader("weka.core.converters.CSVLoader")
    data_csv = load_csv.load_file(
        "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.csv"
    )

    saver = Saver("weka.core.converters.ArffSaver")
    saver.save_file(
        data_csv,
        "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.arff"
    )

    load_arff = Loader("weka.core.converters.ArffLoader")
    data_arff = load_arff.load_file(
        "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.arff"
    )
    data_arff.class_is_last()

    cls = Classifier(classname="weka.classifiers.trees.J48",
                     options=["-C", "0.5"])
    cls.build_classifier(data_arff)
    for index, inst in enumerate(data_arff):
        pred = cls.classify_instance(inst)
        dist = cls.distribution_for_instance(inst)
        # save tree prune in txt file

    saveFile = open(
        "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.txt",
        "w")
    saveFile.write(str(cls))
    # print(cls)
    saveFile.close()

    global j48
    J48_class = Classifier(classname="weka.classifiers.trees.J48",
                           options=["-C", "0.25", "-M", "2"])
    J48_class.build_classifier(data_arff)
    evaluationj48 = Evaluation(data_arff)
    evaluationj48.crossvalidate_model(J48_class, data_arff, 10, Random(100))
    j48 = str(evaluationj48.percent_correct)
    jvm.stop()
    return j48
コード例 #7
0
def main():
    jvm.start()
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file("train_sorted.arff")
    numofStores = 1115

    for storeNum in range(0, numofStores):

        tempData = data
        removeUpper = Filter(
            classname="weka.filters.unsupervised.instance.RemoveWithValues",
            options=[
                "-S",
                str(storeNum + 2) + ".0", "-C", "first", "-L", "first-last",
                "-V"
            ])
        removeUpper.inputformat(data)
        tempData = removeUpper.filter(data)

        removeLower = Filter(
            classname="weka.filters.unsupervised.instance.RemoveWithValues",
            options=[
                "-S",
                str(storeNum + 1) + ".0", "-C", "first", "-L", "first-last"
            ])
        removeLower.inputformat(tempData)
        tempData = removeLower.filter(tempData)

        #removing the storeID attribute
        tempData.delete_first_attribute()

        saver = Saver(classname="weka.core.converters.ArffSaver")
        saver.save_file(tempData, "stores/store" + str(storeNum + 1) + ".arff")
        print 'Saved Store' + str(storeNum + 1)

    jvm.stop()
コード例 #8
0
ファイル: assignment.py プロジェクト: Qisen25/Data-Mining
def preparation():
	data_file="csvfiles/data.csv"

	try:
		#Load data
		loader=Loader(classname="weka.core.converters.CSVLoader")
		data=loader.load_file(data_file)
		data.class_is_last()
		
		miss = mostMissing(data)#find attributes with significant missing data
		data = unsupFilters(data, "attribute.Remove", ["-R", "1,"+ miss])#remove id and most mising value attributes
		data = unsupFilters(data, "attribute.RemoveUseless", [])#remove use less attributes
		
		nonDistinct = notDistinct(data)#find attributes that are not distinct and convert them to nominal
		data = unsupFilters(data, "attribute.NumericToNominal", ["-R", "last," + nonDistinct])#class convert to nominal
		data = unsupFilters(data, "attribute.ReplaceMissingValues", [])#replace missing values
		data = unsupFilters(data, "attribute.Normalize", [])#normalize attributes to create less bias
		
		#split data into test and training set
		test = unsupFilters(data, "instance.RemoveRange", ["-V", "-R", "901-1000"])
		train = unsupFilters(data, "instance.RemoveRange", ["-R", "901-1000"])
		train = supFilters(train, "instance.SMOTE", ["-P", "160.0"])
		#print(data)
		
		saver = Saver(classname="weka.core.converters.ArffSaver")
		saver.save_file(test, "test.arff")
		saver.save_file(train, "train.arff")
		
		#Performing cross validation using naiveBayes, IBk and j48 tree
		#get the accuracy of the cross validation and store all in an array
		accuracyArray = [naiveBayes(train), IBK(train), treeJ48(train)]
		mostAccurate = max(accuracyArray)#find most accurate
		print(mostAccurate)
		accuracyArray.remove(mostAccurate)
		secondAcc = max(accuracyArray)#get second accurate
		print(secondAcc)
		trainAndMakePred(train, test)
		#makePrediction(test)
		
		print("Data loaded successfully")
	except IOError:
		print("Error loading file " + data_file)
コード例 #9
0
                        '-E',
                        'weka.attributeSelection.CfsSubsetEval -P 1 -E 1',
                        '-S',
                        'weka.attributeSelection.RerankingSearch -method 2 -blockSize 20 -rankingMeasure 0 -search "weka.attributeSelection.GreedyStepwise -T -1.7976931348623157E308 -N 20 -num-slots 1"'
                    ])
                # FS.inputformat(data)
                # data = FS.filter(data)

                # FS.inputformat(dataLast)
                # dataLast = FS.filter(dataLast)

                # ReplaceMV = Filter(classname="weka.filters.unsupervised.attribute.ReplaceMissingValues")
                # ReplaceMV.inputformat(data)
                # data = ReplaceMV.filter(data)

                # ReplaceMV.inputformat(dataLast)
                # dataLast = ReplaceMV.filter(dataLast)

                data.class_is_last()
                # dataLast.class_is_last()
                from weka.core.converters import Saver

                saver = Saver(classname="weka.core.converters.ArffSaver")
                saver.save_file(
                    data,
                    '/Users/Lino/PycharmProjects/Classification/Snapshots/ARFF_PROG/Fast/Data_'
                    + str(window) + 'd_S' + str(seed) + '_' + str(fold) +
                    'FOLD' + '.arff')

jvm.stop()
print(MV)
コード例 #10
0
def main():
    """
    Runs a filter from the command-line. Calls JVM start/stop automatically.
    Use -h to see all options.
    """
    parser = argparse.ArgumentParser(
        description=
        'Executes a filter from the command-line. Calls JVM start/stop automatically.'
    )
    parser.add_argument("-j",
                        metavar="classpath",
                        dest="classpath",
                        help="additional classpath, jars/directories")
    parser.add_argument("-X",
                        metavar="heap",
                        dest="heap",
                        help="max heap size for jvm, e.g., 512m")
    parser.add_argument("-i",
                        metavar="input1",
                        dest="input1",
                        required=True,
                        help="input file 1")
    parser.add_argument("-o",
                        metavar="output1",
                        dest="output1",
                        required=True,
                        help="output file 1")
    parser.add_argument("-r",
                        metavar="input2",
                        dest="input2",
                        help="input file 2")
    parser.add_argument("-s",
                        metavar="output2",
                        dest="output2",
                        help="output file 2")
    parser.add_argument("-c",
                        metavar="classindex",
                        default="-1",
                        dest="classindex",
                        help="1-based class attribute index")
    parser.add_argument("filter",
                        help="filter classname, e.g., weka.filters.AllFilter")
    parser.add_argument("option",
                        nargs=argparse.REMAINDER,
                        help="additional filter options")
    parsed = parser.parse_args()
    if parsed.input2 is None and parsed.output2 is not None:
        raise Exception("No second input file provided ('-r ...')!")

    jars = []
    if parsed.classpath is not None:
        jars = parsed.classpath.split(os.pathsep)
    params = []
    if parsed.input1 is not None:
        params.extend(["-i", parsed.input1])
    if parsed.output1 is not None:
        params.extend(["-o", parsed.output1])
    if parsed.input2 is not None:
        params.extend(["-r", parsed.input2])
    if parsed.output2 is not None:
        params.extend(["-s", parsed.output2])
    if parsed.classindex is not None:
        params.extend(["-c", parsed.classindex])

    jvm.start(jars, max_heap_size=parsed.heap, packages=True)

    logger.debug("Commandline: " + join_options(sys.argv[1:]))

    try:
        flter = Filter(parsed.filter)
        if len(parsed.option) > 0:
            flter.options = parsed.option
        loader = Loader(classname="weka.core.converters.ArffLoader")
        in1 = loader.load_file(parsed.input1)
        cls = parsed.classindex
        if str(parsed.classindex) == "first":
            cls = "0"
        if str(parsed.classindex) == "last":
            cls = str(in1.num_attributes - 1)
        in1.class_index = int(cls)
        flter.inputformat(in1)
        out1 = flter.filter(in1)
        saver = Saver(classname="weka.core.converters.ArffSaver")
        saver.save_file(out1, parsed.output1)
        if parsed.input2 is not None:
            in2 = loader.load_file(parsed.input2)
            in2.class_index = int(cls)
            out2 = flter.filter(in2)
            saver.save_file(out2, parsed.output2)
    except Exception as e:
        print(e)
    finally:
        jvm.stop()
コード例 #11
0
def main():
    """
    Runs a filter from the command-line. Calls JVM start/stop automatically.
    Use -h to see all options.
    """
    parser = argparse.ArgumentParser(
        description='Executes a filter from the command-line. Calls JVM start/stop automatically.')
    parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories")
    parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m")
    parser.add_argument("-i", metavar="input1", dest="input1", required=True, help="input file 1")
    parser.add_argument("-o", metavar="output1", dest="output1", required=True, help="output file 1")
    parser.add_argument("-r", metavar="input2", dest="input2", help="input file 2")
    parser.add_argument("-s", metavar="output2", dest="output2", help="output file 2")
    parser.add_argument("-c", metavar="classindex", default="-1", dest="classindex",
                        help="1-based class attribute index")
    parser.add_argument("filter", help="filter classname, e.g., weka.filters.AllFilter")
    parser.add_argument("option", nargs=argparse.REMAINDER, help="additional filter options")
    parsed = parser.parse_args()
    if parsed.input2 is None and parsed.output2 is not None:
        raise Exception("No second input file provided ('-r ...')!")

    jars = []
    if parsed.classpath is not None:
        jars = parsed.classpath.split(os.pathsep)
    params = []
    if parsed.input1 is not None:
        params.extend(["-i", parsed.input1])
    if parsed.output1 is not None:
        params.extend(["-o", parsed.output1])
    if parsed.input2 is not None:
        params.extend(["-r", parsed.input2])
    if parsed.output2 is not None:
        params.extend(["-s", parsed.output2])
    if parsed.classindex is not None:
        params.extend(["-c", parsed.classindex])

    jvm.start(jars, max_heap_size=parsed.heap, packages=True)

    logger.debug("Commandline: " + join_options(sys.argv[1:]))

    try:
        flter = Filter(parsed.filter)
        if len(parsed.option) > 0:
            flter.options = parsed.option
        loader = Loader(classname="weka.core.converters.ArffLoader")
        in1 = loader.load_file(parsed.input1)
        cls = parsed.classindex
        if str(parsed.classindex) == "first":
            cls = "0"
        if str(parsed.classindex) == "last":
            cls = str(in1.num_attributes - 1)
        in1.class_index = int(cls)
        flter.inputformat(in1)
        out1 = flter.filter(in1)
        saver = Saver(classname="weka.core.converters.ArffSaver")
        saver.save_file(out1, parsed.output1)
        if parsed.input2 is not None:
            in2 = loader.load_file(parsed.input2)
            in2.class_index = int(cls)
            out2 = flter.filter(in2)
            saver.save_file(out2, parsed.output2)
    except Exception, e:
        print(e)
コード例 #12
0
    def getFunctionFeaturesArff(self, wholeSetArff, functionFeatureArff):       
        data = self.load_Arff(wholeSetArff)
        filteredData = self.getSetDataBySetIndex(data, 2)     

        saver = Saver()
        saver.save_file(filteredData, functionFeatureArff)
コード例 #13
0
ファイル: class-1.5.py プロジェクト: sudhakar2205/wekamooc
from weka.core.converters import Loader, Saver
from weka.core.dataset import Instances
from weka.filters import Filter

jvm.start()

# load weather.nominal
fname = data_dir + os.sep + "weather.nominal.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)

# output header
print(Instances.template_instances(data))

# remove attribute no 3
print("\nRemove attribute no 3")
fltr = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "3"])
fltr.inputformat(data)
filtered = fltr.filter(data)

# output header
print(Instances.template_instances(filtered))

# save modified dataset
saver = Saver(classname="weka.core.converters.ArffSaver")
saver.save_file(filtered, data_dir + os.sep + "weather.nominal-filtered.arff")

jvm.stop()

コード例 #14
0
ファイル: views.py プロジェクト: shubhammandhare10/MedTech
def createpatientinfo(request):
    if request.method == 'POST':
        try:
            form = PatientInfoForm(request.POST, request.FILES)
            newPatientInfo = form.save(commit=False)
            newPatientInfo.user = request.user
            image = request.FILES['Skin_image'].name
            print("*******************", image)

            newPatientInfo.image_name = image

            newPatientInfo.save()

            # ******************************************** ENCRYPTION CODE ************************************************
            key = Fernet.generate_key()
            key = key.decode('utf-8')

            newPatientInfo.image_key = key
            newPatientInfo.save(update_fields=['image_key'])

            input_file = "media/patient/images/" + image
            encrypted_file = "encryptedImages/" + image

            with open(input_file, 'rb') as f:
                data = f.read()

            fernet = Fernet(key)
            encrypted = fernet.encrypt(data)

            with open(encrypted_file, 'wb') as f:
                f.write(encrypted)

    # ***************************************************  DECRYPTION CODE ************************************************
            image = newPatientInfo.image_name
            input_file = encrypted_file
            decrypted_file = "decryptedImages/" + image
            key = newPatientInfo.image_key
            # print("************************************************",key)

            with open(input_file, 'rb') as f:
                data = f.read()

            fernet = Fernet(key)
            encrypted = fernet.decrypt(data)

            with open(decrypted_file, 'wb') as f:
                f.write(encrypted)


# -----------------------------------------------------  WEKA CODE  ---------------------------------------------------
            JVM.start(max_heap_size="4000m")

            clsfr, _ = Classifier.deserialize(
                r"patient\static\patient\Melanoma_Best_Performing_Weka3.8.model"
            )
            haarSize = 8
            dctMat = dct(np.eye(64), norm='ortho')
            haarMat = Hybrid.haar(haarSize)

            for i in range(haarSize):
                haarMat[i] = haarMat[i] / math.sqrt(abs(haarMat[i]).sum())

            hybridTransformMat = Hybrid.hybridTransform(
                haarMat, dctMat.transpose())

            fPath = "decryptedImages/"
            fName = image

            img = cv2.imread(fPath + fName)
            imgResize = cv2.resize(img, (512, 512),
                                   interpolation=cv2.INTER_AREA)

            bFeatures64, gFeatures64, rFeatures64, _, _, _, _, _, _ = Hybrid.hybridTransformation(
                imgResize, hybridTransformMat)

            bFeatures64 = bFeatures64.reshape((1, bFeatures64.shape[0]))
            gFeatures64 = gFeatures64.reshape((1, gFeatures64.shape[0]))
            rFeatures64 = rFeatures64.reshape((1, rFeatures64.shape[0]))
            diagnosisMat = np.full((1, 1), "NA")

            features64 = np.concatenate(
                (bFeatures64, gFeatures64, rFeatures64, diagnosisMat), axis=1)

            op_file_name = "arff_csv_files/HybridTransformFeatures64-Haar" + str(
                haarSize) + "DCT" + str(dctMat.shape[0]) + fName
            pd.DataFrame(features64).to_csv(op_file_name + ".csv",
                                            header=True,
                                            mode='a',
                                            index=False)

            csvLoader = Loader(classname="weka.core.converters.CSVLoader")
            data = csvLoader.load_file(op_file_name + ".csv")

            arffSaver = Saver(classname="weka.core.converters.ArffSaver")
            arffSaver.save_file(data, op_file_name + ".arff")

            arffLoader = Loader(classname="weka.core.converters.ArffLoader")
            arff_data = arffLoader.load_file(op_file_name + ".arff")
            arff_data.class_is_last()

            diagnosis = ""
            for index, inst in enumerate(arff_data):
                pred = clsfr.classify_instance(inst)
                print(pred)
                dist = clsfr.distribution_for_instance(inst)
                print(dist)

                if pred == 1.0:
                    diagnosis = "Malignant"
                else:
                    diagnosis = "Benign"

            print(
                "Final Diagnosis: ***************************************************",
                diagnosis)
            JVM.stop()
            # -----------------------------------------------------  WEKA CODE END ---------------------------------------------------

            newPatientInfo.result = diagnosis
            newPatientInfo.save(update_fields=['result'])

            return redirect('currentinfo')
        except ValueError:
            return render(request, 'patient/createpatientinfo.html', {
                'form': PatientInfoForm(),
                "error": "Bad data passed in!"
            })
    else:
        return render(request, 'patient/createpatientinfo.html',
                      {'form': PatientInfoForm()})
コード例 #15
0
 def save_Arff(self, data, outputPath):
     saver = Saver()
     saver.save_file(data, outputPath)
コード例 #16
0
ファイル: irdc.py プロジェクト: fracpete/wekamooc
                for idx, col in enumerate(row):
                    col = col.lower()
                    atts.append(Attribute.create_numeric(col))
                    if not ref_present and (idx == 0):
                        atts.append(Attribute.create_numeric("reference value"))
                data = Instances.create_instances("irdc", atts, 0)
            else:
                values = []
                for idx, col in enumerate(row):
                    values.append(float(col))
                    if not ref_present and (idx == 0):
                        values.append(float('NaN'))
                inst = Instance.create_instance(values)
                data.add_instance(inst)

        saver = Saver(classname="weka.core.converters.ArffSaver")
        saver.save_file(data, data_dir + os.sep + outfile)

# train/test/predict
print("Train/test/predict...")

groups = ["DataSet1", "DataSet2"]
# groups = ["DataSet2"]

for group in groups:
    print(group)
    train = data_dir + os.sep + group + "_Cal.arff"
    test = data_dir + os.sep + group + "_Test.arff"
    pred = data_dir + os.sep + group + "_Val.arff"

    loader = Loader(classname="weka.core.converters.ArffLoader")
コード例 #17
0
ファイル: evaluate.py プロジェクト: sbiastoch/thesis
	def saveCSV(self, filename, path='/home/sbiastoch/Schreibtisch/csv_files/'):
		saver = Saver(classname="weka.core.converters.CSVSaver")
		saver.save_file(self.data, path+filename)
コード例 #18
0
ファイル: ml.py プロジェクト: ChrisCummins/phd
def save(data, dst, saver="weka.core.converters.ArffSaver", **kwargs):
    if not MODULE_SUPPORTED: return
    saver = WekaSaver(classname=saver, **kwargs)
    saver.save_file(data, fs.path(dst))
コード例 #19
0
ファイル: class-1.5.py プロジェクト: echavarria/wekamooc
from weka.core.converters import Loader, Saver
from weka.core.dataset import Instances
from weka.filters import Filter

jvm.start()

# load weather.nominal
fname = data_dir + os.sep + "weather.nominal.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)

# output header
print(Instances.template_instances(data))

# remove attribute no 3
print("\nRemove attribute no 3")
fltr = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "3"])
fltr.set_inputformat(data)
filtered = fltr.filter(data)

# output header
print(Instances.template_instances(filtered))

# save modified dataset
saver = Saver(classname="weka.core.converters.ArffSaver")
saver.save_file(filtered, data_dir + os.sep + "weather.nominal-filtered.arff")

jvm.stop()

コード例 #20
0
ファイル: class-5.5.py プロジェクト: fracpete/wekamooc
import tempfile
import weka.core.jvm as jvm
from weka.core.converters import Loader, Saver

jvm.start()

# load iris (gets rid of all the comments)
fname = data_dir + os.sep + "iris.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
data.class_is_last()

# output arff
outfile = tempfile.gettempdir() + os.sep + "test.arff"
saver = Saver(classname="weka.core.converters.ArffSaver")
saver.save_file(data, outfile)
f = open(outfile, 'r')
arff = f.read()
f.close()
print(arff)

# output xrff
outfile = tempfile.gettempdir() + os.sep + "test.xrff"
saver = Saver(classname="weka.core.converters.XRFFSaver")
saver.save_file(data, outfile)
f = open(outfile, 'r')
xrff = f.read()
f.close()
print(xrff)
コード例 #21
0
ファイル: class-5.5.py プロジェクト: sudhakar2205/wekamooc
import tempfile
import weka.core.jvm as jvm
from weka.core.converters import Loader, Saver

jvm.start()

# load iris (gets rid of all the comments)
fname = data_dir + os.sep + "iris.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
data.class_is_last()

# output arff
outfile = tempfile.gettempdir() + os.sep + "test.arff"
saver = Saver(classname="weka.core.converters.ArffSaver")
saver.save_file(data, outfile)
f = open(outfile, 'r')
arff = f.read()
f.close()
print(arff)

# output xrff
outfile = tempfile.gettempdir() + os.sep + "test.xrff"
saver = Saver(classname="weka.core.converters.XRFFSaver")
saver.save_file(data, outfile)
f = open(outfile, 'r')
xrff = f.read()
f.close()
print(xrff)
コード例 #22
0
 def save_dataset(self,filepath):
     saver = Saver(classname="weka.core.converters.CSVSaver")
     saver.save_file(self.dataset, filepath)
コード例 #23
0
import weka.plot.clusterers as plc

## Downloading the data set
## If you don't have the data set in the current directory uncomment the part below

##path = "http://archive.ics.uci.edu/ml/machine-learning-databases/00290/eb.arff"
##subprocess.call(["curl","-O", path])

#################################################################

## Start the jvm
jvm.start()

## Load the data set and specify the class label coloumn
loader = Loader(classname = "weka.core.converters.ArffLoader")
saver = Saver(classname = "weka.core.converters.ArffSaver")
data = loader.load_file("eb.arff")

## Data Summary: The portion below does the first pass analysis of the data
print "Number of attributes:", data.num_attributes
print "Number of instances:", data.num_instances
print "Attributes:"
for i in range(0, data.num_attributes):
	print data.attribute(i)
	print data.attribute_stats(i)



####Uncomment the line below to plot the data points
####pld.scatter_plot(data, 0, 1)
コード例 #24
0
    def getFunctionFeaturesArff(self, wholeSetArff, functionFeatureArff):
        data = self.load_Arff(wholeSetArff)
        filteredData = self.getSetDataBySetIndex(data, 2)

        saver = Saver()
        saver.save_file(filteredData, functionFeatureArff)
コード例 #25
0
#Filter function
from weka.filters import Filter
print("Filter operation")
remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveUseless",
                options=["-M", "99.0"])
remove.inputformat(data)
filtered = remove.filter(data)
print(filtered)

#EM imputation function
em = Filter(
    classname="weka.filters.unsupervised.attribute.ReplaceMissingValues")
em.inputformat(filtered)
em_imputed = em.filter(filtered)
print(em_imputed)
#weka.filters.unsupervised.attribute.EMImputation -N -1 -E 1.0E-4 -Q 1.0E-8
#Saver arff to csv
saver = Saver(classname="weka.core.converters.CSVSaver")
saver.save_file(em_imputed, "./Dataset/hepatitis.csv")
saver.save_file(filtered, "./Dataset/hepatitis_removed_useless.csv")
#saver.save_file(data, "./Dataset/hepatitis_weka.csv")

#converters.save_any_file("./final_small_em.csv")
#converters.save_any_file("/some/where/else/iris.csv")
#converters.save_any_file("/some/where/else/iris.csv")

#cls =Classifier(classname="weka.classifiers.trees.J48")

#print(cls)
jvm.stop()
コード例 #26
0
ファイル: irdc.py プロジェクト: sudhakar2205/wekamooc
                    col = col.lower()
                    atts.append(Attribute.create_numeric(col))
                    if not ref_present and (idx == 0):
                        atts.append(
                            Attribute.create_numeric("reference value"))
                data = Instances.create_instances("irdc", atts, 0)
            else:
                values = []
                for idx, col in enumerate(row):
                    values.append(float(col))
                    if not ref_present and (idx == 0):
                        values.append(float('NaN'))
                inst = Instance.create_instance(values)
                data.add_instance(inst)

        saver = Saver(classname="weka.core.converters.ArffSaver")
        saver.save_file(data, data_dir + os.sep + outfile)

# train/test/predict
print("Train/test/predict...")

groups = ["DataSet1", "DataSet2"]
# groups = ["DataSet2"]

for group in groups:
    print(group)
    train = data_dir + os.sep + group + "_Cal.arff"
    test = data_dir + os.sep + group + "_Test.arff"
    pred = data_dir + os.sep + group + "_Val.arff"

    loader = Loader(classname="weka.core.converters.ArffLoader")
コード例 #27
0
 def save_Arff(self, data, outputPath):
     saver = Saver()
     saver.save_file(data, outputPath)