Python loadarff Beispiele, arff.loadarff Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: DataProcess.py Projekt: TimHillier/MusicClassifier

def read_file(file_to_read):
    _X = []
    _Y = []
    test_X = []
    test_Y = []
    #the file should be a .arff file
    if (not (file_to_read.endswith('.arff'))):
        print("File Should end with .arff")
        quit(0)
    data, meta = arff.loadarff(file_to_read)
    #if i get every 5th element it should be 20% of the total
    counter = 0
    split = 4
    for i in range(0, len(data)):
        data_list = data[i].tolist()
        data_list = list(data_list)
        label = data_list[-1].decode('utf-8')
        del (data_list[-1])
        if counter == split:
            test_X.append(data_list)
            test_Y.append(label)
            counter = 0
        else:
            _X.append(data_list)
            _Y.append(label)
            counter += 1
    return _X, _Y, test_X, test_Y

Beispiel #2

0

Datei anzeigen

Datei: utils.py Projekt: DiNAi/simple_neural_network

def load_binary_class_data(arff_file):
    """Returns 2 values - training instances and labels from an ARFF file.

    Labels should have attribute 'Class' in the ARFF file.  We assume that the
    first and second values are the negative and positive labels respectively.
    Then, negative labels are labeled 0 and positive labels are labeled 1.

    Returns:
    X - n_instances-length list of n_feat-length lists
    y - n_instances-length list of integers, either of value 0 or 1.
    """

    # Load ARFF file
    data, metadata = arff.loadarff(arff_file)

    # Get the labels for attribute 'Class'
    # We assume that the first and second values are the negative and positive
    # labels respectively
    norminality, labels = metadata['Class']
    negative_label, positive_label = labels

    # Convert ARFF data to X and y
    # X is a list of instances, where each instance is itself a list of
    # features
    # y is a list of labels either 0 or 1
    X, y = [], []
    for instance in data:

        # Split instance into a list of features and a string label
        features = list(instance.tolist()[:-1])
        string_label = instance[-1]

        # Check that string label is one of the possible labels
        assert string_label in (positive_label, negative_label)

        # Convert label from string to 0 or 1
        integer_label = 0 if string_label == negative_label else 1

        # Push into matrices
        X.append(features)
        y.append(integer_label)

    return X, y, negative_label, positive_label

Beispiel #3

0

Datei anzeigen

Datei: market.py Projekt: nirupam1sharma/market-basket-analysis-2

from flask import (
    Flask,
    render_template,
    redirect,
    request,
)
from mlxtend.frequent_patterns import (
    apriori,
    association_rules,
)
from scipy.io import arff

load_dotenv()
app = Flask(__name__)

data, meta = arff.loadarff(open('datasets/supermarket.arff', 'r'))
df = pd.DataFrame(data).drop('total', axis=1)

map_f = lambda v: 0 if (v == b'?' or v == b'low') else 1
df = df.applymap(map_f)

frequent_itemsets = apriori(df, min_support=0.1, use_colnames=True)
rules = association_rules(frequent_itemsets, metric='lift', min_threshold=1)

itemset_count = len(frequent_itemsets)
rules_count = len(rules)

items = sorted(
    set(itertools.chain.from_iterable(frequent_itemsets.itemsets.values)))
basket = set()

Beispiel #4

0

Datei anzeigen

Datei: plot_learning_curves.py Projekt: boonjiashen/decision_tree

# Parse arguments
parser = optparse.OptionParser()
options, args = parser.parse_args()
assert len(args) == 3

# First positional argument: name of ARFF file
# Second positional argument: number of minimum instances to allow a node to
# split
train_filename, test_filename, min_instances = args
min_instances = int(min_instances)  # Cast to integer

#################### Declare inputs for learning ####################

# Load ARFF file
data, metadata = arff.loadarff(train_filename)

# Change data to Python native list of lists
# data_list = [[x for x in list_] for list_ in data]
data_list = [list_ for list_ in data]

# Length n+1 list of booleans for whether each feature is norminal
# Feature is numeric if it's not norminal
# The additional 1 is the class feature type
norminalities = [type_ == "nominal" for type_ in metadata.types()]

# enumeration i is a tuple of all possible values of feature i
value_enumerations = []
for name in metadata.names():
    norminality, value_enumeration = metadata[name]
    value_enumerations.append(value_enumeration)

Beispiel #5

0

Datei anzeigen

import matplotlib.pyplot as plt

# Parse arguments
parser = optparse.OptionParser()
options, args = parser.parse_args()
assert len(args) == 2

# First positional argument: name of ARFF file
# Second positional argument: number of minimum instances to allow a node to
# split
train_filename, test_filename = args

#################### Declare inputs for learning ####################

# Load ARFF file
data, metadata = arff.loadarff(train_filename)

# Change data to Python native list of lists
#data_list = [[x for x in list_] for list_ in data]
data_list = [list_ for list_ in data]

# Length n+1 list of booleans for whether each feature is norminal
# Feature is numeric if it's not norminal
# The additional 1 is the class feature type
norminalities = [type_ == 'nominal' for type_ in metadata.types()]

# enumeration i is a tuple of all possible values of feature i
value_enumerations = []
for name in metadata.names():
    norminality, value_enumeration = metadata[name]
    value_enumerations.append(value_enumeration)

Beispiel #6

0

Datei anzeigen

        test_F1 = SVC(train_Data, train_Defective, test_Data, test_Defective)
    except:
        func(myset)
    return test_auc, test_F1


'''新建数据表'''
wb = Workbook()
ws = wb.active
'''fileList = ['CM1', 'KC1', 'KC3', 'MC2', 'MW1', 'PC1', 'PC2', 'PC3', 'PC4', 'JM1', 'MC1']#, 'PC5']'''
'''JM1 MC1 PC5'''
'''for filename in fileList:'''
filename = 'CM1'
ws.append([filename])
'''导入数据'''
dataset, mate = arff.loadarff(filename + '.arff')
#dataset = arff.load(open(filename+'.arff'))
#dataset_list = list(dataset)
#originData = np.array(dataset)
dataset = pandas.DataFrame(dataset)
originData = np.array(dataset)
data, instance, attribute, Defective = getdata(originData)
DefectiveNum = getDefectiveNum(Defective)
'''标准化'''
data = data.astype('float64')
scaled = preprocessing.scale(data)
#print(scaled)
'''计算k'''
'''k = np.sum(Defective == "N")/np.sum(Defective == "Y")
if k == k:
    k = int(k)

Beispiel #7

0

Datei anzeigen

import numpy as np
import arff
data = arff.load(open('chronic_kidney_disease_full.arff', 'rb'))
print(data)
data, meta = arff.loadarff('chronic_kidney_disease_full.arff')
print(data)
print(meta)


class LogReg:

    def __init__(self, loops, a=0.01):
        self.lr = a
        self.loops = loops



    def cost(self, h, y):
        return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()

    def sigmoid(z):
        y = 1 / (1 + np.exp(-z))
        return y

    def fit(self, X, y):

        # weights vector
        self.theta = np.zeros(X.shape[1])
        # weights training
        for i in range(self.loops):