Exemple #1
0
    def df_to_instances(self):
        '''
        transform pandas data frame to arff style data
        :param df:              panda data frame
        :param relation:        relation, string
        :param attr_label:      label attribute, string
        :return:                arff style data
        '''

        atts = []
        for col in self.df.columns:
            if col != self.attr_label:
                att = Attribute.create_numeric(col)
            else:
                att = Attribute.create_nominal(col, ['0', '1'])
            atts.append(att)
        nrow = len(self.df)
        result = Instances.create_instances(self.relation, atts, nrow)
        # data
        for i in range(nrow):
            inst = Instance.create_instance(
                self.df.iloc[i].astype('float64').to_numpy().copy(order='C'))
            result.add_instance(inst)

        return result
Exemple #2
0
def ndarray_to_instances(array, relation, att_template="Att-#", att_list=None):
    """
    Converts the numpy matrix into an Instances object and returns it.

    :param array: the numpy ndarray to convert
    :type array: numpy.darray
    :param relation: the name of the dataset
    :type relation: str
    :param att_template: the prefix to use for the attribute names, "#" is the 1-based index,
                         "!" is the 0-based index, "@" the relation name
    :type att_template: str
    :param att_list: the list of attribute names to use
    :type att_list: list
    :return: the generated instances object
    :rtype: Instances
    """
    if len(numpy.shape(array)) != 2:
        raise Exception("Number of array dimensions must be 2!")
    rows, cols = numpy.shape(array)

    # header
    atts = []
    if att_list is not None:
        if len(att_list) != cols:
            raise Exception(
                "Number columns and provided attribute names differ: " +
                str(cols) + " != " + len(att_list))
        for name in att_list:
            att = Attribute.create_numeric(name)
            atts.append(att)
    else:
        for i in range(cols):
            name = att_template.replace("#", str(i + 1)).replace(
                "!", str(i)).replace("@", relation)
            att = Attribute.create_numeric(name)
            atts.append(att)
    result = Instances.create_instances(relation, atts, rows)

    # data
    for i in range(rows):
        inst = Instance.create_instance(array[i])
        result.add_instance(inst)

    return result
def ndarray_to_instances(array, relation, att_template="Att-#", att_list=None):
    """
    Converts the numpy matrix into an Instances object and returns it.

    :param array: the numpy ndarray to convert
    :type array: numpy.darray
    :param relation: the name of the dataset
    :type relation: str
    :param att_template: the prefix to use for the attribute names, "#" is the 1-based index,
                         "!" is the 0-based index, "@" the relation name
    :type att_template: str
    :param att_list: the list of attribute names to use
    :type att_list: list
    :return: the generated instances object
    :rtype: Instances
    """
    if len(numpy.shape(array)) != 2:
        raise Exception("Number of array dimensions must be 2!")
    rows, cols = numpy.shape(array)

    # header
    atts = []
    if att_list is not None:
        if len(att_list) != cols:
            raise Exception(
                "Number columns and provided attribute names differ: " + str(cols) + " != " + len(att_list))
        for name in att_list:
            att = Attribute.create_numeric(name)
            atts.append(att)
    else:
        for i in xrange(cols):
            name = att_template.replace("#", str(i+1)).replace("!", str(i)).replace("@", relation)
            att = Attribute.create_numeric(name)
            atts.append(att)
    result = Instances.create_instances(relation, atts, rows)

    # data
    for i in xrange(rows):
        inst = Instance.create_instance(array[i])
        result.add_instance(inst)

    return result
def classify_level(sent, classifier, stats, params={}, match={}):
    """
    Classifies the CEFR level of 'sent'.
    2016 june - based on check_readability() in sent_match.py
    @ sent:     
    @ stats:    SentStatistics instance
    @ params:   parameters for SentMatch (HitEx)
    @ match:    SentMatch instance
    # TO DO: add argument for choosing bw WEKA and sklearn
             adapt to both sents and texts
             in- vs cross-domain setups 
    """
    sent_feats = SentFeatures(sent, stats, params)
    fs = sent_feats.features
    feature_names = fs.keys()
    # set the order of training attributes for values
    with codecs.open("auxiliaries/feature_names.txt") as f:
        train_fn = [l.strip("\n") for l in f.readlines()]
    f_list = [fs[tfn] for tfn in train_fn]

    # create Instance, attributes and a dummy dataset (required for prediction)
    inst = Instance.create_instance(f_list)
    attributes = []
    for feat_n in train_fn:
        attributes.append(Attribute.create_numeric(feat_n))
    attributes.append(
        Attribute.create_nominal("level", ["A1", "A2", "B1", "B2", "C1"]))
    dataset = Instances.create_instances("readability", attributes, 0)
    dataset.add_instance(inst)
    dataset.class_is_last()

    # make prediction
    cefr_mapping = {"A1": 1.0, "A2": 2.0, "B1": 3.0, "B2": 4.0, "C1": 5.0}
    trg_cefr_fl = cefr_mapping[params["target_cefr"]]
    for instance in dataset:
        pred = classifier.classify_instance(instance)
        pred_cefr = pred + 1
        #if pred_cefr < 1 or pred_cefr > 5:
        level_diff = pred_cefr - trg_cefr_fl  # negative value = easier than target
        nominal_level = [k for k, v in cefr_mapping.items()
                         if v == pred_cefr][0]

    return (level_diff, nominal_level, fs
            )  #return also fs -> for detailed info in webservice
Exemple #5
0
def main():
    """
    Creates a dataset from scratch using random data and outputs it.
    """

    atts = []
    for i in range(5):
        atts.append(Attribute.create_numeric("x" + str(i)))

    data = Instances.create_instances("data", atts, 10)

    for n in range(10):
        values = []
        for i in range(5):
            values.append(n * 100 + i)
        inst = Instance.create_instance(values)
        data.add_instance(inst)

    print(data)
def main():
    """
    Creates a dataset from scratch using random data and outputs it.
    """

    atts = []
    for i in xrange(5):
        atts.append(Attribute.create_numeric("x" + str(i)))

    data = Instances.create_instances("data", atts, 10)

    for n in xrange(10):
        values = []
        for i in xrange(5):
            values.append(n*100 + i)
        inst = Instance.create_instance(values)
        data.add_instance(inst)

    print(data)
Exemple #7
0
def sklearn_input_to_weka(X, y=None, labels=None):
    from weka.core.dataset import Attribute, Instances, Instance
    attribs = []
    for i in range(len(X[0])):
        attribs.append(Attribute.create_numeric('x_{}'.format(i)))
    if labels is None and y is not None:
        labels = [str(label) for label in np.unique(y)]
    attribs.append(Attribute.create_nominal('y', labels))
    n_rows = len(X)
    instances = Instances.create_instances('data', attribs, n_rows)
    for i in range(n_rows):
        if y is None:
            row = [*X[i], '0']
        elif isinstance(y, pd.Series):
            row = [*X[i], y.iloc[i]]
        else:
            row = [*X[i], y[i]]
        instances.add_instance(Instance.create_instance(row))
    instances.class_is_last()
    return instances, labels
Exemple #8
0
def create_attributes(lang, tag):
    attr = []
    n_feature = 0
    tag_list = ""
    tag_feature = ""

    if lang == LANG_ID:
        n_feature = ID_N_FEATURE
        tag_list = ID_TAG
        tag_feature = ID_TAG_FEATURE
    elif lang == LANG_EN:
        n_feature = EN_N_FEATURE
        tag_list = EN_TAG
        tag_feature = EN_TAG_FEATURE
    for i in range(0, n_feature):
        for tag in tag_list:
            for ftr in tag_feature:
                attr.append(
                    Attribute.create_numeric(tag + str(i + 1) + "_" + ftr))
    attr.append(Attribute.create_nominal(tag + "_class", []))
    return attr
Exemple #9
0
for infile in files:
    with open(data_dir + os.sep + infile, "rb") as csvfile:
        print(infile)
        outfile = os.path.splitext(infile)[0] + ".arff"
        reader = csv.reader(csvfile)
        data = None
        ref_present = True
        for index, row in enumerate(reader):
            if index == 0:
                atts = []
                ref_present = ("Reference value" in row) or ("Reference Value"
                                                             in row)
                for idx, col in enumerate(row):
                    col = col.lower()
                    atts.append(Attribute.create_numeric(col))
                    if not ref_present and (idx == 0):
                        atts.append(
                            Attribute.create_numeric("reference value"))
                data = Instances.create_instances("irdc", atts, 0)
            else:
                values = []
                for idx, col in enumerate(row):
                    values.append(float(col))
                    if not ref_present and (idx == 0):
                        values.append(float('NaN'))
                inst = Instance.create_instance(values)
                data.add_instance(inst)

        saver = Saver(classname="weka.core.converters.ArffSaver")
        saver.save_file(data, data_dir + os.sep + outfile)
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()
    helper.print_title("Iris dataset")
    print(iris_data)
    helper.print_title("Iris dataset (incrementally output)")
    for i in iris_data:
        print(i)
    helper.print_title("Iris summary")
    print(Instances.summary(iris_data))
    helper.print_title("Iris attributes")
    for a in iris_data.attributes():
        print(a)
    helper.print_title("Instance at #0")
    print(iris_data.get_instance(0))
    print(iris_data.get_instance(0).values)
    print("Attribute stats (first):\n" + str(iris_data.attribute_stats(0)))
    print("total count (first attribute):\n" + str(iris_data.attribute_stats(0).total_count))
    print("numeric stats (first attribute):\n" + str(iris_data.attribute_stats(0).numeric_stats))
    print("nominal counts (last attribute):\n"
          + str(iris_data.attribute_stats(iris_data.num_attributes - 1).nominal_counts))
    helper.print_title("Instance values at #0")
    for v in iris_data.get_instance(0):
        print(v)

    # append datasets
    helper.print_title("append datasets")
    data1 = Instances.copy_instances(iris_data, 0, 2)
    data2 = Instances.copy_instances(iris_data, 2, 2)
    print("Dataset #1:\n" + str(data1))
    print("Dataset #2:\n" + str(data2))
    msg = data1.equal_headers(data2)
    print("#1 == #2 ? " + "yes" if msg is None else msg)
    combined = Instances.append_instances(data1, data2)
    print("Combined:\n" + str(combined))

    # merge datasets
    helper.print_title("merge datasets")
    data1 = Instances.copy_instances(iris_data, 0, 2)
    data1.class_index = -1
    data1.delete_attribute(1)
    data1.delete_first_attribute()
    data2 = Instances.copy_instances(iris_data, 0, 2)
    data2.class_index = -1
    data2.delete_attribute(4)
    data2.delete_attribute(3)
    data2.delete_attribute(2)
    print("Dataset #1:\n" + str(data1))
    print("Dataset #2:\n" + str(data2))
    msg = data1.equal_headers(data2)
    print("#1 == #2 ? " + ("yes" if msg is None else msg))
    combined = Instances.merge_instances(data2, data1)
    print("Combined:\n" + str(combined))

    # load dataset incrementally
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset incrementally: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file, incremental=True)
    iris_data.class_is_last()
    helper.print_title("Iris dataset")
    print(iris_data)
    for inst in loader:
        print(inst)

    # create attributes
    helper.print_title("Creating attributes")
    num_att = Attribute.create_numeric("num")
    print("numeric: " + str(num_att))
    date_att = Attribute.create_date("dat", "yyyy-MM-dd")
    print("date: " + str(date_att))
    nom_att = Attribute.create_nominal("nom", ["label1", "label2"])
    print("nominal: " + str(nom_att))

    # create dataset
    helper.print_title("Create dataset")
    dataset = Instances.create_instances("helloworld", [num_att, date_att, nom_att], 0)
    print(str(dataset))

    # create an instance
    helper.print_title("Create and add instance")
    values = [3.1415926, date_att.parse_date("2014-04-10"), 1.0]
    inst = Instance.create_instance(values)
    print("Instance #1:\n" + str(inst))
    dataset.add_instance(inst)
    values = [2.71828, date_att.parse_date("2014-08-09"), Instance.missing_value()]
    inst = Instance.create_instance(values)
    dataset.add_instance(inst)
    print("Instance #2:\n" + str(inst))
    inst.set_value(0, 4.0)
    print("Instance #2 (updated):\n" + str(inst))
    print("Dataset:\n" + str(dataset))
    dataset.delete_with_missing(2)
    print("Dataset (after delete of missing):\n" + str(dataset))
    values = [(1, date_att.parse_date("2014-07-11"))]
    inst = Instance.create_sparse_instance(values, 3, classname="weka.core.SparseInstance")
    print("sparse Instance:\n" + str(inst))
    dataset.add_instance(inst)
    print("dataset with mixed dense/sparse instance objects:\n" + str(dataset))

    # create dataset (lists)
    helper.print_title("Create dataset from lists")
    x = [[randint(1, 10) for _ in range(5)] for _ in range(10)]
    y = [randint(0, 1) for _ in range(10)]
    dataset2 = ds.create_instances_from_lists(x, y, "generated from lists")
    print(dataset2)
    x = [[randint(1, 10) for _ in range(5)] for _ in range(10)]
    dataset2 = ds.create_instances_from_lists(x, name="generated from lists (no y)")
    print(dataset2)

    # create dataset (matrices)
    helper.print_title("Create dataset from matrices")
    x = np.random.randn(10, 5)
    y = np.random.randn(10)
    dataset3 = ds.create_instances_from_matrices(x, y, "generated from matrices")
    print(dataset3)
    x = np.random.randn(10, 5)
    dataset3 = ds.create_instances_from_matrices(x, name="generated from matrices (no y)")
    print(dataset3)

    # create more sparse instances
    diabetes_file = helper.get_data_dir() + os.sep + "diabetes.arff"
    helper.print_info("Loading dataset: " + diabetes_file)
    loader = Loader("weka.core.converters.ArffLoader")
    diabetes_data = loader.load_file(diabetes_file)
    diabetes_data.class_is_last()
    helper.print_title("Create sparse instances using template dataset")
    sparse_data = Instances.template_instances(diabetes_data)
    for i in range(diabetes_data.num_attributes - 1):
        inst = Instance.create_sparse_instance(
            [(i, float(i+1) / 10.0)], sparse_data.num_attributes, classname="weka.core.SparseInstance")
        sparse_data.add_instance(inst)
    print("sparse dataset:\n" + str(sparse_data))

    # simple scatterplot of iris dataset: petalwidth x petallength
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()
    pld.scatter_plot(
        iris_data, iris_data.attribute_by_name("petalwidth").index,
        iris_data.attribute_by_name("petallength").index,
        percent=50,
        wait=False)

    # line plot of iris dataset (without class attribute)
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()
    pld.line_plot(iris_data, atts=range(iris_data.num_attributes - 1), percent=50, title="Line plot iris", wait=False)

    # matrix plot of iris dataset
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()
    pld.matrix_plot(iris_data, percent=50, title="Matrix plot iris", wait=True)
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()
    helper.print_title("Iris dataset")
    print(iris_data)
    helper.print_title("Iris dataset (incrementally output)")
    for i in iris_data:
        print(i)
    helper.print_title("Iris summary")
    print(Instances.summary(iris_data))
    helper.print_title("Iris attributes")
    for a in iris_data.attributes():
        print(a)
    helper.print_title("Instance at #0")
    print(iris_data.get_instance(0))
    print(iris_data.get_instance(0).values)
    print("Attribute stats (first):\n" + str(iris_data.attribute_stats(0)))
    print("total count (first attribute):\n" +
          str(iris_data.attribute_stats(0).total_count))
    print("numeric stats (first attribute):\n" +
          str(iris_data.attribute_stats(0).numeric_stats))
    print("nominal counts (last attribute):\n" + str(
        iris_data.attribute_stats(iris_data.num_attributes -
                                  1).nominal_counts))
    helper.print_title("Instance values at #0")
    for v in iris_data.get_instance(0):
        print(v)

    # append datasets
    helper.print_title("append datasets")
    data1 = Instances.copy_instances(iris_data, 0, 2)
    data2 = Instances.copy_instances(iris_data, 2, 2)
    print("Dataset #1:\n" + str(data1))
    print("Dataset #2:\n" + str(data2))
    msg = data1.equal_headers(data2)
    print("#1 == #2 ? " + "yes" if msg is None else msg)
    combined = Instances.append_instances(data1, data2)
    print("Combined:\n" + str(combined))

    # merge datasets
    helper.print_title("merge datasets")
    data1 = Instances.copy_instances(iris_data, 0, 2)
    data1.class_index = -1
    data1.delete_attribute(1)
    data1.delete_first_attribute()
    data2 = Instances.copy_instances(iris_data, 0, 2)
    data2.class_index = -1
    data2.delete_attribute(4)
    data2.delete_attribute(3)
    data2.delete_attribute(2)
    print("Dataset #1:\n" + str(data1))
    print("Dataset #2:\n" + str(data2))
    msg = data1.equal_headers(data2)
    print("#1 == #2 ? " + ("yes" if msg is None else msg))
    combined = Instances.merge_instances(data2, data1)
    print("Combined:\n" + str(combined))

    # load dataset incrementally
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset incrementally: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file, incremental=True)
    iris_data.class_is_last()
    helper.print_title("Iris dataset")
    print(iris_data)
    for inst in loader:
        print(inst)

    # create attributes
    helper.print_title("Creating attributes")
    num_att = Attribute.create_numeric("num")
    print("numeric: " + str(num_att))
    date_att = Attribute.create_date("dat", "yyyy-MM-dd")
    print("date: " + str(date_att))
    nom_att = Attribute.create_nominal("nom", ["label1", "label2"])
    print("nominal: " + str(nom_att))

    # create dataset
    helper.print_title("Create dataset")
    dataset = Instances.create_instances("helloworld",
                                         [num_att, date_att, nom_att], 0)
    print(str(dataset))

    # create an instance
    helper.print_title("Create and add instance")
    values = [3.1415926, date_att.parse_date("2014-04-10"), 1.0]
    inst = Instance.create_instance(values)
    print("Instance #1:\n" + str(inst))
    dataset.add_instance(inst)
    values = [
        2.71828,
        date_att.parse_date("2014-08-09"),
        Instance.missing_value()
    ]
    inst = Instance.create_instance(values)
    dataset.add_instance(inst)
    print("Instance #2:\n" + str(inst))
    inst.set_value(0, 4.0)
    print("Instance #2 (updated):\n" + str(inst))
    print("Dataset:\n" + str(dataset))
    dataset.delete_with_missing(2)
    print("Dataset (after delete of missing):\n" + str(dataset))
    values = [(1, date_att.parse_date("2014-07-11"))]
    inst = Instance.create_sparse_instance(
        values, 3, classname="weka.core.SparseInstance")
    print("sparse Instance:\n" + str(inst))
    dataset.add_instance(inst)
    print("dataset with mixed dense/sparse instance objects:\n" + str(dataset))

    # create dataset (lists)
    helper.print_title("Create dataset from lists")
    x = [[randint(1, 10) for _ in range(5)] for _ in range(10)]
    y = [randint(0, 1) for _ in range(10)]
    dataset2 = ds.create_instances_from_lists(x, y, "generated from lists")
    print(dataset2)
    x = [[randint(1, 10) for _ in range(5)] for _ in range(10)]
    dataset2 = ds.create_instances_from_lists(
        x, name="generated from lists (no y)")
    print(dataset2)

    # create dataset (matrices)
    helper.print_title("Create dataset from matrices")
    x = np.random.randn(10, 5)
    y = np.random.randn(10)
    dataset3 = ds.create_instances_from_matrices(x, y,
                                                 "generated from matrices")
    print(dataset3)
    x = np.random.randn(10, 5)
    dataset3 = ds.create_instances_from_matrices(
        x, name="generated from matrices (no y)")
    print(dataset3)

    # create more sparse instances
    diabetes_file = helper.get_data_dir() + os.sep + "diabetes.arff"
    helper.print_info("Loading dataset: " + diabetes_file)
    loader = Loader("weka.core.converters.ArffLoader")
    diabetes_data = loader.load_file(diabetes_file)
    diabetes_data.class_is_last()
    helper.print_title("Create sparse instances using template dataset")
    sparse_data = Instances.template_instances(diabetes_data)
    for i in xrange(diabetes_data.num_attributes - 1):
        inst = Instance.create_sparse_instance(
            [(i, float(i + 1) / 10.0)],
            sparse_data.num_attributes,
            classname="weka.core.SparseInstance")
        sparse_data.add_instance(inst)
    print("sparse dataset:\n" + str(sparse_data))

    # simple scatterplot of iris dataset: petalwidth x petallength
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()
    pld.scatter_plot(iris_data,
                     iris_data.attribute_by_name("petalwidth").index,
                     iris_data.attribute_by_name("petallength").index,
                     percent=50,
                     wait=False)

    # line plot of iris dataset (without class attribute)
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()
    pld.line_plot(iris_data,
                  atts=xrange(iris_data.num_attributes - 1),
                  percent=50,
                  title="Line plot iris",
                  wait=False)

    # matrix plot of iris dataset
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()
    pld.matrix_plot(iris_data, percent=50, title="Matrix plot iris", wait=True)
Exemple #12
0
def main():
    global stop_spinning, name, upper_clothing, lower_clothing, outer_clothing, shoes_clothing, upper_indices, lower_indices, outer_indices, shoes_indices
    '''
    Classifies clothing using stored classification models for each user
    '''
    FSM = ClothingFSM()
    #FSM.username_server()

    clothingdb = MySQLdb.connect(host="localhost",
                                 user="******",
                                 passwd="mypassword", # Change to your SQL DB password
                                 db = "userprofiles")
    cursor = clothingdb.cursor()

    cursor.execute("SELECT * FROM clothing")

    name = "Study"

    #Populate clothing dictionaries with user's wardrobe
    for row in cursor.fetchall():
        print str(row[2])
        print str(row[6])
        if str(row[0]) == name:
            if str(row[1]) == "Upper Body":
                try:
                    upper_clothing[row[2]].append(row[6])
                except:
                    print "Problem appending clothing to dictionary"
            if str(row[1]) == "Lower Body":
                try:
                    lower_clothing[row[3]].append(row[6])
                except:
                    print "Problem appending clothing to dictionary"
            if str(row[1]) == "Outerwear":
                try:
                    outer_clothing[row[4]].append(row[6])
                except:
                    print "Problem appending clothing to dictionary"
            if str(row[1]) == "Shoes":
                try:
                    shoes_clothing[row[5]].append(row[6])
                except:
                    print "Problem appending clothing to dictionary"
    
    print upper_clothing, lower_clothing, outer_clothing, shoes_clothing
    # FSM.received_user_info()

    #In final program, we will receive this information from database


    #Set to true or false if receiving features vs testing defaults
    receive_features = True

    if receive_features == False:
        #Wait to Receive input 

        #Example inputs from user/weather API
        features['casual_formal'] = 3
        #5 is very comfortable 1 is not comfortable
        features['comfort'] = 3
        #1 is not snowing 2 is light snow 3 is heavy snow
        features['snow'] = 1
        #1 is not raining 3 is raining(no medium)
        features['rain'] = 3
        #If user is spending their time mostly outside, set warmth to outsidewarmth. If not, set warmth
        features['warmth'] = 1
        features['outside_warmth'] = 4
        #1 is no 0 is yes
        features['athletic'] = 1

        snowstring = ''
        rainstring = ''
        athleticstring = ''
    
    else:
        FSM.features_server()
    
    
    
 
    upper_array = [None] * 14
    lower_array = [None] * 7
    outer_array = [None] * 3
    shoes_array = [None] * 4
    upper_prediction_array = []
    lower_prediction_array = []
    outer_prediction_array = []
    shoes_prediction_array = []

    warmth_att = Attribute.create_numeric("Warmth")
    comfort_att = Attribute.create_numeric("Comfort")
    casual_att = Attribute.create_numeric("Casual")
    rain_att = Attribute.create_numeric("Rain")
    snow_att = Attribute.create_numeric("Snow")
    athletic_att = Attribute.create_numeric("Athletic")

    
    upper_attributes = [warmth_att, casual_att, comfort_att, athletic_att]
    lower_attributes = [warmth_att, casual_att, comfort_att, athletic_att]
    outer_attributes = [warmth_att, casual_att, comfort_att, snow_att, rain_att]
    shoes_attributes = [casual_att, comfort_att, athletic_att]

    Instances.create_instances("upper_instances", upper_attributes, 0)
    Instances.create_instances("lower_instances", lower_attributes, 0)
    Instances.create_instances("outer_instances", outer_attributes, 0)
    Instances.create_instances("shoes_instances", shoes_attributes, 0)

    #Simulate their wardrobe
    #Upper
    # Tank Top
    if len(upper_clothing['Tank Top']) == 0:
        upper_array[0] = 0 
    else:
        upper_array[0] = 1 
    # T-Shirt
    if len(upper_clothing['T-Shirt']) == 0:
        upper_array[1] = 0 
    else:
        upper_array[1] = 1 
    # Long-Sleeved Shirt
    if len(upper_clothing['Long-sleeved Shirt']) == 0:
        upper_array[2] = 0 
    else:
        upper_array[2] = 1 
    # Athletic Top
    if len(upper_clothing['Athletic Top']) == 0:
        upper_array[3] = 0 
    else:
        upper_array[3] = 1     
    # Button-down Shirt
    if len(upper_clothing['Button-down Shirt']) == 0:
        upper_array[4] = 0 
    else:
        upper_array[4] = 1     
    # Polo Shirt
    if len(upper_clothing['Polo Shirt']) == 0:
        upper_array[5] = 0 
    else:
        upper_array[5] = 1  
    # Dress Shirt
    if len(upper_clothing['Dress Shirt']) == 0:
        upper_array[6] = 0 
    else:
        upper_array[6] = 1  
    # Suit Jacket
    if len(upper_clothing['Suit Jacket']) == 0:
        upper_array[7] = 0 
    else:
        upper_array[7] = 1  
    # Blazer
    if len(upper_clothing['Blazer']) == 0:
        upper_array[8] = 0 
    else:
        upper_array[8] = 1  
    # Hoodie
    if len(upper_clothing['Hoodie']) == 0:
        upper_array[9] = 0 
    else:
        upper_array[9] = 1  
    # Sweater
    if len(upper_clothing['Sweater']) == 0:
        upper_array[10] = 0 
    else:
        upper_array[10] = 1  
    # Blouse
    if len(upper_clothing['Blouse']) == 0:
        upper_array[11] = 0 
    else:
        upper_array[11] = 1

    # Day Dress
    if len(upper_clothing['Day Dress']) == 0:
        upper_array[12] = 0 
    else:
        upper_array[12] = 1
    # Evening Dress
    if len(upper_clothing['Evening Dress']) == 0:
        upper_array[13] = 0 
    else:
        upper_array[13] = 1

    #Lower

    # Regular Shorts
    if len(lower_clothing['Shorts']) == 0:
        lower_array[0] = 0 
    else:
        lower_array[0] = 1
    # Athletic Shorts
    if len(lower_clothing['Athletic Shorts']) == 0:
        lower_array[1] = 0 
    else:
        lower_array[1] = 1
    # Athletic Pants
    if len(lower_clothing['Athletic Pants']) == 0:
        lower_array[2] = 0 
    else:
        lower_array[2] = 1
    # Jeans
    if len(lower_clothing['Jeans']) == 0:
        lower_array[3] = 0 
    else:
        lower_array[3] = 1
    # Trousers
    if len(lower_clothing['Trousers']) == 0:
        lower_array[4] = 0 
    else:
        lower_array[4] = 1
    # Skirt
    if len(lower_clothing['Skirt']) == 0:
        lower_array[5] = 0 
    else:
        lower_array[5] = 1
    # Dress Pants
    if len(lower_clothing['Dress Pants']) == 0:
        lower_array[6] = 0 
    else:
        lower_array[6] = 1

    #Outer
    # Light Jacket
    if len(outer_clothing['Light Jacket']) == 0:
        outer_array[0] = 0 
    else:
        outer_array[0] = 1
    # Heavy Jacket
    if len(outer_clothing['Winter Jacket']) == 0:
        outer_array[1] = 0 
    else:
        outer_array[1] = 1
    # Rain Jacket
    if len(outer_clothing['Rain Jacket']) == 0:
        outer_array[2] = 0 
    else:
        outer_array[2] = 1
    
    #Shoes 
    # Casual Shoes
    if len(shoes_clothing['Casual Shoes']) == 0:
        shoes_array[0] = 0 
    else:
        shoes_array[0] = 1
    # Athletic Shoes
    if len(shoes_clothing['Athletic Shoes']) == 0:
        shoes_array[1] = 0 
    else:
        shoes_array[1] = 1
    # Dress Shoes
    if len(shoes_clothing['Dress Shoes']) == 0:
        shoes_array[2] = 0 
    else:
        shoes_array[2] = 1
    # Dressy Casual  Shoes
    if len(shoes_clothing['Business Casual Shoes']) == 0:
        shoes_array[3] = 0 
    else:
        shoes_array[3] = 1
    

    upper_list = [features['outside_warmth'], features['casual_formal'], features['comfort'], features['athletic']]
    lower_list = [features['outside_warmth'], features['casual_formal'], features['comfort'], math.fabs(1-features['athletic'])]
    outer_list = [features['outside_warmth'], features['casual_formal'], features['comfort'], features['rain'], features['snow']]
    shoes_list = [features['casual_formal'], features['comfort'], math.fabs(1-features['athletic'])]
    upper_instance = Instance.create_instance(upper_list, classname='weka.core.DenseInstance', weight= 1.0)
    lower_instance = Instance.create_instance(lower_list, classname='weka.core.DenseInstance', weight= 1.0)
    outer_instance = Instance.create_instance(outer_list, classname='weka.core.DenseInstance', weight= 1.0)
    shoes_instance = Instance.create_instance(shoes_list, classname='weka.core.DenseInstance', weight= 1.0)

    upper_path = '/home/leo/models/uppermodel2.model'
    lower_path = '/home/leo/models/lowermodel2.model'
    outer_path = '/home/leo/models/outermodel2.model'
    shoes_path = '/home/leo/models/shoesmodel7.model'

    upper_classifier = Classifier(jobject=serialization.read(upper_path))
    lower_classifier = Classifier(jobject=serialization.read(lower_path))
    outer_classifier = Classifier(jobject=serialization.read(outer_path))
    shoes_classifier = Classifier(jobject=serialization.read(shoes_path))

    upper_predictions = upper_classifier.distribution_for_instance(upper_instance)
    lower_predictions = lower_classifier.distribution_for_instance(lower_instance)
    outer_predictions = outer_classifier.distribution_for_instance(outer_instance)
    shoes_predictions = shoes_classifier.distribution_for_instance(shoes_instance)


    if features['rain'] == 1:
        rainstring = 'No'
    if features['rain'] == 3:
        rainstring = 'Yes'
    if features['snow'] == 1:
        snowstring = 'No'
    if features['snow'] == 3:
        snowstring = 'Yes'
    if features['athletic'] == 1:
        athleticstring = 'No'
    if features['athletic'] == 0:
        athleticstring = 'Yes'

    print "Features being Classified:"
    print "Outside Warmth:", features['outside_warmth'], "Inside-Outside:", features['inside_outside'], "Casual-Formal:", features['casual_formal'], "Comfort:", features['comfort'], "Athletic:", athleticstring, "Rain:", rainstring, "Snow:", snowstring



    #Remove Clothing Options User Doesn't Own
    for i in range(len(upper_array)):
        if upper_array[i] == 0:
            upper_prediction_array.append(0)
        else:
            upper_prediction_array.append(upper_predictions[i])

    for i in range(len(lower_array)):
        if lower_array[i] == 0:
            lower_prediction_array.append(0)
        else:
            lower_prediction_array.append(lower_predictions[i])

    for i in range(len(outer_array)):
        if outer_array[i] == 0:
            outer_prediction_array.append(0)
        else:
            outer_prediction_array.append(outer_predictions[i])

    for i in range(len(shoes_array)):
        if shoes_array[i] == 0:
            shoes_prediction_array.append(0)
        else:
            shoes_prediction_array.append(shoes_predictions[i])

    #Find the top 3 options for each classifier
    max_index_upper1 = 0
    max_index_upper2 = 0
    max_index_upper3 = 0
    max_index_upper4 = 0
    max_index_upper5 = 0


    for i in range(1,len(upper_prediction_array)):
        n = upper_prediction_array[max_index_upper1]
        if upper_prediction_array[i] > n:
            max_index_upper1 = i

    upper_prediction_array[max_index_upper1] = 0

    for i in range(1, len(upper_prediction_array)):
        n = upper_prediction_array[max_index_upper2]
        if upper_prediction_array[i] > n:
            max_index_upper2 = i

    upper_prediction_array[max_index_upper2] = 0

    for i in range(1, len(upper_prediction_array)):
        n = upper_prediction_array[max_index_upper3]
        if upper_prediction_array[i] > n:
            max_index_upper3 = i

    upper_prediction_array[max_index_upper3] = 0
    
    for i in range(1, len(upper_prediction_array)):
        n = upper_prediction_array[max_index_upper4]
        if upper_prediction_array[i] > n:
            max_index_upper4 = i
    
    upper_prediction_array[max_index_upper4] = 0   

    for i in range(1, len(upper_prediction_array)):
        n = upper_prediction_array[max_index_upper5]
        if upper_prediction_array[i] > n:
            max_index_upper5 = i

    upper_indices = [max_index_upper1, max_index_upper2, max_index_upper3, max_index_upper4, max_index_upper5]

    max_index_lower1 = 0
    max_index_lower2 = 0
    max_index_lower3 = 0
    max_index_lower4 = 0
    max_index_lower5 = 0        

    for i in range(1,len(lower_prediction_array)):
        n = lower_prediction_array[max_index_lower1]
        if lower_prediction_array[i] > n:
            max_index_lower1 = i

    lower_prediction_array[max_index_lower1] = 0

    for i in range(1,len(lower_prediction_array)):
        n = lower_prediction_array[max_index_lower2]
        if lower_prediction_array[i] > n:
            max_index_lower2 = i

    lower_prediction_array[max_index_lower2] = 0

    for i in range(1,len(lower_prediction_array)):
        n = lower_prediction_array[max_index_lower3]
        if lower_prediction_array[i] > n:
            max_index_lower3 = i
    
    lower_prediction_array[max_index_lower3] = 0
    
    for i in range(1, len(lower_prediction_array)):
        n = lower_prediction_array[max_index_lower4]
        if lower_prediction_array[i] > n:
            max_index_upper4 = i
    
    lower_prediction_array[max_index_lower4] = 0   

    for i in range(1, len(lower_prediction_array)):
        n = lower_prediction_array[max_index_lower5]
        if lower_prediction_array[i] > n:
            max_index_lower5 = i
    
    lower_indices = [max_index_lower1, max_index_lower2, max_index_lower3, max_index_lower4, max_index_lower5]

    max_index_outer1 = 0
    max_index_outer2 = 0
    max_index_outer3 = 0

    for i in range(1, len(outer_prediction_array)):
        n = outer_prediction_array[max_index_outer1]
        if outer_prediction_array[i] > n:
            max_index_outer1 = i

    outer_prediction_array[max_index_outer1] = 0

    for i in range(1, len(outer_prediction_array)):
        n = outer_prediction_array[max_index_outer2]
        if outer_prediction_array[i] > n:
            max_index_outer2 = i

    outer_prediction_array[max_index_outer2] = 0

    for i in range(1, len(outer_prediction_array)):
        n = outer_prediction_array[max_index_outer3]
        if outer_prediction_array[i] > n:
            max_index_outer3 = i

    outer_indices = [max_index_outer1, max_index_outer2, max_index_outer3]

    max_index_shoes1 = 0
    max_index_shoes2 = 0
    max_index_shoes3 = 0
    max_index_shoes4 = 0

    for i in range(1, len(shoes_prediction_array)):
        n = shoes_prediction_array[max_index_shoes1]
        if shoes_prediction_array[i] > n:
            max_index_shoes1 = i

    shoes_prediction_array[max_index_shoes1] = 0

    for i in range(1, len(shoes_prediction_array)):
        n = shoes_prediction_array[max_index_shoes2]
        if shoes_prediction_array[i] > n:
            max_index_shoes2 = i

    shoes_prediction_array[max_index_shoes2] = 0

    for i in range(1, len(shoes_prediction_array)):
        n = shoes_prediction_array[max_index_shoes3]
        if shoes_prediction_array[i] > n:
            max_index_shoes3 = i

    shoes_prediction_array[max_index_shoes3] = 0

    for i in range(1, len(shoes_prediction_array)):
        n = shoes_prediction_array[max_index_shoes4]
        if shoes_prediction_array[i] > n:
            max_index_shoes4 = i
    
    shoes_indices = [max_index_shoes1, max_index_shoes2, max_index_shoes3, max_index_shoes4]
    
    print "Outer Indices:", outer_indices
    FSM.received_inputs()
    print "Exiting Program"
Exemple #13
0
         "DataSet2_Cal.csv", "DataSet2_Test.csv", "DataSet2_Val.csv"]

for infile in files:
    with open(data_dir + os.sep + infile, "rb") as csvfile:
        print(infile)
        outfile = os.path.splitext(infile)[0] + ".arff"
        reader = csv.reader(csvfile)
        data = None
        ref_present = True
        for index, row in enumerate(reader):
            if index == 0:
                atts = []
                ref_present = ("Reference value" in row) or ("Reference Value" in row)
                for idx, col in enumerate(row):
                    col = col.lower()
                    atts.append(Attribute.create_numeric(col))
                    if not ref_present and (idx == 0):
                        atts.append(Attribute.create_numeric("reference value"))
                data = Instances.create_instances("irdc", atts, 0)
            else:
                values = []
                for idx, col in enumerate(row):
                    values.append(float(col))
                    if not ref_present and (idx == 0):
                        values.append(float('NaN'))
                inst = Instance.create_instance(values)
                data.add_instance(inst)

        saver = Saver(classname="weka.core.converters.ArffSaver")
        saver.save_file(data, data_dir + os.sep + outfile)
Exemple #14
0
def train(request):

    jvm.start()

    d_att1 = Attribute.create_numeric("bodydearword.feature")
    d_att2 = Attribute.create_numeric("bodyform.feature")
    d_att3 = Attribute.create_numeric("bodyhtml.feature")
    d_att4 = Attribute.create_numeric("bodymultipart.feature")
    d_att5 = Attribute.create_numeric("bodynumchars.feature")
    d_att6 = Attribute.create_numeric("bodynumfunctionwords.feature")
    d_att7 = Attribute.create_numeric("bodynumuniqwords.feature")
    d_att8 = Attribute.create_numeric("bodynumwords.feature")
    d_att9 = Attribute.create_numeric("bodyrichness.feature")
    d_att10 = Attribute.create_numeric("bodysuspensionword.feature")
    d_att11 = Attribute.create_numeric("bodyverifyyouraccountphrase.feature")
    d_att12 = Attribute.create_numeric("externalsabinary.feature")
    d_att13 = Attribute.create_numeric("externalsascore.feature")
    d_att14 = Attribute.create_numeric("scriptjavascript.feature")
    d_att15 = Attribute.create_numeric("scriptonclick.feature")
    d_att16 = Attribute.create_numeric("scriptpopup.feature")
    d_att17 = Attribute.create_numeric("scriptstatuschange.feature")
    d_att18 = Attribute.create_numeric("scriptunmodalload.feature")
    d_att19 = Attribute.create_numeric("senddiffreplyto.feature")
    d_att20 = Attribute.create_numeric("sendnumwords.feature")
    d_att21 = Attribute.create_numeric("sendunmodaldomain.feature")
    d_att22 = Attribute.create_numeric("subjectbankword.feature")
    d_att23 = Attribute.create_numeric("subjectdebitword.feature")
    d_att24 = Attribute.create_numeric("subjectfwdword.feature")
    d_att25 = Attribute.create_numeric("subjectnumchars.feature")
    d_att26 = Attribute.create_numeric("subjectnumwords.feature")
    d_att27 = Attribute.create_numeric("subjectreplyword.feature")
    d_att28 = Attribute.create_numeric("subjectrichness.feature")
    d_att29 = Attribute.create_numeric("subjectverifyword.feature")
    d_att30 = Attribute.create_numeric("urlatchar.feature")
    d_att31 = Attribute.create_numeric("urlbaglink.feature")
    d_att32 = Attribute.create_numeric("urlip.feature")
    d_att33 = Attribute.create_numeric("urlnumdomains.feature")
    d_att34 = Attribute.create_numeric("urlnumexternallink.feature")
    d_att35 = Attribute.create_numeric("urlnumimagelink.feature")
    d_att36 = Attribute.create_numeric("urlnuminternallink.feature")
    d_att37 = Attribute.create_numeric("urlnumip.feature")
    d_att38 = Attribute.create_numeric("urlnumlink.feature")
    d_att39 = Attribute.create_numeric("urlnumperiods.feature")
    d_att40 = Attribute.create_numeric("urlnumport.feature")
    d_att41 = Attribute.create_numeric("urlport.feature")
    d_att42 = Attribute.create_numeric("urltwodoains.feature")
    d_att43 = Attribute.create_numeric("urlunmodalbaglink.feature")
    d_att44 = Attribute.create_numeric("urlwordclicklink.feature")
    d_att45 = Attribute.create_numeric("urlwordherelink.feature")
    d_att46 = Attribute.create_numeric("urlwordloginlink.feature")
    d_att47 = Attribute.create_numeric("urlwordupdatelink.feature")
    d_att48 = Attribute.create_nominal("class", {'phish', 'ham'})
    #
    data_dir = settings.BASE_DIR + "/phishing/public/datasets/"
    #
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_dir + "dataset.arff")
    data.class_is_last()
    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.options = ["-C", "0.3"]
    cls.build_classifier(data)

    serialization.write(data_dir + "out.model", cls)
    classifier = Classifier(jobject=serialization.read(data_dir + "out.model"))

    dataset = Instances.create_instances("test", [
        d_att1, d_att2, d_att3, d_att4, d_att5, d_att6, d_att7, d_att8, d_att9,
        d_att10, d_att11, d_att12, d_att13, d_att14, d_att15, d_att16, d_att17,
        d_att18, d_att19, d_att20, d_att21, d_att22, d_att23, d_att24, d_att25,
        d_att26, d_att27, d_att28, d_att29, d_att30, d_att31, d_att32, d_att33,
        d_att34, d_att35, d_att36, d_att37, d_att38, d_att39, d_att40, d_att41,
        d_att42, d_att43, d_att44, d_att45, d_att46, d_att47, d_att48
    ], 0)
    values = [
        0, 0, 0, 0, 890, 1, 124, 198, 0.22247191011236, 0, 0, 0, 0.0, 0, 0, 0,
        0, 0, 1, 4, 0, 0, 0, 0, 21, 4, 1, 0.19047619047619, 0, 0, 0, 0, 2, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        Instance.missing_value()
    ]
    inst = Instance.create_instance(values)
    dataset.add_instance(inst)
    dataset.class_is_last()
    # print(str(dataset))
    var = ''
    for inst1 in dataset:
        pred = classifier.classify_instance(inst1)
        var = inst1.class_attribute.value(int(pred))
        if var == 'ham':
            print('No es pishing')
            # do somthing
        else:
            print('Es pishing')
            # do somthing

        print(var)

    jvm.stop()

    return HttpResponse(str(var))
def to_instances(X, y=None, att_names=None, att_types=None, class_name=None, class_type=None, relation_name=None,
                 num_nominal_labels=None, num_class_labels=None):
    """
    Turns the 2D matrix and the optional 1D class vector into an Instances object.

    :param X: the input variables, 2D matrix
    :type X: ndarray
    :param y: the optional class value column, 1D vector
    :type y: ndarray
    :param att_names: the list of attribute names
    :type att_names: list
    :param att_types: the list of attribute types (C=categorical, N=numeric), assumes numeric by default if not provided
    :param class_name: the name of the class attribute
    :type class_name: str
    :param class_type: the type of the class attribute (C=categorical, N=numeric)
    :type class_type: str
    :param relation_name: the name for the dataset
    :type relation_name: str
    :param num_nominal_labels: the dictionary with the number of labels (key is 0-based attribute index)
    :type num_nominal_labels: dict
    :param num_class_labels: the number of labels in the class attribute
    :type num_class_labels: int
    :return: the generated Instances object
    :rtype: Instances
    """

    if len(X) == 0:
        raise Exception("No data to convert!")

    # defaults
    if att_types is None:
        att_types = determine_attribute_types(X)
    if att_names is None:
        att_names = []
        for i in range(len(X[0])):
            att_names.append("att-" + str(i+1))
    if relation_name is None:
        relation_name = "scikit-weka @ " + str(datetime.now())
    if class_name is None:
        if "class" not in att_names:
            class_name = "class"
        else:
            class_name = "class-" + str(len(att_names) + 1)
    if y is not None:
        if class_type is None:
            class_type = determine_attribute_type(y)

    # create header
    atts = []

    for i in range(len(X[0])):
        att_name = att_names[i]
        att_type = att_types[i]

        if att_type == "N":
            atts.append(Attribute.create_numeric(att_name))
        elif att_type == "C":
            if (num_nominal_labels is not None) and (i in num_nominal_labels):
                values = []
                for l in range(num_nominal_labels[i]):
                    values.append("_%d" % l)
            else:
                labels = set()
                for n in range(len(X)):
                    r = X[n]
                    v = str(r[i])
                    labels.add(v)
                values = sorted(labels)
            atts.append(Attribute.create_nominal(att_name, values))
        else:
            raise Exception("Unsupported attribute type for column %d: %s" % ((i+1), att_type))

    if y is not None:
        if class_type == "N":
            atts.append(Attribute.create_numeric(class_name))
        elif class_type == "C":
            if num_class_labels is not None:
                values = []
                for l in range(num_class_labels):
                    values.append("_%d" % l)
            else:
                values = sorted(set([str(x) for x in y]))
            atts.append(Attribute.create_nominal(class_name, values))

    result = Instances.create_instances(relation_name, atts, len(X))
    if y is not None:
        result.class_index = result.num_attributes - 1

    # data
    for n in range(len(X)):
        values = []
        r = X[n]
        for i in range(len(r)):
            if att_types[i] == "C":
                values.append(atts[i].index_of(str(r[i])))
            elif att_types[i] == "N":
                values.append(r[i])
            else:
                raise Exception("Unsupported attribute type for column %d: %s" % ((i+1), att_types[i]))
        if y is not None:
            if class_type == "C":
                values.append(atts[-1].index_of(str(y[n])))
            elif class_type == "N":
                values.append(y[n])
            else:
                raise Exception("Unsupported attribute type for class: %s" % class_type)
        inst = Instance.create_instance(values)
        result.add_instance(inst)

    return result