Beispiel #1
0
    def test_model(self, test_data, empty_solution, evaluate = False):
        model_weka = None
        if os.path.isfile(self.prediction_file):
            print 'Model ' + self.name + ' already tested.'
        elif not os.path.isfile(self.model_file):
            print 'Impossible testing this model. It should be trained first.'
            return
        else: 
            print 'Starting to test_model model ' + self.name + '.'
            model_weka = Classifier(jobject = serialization.read(self.model_file)) 
            evaluation = Evaluation(data = test_data)
            evaluation.test_model(classifier = model_weka, data = test_data)
            
            predictions = evaluation.predictions()
            rows        = read_sheet(file_name = empty_solution)
            solutions   = []

            for row in rows:
                solution = [row['userid'], row['tweetid'], predictions.pop(0).predicted()]
                solutions.append(solution)
            write_the_solution_file(solutions, self.prediction_file)
            print 'Model ' + self.name + ' tested.'
        
        if evaluate == True:
            if os.path.isfile(self.evaluation_file):
                print 'Model ' + self.name + ' already evaluated.'
                return
            elif model_weka == None:
                model_weka = Classifier(jobject = serialization.read(self.model_file)) 
                evaluation = Evaluation(data = test_data)
                evaluation.test_model(classifier = model_weka, data = test_data)
            save_file(file_name = self.evaluation_file, content = evaluation.to_summary())
            print 'Model ' + self.name + ' evaluated.'
def read_file(file_name):
	tile_set_list = []
	characteristic = []
	jvm.start()
	nmrClass = Classifier(jobject=serialization.read("models/lmt_3sd.model"))
	with open(file_name) as f: # opens file

		# reads in characteristic protein sequence and coverts it to expected chemical shift values
		tile_characteristic = f.readline()
		characteristic = re.findall(r'\b[A-Za-z]{3,4}\b', tile_characteristic)
		characteristic = letters_to_numbers(characteristic)

		for line in f: # reads in NMR Data
			#reads each line and grabs numbers and na data
			#file format "a b c d"
			a, b, c, d = re.findall(r'\b\d+\.\d*\b|\bna\b', line)
			# Dealing with missing data
			if (a == "na"):
				a = -1
			if (b == "na"):
				b = -1
			if (c == "na"):
				c = -1
			if (d == "na"):
				d = -1
			# adds a new Tile to tile_set_list
			if (not (a==-1 and b==-1 and c==-1 and d==-1)):
				tile_set_list.append(Tile(a, b, c, d, nmrClass)) 
	return tile_set_list, characteristic, nmrClass
Beispiel #3
0
def assign_cluster(file_location, file_out="clustered.csv", model="kmeans.model", last_filename=False):
    data = read_csv_file(file_location)
    check_jvm()
    # load clusters
    obj = serialization.read(model)
    clusterer = Clusterer(jobject=obj)

    # create file with cluster group
    with open(file_out, 'w') as output:
        for index, attrs in enumerate(data):
            tmp = []
            if last_filename:
                inst = Instance.create_instance(attrs[:-2])
            else:
                inst = Instance.create_instance(attrs[1:])

            pred = clusterer.cluster_instance(inst)
            dist = clusterer.distribution_for_instance(inst)

            if last_filename :
                tmp.append(attrs[-1])
                tmp.append(pred)
                tmp.extend(attrs[:-2])
            else:
                tmp.append(attrs[0])
                tmp.append(pred)
                tmp.extend(attrs[1:])

            print(str(index + 1) + ": label index=" +
                  str(pred) + ", class distribution=" + str(dist))
            output.write('%s\n'%(','.join(map(str,tmp)) ))
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()

    # train classifier
    classifier = Classifier("weka.classifiers.trees.J48")
    classifier.build_classifier(iris_data)

    # save and read object
    helper.print_title("I/O: single object")
    outfile = tempfile.gettempdir() + os.sep + "j48.model"
    serialization.write(outfile, classifier)
    model = Classifier(jobject=serialization.read(outfile))
    print(model)

    # save classifier and dataset header (multiple objects)
    helper.print_title("I/O: single object")
    serialization.write_all(outfile, [classifier, Instances.template_instances(iris_data)])
    objects = serialization.read_all(outfile)
    for i, obj in enumerate(objects):
        helper.print_info("Object #" + str(i+1) + ":")
        if javabridge.get_env().is_instance_of(obj, javabridge.get_env().find_class("weka/core/Instances")):
            obj = Instances(jobject=obj)
        elif javabridge.get_env().is_instance_of(obj, javabridge.get_env().find_class("weka/classifiers/Classifier")):
            obj = Classifier(jobject=obj)
        print(obj)
Beispiel #5
0
def assign_classify(file_location, output="classified.out", model="naivebayes.model"):
    data = read_csv_file(file_location)
    jvm.start()
    # load clusters
    obj = serialization.read(model)
    classifier = Classifier(jobject=obj)
    # create file with cluster group
    with open(output, 'w') as cluster_file:
        for index, attrs in enumerate(data):
            inst = Instance.create_instance(attrs[1:])
            pred = classifier.classify_instance(inst)
            print(str(index + 1) + ": label index=" + str(pred))
    jvm.stop()
Beispiel #6
0
def query_instance(attributes, model="kmeans.model"):
    """
        get the cluster for defined attributes
        :params attributes: array or list
        :returns: cluster id
    """
    check_jvm()
    # create instance
    inst = Instance.create_instance(attributes)
    # load model
    obj = serialization.read(model)
    # load cluster and get the cluster_id
    cluster = Clusterer(jobject=obj)
    cluster_id = cluster.cluster_instance(inst)

    return cluster_id
Beispiel #7
0
 def train_model(self, training_data):
     model_weka = None
     if os.path.isfile(self.model_file):
         print 'Model ' + self.name + ' already trained.'
     else:
         print 'Starting to train_model model ' + self.name + '.'
         model_weka = Classifier(classname = self.classname, options = self.options) 
         
         model_weka.build_classifier(data = training_data)
         serialization.write(filename = self.model_file, jobject = model_weka)
         print 'Model ' + self.name + ' trained and saved.'
     if os.path.isfile(self.parameter_file):
         print 'Parameters of the model ' + self.name + ' already saved.'
     else:
         if model_weka == None:
             model_weka = Classifier(jobject = serialization.read(self.model_file))
         save_file(file_name = self.parameter_file, content = str(model_weka))
         print 'Parameters of the model ' + self.name + ' saved.'
    def test_read_write(self):
        """
        Tests methods read and write.
        """
        fname = self.tempfile("readwrite.ser")
        self.delfile(fname)

        lin = ["A", "B", "C", "D"]
        vin = javabridge.make_instance("java/util/Vector", "()V")
        for element in lin:
            javabridge.call(vin, "add", "(Ljava/lang/Object;)Z", element)
        serialization.write(fname, vin)
        self.assertTrue(os.path.exists(fname), msg="Failed to write to " + fname + "?")

        vout = serialization.read(fname)
        self.assertIsNotNone(vout, msg="Failed to read from " + fname + "?")
        enm = javabridge.call(vin, "elements", "()Ljava/util/Enumeration;")
        lout = typeconv.enumeration_to_list(enm)
        self.delfile(fname)
        self.assertEqual(lin, lout, msg="Input/output differ")
Beispiel #9
0
def read_model_weka(path):
    return Classifier(jobject=serialization.read(path))
 def load_prediction_model(self, path_prediction_model):
     return read(path_prediction_model)
Beispiel #11
0
def load_model(fname, dir_name):
    outfile = realpath(join(dir_name, fname))
    model = Classifier(jobject=serialization.read(outfile))
    return model
def classify():
    loader = Loader(classname="weka.core.converters.ArffLoader")

    dataset20x20 = loader.load_file("preprocessed/notes/20x20.arff")
    dataset20x20.class_is_last()

    dataset20x50 = loader.load_file("preprocessed/notes/20x50.arff")
    dataset20x50.class_is_last()

    dataset50x20 = loader.load_file("preprocessed/notes/50x20.arff")
    dataset50x20.class_is_last()

    model20x20 = Classifier(jobject=serialization.read("nn20x20.model"))
    model35x20 = Classifier(jobject=serialization.read("nn20x50.model"))
    model50x20 = Classifier(jobject=serialization.read("nn50x20.model"))

    nn1 = model20x20
    nn2 = model35x20
    nn3 = model50x20

    class1 = []
    class2 = []
    class3 = []

    for index, inst in enumerate(dataset20x20):
        pred1 = nn1.classify_instance(inst)
        class1.append(pred1)

    for index, inst in enumerate(dataset20x50):
        pred2 = nn2.classify_instance(inst)
        class2.append(pred2)

    for index, inst in enumerate(dataset50x20):
        pred3 = nn3.classify_instance(inst)
        class3.append(pred3)

    for i in range(len(class1)):
        if os.path.isfile('preprocessed/notes/note' + '%s' % (i) + '.png'):
            img = cv2.imread('preprocessed/notes/note' + '%s' % (i) + '.png',
                             0)
            #cv2.imshow('Note' + '%s' % (i) , cv2.resize(img,(200,200)))
            if not os.path.isfile("classified/vote_log.csv"):
                header()

            values = []

            print "NOTE", i, ":"
            print "   nn1:", noteName(class1[i])
            print "   nn2:", noteName(class2[i])
            print "   nn3:", noteName(class3[i])
            values.append("note" + str(i) + ".png")
            values.append(" ")
            values.append(noteName(class1[i]))
            values.append(noteName(class2[i]))
            values.append(noteName(class3[i]))

            cv2.imwrite("1_%s" % noteName(class1[i]) + "/note%s" % i + ".png",
                        img)
            cv2.imwrite("2_%s" % noteName(class2[i]) + "/note%s" % i + ".png",
                        img)
            cv2.imwrite("3_%s" % noteName(class2[i]) + "/note%s" % i + ".png",
                        img)

            if class1[i] == class2[i]:
                print "MAJORITY VOTE:", noteName(class1[i])
                cv2.imwrite(
                    "classified/" + "%s" % noteName(class1[i]) +
                    "/note%s" % i + ".png", img)
                values.append(noteName(class1[i]))
            elif class1[i] == class3[i]:
                print "MAJORITY VOTE:", noteName(class1[i])
                cv2.imwrite(
                    "classified/" + "%s" % noteName(class1[i]) +
                    "/note%s" % i + ".png", img)
                values.append(noteName(class1[i]))
            elif class2[i] == class3[i]:
                print "MAJORITY VOTE:", noteName(class2[i])
                cv2.imwrite(
                    "classified/" + "%s" % noteName(class2[i]) +
                    "/note%s" % i + ".png", img)
                values.append(noteName(class2[i]))
            else:
                print "No classification"
                cv2.imwrite("noClassification/note%s" % i + ".png", img)
                values.append("no classification")
            print "\n"

            writer = csv.writer(open("classified/vote_log.csv", 'ab'))
            writer.writerows([values])

            cv2.waitKey(0)
Beispiel #13
0
def loadModel(modelPath):
    model = Classifier(jobject=sr.read(modelPath))
    return model
Beispiel #14
0
def main():
    global stop_spinning, name, upper_clothing, lower_clothing, outer_clothing, shoes_clothing, upper_indices, lower_indices, outer_indices, shoes_indices
    '''
    Classifies clothing using stored classification models for each user
    '''
    FSM = ClothingFSM()
    #FSM.username_server()

    clothingdb = MySQLdb.connect(host="localhost",
                                 user="******",
                                 passwd="mypassword", # Change to your SQL DB password
                                 db = "userprofiles")
    cursor = clothingdb.cursor()

    cursor.execute("SELECT * FROM clothing")

    name = "Study"

    #Populate clothing dictionaries with user's wardrobe
    for row in cursor.fetchall():
        print str(row[2])
        print str(row[6])
        if str(row[0]) == name:
            if str(row[1]) == "Upper Body":
                try:
                    upper_clothing[row[2]].append(row[6])
                except:
                    print "Problem appending clothing to dictionary"
            if str(row[1]) == "Lower Body":
                try:
                    lower_clothing[row[3]].append(row[6])
                except:
                    print "Problem appending clothing to dictionary"
            if str(row[1]) == "Outerwear":
                try:
                    outer_clothing[row[4]].append(row[6])
                except:
                    print "Problem appending clothing to dictionary"
            if str(row[1]) == "Shoes":
                try:
                    shoes_clothing[row[5]].append(row[6])
                except:
                    print "Problem appending clothing to dictionary"
    
    print upper_clothing, lower_clothing, outer_clothing, shoes_clothing
    # FSM.received_user_info()

    #In final program, we will receive this information from database


    #Set to true or false if receiving features vs testing defaults
    receive_features = True

    if receive_features == False:
        #Wait to Receive input 

        #Example inputs from user/weather API
        features['casual_formal'] = 3
        #5 is very comfortable 1 is not comfortable
        features['comfort'] = 3
        #1 is not snowing 2 is light snow 3 is heavy snow
        features['snow'] = 1
        #1 is not raining 3 is raining(no medium)
        features['rain'] = 3
        #If user is spending their time mostly outside, set warmth to outsidewarmth. If not, set warmth
        features['warmth'] = 1
        features['outside_warmth'] = 4
        #1 is no 0 is yes
        features['athletic'] = 1

        snowstring = ''
        rainstring = ''
        athleticstring = ''
    
    else:
        FSM.features_server()
    
    
    
 
    upper_array = [None] * 14
    lower_array = [None] * 7
    outer_array = [None] * 3
    shoes_array = [None] * 4
    upper_prediction_array = []
    lower_prediction_array = []
    outer_prediction_array = []
    shoes_prediction_array = []

    warmth_att = Attribute.create_numeric("Warmth")
    comfort_att = Attribute.create_numeric("Comfort")
    casual_att = Attribute.create_numeric("Casual")
    rain_att = Attribute.create_numeric("Rain")
    snow_att = Attribute.create_numeric("Snow")
    athletic_att = Attribute.create_numeric("Athletic")

    
    upper_attributes = [warmth_att, casual_att, comfort_att, athletic_att]
    lower_attributes = [warmth_att, casual_att, comfort_att, athletic_att]
    outer_attributes = [warmth_att, casual_att, comfort_att, snow_att, rain_att]
    shoes_attributes = [casual_att, comfort_att, athletic_att]

    Instances.create_instances("upper_instances", upper_attributes, 0)
    Instances.create_instances("lower_instances", lower_attributes, 0)
    Instances.create_instances("outer_instances", outer_attributes, 0)
    Instances.create_instances("shoes_instances", shoes_attributes, 0)

    #Simulate their wardrobe
    #Upper
    # Tank Top
    if len(upper_clothing['Tank Top']) == 0:
        upper_array[0] = 0 
    else:
        upper_array[0] = 1 
    # T-Shirt
    if len(upper_clothing['T-Shirt']) == 0:
        upper_array[1] = 0 
    else:
        upper_array[1] = 1 
    # Long-Sleeved Shirt
    if len(upper_clothing['Long-sleeved Shirt']) == 0:
        upper_array[2] = 0 
    else:
        upper_array[2] = 1 
    # Athletic Top
    if len(upper_clothing['Athletic Top']) == 0:
        upper_array[3] = 0 
    else:
        upper_array[3] = 1     
    # Button-down Shirt
    if len(upper_clothing['Button-down Shirt']) == 0:
        upper_array[4] = 0 
    else:
        upper_array[4] = 1     
    # Polo Shirt
    if len(upper_clothing['Polo Shirt']) == 0:
        upper_array[5] = 0 
    else:
        upper_array[5] = 1  
    # Dress Shirt
    if len(upper_clothing['Dress Shirt']) == 0:
        upper_array[6] = 0 
    else:
        upper_array[6] = 1  
    # Suit Jacket
    if len(upper_clothing['Suit Jacket']) == 0:
        upper_array[7] = 0 
    else:
        upper_array[7] = 1  
    # Blazer
    if len(upper_clothing['Blazer']) == 0:
        upper_array[8] = 0 
    else:
        upper_array[8] = 1  
    # Hoodie
    if len(upper_clothing['Hoodie']) == 0:
        upper_array[9] = 0 
    else:
        upper_array[9] = 1  
    # Sweater
    if len(upper_clothing['Sweater']) == 0:
        upper_array[10] = 0 
    else:
        upper_array[10] = 1  
    # Blouse
    if len(upper_clothing['Blouse']) == 0:
        upper_array[11] = 0 
    else:
        upper_array[11] = 1

    # Day Dress
    if len(upper_clothing['Day Dress']) == 0:
        upper_array[12] = 0 
    else:
        upper_array[12] = 1
    # Evening Dress
    if len(upper_clothing['Evening Dress']) == 0:
        upper_array[13] = 0 
    else:
        upper_array[13] = 1

    #Lower

    # Regular Shorts
    if len(lower_clothing['Shorts']) == 0:
        lower_array[0] = 0 
    else:
        lower_array[0] = 1
    # Athletic Shorts
    if len(lower_clothing['Athletic Shorts']) == 0:
        lower_array[1] = 0 
    else:
        lower_array[1] = 1
    # Athletic Pants
    if len(lower_clothing['Athletic Pants']) == 0:
        lower_array[2] = 0 
    else:
        lower_array[2] = 1
    # Jeans
    if len(lower_clothing['Jeans']) == 0:
        lower_array[3] = 0 
    else:
        lower_array[3] = 1
    # Trousers
    if len(lower_clothing['Trousers']) == 0:
        lower_array[4] = 0 
    else:
        lower_array[4] = 1
    # Skirt
    if len(lower_clothing['Skirt']) == 0:
        lower_array[5] = 0 
    else:
        lower_array[5] = 1
    # Dress Pants
    if len(lower_clothing['Dress Pants']) == 0:
        lower_array[6] = 0 
    else:
        lower_array[6] = 1

    #Outer
    # Light Jacket
    if len(outer_clothing['Light Jacket']) == 0:
        outer_array[0] = 0 
    else:
        outer_array[0] = 1
    # Heavy Jacket
    if len(outer_clothing['Winter Jacket']) == 0:
        outer_array[1] = 0 
    else:
        outer_array[1] = 1
    # Rain Jacket
    if len(outer_clothing['Rain Jacket']) == 0:
        outer_array[2] = 0 
    else:
        outer_array[2] = 1
    
    #Shoes 
    # Casual Shoes
    if len(shoes_clothing['Casual Shoes']) == 0:
        shoes_array[0] = 0 
    else:
        shoes_array[0] = 1
    # Athletic Shoes
    if len(shoes_clothing['Athletic Shoes']) == 0:
        shoes_array[1] = 0 
    else:
        shoes_array[1] = 1
    # Dress Shoes
    if len(shoes_clothing['Dress Shoes']) == 0:
        shoes_array[2] = 0 
    else:
        shoes_array[2] = 1
    # Dressy Casual  Shoes
    if len(shoes_clothing['Business Casual Shoes']) == 0:
        shoes_array[3] = 0 
    else:
        shoes_array[3] = 1
    

    upper_list = [features['outside_warmth'], features['casual_formal'], features['comfort'], features['athletic']]
    lower_list = [features['outside_warmth'], features['casual_formal'], features['comfort'], math.fabs(1-features['athletic'])]
    outer_list = [features['outside_warmth'], features['casual_formal'], features['comfort'], features['rain'], features['snow']]
    shoes_list = [features['casual_formal'], features['comfort'], math.fabs(1-features['athletic'])]
    upper_instance = Instance.create_instance(upper_list, classname='weka.core.DenseInstance', weight= 1.0)
    lower_instance = Instance.create_instance(lower_list, classname='weka.core.DenseInstance', weight= 1.0)
    outer_instance = Instance.create_instance(outer_list, classname='weka.core.DenseInstance', weight= 1.0)
    shoes_instance = Instance.create_instance(shoes_list, classname='weka.core.DenseInstance', weight= 1.0)

    upper_path = '/home/leo/models/uppermodel2.model'
    lower_path = '/home/leo/models/lowermodel2.model'
    outer_path = '/home/leo/models/outermodel2.model'
    shoes_path = '/home/leo/models/shoesmodel7.model'

    upper_classifier = Classifier(jobject=serialization.read(upper_path))
    lower_classifier = Classifier(jobject=serialization.read(lower_path))
    outer_classifier = Classifier(jobject=serialization.read(outer_path))
    shoes_classifier = Classifier(jobject=serialization.read(shoes_path))

    upper_predictions = upper_classifier.distribution_for_instance(upper_instance)
    lower_predictions = lower_classifier.distribution_for_instance(lower_instance)
    outer_predictions = outer_classifier.distribution_for_instance(outer_instance)
    shoes_predictions = shoes_classifier.distribution_for_instance(shoes_instance)


    if features['rain'] == 1:
        rainstring = 'No'
    if features['rain'] == 3:
        rainstring = 'Yes'
    if features['snow'] == 1:
        snowstring = 'No'
    if features['snow'] == 3:
        snowstring = 'Yes'
    if features['athletic'] == 1:
        athleticstring = 'No'
    if features['athletic'] == 0:
        athleticstring = 'Yes'

    print "Features being Classified:"
    print "Outside Warmth:", features['outside_warmth'], "Inside-Outside:", features['inside_outside'], "Casual-Formal:", features['casual_formal'], "Comfort:", features['comfort'], "Athletic:", athleticstring, "Rain:", rainstring, "Snow:", snowstring



    #Remove Clothing Options User Doesn't Own
    for i in range(len(upper_array)):
        if upper_array[i] == 0:
            upper_prediction_array.append(0)
        else:
            upper_prediction_array.append(upper_predictions[i])

    for i in range(len(lower_array)):
        if lower_array[i] == 0:
            lower_prediction_array.append(0)
        else:
            lower_prediction_array.append(lower_predictions[i])

    for i in range(len(outer_array)):
        if outer_array[i] == 0:
            outer_prediction_array.append(0)
        else:
            outer_prediction_array.append(outer_predictions[i])

    for i in range(len(shoes_array)):
        if shoes_array[i] == 0:
            shoes_prediction_array.append(0)
        else:
            shoes_prediction_array.append(shoes_predictions[i])

    #Find the top 3 options for each classifier
    max_index_upper1 = 0
    max_index_upper2 = 0
    max_index_upper3 = 0
    max_index_upper4 = 0
    max_index_upper5 = 0


    for i in range(1,len(upper_prediction_array)):
        n = upper_prediction_array[max_index_upper1]
        if upper_prediction_array[i] > n:
            max_index_upper1 = i

    upper_prediction_array[max_index_upper1] = 0

    for i in range(1, len(upper_prediction_array)):
        n = upper_prediction_array[max_index_upper2]
        if upper_prediction_array[i] > n:
            max_index_upper2 = i

    upper_prediction_array[max_index_upper2] = 0

    for i in range(1, len(upper_prediction_array)):
        n = upper_prediction_array[max_index_upper3]
        if upper_prediction_array[i] > n:
            max_index_upper3 = i

    upper_prediction_array[max_index_upper3] = 0
    
    for i in range(1, len(upper_prediction_array)):
        n = upper_prediction_array[max_index_upper4]
        if upper_prediction_array[i] > n:
            max_index_upper4 = i
    
    upper_prediction_array[max_index_upper4] = 0   

    for i in range(1, len(upper_prediction_array)):
        n = upper_prediction_array[max_index_upper5]
        if upper_prediction_array[i] > n:
            max_index_upper5 = i

    upper_indices = [max_index_upper1, max_index_upper2, max_index_upper3, max_index_upper4, max_index_upper5]

    max_index_lower1 = 0
    max_index_lower2 = 0
    max_index_lower3 = 0
    max_index_lower4 = 0
    max_index_lower5 = 0        

    for i in range(1,len(lower_prediction_array)):
        n = lower_prediction_array[max_index_lower1]
        if lower_prediction_array[i] > n:
            max_index_lower1 = i

    lower_prediction_array[max_index_lower1] = 0

    for i in range(1,len(lower_prediction_array)):
        n = lower_prediction_array[max_index_lower2]
        if lower_prediction_array[i] > n:
            max_index_lower2 = i

    lower_prediction_array[max_index_lower2] = 0

    for i in range(1,len(lower_prediction_array)):
        n = lower_prediction_array[max_index_lower3]
        if lower_prediction_array[i] > n:
            max_index_lower3 = i
    
    lower_prediction_array[max_index_lower3] = 0
    
    for i in range(1, len(lower_prediction_array)):
        n = lower_prediction_array[max_index_lower4]
        if lower_prediction_array[i] > n:
            max_index_upper4 = i
    
    lower_prediction_array[max_index_lower4] = 0   

    for i in range(1, len(lower_prediction_array)):
        n = lower_prediction_array[max_index_lower5]
        if lower_prediction_array[i] > n:
            max_index_lower5 = i
    
    lower_indices = [max_index_lower1, max_index_lower2, max_index_lower3, max_index_lower4, max_index_lower5]

    max_index_outer1 = 0
    max_index_outer2 = 0
    max_index_outer3 = 0

    for i in range(1, len(outer_prediction_array)):
        n = outer_prediction_array[max_index_outer1]
        if outer_prediction_array[i] > n:
            max_index_outer1 = i

    outer_prediction_array[max_index_outer1] = 0

    for i in range(1, len(outer_prediction_array)):
        n = outer_prediction_array[max_index_outer2]
        if outer_prediction_array[i] > n:
            max_index_outer2 = i

    outer_prediction_array[max_index_outer2] = 0

    for i in range(1, len(outer_prediction_array)):
        n = outer_prediction_array[max_index_outer3]
        if outer_prediction_array[i] > n:
            max_index_outer3 = i

    outer_indices = [max_index_outer1, max_index_outer2, max_index_outer3]

    max_index_shoes1 = 0
    max_index_shoes2 = 0
    max_index_shoes3 = 0
    max_index_shoes4 = 0

    for i in range(1, len(shoes_prediction_array)):
        n = shoes_prediction_array[max_index_shoes1]
        if shoes_prediction_array[i] > n:
            max_index_shoes1 = i

    shoes_prediction_array[max_index_shoes1] = 0

    for i in range(1, len(shoes_prediction_array)):
        n = shoes_prediction_array[max_index_shoes2]
        if shoes_prediction_array[i] > n:
            max_index_shoes2 = i

    shoes_prediction_array[max_index_shoes2] = 0

    for i in range(1, len(shoes_prediction_array)):
        n = shoes_prediction_array[max_index_shoes3]
        if shoes_prediction_array[i] > n:
            max_index_shoes3 = i

    shoes_prediction_array[max_index_shoes3] = 0

    for i in range(1, len(shoes_prediction_array)):
        n = shoes_prediction_array[max_index_shoes4]
        if shoes_prediction_array[i] > n:
            max_index_shoes4 = i
    
    shoes_indices = [max_index_shoes1, max_index_shoes2, max_index_shoes3, max_index_shoes4]
    
    print "Outer Indices:", outer_indices
    FSM.received_inputs()
    print "Exiting Program"
Beispiel #15
0
def test(objs, paras, testfile1, pred, real):
  testfile = preprocess(testfile1, True)
  xref = {'x_nT':1,'x_nT_delta':0,'x_nK':1,'x_nK_delta':0,'x_long':1,'x_str':0,'x_strsum':0}
  add_features(xref, 'x')
  zeroref = []
  for k in ['long', 'nK', 'nK_delta', 'nT', 'nT_delta', 'str', 'strsum']:
    zeroref.append(xref['x_%s' % k])
  zeroref.append(0) # should be obj
  for k in addf():
    zeroref.append(xref['x_%s' % k])

  with open(testfile) as fin:
    reader = csv.DictReader(fin)
    linecount = 0
    for line in reader:
      ops = []
      for h in line:
        if h.startswith('op'): ops.append(h[:h.find('_')])
      for op in ops: add_features(line, op)
      stats = {}
      valid = True
      real_line = {}
      for h in line:
        if h.startswith('op'):
          k = h[:h.find('_')]
          v = h[h.find('_')+1:]
          if k not in stats: stats[k] = {}
          stats[k][v] = pfloat(line[h])
          if stats[k][v] is None:
            valid = False
        elif h in objs:
          real_line[h] = pfloat(line[h])
          if real_line[h] is None:
            valid = False
      if not valid: continue
      linecount += 1
      if linecount > 250: continue
      #for k in stats:
      #  assert len(paras) == len(stats[k])
      #  for v in stats[k]:
      #    assert v in paras
      for obj in objs:
        c = Classifier(jobject=serialization.read(model_file('hash', obj)))
        zerovalue = c.classify_instance(Instance.create_instance(zeroref))
        #s = 0
        s = zerovalue
        for op in stats:
          values = []
          for k in ['long', 'nK', 'nK_delta', 'nT', 'nT_delta', 'str', 'strsum']:
            values.append(stats[op][k])
          values.append(0) # should be obj
          for k in addf():
            values.append(stats[op][k])
          ins = Instance.create_instance(values)
          prediction = c.classify_instance(ins)
          #print '   ', obj, op, values, prediction, prediction - zerovalue
          #s += pred
          s = s + max(prediction - zerovalue, 0)
        #print obj, 'real', real_line[obj], 'pred', s
        pred[obj].append(s)
        real[obj].append(real_line[obj])
  print 'test', testfile, 'linecount', linecount
  subprocess.call('rm %s' % testfile, shell=True)
Beispiel #16
0
 def loadClusterModel(self, method, mname):
     finalname = "%s_%s.model" % (method, mname)
     cluster = Clusterer(jobject=serialization.read(os.path.join(self.modelDir, finalname)))
     logger.info('[%s] : [INFO] Loaded clusterer mode %s ',
                 datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), finalname)
     return cluster
Beispiel #17
0
def loadFilter(filterPath):
    filter = Filter(jobject=sr.read(filterPath))
    return filter
Beispiel #18
0
def train(request):

    jvm.start()

    d_att1 = Attribute.create_numeric("bodydearword.feature")
    d_att2 = Attribute.create_numeric("bodyform.feature")
    d_att3 = Attribute.create_numeric("bodyhtml.feature")
    d_att4 = Attribute.create_numeric("bodymultipart.feature")
    d_att5 = Attribute.create_numeric("bodynumchars.feature")
    d_att6 = Attribute.create_numeric("bodynumfunctionwords.feature")
    d_att7 = Attribute.create_numeric("bodynumuniqwords.feature")
    d_att8 = Attribute.create_numeric("bodynumwords.feature")
    d_att9 = Attribute.create_numeric("bodyrichness.feature")
    d_att10 = Attribute.create_numeric("bodysuspensionword.feature")
    d_att11 = Attribute.create_numeric("bodyverifyyouraccountphrase.feature")
    d_att12 = Attribute.create_numeric("externalsabinary.feature")
    d_att13 = Attribute.create_numeric("externalsascore.feature")
    d_att14 = Attribute.create_numeric("scriptjavascript.feature")
    d_att15 = Attribute.create_numeric("scriptonclick.feature")
    d_att16 = Attribute.create_numeric("scriptpopup.feature")
    d_att17 = Attribute.create_numeric("scriptstatuschange.feature")
    d_att18 = Attribute.create_numeric("scriptunmodalload.feature")
    d_att19 = Attribute.create_numeric("senddiffreplyto.feature")
    d_att20 = Attribute.create_numeric("sendnumwords.feature")
    d_att21 = Attribute.create_numeric("sendunmodaldomain.feature")
    d_att22 = Attribute.create_numeric("subjectbankword.feature")
    d_att23 = Attribute.create_numeric("subjectdebitword.feature")
    d_att24 = Attribute.create_numeric("subjectfwdword.feature")
    d_att25 = Attribute.create_numeric("subjectnumchars.feature")
    d_att26 = Attribute.create_numeric("subjectnumwords.feature")
    d_att27 = Attribute.create_numeric("subjectreplyword.feature")
    d_att28 = Attribute.create_numeric("subjectrichness.feature")
    d_att29 = Attribute.create_numeric("subjectverifyword.feature")
    d_att30 = Attribute.create_numeric("urlatchar.feature")
    d_att31 = Attribute.create_numeric("urlbaglink.feature")
    d_att32 = Attribute.create_numeric("urlip.feature")
    d_att33 = Attribute.create_numeric("urlnumdomains.feature")
    d_att34 = Attribute.create_numeric("urlnumexternallink.feature")
    d_att35 = Attribute.create_numeric("urlnumimagelink.feature")
    d_att36 = Attribute.create_numeric("urlnuminternallink.feature")
    d_att37 = Attribute.create_numeric("urlnumip.feature")
    d_att38 = Attribute.create_numeric("urlnumlink.feature")
    d_att39 = Attribute.create_numeric("urlnumperiods.feature")
    d_att40 = Attribute.create_numeric("urlnumport.feature")
    d_att41 = Attribute.create_numeric("urlport.feature")
    d_att42 = Attribute.create_numeric("urltwodoains.feature")
    d_att43 = Attribute.create_numeric("urlunmodalbaglink.feature")
    d_att44 = Attribute.create_numeric("urlwordclicklink.feature")
    d_att45 = Attribute.create_numeric("urlwordherelink.feature")
    d_att46 = Attribute.create_numeric("urlwordloginlink.feature")
    d_att47 = Attribute.create_numeric("urlwordupdatelink.feature")
    d_att48 = Attribute.create_nominal("class", {'phish', 'ham'})
    #
    data_dir = settings.BASE_DIR + "/phishing/public/datasets/"
    #
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_dir + "dataset.arff")
    data.class_is_last()
    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.options = ["-C", "0.3"]
    cls.build_classifier(data)

    serialization.write(data_dir + "out.model", cls)
    classifier = Classifier(jobject=serialization.read(data_dir + "out.model"))

    dataset = Instances.create_instances("test", [
        d_att1, d_att2, d_att3, d_att4, d_att5, d_att6, d_att7, d_att8, d_att9,
        d_att10, d_att11, d_att12, d_att13, d_att14, d_att15, d_att16, d_att17,
        d_att18, d_att19, d_att20, d_att21, d_att22, d_att23, d_att24, d_att25,
        d_att26, d_att27, d_att28, d_att29, d_att30, d_att31, d_att32, d_att33,
        d_att34, d_att35, d_att36, d_att37, d_att38, d_att39, d_att40, d_att41,
        d_att42, d_att43, d_att44, d_att45, d_att46, d_att47, d_att48
    ], 0)
    values = [
        0, 0, 0, 0, 890, 1, 124, 198, 0.22247191011236, 0, 0, 0, 0.0, 0, 0, 0,
        0, 0, 1, 4, 0, 0, 0, 0, 21, 4, 1, 0.19047619047619, 0, 0, 0, 0, 2, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        Instance.missing_value()
    ]
    inst = Instance.create_instance(values)
    dataset.add_instance(inst)
    dataset.class_is_last()
    # print(str(dataset))
    var = ''
    for inst1 in dataset:
        pred = classifier.classify_instance(inst1)
        var = inst1.class_attribute.value(int(pred))
        if var == 'ham':
            print('No es pishing')
            # do somthing
        else:
            print('Es pishing')
            # do somthing

        print(var)

    jvm.stop()

    return HttpResponse(str(var))
Beispiel #19
0
 def __init__(self, model_path, senti_path, stop_words, ngrams_path):
     self.loader = Loader(classname="weka.core.converters.ArffLoader")
     self.features_calculator = FeaturesCalculator(ngrams_path)
     self.classifier = Classifier(jobject=serialization.read(model_path))
     self.normalizer = Preprocessor(senti_path)
     self.stop_words = stop_words
print(t)



jvm.start(max_heap_size="4g",packages=True)


Wtrain = converters.load_any_file("train.csv")
Wtest = converters.load_any_file("test.csv")
Wtrain.class_is_last()
Wtest.class_is_last()



if(Path('lmt.model').exists()):
    lmt = Classifier(jobject=serialization.read("lmt.model"))
else:
    lmt = Classifier(classname="weka.classifiers.trees.LMT")
    lmt.build_classifier(Wtrain)
    serialization.write("lmt.model", lmt)

evlmt = Evaluation(Wtrain)
evlmt.crossvalidate_model(lmt, Wtrain, 5, Random(1))



print("Error is",evlmt.error_rate)
cm2e = evlmt.confusion_matrix
cm2E = pd.DataFrame(cm2e, index = ["neg","pos"],columns = ["neg","pos"])
plt.figure(figsize = (7,7))
axis = sns.heatmap(cm2E, annot=True, cbar=False, cmap="Reds")