def main():
    directory = sys.argv[1]
    labels_file = sys.argv[2]
    w = int(sys.argv[3])
    h = int(sys.argv[4])
    r = int(sys.argv[5])
    
    print("Reading files")
    #0. Get the names of all the files
    list_of_files=getFileList(directory)
        
    #0. Read the files into an OrderedDict datastructure.
    datastructures=list()
    for file_p in list_of_files:
        
        datastructures.append(Collection(file_path=file_p, data_structure=readFile(file_p)))
    
    print("Normalizing datastructures")
    #1. (1a)For each data file, normalize data to values between 0.0 and 1.0
    
    for collection in datastructures:
        
        data_structure=collection.data_structure
        normalize(data_structure)    
    
    
    
    #2. (1b)Determine the lengths and the ranges for the different bands.
    bands=calculateBands(r)
    
    
    #3. (1b) For each data file, change each of the values by the value of the center
    #of the band to which that value corresponds.
    
    for pair in datastructures:
        datastructure=pair[1]
        switch_values(datastructure, bands);
    
    print("Creating training and testing sets")
    #4. Read labels
    file_label_l = readLabels(directory, labels_file)
    
    (training_observations, testing_observations) = createObservations(datastructures, file_label_l)
    
    
    
    print("training decision tree")
    decision_tree_root = decisiontree.train(training_observations)
    
    
    
    print("testing decision tree")
    predictions = decisiontree.predict(decision_tree_root, testing_observations)   
    print(predictions)
    
    #DO NOT USE THE WINDOW CREATION CODE
    '''    
Exemple #2
0
def main():
    directory = sys.argv[1]
    labels_file = sys.argv[2]
    w = int(sys.argv[3])
    h = int(sys.argv[4])
    r = int(sys.argv[5])

    print("Reading files")
    #0. Get the names of all the files
    list_of_files = getFileList(directory)

    #0. Read the files into an OrderedDict datastructure.
    datastructures = list()
    for file_p in list_of_files:

        datastructures.append(
            Collection(file_path=file_p, data_structure=readFile(file_p)))

    print("Normalizing datastructures")
    #1. (1a)For each data file, normalize data to values between 0.0 and 1.0

    for collection in datastructures:

        data_structure = collection.data_structure
        normalize(data_structure)

    #2. (1b)Determine the lengths and the ranges for the different bands.
    bands = calculateBands(r)

    #3. (1b) For each data file, change each of the values by the value of the center
    #of the band to which that value corresponds.

    for pair in datastructures:
        datastructure = pair[1]
        switch_values(datastructure, bands)

    print("Creating training and testing sets")
    #4. Read labels
    file_label_l = readLabels(directory, labels_file)

    (training_observations,
     testing_observations) = createObservations(datastructures, file_label_l)

    print("training decision tree")
    decision_tree_root = decisiontree.train(training_observations)

    print("testing decision tree")
    predictions = decisiontree.predict(decision_tree_root,
                                       testing_observations)
    print(predictions)

    #DO NOT USE THE WINDOW CREATION CODE
    '''    
Exemple #3
0
def predict():
    try:
        tree_json = request.json["tree"]
        toy_id = request.json["toy_id"]
        user_truth = request.json["user_truth"]
        targets = request.json["category"]
        tree = utilities.get_tree(tree_json)

        y_pred = decisiontree.predict(tree, toy_id, user_truth, targets[0])
        print('Prediction: ', targets[y_pred[0]])
        return {"prediction": y_pred.tolist()}
    except Exception as err:
        print(err)
        return "Failed", 500
Exemple #4
0
def task_list(request):
    Method = request.method
    
    if request.method == 'GET':
        #serializer = TaskSerializer(data=request.data)
        
        return render(request, 'snippets/project.html', locals())

    elif request.method == 'POST':
        
        postBody = request.body
        json_result = json.loads(postBody)
        
        if json_result['type']=='delConnection':
            
            sourceid=json_result['sourceid']
            targetid=json_result['targetid']
            finalConnections(line, sourceid, targetid)
            resp={
                    'code' : '1'
                    }
            return HttpResponse(json.dumps(resp),content_type="application/json")
        
        if json_result['type']=='saveConnection':
            
            relId = json_result['relId']
            taskid = json_result['task.taskId']
            totaskId = json_result['toTaskId']
            sType = json_result['sourceType']
            tType = json_result['targetType']
    
            listtype = []
            listtype.append(sType)
            listtype.append(tType)   
            listid = []
            listid.append(taskid)
            listid.append(totaskId)    
            connections = {}
            connections['cid'] = relId
            connections['connids'] = listid
            connections['conntype'] = listtype   
            line.append(connections)
            resp={}
            return HttpResponse(json.dumps(resp),content_type="application/json")
        
        if json_result['type']=='delNode':
            
            endid=json_result['endid']          
            finalNode(line, endid)
            resp={}
            return HttpResponse(json.dumps(resp),content_type="application/json")
        
        if json_result['type']=='saveLine':
            
            endid=json_result['endid']
            endtype=json_result['endtype']
            finalline(line,endid,endtype)
            resp={}
            return HttpResponse(json.dumps(resp),content_type="application/json")
            
        if json_result['type']=='data_source':
            global filename
            filename = json_result['data']
            with open('E:/Anaconda/Scripts/CorsApi/snippets/Resource/‎file/'+json_result['data']+'.csv','rt',encoding="utf-8") as csvfile:
                reader = csv.reader(csvfile)
                rows = [row for row in reader]
            csvfile.close()
            
            global row
            row = rows[0]
            resp={
                    'attribute':row,
                    }
            
            return HttpResponse(json.dumps(resp),content_type="application/json")
        
        if json_result['type']=='field':
            column_data = {}
            for attribute in json_result['data']:
                with open('E:/Anaconda/Scripts/CorsApi/snippets/Resource/‎file/'+filename+'.csv','rt',encoding="utf-8") as csvfile:
                    inner_list=[]
                    reader = csv.DictReader(csvfile)               
                    column = [row[attribute] for row in reader]
                    if attribute!='result':
                        X = np.array(column,dtype=np.float64).reshape(-1,1)
                        min_max_scaler = preprocessing.MinMaxScaler()  
                        X_minMax = min_max_scaler.fit_transform(X) 
                        for n in X_minMax:
                        
                            inner_list.append(round(n[0],2))
                        column_data[attribute]=inner_list 
                    else:
                        rs=map(eval,column)
                        column_data[attribute]=list(rs)  
                        
                    
            
            resp={
                    'normalization':column_data,
                    }
            
            return HttpResponse(json.dumps(resp),content_type="application/json")
        
        if json_result['type']=='normal':
            column_data = {}
            data = json_result['data'] 
            for attribute in data:
                if attribute!='result':
                    X = np.array(column,dtype=np.float64).reshape(-1,1)
                    min_max_scaler = preprocessing.MinMaxScaler()  
                    X_minMax = min_max_scaler.fit_transform(X) 
                    for n in X_minMax:
                    
                        inner_list.append(round(n[0],2))
                    column_data[attribute]=inner_list 
                else:
                    rs=map(eval,column)
                    column_data[attribute]=list(rs)  
            resp={
                    'normalization':column_data,
                    
                    }
            
            return HttpResponse(json.dumps(resp),content_type="application/json")
        
        if json_result['type']=='pca':
             
            data = json_result['data'] 
            X = np,array(data)
            pca = PCA(n_components=2)
            ratio = pca.explained_variance_ratio_
            resp={
                    'ratio':ratio[0],
                    
                    }
            
            return HttpResponse(json.dumps(resp),content_type="application/json")
        
        if json_result['type']=='bayes':
             
            data = json_result['data'] 
            dataset,trainingSet,testSet,accuracy,result = naivebayes.bayes(data)
            attribute=[]
            for key in data.keys():
                attribute.append(key)
            
            resp={
                    'rows':len(dataset),
                    'train':len(trainingSet), 
                    'test':len(testSet),
                    'accuracy':accuracy,
                    'result':result,
                    'attribute':attribute,
                    }
            
            return HttpResponse(json.dumps(resp),content_type="application/json")
        
        if json_result['type']=='decision_tree':         
            max_depth = int(json_result['max_depth'])
            criterion = json_result['criterion']
            splitter = json_result['splitter']
            min_samples_split = int(json_result['min_samples_split'])
            data = json_result['data'] 
            score = decisiontree.dtree(max_depth,criterion,splitter,min_samples_split,data)
            
            resp={
                    'score':score,
                    }
            
            return HttpResponse(json.dumps(resp),content_type="application/json")
            
        if json_result['type']=='dtree_predict':
            a=[]
            dtree_predict= json_result['data']
            #dtree_predict = np.float64(dtree_predict)
            
            a.append(dtree_predict)
            pred = decisiontree.predict(a)
            
            resp={
                    'pred':[str(i) for i in pred],
                    }
            
            return HttpResponse(json.dumps(resp),content_type="application/json")    
    return render(request, 'snippets/project.html', locals())
print(output)
assert output != 0

####################################################################################################
# Test predict()
####################################################################################################

print("\n\nTesting predict()")
print("----------------------------------------------------------------------------------------------------\n\n")

idSubtrees = {0: 0, 1: 1}
notSubtrees = {0: 1, 1: 0}

data = [[1], [1], [0], [0]]
tree = 3
output = dt.predict(tree, data)
print(output)
assert output == [3, 3, 3, 3]

data = [[1], [1], [0], [0]]
tree = (0, {0: 10, 1: 20})
output = dt.predict(tree, data)
print(output)
assert output == [20, 20, 10, 10]

data = [[1, 0], [1, 0], [0, 0], [0, 0]]
tree = (0, {0: 10, 1: 20})
output = dt.predict(tree, data)
print(output)
assert output == [20, 20, 10, 10]
def random_predict(treesset, testdata, features, label):
    predicts = []
    for tree in treesset:
        predict = dt.predict(tree, features, testdata, label)
        predicts.append(predict)
    return majorvoting(predicts)
Exemple #7
0
def predict(forest: RandomForest, row: Row) -> Label:
    predictions = [decisiontree.predict(tree.tree, row) for tree in forest]
    return util.most_frequent_alphabetically(predictions)