def main(): directory = sys.argv[1] labels_file = sys.argv[2] w = int(sys.argv[3]) h = int(sys.argv[4]) r = int(sys.argv[5]) print("Reading files") #0. Get the names of all the files list_of_files=getFileList(directory) #0. Read the files into an OrderedDict datastructure. datastructures=list() for file_p in list_of_files: datastructures.append(Collection(file_path=file_p, data_structure=readFile(file_p))) print("Normalizing datastructures") #1. (1a)For each data file, normalize data to values between 0.0 and 1.0 for collection in datastructures: data_structure=collection.data_structure normalize(data_structure) #2. (1b)Determine the lengths and the ranges for the different bands. bands=calculateBands(r) #3. (1b) For each data file, change each of the values by the value of the center #of the band to which that value corresponds. for pair in datastructures: datastructure=pair[1] switch_values(datastructure, bands); print("Creating training and testing sets") #4. Read labels file_label_l = readLabels(directory, labels_file) (training_observations, testing_observations) = createObservations(datastructures, file_label_l) print("training decision tree") decision_tree_root = decisiontree.train(training_observations) print("testing decision tree") predictions = decisiontree.predict(decision_tree_root, testing_observations) print(predictions) #DO NOT USE THE WINDOW CREATION CODE '''
def main(): directory = sys.argv[1] labels_file = sys.argv[2] w = int(sys.argv[3]) h = int(sys.argv[4]) r = int(sys.argv[5]) print("Reading files") #0. Get the names of all the files list_of_files = getFileList(directory) #0. Read the files into an OrderedDict datastructure. datastructures = list() for file_p in list_of_files: datastructures.append( Collection(file_path=file_p, data_structure=readFile(file_p))) print("Normalizing datastructures") #1. (1a)For each data file, normalize data to values between 0.0 and 1.0 for collection in datastructures: data_structure = collection.data_structure normalize(data_structure) #2. (1b)Determine the lengths and the ranges for the different bands. bands = calculateBands(r) #3. (1b) For each data file, change each of the values by the value of the center #of the band to which that value corresponds. for pair in datastructures: datastructure = pair[1] switch_values(datastructure, bands) print("Creating training and testing sets") #4. Read labels file_label_l = readLabels(directory, labels_file) (training_observations, testing_observations) = createObservations(datastructures, file_label_l) print("training decision tree") decision_tree_root = decisiontree.train(training_observations) print("testing decision tree") predictions = decisiontree.predict(decision_tree_root, testing_observations) print(predictions) #DO NOT USE THE WINDOW CREATION CODE '''
def predict(): try: tree_json = request.json["tree"] toy_id = request.json["toy_id"] user_truth = request.json["user_truth"] targets = request.json["category"] tree = utilities.get_tree(tree_json) y_pred = decisiontree.predict(tree, toy_id, user_truth, targets[0]) print('Prediction: ', targets[y_pred[0]]) return {"prediction": y_pred.tolist()} except Exception as err: print(err) return "Failed", 500
def task_list(request): Method = request.method if request.method == 'GET': #serializer = TaskSerializer(data=request.data) return render(request, 'snippets/project.html', locals()) elif request.method == 'POST': postBody = request.body json_result = json.loads(postBody) if json_result['type']=='delConnection': sourceid=json_result['sourceid'] targetid=json_result['targetid'] finalConnections(line, sourceid, targetid) resp={ 'code' : '1' } return HttpResponse(json.dumps(resp),content_type="application/json") if json_result['type']=='saveConnection': relId = json_result['relId'] taskid = json_result['task.taskId'] totaskId = json_result['toTaskId'] sType = json_result['sourceType'] tType = json_result['targetType'] listtype = [] listtype.append(sType) listtype.append(tType) listid = [] listid.append(taskid) listid.append(totaskId) connections = {} connections['cid'] = relId connections['connids'] = listid connections['conntype'] = listtype line.append(connections) resp={} return HttpResponse(json.dumps(resp),content_type="application/json") if json_result['type']=='delNode': endid=json_result['endid'] finalNode(line, endid) resp={} return HttpResponse(json.dumps(resp),content_type="application/json") if json_result['type']=='saveLine': endid=json_result['endid'] endtype=json_result['endtype'] finalline(line,endid,endtype) resp={} return HttpResponse(json.dumps(resp),content_type="application/json") if json_result['type']=='data_source': global filename filename = json_result['data'] with open('E:/Anaconda/Scripts/CorsApi/snippets/Resource/file/'+json_result['data']+'.csv','rt',encoding="utf-8") as csvfile: reader = csv.reader(csvfile) rows = [row for row in reader] csvfile.close() global row row = rows[0] resp={ 'attribute':row, } return HttpResponse(json.dumps(resp),content_type="application/json") if json_result['type']=='field': column_data = {} for attribute in json_result['data']: with open('E:/Anaconda/Scripts/CorsApi/snippets/Resource/file/'+filename+'.csv','rt',encoding="utf-8") as csvfile: inner_list=[] reader = csv.DictReader(csvfile) column = [row[attribute] for row in reader] if attribute!='result': X = np.array(column,dtype=np.float64).reshape(-1,1) min_max_scaler = preprocessing.MinMaxScaler() X_minMax = min_max_scaler.fit_transform(X) for n in X_minMax: inner_list.append(round(n[0],2)) column_data[attribute]=inner_list else: rs=map(eval,column) column_data[attribute]=list(rs) resp={ 'normalization':column_data, } return HttpResponse(json.dumps(resp),content_type="application/json") if json_result['type']=='normal': column_data = {} data = json_result['data'] for attribute in data: if attribute!='result': X = np.array(column,dtype=np.float64).reshape(-1,1) min_max_scaler = preprocessing.MinMaxScaler() X_minMax = min_max_scaler.fit_transform(X) for n in X_minMax: inner_list.append(round(n[0],2)) column_data[attribute]=inner_list else: rs=map(eval,column) column_data[attribute]=list(rs) resp={ 'normalization':column_data, } return HttpResponse(json.dumps(resp),content_type="application/json") if json_result['type']=='pca': data = json_result['data'] X = np,array(data) pca = PCA(n_components=2) ratio = pca.explained_variance_ratio_ resp={ 'ratio':ratio[0], } return HttpResponse(json.dumps(resp),content_type="application/json") if json_result['type']=='bayes': data = json_result['data'] dataset,trainingSet,testSet,accuracy,result = naivebayes.bayes(data) attribute=[] for key in data.keys(): attribute.append(key) resp={ 'rows':len(dataset), 'train':len(trainingSet), 'test':len(testSet), 'accuracy':accuracy, 'result':result, 'attribute':attribute, } return HttpResponse(json.dumps(resp),content_type="application/json") if json_result['type']=='decision_tree': max_depth = int(json_result['max_depth']) criterion = json_result['criterion'] splitter = json_result['splitter'] min_samples_split = int(json_result['min_samples_split']) data = json_result['data'] score = decisiontree.dtree(max_depth,criterion,splitter,min_samples_split,data) resp={ 'score':score, } return HttpResponse(json.dumps(resp),content_type="application/json") if json_result['type']=='dtree_predict': a=[] dtree_predict= json_result['data'] #dtree_predict = np.float64(dtree_predict) a.append(dtree_predict) pred = decisiontree.predict(a) resp={ 'pred':[str(i) for i in pred], } return HttpResponse(json.dumps(resp),content_type="application/json") return render(request, 'snippets/project.html', locals())
print(output) assert output != 0 #################################################################################################### # Test predict() #################################################################################################### print("\n\nTesting predict()") print("----------------------------------------------------------------------------------------------------\n\n") idSubtrees = {0: 0, 1: 1} notSubtrees = {0: 1, 1: 0} data = [[1], [1], [0], [0]] tree = 3 output = dt.predict(tree, data) print(output) assert output == [3, 3, 3, 3] data = [[1], [1], [0], [0]] tree = (0, {0: 10, 1: 20}) output = dt.predict(tree, data) print(output) assert output == [20, 20, 10, 10] data = [[1, 0], [1, 0], [0, 0], [0, 0]] tree = (0, {0: 10, 1: 20}) output = dt.predict(tree, data) print(output) assert output == [20, 20, 10, 10]
def random_predict(treesset, testdata, features, label): predicts = [] for tree in treesset: predict = dt.predict(tree, features, testdata, label) predicts.append(predict) return majorvoting(predicts)
def predict(forest: RandomForest, row: Row) -> Label: predictions = [decisiontree.predict(tree.tree, row) for tree in forest] return util.most_frequent_alphabetically(predictions)