def run_skin_one_fold(self, number_of_training, number_of_training_instances, fold, percent_of_train, number_of_equal_disjoint_sets): # get skin data x_train, y_train, x_test, y_test, x, y = self.get_skin_data( percent_of_train) # get training weights weights_random = ( regression.Regression().gradient_descent_random_general( x_train, y_train, number_of_training, number_of_training_instances)) weights_all = (regression.Regression().gradient_descent_all( x_train, y_train)) # how many "num_subset" equal # data_set, label = self.my_data.get_disjoint_subset_data(number_of_equal_disjoint_sets, x, y) data_set, label = Data().get_disjoint_subset_data( number_of_equal_disjoint_sets, x_train, y_train) weights_equal = (regression.Regression().gradient_descent_equal( data_set, label)) # write trained weights to file Data().write_to_csv_file( "../resources/skin/result/weights/output_weights_random" + str(fold) + ".csv", weights_random) Data().write_to_csv_file( "../resources/skin/result/weights/output_weights_equal" + str(fold) + ".csv", weights_equal) Data().write_to_csv_file( "../resources/skin/result/weights/output_weights_all" + str(fold) + ".csv", weights_all) # get center point my_center_point = itertver.IteratedTverberg() center_point_random, average_point_random = my_center_point.get_center_and_average_point( weights_random) center_point_equal, average_point_equal = my_center_point.get_center_and_average_point( weights_equal) # testing phase test.Test().perform_test( x_test, y_test, weights_random, center_point_random, average_point_random, weights_all, "../resources/skin/result/errors/" + str(fold) + "error_random.txt") test.Test().perform_test( x_test, y_test, weights_equal, center_point_equal, average_point_equal, weights_all, "../resources/skin/result/errors/" + str(fold) + "error_equal.txt")
def run_haber_man(self): my_sklearn = sklearnlib.Sklearnlib() weights, scores, mean_point = my_sklearn.train_and_test(3200, self.X, self.Y, 0.3) Data().write_to_csv_file("../resources/haberman/output_weights_haber_man", weights) Data().write_score_to_file("../resources/haberman/scores", scores) my_itertver = itertver.IteratedTverberg() center_point_with_proof = my_itertver.center_point(weights) my_plot = plot.Plot() my_plot.plot3dpoints(weights, center_point_with_proof[0][0], mean_point) print "Center Point with proof: ", center_point_with_proof[0] print "Center point: ", center_point_with_proof[0][0] print "Proof of center point: ", center_point_with_proof[0][1] print "Depth of center point: ", len(center_point_with_proof[0][1]) return weights, center_point_with_proof[0][0], mean_point
def post(self, type=None): self.set_header('Content-Type', 'application/json') jsonM = Data() getJson = self.request.body jsondata = json.loads(getJson) __is_base64 = False __image = None if 'url' in jsondata['query'].keys(): __image = jsondata['query']['url'] else: __image = jsondata['query']['base64'] __is_base64 = True if __image is None: raise Exception('image data is empty!') __name = jsondata['query']['name'] __id = jsondata['query']['id'] __data = jsondata['query']['data'] # 需要参与搜索的字段 __search = jsondata['query']['search'] __data['id'] = __id # 直接使用 application 的 redis 初始化 Manage(self.application.r).index_image(__id, __search, __data, __image, __name, __is_base64) return self.write( jsonM.setStatus('status', 'OK').set('msg', str('index success!')).get())
def __init__(self, Manage=None, result_size=10): """ 设置 管理器 """ self.manage = Manage self.feature = Feature() self.data = Data() self.result_size = result_size
def get_skin_data(self, percent_of_train): """ each run will get different subset from X, Y, as the matrix are shuffled before they are split """ x, y = self.my_data.get_skin_data() x_train, y_train, x_test, y_test = Data().split_to_train_and_test( x, y, percent_of_train) return x_train, y_train, x_test, y_test, x, y
def test_all_point(self, number_of_fold, percent_of_train, path): # get skin data x_train, y_train, x_test, y_test, x, y = self.get_skin_data( percent_of_train) weights_all = [] sgd = linear_model.SGDClassifier() my_test = test.Test() for i in range(1, 11): x_train_, y_train_, x_test_, y_test_ = Data( ).split_to_train_and_test(x_train, y_train, i) sgd.fit(x_train_, y_train_) weights_all.append(sgd.coef_) i = float(i - 0.5) x_train_, y_train_, x_test_, y_test_ = Data( ).split_to_train_and_test(x_train, y_train, i) sgd.fit(x_train_, y_train_) weights_all.append(sgd.coef_) my_test.perform_test_for_all_point(x_test, y_test, weights_all, path)
def index_image(self, id=0, search=[], data=[], image='', name='', is_base64=False): # TOOD 需要一个算法 , 将每次 的 search 字段数据 转化为相同的,数据 # 根据 client 的条件字段创建 的 key source_data = {} for index_name in search: source_data[index_name] = data[index_name] # 每个对象必须要有一个名字 data['name'] = name key_name = self.__generate_key(search, source_data) # 在尾部加上id key_name += '#' + str(id) # 0 -> data # 1 -> image - binary if image: if is_base64: # 对于 base64 的处理办法 if 'base64,' in image: image = image.split(',')[1] image_str = cStringIO.StringIO(base64.b64decode(image)) im_instance = im.open(image_str).resize(self.image_size) else: # 对于 url 文件的处理办法 res = urllib2.urlopen(image) if res.code == 200: # save data into Bytes imimage = io.BytesIO(res.read()) # 压缩图片以及,格式化为 JPEG im_instance = im.open(imimage).resize(self.image_size) # 不管使用哪种参数方法 最终的结果都是保存 redis output = StringIO.StringIO() im_instance \ .convert('RGB') \ .save(output, 'JPEG') string = Data(data).to_string() value = string + \ self.divided + \ output.getvalue() self.r.set(key_name, value) return True return False
def delete(self, type=None): try: # os.remove(os.environ[config.STORAGE_INDEX_DB]) Manage(self.application.r).clear_db() except: pass self.set_header('Content-Type', 'application/json') jsonM = Data() self.write( jsonM.setStatus('status', 'OK').set('msg', str('delete index Success!')).get())
def gradient_descent_random_general(self, data_set, label_set, number_of_training, number_of_training_instances): weights_random = [] my_data = Data() for i in range(0, number_of_training): print i, "th training." reg_opt = optimization.Optimization() weights_random.append( reg_opt.gradient_descent_random( data_set, label_set, my_data.get_random_index_list(number_of_training_instances, data_set))) return weights_random
def run_protein_one_fold(self, number_of_training, number_of_training_instances, number_of_equal_disjoint_sets, fold, percent_of_training): x_, y_ = self.my_protein_data.get_protein_data() x_train, x_test, y_train, y_test = sklearnlib.Sklearnlib( ).split_train_and_test(x_, y_, percent_of_training) # get training weights weights_random = ( regression.Regression().gradient_descent_random_general( x_train, y_train, number_of_training, number_of_training_instances)) weights_all = (regression.Regression().gradient_descent_all( x_train, y_train)) # how many "num_subset" equal # data_set, label = self.my_data.get_disjoint_subset_data(number_of_equal_disjoint_sets, x, y) data_set, label = Data().get_disjoint_subset_data( number_of_equal_disjoint_sets, x_train, y_train) weights_equal = (regression.Regression().gradient_descent_equal( data_set, label)) # get center point my_center_point = itertver.IteratedTverberg() center_point_random, average_point_random = my_center_point.get_center_and_average_point( weights_random) center_point_equal, average_point_equal = my_center_point.get_center_and_average_point( weights_equal) # testing phase test.Test().perform_test( x_test, y_test, weights_random, center_point_random, average_point_random, weights_all, "../resources/protein/result_01/errors/" + str(fold) + "error_random.txt") test.Test().perform_test( x_test, y_test, weights_equal, center_point_equal, average_point_equal, weights_all, "../resources/protein/result_01/errors/" + str(fold) + "error_equal.txt")
def process_match(origin_io=None, lists=None): ''' : match image by different process ''' feature = Feature() data = Data() # print lists results = [] # 直接返回 redis row data for container in lists: # 找到比对数据 feature.set_byte_base_image(origin_io) # 风格数据 data, image_str = container.split(Manage.divided) byte = io.BytesIO(image_str) feature.set_byte_storage_image(byte) bean = json.loads(data) result = {} # 使用算法到的 # 大色块比对方案 result = feature.process(None) # merge calculate score score = mix_hash(result) # 准备返回数据 processed = {} for i in bean: processed[i] = bean[i] processed['score'] = score results.append(processed) return results