def test_tree(): text = ''' 年龄, 工作, 已婚, 信用, 可以借贷? 青年, no , no , 一般 , 不给 青年, no , no , 好 , 不给 青年, yes, no , 好 , 给 青年, yes, yes, 一般 , 给 青年, no , no , 一般 , 不给 中年, no , no , 一般 , 不给 中年, no , no , 好 , 不给 中年, yes, yes, 好 , 给 中年, no , yes, 非常好 , 给 中年, no , yes, 非常好 , 给 老年, no , yes, 非常好 , 给 老年, no , yes, 好 , 给 老年, yes, no , 好 , 给 老年, yes, no , 非常好 , 给 老年, no , no , 一般 , 不给 ''' data, title = datamatrix(text) dt = DecisionTree(data, labels=title) n = dt.best_split_feature(data) tree = dt.decision_tree print(n) puts(tree) test1 = dt.classify(['青年', 'no', 'no', '好'], decision_tree=tree) test2 = dt.classify(['青年', 'no', 'yes', '非常好'], decision_tree=tree) test3 = dt.classify(['老年', 'yes', 'yes', '一般'], decision_tree=tree) test4 = dt.classify(['老年', 'no', 'no', '好'], decision_tree=tree) test1 | should.eq('不给') test2 | should.eq('给') test3 | should.eq('给') test4 | should.eq('不给')
def brute_force_params(run_func, logger, **params_range): '''尝试参数的各种可能组合 每个输入参数是一个可能值的列表, 比如 range(0, 250, 10) 使用 itertools.product 遍历所有可能的参数组''' from itertools import product import time total_start = time.time() total = [] params_product = list(product(*[[(k, elem) for elem in v] for k, v in params_range.items()])) puts('params prepared, {} variantions'.format(len(params_product))) for i, params in enumerate(params_product): loop_start = time.time() result = run_func(**dict(params)) # result = 0 total.append([params, result]) loop_cost = time.time() - loop_start total_cost = time.time() - total_start percent = (i+1) / len(params_product) info = 'get <{result:.3%}> by: {params} \n [current {loop_cost:.1f}s / total {total_cost:.1f}s] {percent:.2%}'.format_map(vars()) logger.debug(info) puts('all done!') logger.debug(total) return total
def test_word_filler_render(): t1 = os.getcwd() + '/test/test_templates/test_{{name}}_面积计算表.doc' t2 = os.getcwd() + '/test/test_templates/test_no_field_面积计算表.doc' from infotext import InfoText text = ''' 单位名称: 测试单位 name: 测试单位name 项目名称: 测试项目 项目编号: 2015-项目编号-001 面积90: 12345.600 面积80: 12345.300 地籍号: 1234567890010010000 四至: 测试路1;测试街2;测试路3;测试街4 土地坐落: 测试路以东,测试街以南 area: 1000 已设定值: value ''' info = InfoText.from_string(text) filler = Filler(template_path=t1, output_folder=os.getcwd() + '/test/test_output') filler.detect_required_fields(unique=False) | puts() filler = Filler(template_path=t1, output_folder=os.getcwd() + '/test/test_output') filler.detect_required_fields(unique=True) | puts() filler = Filler(template_path=t1, output_folder=os.getcwd() + '/test/test_output') filler.render(info=info)
def brute_force_params(run_func, logger, **params_range): '''尝试参数的各种可能组合 每个输入参数是一个可能值的列表, 比如 range(0, 250, 10) 使用 itertools.product 遍历所有可能的参数组''' from itertools import product import time total_start = time.time() total = [] params_product = list( product(*[[(k, elem) for elem in v] for k, v in params_range.items()])) puts('params prepared, {} variantions'.format(len(params_product))) for i, params in enumerate(params_product): loop_start = time.time() result = run_func(**dict(params)) # result = 0 total.append([params, result]) loop_cost = time.time() - loop_start total_cost = time.time() - total_start percent = (i + 1) / len(params_product) info = 'get <{result:.3%}> by: {params} \n [current {loop_cost:.1f}s / total {total_cost:.1f}s] {percent:.2%}'.format_map( vars()) logger.debug(info) puts('all done!') logger.debug(total) return total
def test_intepolate_polyline_dx(): '''多段线加密 and 分割为多段线的碎片''' cad = AutoCAD() for pl in cad.selecting(): # distance = 0.3 distance = 0.2 interpl, report = cad.interpolate_polyline(pl, distance=distance, delete_original=True, break_at_vertexes=True) puts('多段线加密 分割为多段线的碎片')
def test_info_nested_by_yaml_load(): path = os.getcwd() + '/test/nested.inf' info = InfoText.from_yaml(path) puts(info) print('----') puts(info.content) print('----') print(yaml.dump(info.content))
def test_trim_result(): path = 'result_by_self_train[28000].csv' result = [] for i, r, b in load_csv(path, sample=0): result.append(int(r) if r != 'classify failed' else random.choice(list(range(0, 10)))) puts(statistic(result)) result = [[r] for r in result] write_csv('submit_' + path, rows=result, headers=['val'])
def test_check_result(): path_result = 'result_by_self_train[28000].csv' path_benchmark = 'rf_benchmark.csv' result = load_csv(path_result, sample=0) # benchmark = load_csv(path_benchmark, sample=21) score = [] for i, r, b in result: score.append('+' if str(r) == b[1:2] else '-') puts(statistic(score))
def test_intepolate_polyline(): '''多段线加密''' cad = AutoCAD() for pl in cad.selecting(): # distance = 0.3 distance = 0.2 interpl, report = cad.interpolate_polyline(pl, distance=distance, delete_original=True, break_at_vertexes=False) interpl.color = 'green' puts('多段线加密')
def test_trim_result(): path = 'result_by_self_train[28000].csv' result = [] for i, r, b in load_csv(path, sample=0): result.append( int(r) if r != 'classify failed' else random. choice(list(range(0, 10)))) puts(statistic(result)) result = [[r] for r in result] write_csv('submit_' + path, rows=result, headers=['val'])
def test_info_additional_keys(): from pyshould import should path = os.getcwd() + '/test/nested.inf' info = InfoText.from_yaml(path) puts(info.content) info.get('a') | should.eq(123) info.get('ErrorKey') | should.eq(None) info.get('foo') | should.eq('bar') # from key<default> info.get('current_year') | should.eq(1404) # key<default> contains this 1404 print(info.get('current_date')) # key<default> does not contain this,
def test_redraw_vertex_sequence(): '''重绘多段线的顶点顺序''' cad = AutoCAD() for pl in cad.selecting(): first = None # first = -1 redrawpl = cad.redraw_vertex_sequence(pl, first=first, hint=False, auto_reverse=True) redrawpl.color = 'yellow' puts('重绘顶点顺序')
def test_de_intepolate_polyline(): '''多段线抽稀''' cad = AutoCAD() for pl in cad.selecting(): interpl, report = cad.de_interpolate_polyline(pl, threshold=3) if abs(interpl.area - pl.area) < 30: interpl.color = 'green' pl.delete() else: interpl.delete() puts('多段线抽稀 report=')
def parse_packaging_served_detail_page(url, folder): url | puts() r = requests.get(url) doc = PyQuery(r.text) | puts() # urls = doc('#project-modules li.image>img').attr('src') | puts() folder = to_file_name(folder) | puts() if not os.path.exists(folder): os.mkdir(folder) for i, elem in enumerate(doc("#project-modules li.image>img"), 1): url = PyQuery(elem).attr("src") save_image(url, folder, folder + "_" + str(i))
def train(self): for i in range(self.n_trees): features_index = random.sample(list(range(self.n_features)), self.n_features_per_tree) self.features_index_for_trees.append(features_index) data_filtered = self.filter_traindata(features_index) labels_filtered = self.filter_labels(features_index) tree = DecisionTree(data_filtered, labels=labels_filtered) self.trees.append(tree) if self.verbose: puts('train done: total {self.n_rows} rows, {self.n_trees} trees, each tree use {self.n_rows_per_tree} rows data') t1, t2, t3, *_, tn = self.trees pprint(t1.decision_tree) pprint(t2.decision_tree) pprint(t3.decision_tree) pprint('------------------------')
def test_documasonry_generate(): template_paths = [os.getcwd() + '/test/test_templates/_test_{{项目名称}}-申请表.xls', os.getcwd() + '/test/test_templates/test_{{name}}_面积计算表.doc', os.getcwd() + '/test/test_templates/test_{{测试单位}}-宗地图.dwg', os.getcwd() + '/test/test_templates/test_no_field_面积计算表.doc', ] output_path = os.getcwd() + '/test/test_output' masonry = Documasonry(output_path=output_path, template_paths=template_paths) text = ''' 项目名称: test1 单位名称: test2 地籍号: 110123122 name: sjgisdgd 面积90: 124.1 面积80: 234.2 area: 124.2 测试单位: testconm title: testtitle project: pro. date: 20124002 ratio: 2000 landcode: 235 area80: 94923 area90: 3257 ''' info = InfoText.from_string(text) masonry.generate(info=info, save=True, add_index=True) | puts()
def points_list_to_variant(self, coord): import pylon data = list(float(x) for x in pylon.flatten(coord)) | pylon.puts() if len(data) % 2 == 1: raise AttributeError("point_to_variant: coord length must be even") com_seq = win32com.client.VARIANT(pythoncom.VT_ARRAY | pythoncom.VT_R8, data) return com_seq
def save_image(url, folder, image_name=None): if not image_name: file_name = folder + "/" + url.split("/")[-1] else: ext = url.split(".")[-1] file_name = folder + "/" + to_file_name(image_name) + "." + ext if os.path.exists(file_name): puts("-- already exist file_name") return r = requests.get(url, stream=True) if r.status_code == 200: with open(file_name, "wb") as f: r.raw.decode_content = True shutil.copyfileobj(r.raw, f) puts("save done! url >> file_name")
def test_documasonry_detect_fields(): template_paths = [os.getcwd() + '/test/test_templates/_test_{{项目名称}}-申请表.xls', os.getcwd() + '/test/test_templates/test_{{name}}_面积计算表.doc', os.getcwd() + '/test/test_templates/test_{{测试单位}}-宗地图.dwg', os.getcwd() + '/test/test_templates/test_no_field_面积计算表.doc', ] output_path = os.getcwd() + '/test/test_output' masonry = Documasonry(output_path=output_path, template_paths=template_paths) masonry.detect_required_fields() | puts()
def save(self, info, close=True, prefix=''): self.output_name = prefix + evalute_field(os.path.basename(self.template_path), info) output_path = os.path.join(self.output_folder, self.output_name) output_path = output_path.replace('\\', '/') output_path = output_path.replace('/', '\\') if os.path.exists(output_path): fix = time.strftime('.backup-%Y%m%d-%H%M%S') os.rename(output_path, fix.join(os.path.splitext(output_path))) try: self.document.SaveAs(output_path) puts('save document done - output_path') except Exception: raise t = 'Word Filler can not save document: <{}>'.format(output_path) raise SaveDocumentError(t) if close: self.document.Close()
def test_scan_entities(): '''扫描选中对象的信息 对于多段线额外统计 闭合线总面积 开放线总长度 hole: 将多段线视为外部包裹线和内部孔洞, 统计面积时以最大面积减去其他较小的 ''' r = cad.scan_entities(selecting_entities, hole=True, error_color=None, error_layer=None) for line in r: line | puts()
def train(self): for i in range(self.n_trees): features_index = random.sample(list(range(self.n_features)), self.n_features_per_tree) self.features_index_for_trees.append(features_index) data_filtered = self.filter_traindata(features_index) labels_filtered = self.filter_labels(features_index) tree = DecisionTree(data_filtered, labels=labels_filtered) self.trees.append(tree) if self.verbose: puts( 'train done: total {self.n_rows} rows, {self.n_trees} trees, each tree use {self.n_rows_per_tree} rows data' ) t1, t2, t3, *_, tn = self.trees pprint(t1.decision_tree) pprint(t2.decision_tree) pprint(t3.decision_tree) pprint('------------------------')
def test_rebuild_arc_polyline(): '''将加密后的poly转为圆弧poly 处理选中的多段线 如果有选中的点, 将这些点作为分隔符''' cad = AutoCAD() polylines = [] points = [] for en in cad.selecting(): if en.entity_type == 'Point': points.append(en) elif en.entity_type == 'Polyline' and en.closed: polylines.append(en) dist = 3 for pl in polylines: arcpl = cad.rebuild_arc_polyline(pl, threshold=dist, segment_points=[(p.x, p.y) for p in points]) arcpl.color = 'yellow' report = '转为圆弧poly 原面积={:.4f} 新面积={:.4f} 相差={:.4f} ({:.4%})' puts(report.format(pl.area, arcpl.area, arcpl.area - pl.area, (arcpl.area-pl.area)/pl.area))
def parse_incredibal(path): text = open(path).read() doc = PyQuery(text) for elem in doc('article'): elem = PyQuery(elem) # elem('div h3>a').text() | puts() url = elem('figure div img').attr('src') | puts() # file_name = elem('figure div img').attr('src').split('/')[-1] | puts() save_image(url, folder='food')
def test_word_filler_detect_fields(): t1 = os.getcwd() + '/test/test_templates/test_{{name}}_面积计算表.doc' t2 = os.getcwd() + '/test/test_templates/test_no_field_面积计算表.doc' from infotext import InfoText yaml_info = InfoText.from_yaml(os.getcwd() + '/test/测试单位.inf') filler = Filler(template_path=t1, output_folder=os.getcwd() + '/test/test_output') filler.detect_required_fields(unique=False) | puts() filler = Filler(template_path=t1, output_folder=os.getcwd() + '/test/test_output') filler.detect_required_fields(unique=True) | puts() filler = Filler(template_path=t2, output_folder=os.getcwd() + '/test/test_output') filler.detect_required_fields(unique=False) | puts() filler = Filler(template_path=t2, output_folder=os.getcwd() + '/test/test_output') filler.detect_required_fields(unique=True) | puts() filler = Filler(template_path=t1, output_folder=os.getcwd() + '/test/test_output') filler.render(info=yaml_info)
def kmeans(dataSet, k): numSamples = dataSet.shape[0] # first column stores which cluster this sample belongs to, # second column stores the error between this sample and its centroid clusterAssment = mat(zeros((numSamples, 2))) clusterChanged = True ## step 1: init centroids centroids = initCentroids(dataSet, k) while clusterChanged: clusterChanged = False ## for each sample for i in range(numSamples): minDist = 100000.0 minIndex = 0 ## for each centroid ## step 2: find the centroid who is closest for j in range(k): distance = euclDistance(centroids[j, :], dataSet[i, :]) if distance < minDist: minDist = distance minIndex = j ## step 3: update its cluster if clusterAssment[i, 0] != minIndex: clusterChanged = True clusterAssment[i, :] = minIndex, minDist**2 ## step 4: update centroids for j in range(k): pointsInCluster = dataSet[nonzero(clusterAssment[:, 0].A == j)[0]] centroids[j, :] = mean(pointsInCluster, axis = 0) pylon.puts('Congratulations, cluster complete!') return centroids, clusterAssment
def test_find_nearest_text(): cad = AutoCAD() entities = list(cad.selecting()) numbers = [] names = [] for text in entities: if text.color == 'black': names.append(text) else: numbers.append(text) from Converter import SpaceCoordinate dist = SpaceCoordinate().distance2 for number in numbers: near = min(names, key=lambda name: dist(name.mid_point, number.mid_point)) | puts() near.color = 'cyan'
def parse_packaging_served(path): text = open(path).read() doc = PyQuery(text) result = [] for elem in doc("li>div"): elem = PyQuery(elem) # elem('div h3>a').text() | puts() # name = elem('.cover-name>a').text() | puts() title = elem(".cover-img img.cover-img-standard").attr("title") image = elem(".cover-img img.cover-img-standard").attr("src") detail = elem(".cover-name a.cover-name-link").attr("href") result.append((title, image, detail)) # save_image(image, folder='packaging_served_images_preview', # image_name=title) # result | puts() for i, (title, image, detail) in enumerate(result): if i < 16: continue puts("@ start parse folder i title detail") site = "http://www.packagingserved.com" parse_packaging_served_detail_page(url=site + detail, folder=title)
def test_jinja_edge_cases(): from infotext import InfoText text = ''' codes: 1231234000050280000 borders: 空地;空地;空地;空地 ''' template = ''' {{(codes | string)[12:15]}}-01 {{(codes | string)[12:15]}}-01 ''' info = InfoText.from_string(text) t = Template(template) result = t.render(**info.content) result | puts()
def showCluster(dataSet, k, centroids, clusterAssment): numSamples, dim = dataSet.shape if dim != 2: pylon.puts("Sorry! I can not draw because the dimension of your data is not 2!") return 1 mark = ['or', 'ob', 'og', 'ok', '^r', '+r', 'sr', 'dr', '<r', 'pr'] if k > len(mark): pylon.puts("Sorry! Your k is too large! please contact Zouxy") return 1 # draw all samples for i in range(numSamples): markIndex = int(clusterAssment[i, 0]) plt.plot(dataSet[i, 0], dataSet[i, 1], mark[markIndex]) mark = ['Dr', 'Db', 'Dg', 'Dk', '^b', '+b', 'sb', 'db', '<b', 'pb'] # draw the centroids for i in range(k): plt.plot(centroids[i, 0], centroids[i, 1], mark[i], markersize = 12) plt.show()
def render(self, info): self.info = info self.app.Visible = True if self.info.get('target_position'): # 如含有 target_position 字段 编辑前需要调整模板全体 object 位置 target_position = self.info.content['target_position'] if isinstance(target_position, list) and len(target_position) == 4: # target_center and target_size [566371.2180, 4340932.6223, 202.3, 202.3] self.fix_position(target_center=target_position[:2], target_size=target_position[2:]) elif isinstance(target_position, str): if not os.path.isfile(target_position): target_position = os.path.join(self.output_folder, target_position) self.insert_block(dwg_path=target_position) last_entity = list(self.entities(kinds='BlockReference'))[0] | puts() target_center = self.mid_point(last_entity) target_size = self.bounding_box_size(last_entity) # print(last_entity.) last_entity.Delete() self.fix_position(target_center=target_center, target_size=target_size) for en in self.text_entities(): val = evalute_field(field=en.TextString, info=info) if val in (None, ''): raise InfoKeyError('无法找到字段的值 {}'.format(en.TextString)) if en.TextString.startswith('{{') and en.TextString.endswith('dwg}}'): # block field syntax should insert dwg block if not os.path.isfile(val): val = os.path.join(self.output_folder, val) self.insert_block(dwg_path=val) en.Delete() else: en.TextString = val self.document.SendCommand('zoom e ')
def test_cad_filler_detect_fields(): t1 = os.getcwd() + '/test/test_templates/test_{{测试单位}}-宗地图.dwg' filler = Filler(template_path=t1, output_folder=os.getcwd() + '/test/test_output') filler.detect_required_fields(unique=False) | puts() filler = Filler(template_path=t1, output_folder=os.getcwd() + '/test/test_output') filler.detect_required_fields(unique=True) | puts()
def test_remove_same_point_polyline(): '移除poly中的重复节点' cad = AutoCAD() for pl in cad.selecting(): plnew, report = cad.remove_same_points_polyline(pl, threshold=0.0001) puts(report)