def exp7(): model0 = ResizeProcessing(0.3, 0.3, force_reload=reload) return ChainOperator( model0, FrameworkFeature(pad=1, threshold=0.00015, min_width=4, force_reload=reload))
def exp4(): model0 = TextLengthFeature(force_reload=reload) model1 = PictureAmountFeature(force_reload=reload) model2 = BlockSizeFeature(force_reload=reload) model3 = NodeAmountFeature(force_reload=reload) model4 = TextLengthFeature(force_reload=reload, per_depth=True) model5 = NodeAmountFeature(force_reload=reload, per_depth=True) node_features = ClusterFeature(CombineOperatorND( model0, CombineOperatorND( model1, CombineOperatorND( model2, CombineOperatorND(model3, CombineOperatorND(model4, model5))))), combine=False, force_reload=reload) return ChainOperator(node_features, CommonStatistics(hstack=True))
model2 = BlockSizeFeature(force_reload=reload) model3 = NodeAmountFeature(force_reload=reload) model4 = TextLengthFeature(force_reload=reload, per_depth=True) model5 = NodeAmountFeature(force_reload=reload, per_depth=True) node_features = ClusterFeature( CombineOperatorND(model0, CombineOperatorND(model1, CombineOperatorND(model2, CombineOperatorND(model3, CombineOperatorND(model4, model5))))), combine=False, force_reload=reload ) model6 = ChainOperator(node_features, CommonStatistics(hstack=True)) features = ChainOperator(model6, Normalization(0,1)) export({ 'features': features.compute(data.groups, data.labels), 'labels': features.get_labels(data), 'featureNames': features.get_feature_names(), }) d = 0 for i, g in enumerate(data.groups): c = 0 for n in g.nodes: if n['sitename'] == 'wordreference_com' and c in node_features.index[i]: print d+node_features.index[i].index(c), n['nodeName'], n['nodeText'], n['pos']
NodeAmountFeature, ClusterFeature, BlockSizeFeature, ComplexNodeFeature, ) from dataset import DataSet set_version('exp11') data = DataSet('/home/gsj987/experiment/webscorer.new/groups/') data.load_dom(5, (4, 4)) reload = True model0 = ResizeProcessing(0.3, 0.3, force_reload=reload) model1 = ChainOperator(model0, MarginFeature(force_reload=reload)) model2 = ChainOperator(model0, MarginFeature(absolute=False, force_reload=reload)) img_features = CombineOperatorND(model1, model2) model10 = TextLengthFeature(force_reload=reload) model11 = PictureAmountFeature(force_reload=reload) model12 = BlockSizeFeature(force_reload=reload) model13 = NodeAmountFeature(force_reload=reload) model14 = TextLengthFeature(force_reload=reload, per_depth=True) model15 = NodeAmountFeature(force_reload=reload, per_depth=True) node_features = ClusterFeature(CombineOperatorND( model10, CombineOperatorND(
set_version('exp5') data = DataSet('/home/gsj987/experiment/webscorer.new/groups/') data.load_dom(5, (4, 4)) for group in data.groups: for node in group.nodes: t, l, w, h = node['pos'] if (w == 0) or (h == 0): print t, l, w, h reload = True model0 = ResizeProcessing(0.3, 0.3, force_reload=reload) model1 = ChainOperator(model0, SaliencyProcessing(force_reload=reload)) model2 = ChainOperator(model1, PixielStatistics(threshold=0.1, grids=(1, 1))) model3 = ChainOperator( model1, MaxMinStatistics(max_or_min="max", threshold=0.1, grids=(1, 1))) model4 = ChainOperator( model1, MaxMinStatistics(max_or_min="min", threshold=0.1, grids=(1, 1))) model5 = ChainOperator( model1, StandardDeviationStatistics(threshold=0.1, grids=(1, 1))) model8 = ChainOperator(model1, DensityStatistics(axis=0, threshold=0.1, grids=(1, 1))) model9 = ChainOperator(model1, DensityStatistics(axis=1, threshold=0.1, grids=(1, 1))) img_features = CombineOperatorND( CombineOperatorND( CombineOperatorND(
#_*_ coding: utf-8 _*_ from extractor import ( set_version, export, ResizeProcessing, FrameworkFeature, Normalization, ChainOperator, ) from dataset import DataSet set_version('exp7') data = DataSet('/home/gsj987/experiment/webscorer.new/groups/') model0 = ResizeProcessing(0.3, 0.3, force_reload=True) model1 = ChainOperator(model0, FrameworkFeature(pad=1, threshold=0.00015, min_width=4,force_reload=True)) o_features = model1 features = ChainOperator(o_features, Normalization(0,1)) export({ 'features': features.compute(data.data, data.labels), 'labels': data.labels, 'featureNames': features.get_feature_names(), })
CommonMaxMinCountStatistics, CommonStatistics, MaxMinStatistics, StandardDeviationStatistics, MeanStatistics, DensityStatistics, ) from dataset import DataSet set_version('exp3') data = DataSet('/home/gsj987/experiment/webscorer.new/groups/') reload = True model0 = ResizeProcessing(0.3, 0.3) model1 = ChainOperator(model0, CornerProcessing()) model2 = ChainOperator(model1, PixielStatistics(threshold=0.1, grids=(3, 4))) model3 = ChainOperator( model1, MaxMinStatistics(max_or_min="max", threshold=0.1, grids=(3, 4))) model4 = ChainOperator( model1, MaxMinStatistics(max_or_min="min", threshold=0.1, grids=(3, 4))) model5 = ChainOperator( model1, StandardDeviationStatistics(threshold=0.1, grids=(3, 4))) model6 = ChainOperator( model1, StandardDeviationStatistics(threshold=0.1, grids=(1, 1))) model7 = ChainOperator(model1, MeanStatistics(threshold=0.1, grids=(1, 1))) model8 = ChainOperator(model1, DensityStatistics(axis=0, threshold=0.1, grids=(1, 1))) model9 = ChainOperator(model1, DensityStatistics(axis=1, threshold=0.1, grids=(1, 1)))
#_*_ coding: utf-8 _*_ from extractor import ( set_version, export, ResizeProcessing, MarginFeature, Normalization, ChainOperator, ) from dataset import DataSet set_version('exp10') data = DataSet('/home/gsj987/experiment/webscorer.new/groups/') model0 = ResizeProcessing(0.3, 0.3, force_reload=True) model1 = ChainOperator(model0, MarginFeature(blocks=(33, 10, 4),force_reload=True)) o_features = model1 features = ChainOperator(o_features, Normalization(0,1)) print features.get_feature_names() export({ 'features': features.compute(data.data, data.labels), 'labels': data.labels, 'featureNames': features.get_feature_names(), })
def exp3(): model0 = ResizeProcessing(0.3, 0.3) model1 = ChainOperator(model0, CornerProcessing()) model2 = ChainOperator(model1, PixielStatistics(threshold=0.1, grids=(3, 4))) model3 = ChainOperator( model1, MaxMinStatistics(max_or_min="max", threshold=0.1, grids=(3, 4))) model4 = ChainOperator( model1, MaxMinStatistics(max_or_min="min", threshold=0.1, grids=(3, 4))) model5 = ChainOperator( model1, StandardDeviationStatistics(threshold=0.1, grids=(3, 4))) model6 = ChainOperator( model1, StandardDeviationStatistics(threshold=0.1, grids=(1, 1))) model7 = ChainOperator(model1, MeanStatistics(threshold=0.1, grids=(1, 1))) model8 = ChainOperator( model1, DensityStatistics(axis=0, threshold=0.1, grids=(1, 1))) model9 = ChainOperator( model1, DensityStatistics(axis=1, threshold=0.1, grids=(1, 1))) model10 = ChainOperator(model2, CommonStatistics()) model11 = ChainOperator(model3, CommonStatistics()) model12 = ChainOperator(model4, CommonStatistics()) model13 = ChainOperator(model5, CommonStatistics()) model14 = ChainOperator(model2, CommonMaxMinCountStatistics('max')) model15 = ChainOperator(model2, CommonMaxMinCountStatistics('min')) return CombineOperatorND( CombineOperatorND( CombineOperatorND( CombineOperatorND( CombineOperatorND( CombineOperatorND( CombineOperatorND( CombineOperatorND( CombineOperatorND(model14, model15), model10), model11), model12), model13), model6), model7), model8), model9)
def exp10(): model0 = ResizeProcessing(0.3, 0.3, force_reload=reload) return ChainOperator( model0, MarginFeature(blocks=(33, 10, 4), force_reload=reload))
CombineOperatorND( model1, CombineOperatorND( model2, CombineOperatorND(model3, CombineOperatorND(model4, model5))))), combine=False, force_reload=reload) return ChainOperator(node_features, CommonStatistics(hstack=True)) f1 = exp1() f2 = CombineOperatorND(f1, exp2()) f3 = CombineOperatorND(f2, exp3()) f7 = CombineOperatorND(f3, exp7()) f10 = CombineOperatorND(f7, exp10()) f4 = exp4() features = ChainOperator(f10, Normalization(0, 1)) #use the code below to export a .mat file #fff = f10.compute(data.data, data.labels) #ffs = features.compute(data.data, data.labels) #export({ # 'features': ffs, # 'labels': data.labels, #})
JpegFeature, TextLengthFeature, PictureAmountFeature, Normalization, ChainOperator, CombineOperatorND, ComplexNodeFeature, ) from dataset import DataSet set_version('exp1') data = DataSet('/home/gsj987/experiment/webscorer.new/groups/') data.load_dom(0) reload = True model0 = TextLengthFeature(force_reload=reload) model1 = CombineOperatorND(model0, PictureAmountFeature(force_reload=reload)) img_model = JpegFeature(force_reload=reload) node_features = ComplexNodeFeature(node_model=model1, img_model=img_model) features = ChainOperator(node_features, Normalization(0, 1)) export({ 'features': features.compute(data.groups, data.labels), 'labels': data.node_labels(), 'featureNames': features.get_feature_names(), })