def exp7():
    model0 = ResizeProcessing(0.3, 0.3, force_reload=reload)
    return ChainOperator(
        model0,
        FrameworkFeature(pad=1,
                         threshold=0.00015,
                         min_width=4,
                         force_reload=reload))
def exp4():
    model0 = TextLengthFeature(force_reload=reload)
    model1 = PictureAmountFeature(force_reload=reload)
    model2 = BlockSizeFeature(force_reload=reload)
    model3 = NodeAmountFeature(force_reload=reload)
    model4 = TextLengthFeature(force_reload=reload, per_depth=True)
    model5 = NodeAmountFeature(force_reload=reload, per_depth=True)
    node_features = ClusterFeature(CombineOperatorND(
        model0,
        CombineOperatorND(
            model1,
            CombineOperatorND(
                model2,
                CombineOperatorND(model3, CombineOperatorND(model4,
                                                            model5))))),
                                   combine=False,
                                   force_reload=reload)

    return ChainOperator(node_features, CommonStatistics(hstack=True))
Esempio n. 3
0
model2 = BlockSizeFeature(force_reload=reload)
model3 = NodeAmountFeature(force_reload=reload)
model4 = TextLengthFeature(force_reload=reload, per_depth=True)
model5 = NodeAmountFeature(force_reload=reload, per_depth=True)

node_features = ClusterFeature(
  CombineOperatorND(model0, 
    CombineOperatorND(model1, 
      CombineOperatorND(model2, 
        CombineOperatorND(model3, 
          CombineOperatorND(model4, model5))))),
  combine=False,
  force_reload=reload 
  )

model6 = ChainOperator(node_features, CommonStatistics(hstack=True))
features = ChainOperator(model6, Normalization(0,1))


export({
  'features': features.compute(data.groups, data.labels),
  'labels': features.get_labels(data),
  'featureNames': features.get_feature_names(),
})

d = 0
for i, g in enumerate(data.groups):
  c = 0
  for n in g.nodes:
    if n['sitename'] == 'wordreference_com' and c in node_features.index[i]:
        print d+node_features.index[i].index(c), n['nodeName'], n['nodeText'], n['pos']
    NodeAmountFeature,
    ClusterFeature,
    BlockSizeFeature,
    ComplexNodeFeature,
)
from dataset import DataSet

set_version('exp11')

data = DataSet('/home/gsj987/experiment/webscorer.new/groups/')
data.load_dom(5, (4, 4))

reload = True

model0 = ResizeProcessing(0.3, 0.3, force_reload=reload)
model1 = ChainOperator(model0, MarginFeature(force_reload=reload))
model2 = ChainOperator(model0,
                       MarginFeature(absolute=False, force_reload=reload))

img_features = CombineOperatorND(model1, model2)

model10 = TextLengthFeature(force_reload=reload)
model11 = PictureAmountFeature(force_reload=reload)
model12 = BlockSizeFeature(force_reload=reload)
model13 = NodeAmountFeature(force_reload=reload)
model14 = TextLengthFeature(force_reload=reload, per_depth=True)
model15 = NodeAmountFeature(force_reload=reload, per_depth=True)

node_features = ClusterFeature(CombineOperatorND(
    model10,
    CombineOperatorND(
Esempio n. 5
0
set_version('exp5')

data = DataSet('/home/gsj987/experiment/webscorer.new/groups/')
data.load_dom(5, (4, 4))

for group in data.groups:
    for node in group.nodes:
        t, l, w, h = node['pos']
        if (w == 0) or (h == 0):
            print t, l, w, h

reload = True

model0 = ResizeProcessing(0.3, 0.3, force_reload=reload)
model1 = ChainOperator(model0, SaliencyProcessing(force_reload=reload))
model2 = ChainOperator(model1, PixielStatistics(threshold=0.1, grids=(1, 1)))
model3 = ChainOperator(
    model1, MaxMinStatistics(max_or_min="max", threshold=0.1, grids=(1, 1)))
model4 = ChainOperator(
    model1, MaxMinStatistics(max_or_min="min", threshold=0.1, grids=(1, 1)))
model5 = ChainOperator(
    model1, StandardDeviationStatistics(threshold=0.1, grids=(1, 1)))
model8 = ChainOperator(model1,
                       DensityStatistics(axis=0, threshold=0.1, grids=(1, 1)))
model9 = ChainOperator(model1,
                       DensityStatistics(axis=1, threshold=0.1, grids=(1, 1)))

img_features = CombineOperatorND(
    CombineOperatorND(
        CombineOperatorND(
Esempio n. 6
0
#_*_ coding: utf-8 _*_
from extractor import (
  set_version,
  export,
  ResizeProcessing,
  FrameworkFeature,
  Normalization,
  ChainOperator,
  )
from dataset import DataSet

set_version('exp7')

data = DataSet('/home/gsj987/experiment/webscorer.new/groups/')

model0 = ResizeProcessing(0.3, 0.3, force_reload=True)
model1 = ChainOperator(model0, FrameworkFeature(pad=1, threshold=0.00015, min_width=4,force_reload=True))

o_features = model1

features = ChainOperator(o_features, Normalization(0,1))


export({
  'features': features.compute(data.data, data.labels),
  'labels': data.labels,
  'featureNames': features.get_feature_names(),
})
Esempio n. 7
0
    CommonMaxMinCountStatistics,
    CommonStatistics,
    MaxMinStatistics,
    StandardDeviationStatistics,
    MeanStatistics,
    DensityStatistics,
)
from dataset import DataSet

set_version('exp3')

data = DataSet('/home/gsj987/experiment/webscorer.new/groups/')
reload = True

model0 = ResizeProcessing(0.3, 0.3)
model1 = ChainOperator(model0, CornerProcessing())
model2 = ChainOperator(model1, PixielStatistics(threshold=0.1, grids=(3, 4)))
model3 = ChainOperator(
    model1, MaxMinStatistics(max_or_min="max", threshold=0.1, grids=(3, 4)))
model4 = ChainOperator(
    model1, MaxMinStatistics(max_or_min="min", threshold=0.1, grids=(3, 4)))
model5 = ChainOperator(
    model1, StandardDeviationStatistics(threshold=0.1, grids=(3, 4)))
model6 = ChainOperator(
    model1, StandardDeviationStatistics(threshold=0.1, grids=(1, 1)))
model7 = ChainOperator(model1, MeanStatistics(threshold=0.1, grids=(1, 1)))
model8 = ChainOperator(model1,
                       DensityStatistics(axis=0, threshold=0.1, grids=(1, 1)))
model9 = ChainOperator(model1,
                       DensityStatistics(axis=1, threshold=0.1, grids=(1, 1)))
Esempio n. 8
0
#_*_ coding: utf-8 _*_
from extractor import (
  set_version,
  export,
  ResizeProcessing,
  MarginFeature,
  Normalization,
  ChainOperator,
  )
from dataset import DataSet

set_version('exp10')

data = DataSet('/home/gsj987/experiment/webscorer.new/groups/')

model0 = ResizeProcessing(0.3, 0.3, force_reload=True)
model1 = ChainOperator(model0, MarginFeature(blocks=(33, 10, 4),force_reload=True))

o_features = model1

features = ChainOperator(o_features, Normalization(0,1))

print features.get_feature_names()

export({
  'features': features.compute(data.data, data.labels),
  'labels': data.labels,
  'featureNames': features.get_feature_names(),
})
def exp3():
    model0 = ResizeProcessing(0.3, 0.3)
    model1 = ChainOperator(model0, CornerProcessing())
    model2 = ChainOperator(model1, PixielStatistics(threshold=0.1,
                                                    grids=(3, 4)))
    model3 = ChainOperator(
        model1, MaxMinStatistics(max_or_min="max", threshold=0.1,
                                 grids=(3, 4)))
    model4 = ChainOperator(
        model1, MaxMinStatistics(max_or_min="min", threshold=0.1,
                                 grids=(3, 4)))
    model5 = ChainOperator(
        model1, StandardDeviationStatistics(threshold=0.1, grids=(3, 4)))
    model6 = ChainOperator(
        model1, StandardDeviationStatistics(threshold=0.1, grids=(1, 1)))
    model7 = ChainOperator(model1, MeanStatistics(threshold=0.1, grids=(1, 1)))
    model8 = ChainOperator(
        model1, DensityStatistics(axis=0, threshold=0.1, grids=(1, 1)))
    model9 = ChainOperator(
        model1, DensityStatistics(axis=1, threshold=0.1, grids=(1, 1)))

    model10 = ChainOperator(model2, CommonStatistics())
    model11 = ChainOperator(model3, CommonStatistics())
    model12 = ChainOperator(model4, CommonStatistics())
    model13 = ChainOperator(model5, CommonStatistics())
    model14 = ChainOperator(model2, CommonMaxMinCountStatistics('max'))
    model15 = ChainOperator(model2, CommonMaxMinCountStatistics('min'))
    return CombineOperatorND(
        CombineOperatorND(
            CombineOperatorND(
                CombineOperatorND(
                    CombineOperatorND(
                        CombineOperatorND(
                            CombineOperatorND(
                                CombineOperatorND(
                                    CombineOperatorND(model14, model15),
                                    model10), model11), model12), model13),
                    model6), model7), model8), model9)
def exp10():
    model0 = ResizeProcessing(0.3, 0.3, force_reload=reload)
    return ChainOperator(
        model0, MarginFeature(blocks=(33, 10, 4), force_reload=reload))
        CombineOperatorND(
            model1,
            CombineOperatorND(
                model2,
                CombineOperatorND(model3, CombineOperatorND(model4,
                                                            model5))))),
                                   combine=False,
                                   force_reload=reload)

    return ChainOperator(node_features, CommonStatistics(hstack=True))


f1 = exp1()
f2 = CombineOperatorND(f1, exp2())
f3 = CombineOperatorND(f2, exp3())
f7 = CombineOperatorND(f3, exp7())
f10 = CombineOperatorND(f7, exp10())

f4 = exp4()

features = ChainOperator(f10, Normalization(0, 1))

#use the code below to export a .mat file

#fff = f10.compute(data.data, data.labels)
#ffs = features.compute(data.data, data.labels)
#export({
#  'features': ffs,
#  'labels': data.labels,
#})
Esempio n. 12
0
    JpegFeature,
    TextLengthFeature,
    PictureAmountFeature,
    Normalization,
    ChainOperator,
    CombineOperatorND,
    ComplexNodeFeature,
)
from dataset import DataSet

set_version('exp1')

data = DataSet('/home/gsj987/experiment/webscorer.new/groups/')
data.load_dom(0)

reload = True

model0 = TextLengthFeature(force_reload=reload)
model1 = CombineOperatorND(model0, PictureAmountFeature(force_reload=reload))

img_model = JpegFeature(force_reload=reload)
node_features = ComplexNodeFeature(node_model=model1, img_model=img_model)

features = ChainOperator(node_features, Normalization(0, 1))

export({
    'features': features.compute(data.groups, data.labels),
    'labels': data.node_labels(),
    'featureNames': features.get_feature_names(),
})