예제 #1
0
model5 = NodeAmountFeature(force_reload=reload, per_depth=True)

node_features = ClusterFeature(
  CombineOperatorND(model0, 
    CombineOperatorND(model1, 
      CombineOperatorND(model2, 
        CombineOperatorND(model3, 
          CombineOperatorND(model4, model5))))),
  combine=False,
  force_reload=reload 
  )

model6 = ChainOperator(node_features, CommonStatistics(hstack=True))
features = ChainOperator(model6, Normalization(0,1))


export({
  'features': features.compute(data.groups, data.labels),
  'labels': features.get_labels(data),
  'featureNames': features.get_feature_names(),
})

d = 0
for i, g in enumerate(data.groups):
  c = 0
  for n in g.nodes:
    if n['sitename'] == 'wordreference_com' and c in node_features.index[i]:
        print d+node_features.index[i].index(c), n['nodeName'], n['nodeText'], n['pos']
    c += 1
  d += len(node_features.index[i])
예제 #2
0
#_*_ coding: utf-8 _*_
from extractor import (
  set_version,
  export,
  ResizeProcessing,
  FrameworkFeature,
  Normalization,
  ChainOperator,
  )
from dataset import DataSet

set_version('exp7')

data = DataSet('/home/gsj987/experiment/webscorer.new/groups/')

model0 = ResizeProcessing(0.3, 0.3, force_reload=True)
model1 = ChainOperator(model0, FrameworkFeature(pad=1, threshold=0.00015, min_width=4,force_reload=True))

o_features = model1

features = ChainOperator(o_features, Normalization(0,1))


export({
  'features': features.compute(data.data, data.labels),
  'labels': data.labels,
  'featureNames': features.get_feature_names(),
})