def _example_from_string(l): i = string.split(l) l = i[0] feats = i[1:] x = sparse.lil_matrix((1, featuremap.len)) for f in feats: x[0, featuremap.id(f)] = 1. x = sparse.csr_matrix(x) y = labelmap.id(l) return x, y
from common.file import myopen import string import common.featuremap labelmap = common.featuremap.get(name="labels") featuremap = common.featuremap.get(name="features") # from collection import defaultdict # cnt = defaultdict(int) for l in myopen(HYPERPARAMETERS["original examples file"]): i = string.split(l) if len(i) == 0: continue l = i[0] feats = i[1:] labelmap.id(l, can_add=True) for f in feats: featuremap.id(f, can_add=True) # cnt[f] += 1 featuremap.readonly = True labelmap.readonly = True print "%d features" % featuremap.len print "%d labels " % labelmap.len featuremap.dump() labelmap.dump()
common.options.reparse(HYPERPARAMETERS) from common.file import myopen import string import common.featuremap labelmap = common.featuremap.get(name="labels") featuremap = common.featuremap.get(name="features") #from collection import defaultdict #cnt = defaultdict(int) for l in myopen(HYPERPARAMETERS["original examples file"]): i = string.split(l) if len(i) == 0: continue l = i[0] feats = i[1:] labelmap.id(l, can_add=True) for f in feats: featuremap.id(f, can_add=True) # cnt[f] += 1 featuremap.readonly = True labelmap.readonly = True print "%d features" % featuremap.len print "%d labels " % labelmap.len featuremap.dump() labelmap.dump()