class TestRandomDiscretizedProjections(unittest.TestCase): def setUp(self): self.rbp = RandomDiscretizedProjections('testHash', 10, 0.1) self.rbp.reset(100) def test_hash_format(self): h = self.rbp.hash_vector(numpy.random.randn(100)) self.assertEqual(len(h), 1) self.assertEqual(type(h[0]), type('')) def test_hash_deterministic(self): x = numpy.random.randn(100) first_hash = self.rbp.hash_vector(x)[0] for k in range(100): self.assertEqual(first_hash, self.rbp.hash_vector(x)[0])
class featureLsh(): def __init__(self, stage, bucket): self.parentLevel = 5 self.rdp = RandomDiscretizedProjections('rdp', stage, bucket, rand_seed=98412194) self.rdp.reset(5) self.hash_dict = {} self.data = defaultdict(list) def get_hash(self, vector): h = self.rdp.hash_vector(vector)[0] return h def set_hash(self, header): self.hash_dict["program"] = "program" for i in header: key_vec = i.split("_") vec = [] for j in key_vec: vec.append(int(j)) newkey = self.get_hash(vec) self.hash_dict[i] = newkey print("Setting hash done. Running lsh...") def update_dict(self, dicts): print("updating_dict") for dict in dicts: newdict = {} for key, value in dict.items(): newkey = self.hash_dict[key] if newkey == "program": newdict[newkey] = value else: if not newkey in newdict: if type(value) == str: newdict[newkey] = float(value) else: if isnan(value): newdict[newkey] = 0 else: newdict[newkey] = float(value) else: if type(value) == str: newdict[newkey] += float(value) else: if not isnan(value): newdict[newkey] += float(value) for key, value in newdict.items(): self.data[key].append(value)
class FeatureBuilder(Template2Listener): def __init__(self, level): self.feature_vector = {} self.parentLevel = int(level) print(self.parentLevel) self.hashes = dict() self.rdp = RandomDiscretizedProjections('rdp', 5, 6, rand_seed=98412194) self.rdp.reset(self.parentLevel) def getHash(self, vector): if len(vector) < self.parentLevel: vector = vector + (self.parentLevel - len(vector)) * [0] h = self.rdp.hash_vector(vector)[0] # h = '_'.join([str(x) for x in vector]) return h def getParents(self, ctx): curLevel = 0 curNode = ctx path = [] while curNode is not None and curLevel < self.parentLevel: #path.append(curNode.getRuleIndex()) nodename = curNode.__class__.__name__ path.append(fixed_hashes[nodename]) curLevel += 1 curNode = curNode.parentCtx return path def update_vector(self, ctx): if self.parentLevel <= 1: name = type(ctx).__name__ if ctx.parentCtx is not None: parentName = type(ctx.parentCtx).__name__ feature_name = 't_' + parentName + '_' + name if feature_name not in self.feature_vector: self.feature_vector[feature_name] = 0 self.feature_vector[feature_name] += 1 else: path=self.getParents(ctx) name=self.getHash(path) if name not in self.feature_vector: self.feature_vector[name] = 0 self.feature_vector[name] += 1 def enterAddop(self, ctx): self.update_vector(ctx) def enterAnd(self, ctx): self.update_vector(ctx) def enterArray(self, ctx): self.update_vector(ctx) def enterArray_access(self, ctx): self.update_vector(ctx) def enterAssign(self, ctx): self.update_vector(ctx) def enterBlock(self, ctx): self.update_vector(ctx) def enterBrackets(self, ctx): self.update_vector(ctx) def enterData(self, ctx): self.update_vector(ctx) def enterDecl(self, ctx): self.update_vector(ctx) def enterPrimitive(self, ctx): self.update_vector(ctx) def enterNumber(self, ctx): self.update_vector(ctx) def enterDtype(self, ctx): self.update_vector(ctx) def enterVector(self, ctx): self.update_vector(ctx) def enterDims(self, ctx): self.update_vector(ctx) def enterVectorDIMS(self, ctx): self.update_vector(ctx) def enterLimits(self, ctx): self.update_vector(ctx) def enterPrior(self, ctx): self.update_vector(ctx) def enterParam(self, ctx): self.update_vector(ctx) def enterParams(self, ctx): self.update_vector(ctx) def enterDistexpr(self, ctx): self.update_vector(ctx) def enterLoopcomp(self, ctx): self.update_vector(ctx) def enterFor_loop(self, ctx): self.update_vector(ctx) def enterIf_stmt(self, ctx): self.update_vector(ctx) def enterElse_blk(self, ctx): self.update_vector(ctx) def enterFunction_call(self, ctx): self.update_vector(ctx) def enterFparam(self, ctx): self.update_vector(ctx) def enterFparams(self, ctx): self.update_vector(ctx) def enterReturn_or_param_type(self, ctx): self.update_vector(ctx) def enterFunction_decl(self, ctx): self.update_vector(ctx) def enterTransformedparam(self, ctx): self.update_vector(ctx) def enterTransformeddata(self, ctx): self.update_vector(ctx) def enterGeneratedquantities(self, ctx): self.update_vector(ctx) def enterFunctions(self, ctx): self.update_vector(ctx) def enterVal(self, ctx): self.update_vector(ctx) def enterDivop(self, ctx): self.update_vector(ctx) def enterString(self, ctx): self.update_vector(ctx) def enterExponop(self, ctx): self.update_vector(ctx) def enterMinusop(self, ctx): self.update_vector(ctx) def enterLt(self, ctx): self.update_vector(ctx) def enterUnary(self, ctx): self.update_vector(ctx) def enterEq(self, ctx): self.update_vector(ctx) def enterGt(self, ctx): self.update_vector(ctx) def enterRef(self, ctx): self.update_vector(ctx) def enterGeq(self, ctx): self.update_vector(ctx) def enterMulop(self, ctx): self.update_vector(ctx) def enterFunction(self, ctx): self.update_vector(ctx) def enterVecmulop(self, ctx): self.update_vector(ctx) def enterNe(self, ctx): self.update_vector(ctx) def enterLeq(self, ctx): self.update_vector(ctx) def enterTranspose(self, ctx): self.update_vector(ctx) def enterVecdivop(self, ctx): self.update_vector(ctx) def enterTernary(self, ctx): self.update_vector(ctx) def enterSubset(self, ctx): self.update_vector(ctx) def enterObserve(self, ctx): self.update_vector(ctx) def enterStatement(self, ctx): self.update_vector(ctx) def enterQuery(self, ctx): self.update_vector(ctx) def enterTemplate(self, ctx): self.update_vector(ctx)