def nsearch(self, value): """ This finds the opposite of the ``search`` predicate. """ self._desc = "not re.compile('%s').search(%s)" % (value, self._name) self._predicate = P.nsearch(self._name, value) return self
def nin(self, value): """ Finds all documents where the attribute is NOT in the given list. Look at ``in_`` for an example. """ self._desc = "%s not in %s" % (self._name, value) self._predicate = P.nin(self._name, value) return self
def orderby(*args): """ Orders documents by one or more columns. .. code-block:: python uv.docs.find(D.orderby('name', '-age')) The ``orderby`` functionality now has *ASC* and *DESC* capability. Descending order is achieved by pre-pending a **-** (negative sign or hyphen) to the column name. Notice that the *orderby* is actually inside the *find* function. However, you can also do this: .. code-block:: python uv.docs.orderby('name', '-age') """ if len(args) > 0: return P.orderby(*args) else: return lambda x: sorted(x)
def nin_list(self, value): """ Finds all documents where the attribute is NOT in the given **list**. Look at ``in_list`` for an explanation. """ _set = list(value) self._desc = "%s not in set(%s)" % (self._name, _set) self._predicate = P.nin(self._name, _set) return self
def __getattr__(self, attr): if attr in self.__dict__: return self.__dict__[attr] # print "doc" self._name = self.__dict__["_name"] + "." + attr self._predicate = P.exists(self._name) self._desc = "has key: " + self._name # setattr(self, "_name", ) return self
class WM: """ Working Memory Used to store activated predicates and their variable bindings. Supports both predicate and vector based representations """ def get_vector_representation(self, sltm): """ Returns a vector representations of the contents of the working memory The vector representations consists of two parts: semantics - the semantics vectors of the activated predicates the dimension of 'semantics' is wm_capacity x semantic_dimension, where semantic dimension is the dimension of the semantic space and wm_capacity is the working memory capacity structure - binary vectors (0 and 1) describing the variable bindings the dimension of structure is max_arity x wm_capacity x wm_capacity, where max_arity is the maximum predicate arity which can be represented and wm_capicity is the working memory capacity """ semantics = np.zeros(shape=(self.__capacity, sltm.get_semantic_dimension())) for wm_i in range(len(self.__contents)): if (self.__contents[wm_i] <> None): semantics[wm_i] = self.__contents[wm_i].get_semantic_vector( sltm) return (semantics, self.__structure) def get_predicate_representation(self, (semantics, structure), sltm): """ Returns a lift of predicate representations of the contents of the working memory """ (wm_capacity, _) = semantics.shape arities = np.sum(structure <> 0, axis=(0, 2)) ps = {} for sem_t in range(wm_capacity): if (np.sum(semantics[sem_t]) <> 0): p_type_id = sltm.get_semantic_category(semantics[sem_t]) ps[sem_t] = Predicate(p_type_id, arities[sem_t]) for sem_t, p in ps.iteritems(): if (not p.is_resolved()): for a_no in range(p.get_arity()): #Python and NumPy are beautiful p.bind_argument(a_no, ps[np.argmax(structure[a_no, sem_t])]) return ps
def limit(value): """ Limits the amount of documents found. .. code-block:: python uv.docs.find(D.limit(50)) """ if not type(value) is int: raise ValueError, "Limit predicates must be integers: limit(5)" return P.udf(Document.__lim__, count=value)
def search(self, value): """ Uses the ``re`` module to search data attributes. The regex doesn't have to match exactly. The given regex would NOT fail if there was an attached port to the IP attribute. .. code-block:: python # re.compile('10\.2\.1\.\d+').search(doc[ip]) uv.docs.find(D.ip.search('10\.2\.1\.\d+')) """ self._desc = "re.compile('%s').search(%s)" % (value, self._name) self._predicate = P.search(self._name, value) return self
def skip(value): """ Skips the first few documents found based on the given input. .. code-block:: python # skips 50 records uv.docs.find(D.skip(50)) """ if not type(value) is int: raise ValueError, "Skip predicates must be integers: K.skip(5)" return P.udf(Document.__skip__, value)
def len(self, value): """ Find documents where an attributes is a certain length .. code-block:: python from underverse.model import Document as D # finds all users whose names are only 3 characters long uv.users.find(D.name.len(3)) """ self._desc = "len(%s) == %s" % (self._name, value) self._predicate = P.len(self._name, value) return self
def btw(self, left, right): """ Finds documents where an attribute is between the given left and right inputs .. code-block:: python from underverse.model import Document as D # finds all users whose age is between 18 and 25 uv.users.find(D.age.btw(18, 25)) """ self._desc = "%s < %s < %s" % (left, self._name, right) self._predicate = P.btw(self._name, left, right) return self
def in_(self, value): """ Finds documents where an attribute is in the given list .. code-block:: python from underverse.model import Document as D # finds all users whose name is either 'Max' or 'Tamara' uv.users.find(D.name.in_(['Max', 'Tamara'])) """ self._desc = "%s in %s" % (self._name, value) self._predicate = P.in_(self._name, value) return self
def limskip(_skip, _limit): """ Skips the first few documents found and also limits the records found based on the given input. .. code-block:: python # skips 50 records and returns the next 50 uv.docs.find(D.limskip(50, 50)) """ if not type(_skip) is int: raise ValueError, "Skip argument must be an integer: K.limskip(2, 5)" if not type(_limit) is int: raise ValueError, "Limit argument must be an integer: K.limskip(2, 5)" return P.udf(Document.__limskip__, _skip, _limit)
def type(self, value): """ Finds documents where an attribute's type matches the input type .. code-block:: python uv.users.find(D.age.type(int)) .. note:: This may not ever be used... """ self._desc = "type(%s) == %s" % (self._name, value.__name__) self._predicate = P.type_(self._name, value) return self
def in_list(self, value): """ Finds documents where an attribute is in the given **list** .. note:: This predicate calls *list* on the input before it is compared. This can be useful for passing other generators to the predicate. However, if the input value is already a list, just call ``D.in_`` instead. .. code-block:: python from underverse.model import Document as D # finds all users whose name is either 'Max' or 'Tamara' uv.users.find(D.name.in_list(['Max', 'Tamara'])) """ _set = list(value) self._desc = "%s in set(%s)" % (self._name, _set) self._predicate = P.in_(self._name, _set) return self
def udf(function, *args, **kwargs): """ Passes the entire data stream to the user defined function along with any *args* and *kwargs*. This can be used to filter documents on multiple attributes of the data along with other advanced functionality. .. note:: The UDF takes the entire collection and returns a subset of documents matching complex criterion. This differs from the UDP functionality in the the UDP only receives a single attribute of one document at a time. .. code-block:: python def complex_filter(array): subset = [] for doc in array: if some_ninja_math: subset.append(doc) return subset uv.docs.find(D.udf(complex_filter)) Or a real example... .. code-block:: python # finds all docs where x**y > 4 def sq_filter(array, goal=2): subset = [] for doc in array: if doc.x ** doc.y > goal: subset.append(doc) return subset for d in verse.find(Document.udf(sq_filter, 4)): print d """ return P.udf(function, *args, **kwargs)
def udp(self, function, *args, **kwargs): """ User defined predicates or UDP can be used if the existing comparison operators are not enough. .. code-block:: python from underverse.model import Document as D #filters out documents where the sqrt of a selected attribute is between a given range def sqrt_filter(doc, lower_bound=2, upper_bound=39): if upper_bound >= math.sqrt(doc) >= lower_bound: return True else: return False # finds all docs whose 'some_number' attribute's sqrt is between 16 and 64 uv.docs.find(D.some_number.udp(sqrt_filter, lower_bound=16, upper_bound=64)) The UDP function only receives the 'some_number' attribute and has to return a bool discerning to filter out or include each row. You'll also notice that any ``*args`` or ``**kwargs`` are forwarded to the method. This is to allow for more flexibility and DRY code. .. note:: If you are astute, you'll quickly see the limitation of this method. However, it might be useful for some. Because the ``udp`` method only passes one attribute, more complex filters will not work easily. If you need a filter which requires multiple attributes of the data, use the ``udf`` method instead. """ self._desc = "%s(%s" % (function.__name__, self._name) if len(args) > 0: self._desc += ", " + ', '.join(args) if len(kwargs) > 0: self._desc += ", " + ', '.join(['%s=%s' % (k, v) for k, v in kwargs.items()]) self._desc += ")" self._predicate = P.udp(self._name, function, *args, **kwargs) return self
parser.add_argument('-csv', help='output data in csv format', action='store_true', default=False) args = parser.parse_args() #from tools import print_v wm_c = args.rn sem_dim = 50 max_ar = 2 #target situation john_t = Atom("John") mary_t = Atom("Mary") loves_t = Predicate("loves", 2, [john_t], [mary_t]) #base 1 john_b1 = Atom("John") maria_b1 = Atom("Maria") loves_b1 = Predicate("loves", 2, [john_b1], [maria_b1]) #base 2 peter_b2 = Atom("Peter") maria_b2 = Atom("Maria") loves_b2 = Predicate("loves", 2, [peter_b2], [maria_b2]) #base 3 john_b3 = Atom("John") mary_b3 = Atom("Mary") hates_b3 = Predicate("hates", 2, [john_b3], [mary_b3])
from encoding import VectorEncoder from predicates import Atom, Property, Predicate import tools john = Atom("john") marry = Atom("marry") peter = Atom("peter") maria = Atom("maria") driver = Property("driver", [john]) loves1 = Predicate("loves", 2, [john], [marry]) loves2 = Predicate("loves", 2, [marry], [driver]) encoder = VectorEncoder(5, 2, 10) vec_repr, predicates = encoder.encode_predicates([loves1, driver]) tools.print_v(vec_repr, predicates=predicates) #tools.print_v(encoder.encode_predicates([loves2]))
import tensorflow as tf from predicates import Atom, Predicate from encoding import VectorEncoder, Glove50Encoder from tools import print_v from analogy import VectorAnalogy from comparison import VectorMapping N_SLOTS = 6 MAX_ARITY = 2 encoder = Glove50Encoder(N_SLOTS, MAX_ARITY) john_1 = Atom("John") mary_1 = Atom("Mary") john_loves_mary = Predicate("loves", 2, [john_1], [mary_1]) john_2 = Atom("John") mary_2 = Atom("Mary") mary_loves_john = Predicate("loves", 2, [mary_2], [john_2]) john_3 = Atom("John") mary_3 = Atom("Mary") john_hates_mary = Predicate("talks", 2, [john_3], [mary_3]) john_4 = Atom("John") mary_4 = Atom("Mary") mary_hates_john = Predicate("hates", 2, [mary_4], [john_4]) peter = Atom("Peter") helen = Atom("Helen")
parser.add_argument('-csv', help='output data in csv format', action='store_true', default=False) args = parser.parse_args() #from tools import print_v wm_c = args.rn sem_dim = 50 max_ar = 2 #target situation person_t = Atom("person") dog_t = Atom("dog") barks_at_t = Predicate("barks", 2, [dog_t], [person_t]) scares_t = Predicate("scares", 2, [dog_t], [person_t]) causes_t = Predicate("causes", 2, [barks_at_t], [scares_t]) #base 1 person_1_b1 = Atom("person") person_2_b1 = Atom("person") dog_1_b1 = Atom("dog") dog_2_b1 = Atom("dog") barks_at_b1 = Predicate("barks", 2, [dog_1_b1], [person_1_b1]) scares_b1 = Predicate("scares", 2, [dog_2_b1], [person_2_b1]) causes_b1 = Predicate("causes", 2, [barks_at_b1], [scares_b1]) #base 2 person_b2 = Atom("person") dog_b2 = Atom("dog")
#number of representational slots r_slots_n = 8 #semantics dimensionality (number of units) sem_dim = 50 #maximal arity max_ar = 2 #tha relative weight of semantics and structure sigma = 0.5#semantics and structure have equal weight #target situation bill_t = Atom("Bill") joe_t = Atom("Joe") susan_t = Atom("Susan") pear_t = Atom("pear") peach_t = Atom("peach") loves_t = Predicate("loves", 2, [bill_t], [susan_t]) has1_t = Predicate("has", 2, [bill_t], [peach_t]) has2_t = Predicate("has", 2, [joe_t], [pear_t]) #base john_b1 = Atom("John") mary_b1 = Atom("Mary") apple_b1 = Atom("apple") loves_b1 = Predicate("loves", 2, [john_b1], [mary_b1]) has_b1 = Predicate("has", 2, [john_b1], [apple_b1]) #Vector encoder using Glove 50d word word embeddings encoder = Glove50Encoder(r_slots_n, max_ar)
def __eq__(self, value): self._desc = "%s == %s" % (self._name, value) self._predicate = P.eq(self._name, value) return self
sem_dim = 50 #maximal arity max_ar = 1 #tha relative weight of semantics and structure sigma = 0.5#semantics and structure have equal weight #target situation bill_t = Atom("Bill") steve_t = Atom("Steve") tom_t = Atom("Tom") smart_bill_t = Property("smart", [bill_t]) tall_bill_t = Property("tall", [bill_t]) smart_steve_t = Property("smart", [steve_t]) timid_tom_t = Property("timid", [tom_t]) tall_tom_t = Property("tall", [tom_t]) same1_t = Predicate("same", 1, [smart_bill_t, smart_steve_t]) same2_t = Predicate("same", 1, [tall_bill_t, tall_tom_t]) unique1_t = Property("unique", [steve_t]) unique2_t = Property("unique", [timid_tom_t]) #base rover_b1 = Atom("Rover") fido_b1 = Atom("Fido") blackie_b1 = Atom("Blackie") hungry_rover_b1 = Property("hungry", [rover_b1]) friendly_rover_b1 = Property("friendly", [rover_b1]) hungry_steve_b1 = Property("hungry", [fido_b1]) frisky_blackie_b1 = Property("frisky", [blackie_b1]) friendly_blackie_b1 = Property("friendly", [blackie_b1]) same1_b1 = Predicate("same", 1, [friendly_rover_b1, friendly_blackie_b1]) same2_b1 = Predicate("same", 1, [hungry_rover_b1, hungry_steve_b1]) unique1_b1 = Property("unique", [fido_b1])
def __init__(self, name): super(Document, self).__init__() self._name = name self._predicate = P.exists(name) self._desc = "has key: "+name
def __ne__(self, value): self._desc = "%s != %s" % (self._name, value) self._predicate = P.ne(self._name, value) return self
def __exists__(self): return P.exists(self._name)
def benchmark(): wm_c = 8 sem_dim = 50 max_ar = 2 o1_1 = Atom("o1") o2_1 = Atom("o2") o1_2 = Atom("o1") o2_2 = Atom("o2") o1_3 = Atom("o1") o2_3 = Atom("o2") r1_1 = Predicate("r1111", 2, [o1_1], [o2_1]) r2_1 = Predicate("r2222", 2, [o2_1], [o1_1]) r1_2 = Predicate("r1000", 2, [o2_2], [o1_2]) r2_2 = Predicate("r2sss", 2, [o1_2], [o2_2]) r1_3 = Predicate("r1eeee", 2, [o2_3], [o1_3]) r2_3 = Predicate("r2rrr", 2, [o1_3], [o2_3]) target = Predicate("cause0", 2, [r1_1], [r2_1]) base1 = Predicate("cause1", 2, [r1_2], [r2_2]) base2 = Predicate("cause2", 2, [r2_3], [r1_3]) encoder = Glove50Encoder(wm_c, max_ar) #encoder = VectorEncoder(wm_c, max_ar, sem_dim) base_predicates = [ [base1], [base2], ] bases = {} base_ps = {} for i in range(len(base_predicates)): vec_repr, ps = encoder.encode_predicates(base_predicates[i]) bases[i] = vec_repr base_ps[i] = ps target, target_ps = encoder.encode_predicates([target]) #crossed = [] sigma_i_step = 10 for sigma_i in range(50, 51, sigma_i_step): #crossed.append(0.0) analogy = NeuralAnalogy(sigma_i / 100.0, wm_c, max_ar, sem_dim, False) for base_i in bases: (max_sim, _, mapping) = analogy.make(target, bases[base_i]) #print_v "None" # print([str(p) for p in target_ps])(mapping) # print_v(target) # print_v(bases[base_i]) # print(mapping) # john_mapping = for i in range(len(base_ps[base_i])): base_p = base_ps[base_i][i] target_p = target_ps[np.argmax(mapping[:, i])] print("{}<->{}".format(target_p, base_p)) print("sigma: {:.3}, base: {}, sim :{:.5}".format(sigma_i / 100.0, base_i, max_sim))
def __ge__(self, value): self._desc = "%s >= %s" % (self._name, value) self._predicate = P.gte(self._name, value) return self
#number of representational slots r_slots_n = 9 #semantics dimensionality (number of units) sem_dim = 50 #maximal arity max_ar = 2 #tha relative weight of semantics and structure sigma = 0.5 #semantics and structure have equal weight #target situation john_t = Atom("John") mary_t = Atom("Mary") apple_t = Atom("apple") core_t = Atom("core") bill_t = Atom("Bill") loves_t = Predicate("loves", 2, [john_t], [mary_t]) has_t = Predicate("has", 2, [apple_t], [core_t]) knows1_t = Predicate("knows", 2, [bill_t], [has_t]) knows2_t = Predicate("knows", 2, [john_t], [loves_t]) #base john_b1 = Atom("John") mary_b1 = Atom("Mary") bill_b1 = Atom("Bill") apple_b1 = Atom("apple") core_b1 = Atom("core") loves_b1 = Predicate("loves", 2, [john_b1], [mary_b1]) has_b1 = Predicate("has", 2, [apple_b1], [core_b1]) knows1_b1 = Predicate("knows", 2, [bill_b1], [loves_b1]) knows2_b1 = Predicate("knows", 2, [john_b1], [has_b1])
def __le__(self, value): self._desc = "%s <= %s" % (self._name, value) self._predicate = P.lte(self._name, value) return self