def stage3(self): #print "Stage 3: When both sources sent all the data." # RJTs in main (left) memory are probed against RJTs in secondary (right) memory. common_resources = set(self.left_table.keys()) & set( self.fileDescriptor_right.keys()) for resource in common_resources: rjts1 = self.left_table[resource].records for rjt1 in rjts1: self.probeFile(rjt1, self.fileDescriptor_right, resource, 3) # RJTs in main (right) memory are probed against RJTs in secondary (left) memory. common_resources = set(self.right_table.keys()) & set( self.fileDescriptor_left.keys()) for resource in common_resources: rjts1 = self.right_table[resource].records for rjt1 in rjts1: self.probeFile(rjt1, self.fileDescriptor_left, resource, 3) # RJTs in secondary memory are probed to produce new results. common_resources = set(self.fileDescriptor_left.keys()) & set( self.fileDescriptor_right.keys()) for resource in common_resources: file1 = open(self.fileDescriptor_right[resource].file.name) rjts1 = file1.readlines() for rjt1 in rjts1: (tuple1, probeTS1, insertTS1, flushTS1) = rjt1.split('|') self.probeFile( Record(eval(tuple1), float(probeTS1), float(insertTS1), float(flushTS1)), self.fileDescriptor_left, resource, 3) file1.close() for resource in common_resources: file1 = open(self.fileDescriptor_left[resource].file.name) rjts1 = file1.readlines() for rjt1 in rjts1: (tuple1, probeTS1, insertTS1, flushTS1) = rjt1.split('|') self.probeFile( Record(eval(tuple1), float(probeTS1), float(insertTS1), float(flushTS1)), self.fileDescriptor_right, resource, 3) file1.close() # Delete files from secondary memory. for resource in self.fileDescriptor_left: remove(self.fileDescriptor_left[resource].file.name) for resource in self.fileDescriptor_right: remove(self.fileDescriptor_right[resource].file.name) # Put EOF in queue and exit. self.qresults.put("EOF")
def probe(self, tuple, resource, rjttable, other_rjttable): probeTS = time() # If the resource is in table, produce results. if resource in rjttable: rjttable.get(resource).setRJTProbeTS(probeTS) list_records = rjttable[resource].records #list_records = rjttable[resource] for record in list_records: res = record.tuple.copy() res.update(tuple) self.qresults.put(res) # If not, contact the source. else: instances = [] for v in self.vars: instances = instances + [tuple[v]] # Contact the source. qright = Queue() self.right.execute(self.vars, instances, qright) # Get the tuples from right queue. rtuple = qright.get(True) if (not (rtuple == "EOF")): while (not (rtuple == "EOF")): # Build answer and produce it. rtuple_copy = rtuple.copy() rtuple_copy.update(tuple) self.qresults.put(rtuple_copy) # Create and insert the record in the left RJT table. record = Record(rtuple, probeTS, time()) if resource in rjttable: other_rjttable.get(resource).updateRecords(record) other_rjttable.get(resource).setRJTProbeTS(probeTS) else: tail = RJTTail(record, float("inf")) other_rjttable[resource] = tail rtuple = qright.get(True) else: # Build the empty tuple. rtuple = {} for att in self.right.atts: rtuple.update({att: ''}) # Produce the answer, rtuple.update(tuple) self.qresults.put(rtuple) return probeTS
def probe(self, record, i, partition, var, right): # Probe a tuple if the partition is not empty. if partition: anyjoin = False # For every record in the partition, check if it is duplicated. # Then, check if the tuple matches for every join variable. # If there is a join, concatenate the tuples and produce result. # If the partition was empty, or any join was produced, then contact the source. for r in partition.records: if self.isDuplicated(record, r): break for v in var: join = True if record.tuple[v] != r.tuple[v]: join = False break if join: anyjoin = True res = record.tuple.copy() res.update(r.tuple) self.results.append(res) # Empty partition or no matches were found. if ((len(partition.records) == 0) or not(anyjoin)): instances = [] for v in var: instances = instances + [record.tuple[v]] # Contact the source. qright = Queue() right.execute(self.vars, instances, qright) # Insert in right table, and produce the results. rtuple = qright.get(True) while not(rtuple == "EOF"): res2 = rtuple.copy() for v in var: res2.update({v:record.tuple[v]}) reg = Record(res2, time(), 0) self.right_table.insertRecord(i, reg) res = rtuple.copy() res.update(record.tuple) self.results.append(res) rtuple = qright.get(True)
def probeAndInsert2(self, resource, tuple, table1, table2, time): record = Record(tuple, time, 0) if resource in table1: records = table1[resource] for t in records: if t.ats > record.ats: continue x = t.tuple.copy() x.update(tuple) self.qresults.put(x) p = table2.get(resource, []) p.append(record) table2[resource] = p
def insertAndProbe(self, tuple, table1, table2): # Insert the tuple in its corresponding partition and probe. #print tuple # Get the attribute(s) to apply hash. att = '' for var in self.vars: att = att + tuple[var] i = hash(att) % table1.size # Insert record in partition. record = Record(tuple, time(), 0) table1.insertRecord(i, record) # Probe the record against its partition in the other table. self.probe(record, table2.partitions[i], self.vars)
def stage1(self, tuple, input, table1, table2): # Stage 1: While both sources are sending data. # Get the attribute(s) to apply hash. att = '' for var in self.vars: att = att + tuple[var] i = hash(att) % table1.size # Insert record in partition. #record = Record(tuple, time(), 0) record = Record(tuple, self.timestamp, float("inf")) table1.insertRecord(i, record) # Probe the record against its partition in the other table. self.probe(record, table2.partitions[i], self.vars)
def probeAndInsert1(self, tuple, table1, table2, time): record = Record(tuple, time, 0) r = self.getResource(tuple) if r in table1: records = table1[r] for t in records: if t.ats > record.ats: continue x = t.tuple.copy() x.update(tuple) self.qresults.put(x) p = table2.get(r, []) i = (p == []) p.append(record) table2[r] = p return i
def insertAndProbe(self, tuple): # Executes the Nested Loop Join. # Get the attribute(s) to apply hash. att1 = '' for var in self.vars: att1 = att1 + tuple[var] i = hash(att1) % self.left_table.size; # Create record (tuple, ats, dts). record = Record(tuple, time(), 0) # Insert record in its corresponding partition. self.left_table.insertRecord(i, record) # Probe the record against its partition in the other table. self.probe(record, i, self.right_table.partitions[i], self.vars, self.right)
def probeAndInsert2(self, resource, tuple, table1, table2, time): #print "probeAndInsert2", resource, tuple record = Record(tuple, time, 0) if resource in table1: records = table1[resource] for t in records: if t.ats > record.ats: continue x = t.tuple.copy() x.update(tuple) self.qresults.put(x) # Delete tuple from bag. try: self.bag.remove(t.tuple) except ValueError: pass p = table2.get(resource, []) p.append(record) table2[resource] = p
def stage1(self, tuple, tuple_rjttable, other_rjttable, vars): # Stage 1: While one of the sources is sending data. # Get the resource associated to the tuples. resource = '' for var in self.vars: resource = resource + tuple[var] # Probe the tuple against its RJT table. probeTS = self.probe(tuple, resource, tuple_rjttable, vars) # Create the records. record = Record(tuple, probeTS, time()) # Insert the record in the other RJT table. # TODO: use RJTTail. Check ProbeTS if resource in other_rjttable: other_rjttable.get(resource).updateRecords(record) other_rjttable.get(resource).setRJTProbeTS(probeTS) #other_rjttable.get(resource).append(record) else: tail = RJTTail(record, float("inf")) other_rjttable[resource] = tail
def stage1(self, tuple, tuple_rjttable, other_rjttable): #print " Stage 1: While one of the sources is sending data." if (tuple != "EOF"): # Get the resource associated to the tuples. resource = '' #print(tuple) for var in self.vars: resource = resource + str(tuple[var]) # Probe the tuple against its RJT table. probeTS = self.probe(tuple, resource, tuple_rjttable) # Create the records. record = Record(tuple, probeTS, time(), float("inf")) # Insert the record in the other RJT table. if resource in other_rjttable: other_rjttable.get(resource).updateRecords(record) other_rjttable.get(resource).setRJTProbeTS(probeTS) #other_rjttable.get(resource).append(record) else: tail = RJTTail(record, probeTS) other_rjttable[resource] = tail
def probeAndInsert1(self, tuple, table1, table2, time): #print "in probeAndInsert1", tuple record = Record(tuple, time, 0) r = self.getResource(tuple) #print "resource", r, tuple if r in table1: records = table1[r] for t in records: if t.ats > record.ats: continue x = t.tuple.copy() x.update(tuple) self.qresults.put(x) # Delete tuple from bag. try: self.bag.remove(tuple) except ValueError: pass p = table2.get(r, []) i = (p == []) p.append(record) table2[r] = p return i