def removeUnmetRelationships(dropSpecList): unmetRelationships = [] # Step #1: Get all OIDs oids = {dropSpec['oid'] for dropSpec in dropSpecList} # Step #2: find unmet relationships and remove them from the original # DROP spec, keeping track of them for dropSpec in dropSpecList: this_oid = dropSpec['oid'] to_delete = [] for rel in dropSpec: # 1-N relationships if rel in __TOMANY: link = __TOMANY[rel] # Find missing OIDs in this relationship and keep track of them, # removing them from the current DROP spec missingOids = [oid for oid in dropSpec[rel] if oid not in oids] for oid in missingOids: unmetRelationships.append(DROPRel(oid, link, this_oid)) dropSpec[rel].remove(oid) # Remove the relationship list entirely if it has no elements if not dropSpec[rel]: to_delete.append(rel) # N-1 relationships elif rel in __TOONE: link = __TOONE[rel] # Check if OID is missing oid = dropSpec[rel] if oid in oids: continue # Keep track of missing relationship unmetRelationships.append(DROPRel(oid, link, this_oid)) # Remove relationship from current DROP spec to_delete.append(rel) for rel in to_delete: del dropSpec[rel] return unmetRelationships
def sanitize_relations(interDMRelations, graph): # TODO: Big change required to remove this hack here # # Values in the interDMRelations array use OIDs to identify drops. # This is because so far we have told users to that OIDs are required # in the physical graph description, while UIDs are optional # (and copied over from the OID if not given). # On the other hand, once drops are actually created in deploySession() # we access the values in interDMRelations as if they had UIDs inside, # which causes problems everywhere because everything else is indexed # on UIDs. # In order to not break the current physical graph constrains and keep # things simple we'll simply replace the values of the interDMRelations # array here to use the corresponding UID for the given OIDs. # Because UIDs are globally unique across drop instances it makes sense # to always index things by UID and not by OID. Thus, in the future we # should probably change the requirement on the physical graphs sent by # users to always require an UID, and optionally an OID, and then change # all this code to immediately use those UIDs instead. newDMRelations = [] for rel in interDMRelations: lhs = uid_for_drop(graph[rel.lhs]) rhs = uid_for_drop(graph[rel.rhs]) new_rel = DROPRel(lhs, rel.rel, rhs) newDMRelations.append(new_rel) interDMRelations[:] = newDMRelations
def test_many_relationships(self): """ A test in which a drop is related to many other drops that live in a separate DM. Drop A is accessed by many applications (B1, B2, .., BN), which should not exhaust resources on DM #1. We collapse all into C so we can monitor only its status to know that the execution is over. DM #1 DM #2 ======= ==================== | | | |--> B1 --| | | | | |--> B2 --| | | A --|----|-|--> B3 --|--> C | | | | |.........| | | | | |--> BN --| | ======= ==================== """ dm1, dm2 = [self._start_dm() for _ in range(2)] sessionId = 's1' N = 100 g1 = [{"oid": "A", "type": "plain", "storage": "memory"}] g2 = [{"oid": "C", "type": "plain", "storage": "memory"}] rels = [] for i in range(N): b_oid = "B%d" % (i, ) # SleepAndCopyApp effectively opens the input drop g2.append({ "oid": b_oid, "type": "app", "app": "test.graphsRepository.SleepAndCopyApp", "outputs": ["C"], "sleepTime": 0 }) rels.append(DROPRel('A', DROPLinkType.INPUT, b_oid)) quickDeploy(dm1, sessionId, g1, {nm_conninfo(1): rels}) quickDeploy(dm2, sessionId, g2, {nm_conninfo(0): rels}) self.assertEqual(1, len(dm1._sessions[sessionId].drops)) self.assertEqual(1 + N, len(dm2._sessions[sessionId].drops)) # Run! The sole fact that this doesn't throw exceptions is already # a good proof that everything is working as expected a = dm1._sessions[sessionId].drops['A'] c = dm2._sessions[sessionId].drops['C'] with droputils.DROPWaiterCtx(self, c, 10): a.write('a') a.setCompleted() for i in range(N): drop = dm2._sessions[sessionId].drops["B%d" % (i, )] self.assertEqual(DROPStates.COMPLETED, drop.status) dm1.destroySession(sessionId) dm2.destroySession(sessionId)
def test_removeUnmetRelationships(self): # Unmet relationsips are # DROPRel(D, CONSUMER, A) # DROPRel(D, STREAMING_CONSUMER, C) # DROPRel(Z, PRODUCER, A) # DROPRel(X, PRODUCER, A) graphDesc = [{ 'oid': 'A', 'consumers': ['B', 'D'], 'producers': ['Z', 'X'] }, { 'oid': 'B', 'outputs': ['C'] }, { 'oid': 'C', 'streamingConsumers': ['D'] }] unmetRelationships = graph_loader.removeUnmetRelationships(graphDesc) self.assertEqual(4, len(unmetRelationships)) self.assertIn(DROPRel('D', DROPLinkType.CONSUMER, 'A'), unmetRelationships) self.assertIn(DROPRel('D', DROPLinkType.STREAMING_CONSUMER, 'C'), unmetRelationships) self.assertIn(DROPRel('Z', DROPLinkType.PRODUCER, 'A'), unmetRelationships) self.assertIn(DROPRel('X', DROPLinkType.PRODUCER, 'A'), unmetRelationships) # The original dropSpecs have changed as well a = graphDesc[0] c = graphDesc[2] self.assertEqual(1, len(a['consumers'])) self.assertEqual('B', a['consumers'][0]) self.assertFalse('producers' in a) self.assertFalse('streamingConsumers' in c)
def add_node_subscriptions(self, sessionId, relationships, nm): evt_consumer = (DROPLinkType.CONSUMER, DROPLinkType.STREAMING_CONSUMER, DROPLinkType.OUTPUT) evt_producer = (DROPLinkType.INPUT, DROPLinkType.STREAMING_INPUT, DROPLinkType.PRODUCER) for host, droprels in relationships.items(): # Make sure we have DROPRel tuples droprels = [DROPRel(*x) for x in droprels] # Sanitize the host/rpc_port info if needed rpc_port = constants.NODE_DEFAULT_RPC_PORT if type(host) is tuple: host, _, rpc_port = host # Store which drops should receive events from which remote drops dropsubs = collections.defaultdict(set) for rel in droprels: # Which side of the relationship is local? local_uid = None remote_uid = None if rel.rhs in self._graph: local_uid = rel.rhs remote_uid = rel.lhs elif rel.lhs in self._graph: local_uid = rel.lhs remote_uid = rel.rhs # We are in the event receiver side if (rel.rel in evt_consumer and rel.lhs is local_uid) or \ (rel.rel in evt_producer and rel.rhs is local_uid): dropsubs[remote_uid].add(local_uid) self._dropsubs.update(dropsubs) # Store the information needed to create the proxies later for rel in droprels: local_uid = rel.rhs mname = LINKTYPE_1TON_APPEND_METHOD[rel.rel] remote_uid = rel.lhs if local_uid not in self._graph: local_uid = rel.lhs remote_uid = rel.rhs mname = LINKTYPE_1TON_BACK_APPEND_METHOD[rel.rel] self._proxyinfo.append((nm, host, rpc_port, local_uid, mname, remote_uid))
def test_runGraphOneDOPerDOM(self): """ A test that creates three DROPs in two different DMs and runs the graph. For this the graphs that are fed into the DMs must *not* express the inter-DM relationships, although they are still passed down separately. The graph looks like: DM #1 DM #2 ======= ============= | A --|----|-> B --> C | ======= ============= """ dm1, dm2 = [self._start_dm() for _ in range(2)] sessionId = 's1' g1 = [{"oid": "A", "type": "plain", "storage": "memory"}] g2 = [{ "oid": "B", "type": "app", "app": "dfms.apps.crc.CRCApp" }, { "oid": "C", "type": "plain", "storage": "memory", "producers": ["B"] }] rels = [DROPRel('B', DROPLinkType.CONSUMER, 'A')] quickDeploy(dm1, sessionId, g1, {nm_conninfo(1): rels}) quickDeploy(dm2, sessionId, g2, {nm_conninfo(0): rels}) self.assertEqual(1, len(dm1._sessions[sessionId].drops)) self.assertEqual(2, len(dm2._sessions[sessionId].drops)) # Run! We wait until c is completed a = dm1._sessions[sessionId].drops['A'] b, c = [dm2._sessions[sessionId].drops[x] for x in ('B', 'C')] with droputils.DROPWaiterCtx(self, c, 1): a.write('a') a.setCompleted() for drop in a, b, c: self.assertEqual(DROPStates.COMPLETED, drop.status) self.assertEqual(a.checksum, int(droputils.allDropContents(c))) dm1.destroySession(sessionId) dm2.destroySession(sessionId)
def addStreamingInput(self, streamingInputDrop, back=True): raise InvalidRelationshipException( DROPRel(streamingInputDrop.uid, DROPLinkType.STREAMING_INPUT, self.uid), "SocketListenerApp should have no inputs")
def addInput(self, inputDrop, back=True): raise InvalidRelationshipException( DROPRel(inputDrop.uid, DROPLinkType.INPUT, self.uid), "SocketListenerApp should have no inputs")
def test_runWithFourDMs(self): """ A test that creates several DROPs in two different DMs and runs the graph. The graph looks like this DM #2 +--------------------------+ | |--> C --| | +---|--> B --|--> D --|--> F --|--| | | |--> E --| | | DM #1 | +--------------------------+ | DM #4 +-----+ | | +---------------------+ | | | |--|--> L --| | | A --|--+ | |--> N --> O | | | | |--|--> M --| | +-----+ | DM #3 | +---------------------+ | +--------------------------+ | | | |--> H --| | | +---|--> G --|--> I --|--> K --|--| | |--> J --| | +--------------------------+ B, F, G, K and N are AppDOs; the rest are plain in-memory DROPs """ dm1, dm2, dm3, dm4 = [self._start_dm() for _ in range(4)] sessionId = 's1' g1 = [memory('A', expectedSize=1)] g2 = [ sleepAndCopy('B', outputs=['C', 'D', 'E'], sleepTime=0), memory('C'), memory('D'), memory('E'), sleepAndCopy('F', inputs=['C', 'D', 'E'], sleepTime=0) ] g3 = [ sleepAndCopy('G', outputs=['H', 'I', 'J'], sleepTime=0), memory('H'), memory('I'), memory('J'), sleepAndCopy('K', inputs=['H', 'I', 'J'], sleepTime=0) ] g4 = [ memory('L'), memory('M'), sleepAndCopy('N', inputs=['L', 'M'], outputs=['O'], sleepTime=0), memory('O') ] rels_12 = [DROPRel('A', DROPLinkType.INPUT, 'B')] rels_13 = [DROPRel('A', DROPLinkType.INPUT, 'G')] rels_24 = [DROPRel('F', DROPLinkType.PRODUCER, 'L')] rels_34 = [DROPRel('K', DROPLinkType.PRODUCER, 'M')] quickDeploy(dm1, sessionId, g1, { nm_conninfo(1): rels_12, nm_conninfo(2): rels_13 }) quickDeploy(dm2, sessionId, g2, { nm_conninfo(0): rels_12, nm_conninfo(3): rels_24 }) quickDeploy(dm3, sessionId, g3, { nm_conninfo(0): rels_13, nm_conninfo(3): rels_34 }) quickDeploy(dm4, sessionId, g4, { nm_conninfo(1): rels_24, nm_conninfo(2): rels_34 }) self.assertEqual(1, len(dm1._sessions[sessionId].drops)) self.assertEqual(5, len(dm2._sessions[sessionId].drops)) self.assertEqual(5, len(dm3._sessions[sessionId].drops)) self.assertEqual(4, len(dm4._sessions[sessionId].drops)) a = dm1._sessions[sessionId].drops['A'] o = dm4._sessions[sessionId].drops['O'] drops = [] for x in (dm1, dm2, dm3, dm4): drops += x._sessions[sessionId].drops.values() # Run! This should trigger the full execution of the graph with droputils.DROPWaiterCtx(self, o, 5): a.write('a') for drop in drops: self.assertEqual( DROPStates.COMPLETED, drop.status, "Status of '%s' is not COMPLETED: %d" % (drop.uid, drop.status)) for dm in [dm1, dm2, dm3, dm4]: dm.destroySession(sessionId)
def test_runGraphSeveralDropsPerDM(self): """ A test that creates several DROPs in two different DMs and runs the graph. The graph looks like this DM #1 DM #2 =================== ================ | A --> C --> D --|----|-| | | | | |--> E --> F | | B --------------|----|-| | =================== ================ :see: `self.test_runGraphSingleDOPerDOM` """ dm1, dm2 = [self._start_dm() for _ in range(2)] sessionId = 's1' g1 = [{ "oid": "A", "type": "plain", "storage": "memory", "consumers": ["C"] }, { "oid": "B", "type": "plain", "storage": "memory" }, { "oid": "C", "type": "app", "app": "dfms.apps.crc.CRCApp" }, { "oid": "D", "type": "plain", "storage": "memory", "producers": ["C"] }] g2 = [{ "oid": "E", "type": "app", "app": "test.test_drop.SumupContainerChecksum" }, { "oid": "F", "type": "plain", "storage": "memory", "producers": ["E"] }] rels = [ DROPRel('D', DROPLinkType.INPUT, 'E'), DROPRel('B', DROPLinkType.INPUT, 'E') ] quickDeploy(dm1, sessionId, g1, {nm_conninfo(1): rels}) quickDeploy(dm2, sessionId, g2, {nm_conninfo(0): rels}) self.assertEqual(4, len(dm1._sessions[sessionId].drops)) self.assertEqual(2, len(dm2._sessions[sessionId].drops)) # Run! The sole fact that this doesn't throw exceptions is already # a good proof that everything is working as expected a, b, c, d = [ dm1._sessions[sessionId].drops[x] for x in ('A', 'B', 'C', 'D') ] e, f = [dm2._sessions[sessionId].drops[x] for x in ('E', 'F')] with droputils.DROPWaiterCtx(self, f, 5): a.write('a') a.setCompleted() b.write('a') b.setCompleted() for drop in a, b, c, d, e, f: self.assertEqual(DROPStates.COMPLETED, drop.status, "DROP %s is not COMPLETED" % (drop.uid)) self.assertEqual(a.checksum, int(droputils.allDropContents(d))) self.assertEqual(b.checksum + d.checksum, int(droputils.allDropContents(f))) dm1.destroySession(sessionId) dm2.destroySession(sessionId)