def exchange_public_keys(connection): keypair = CryptoUtil.generate_keypair() # Send department public key to supervisor NetworkUtil.send_message(connection, keypair.public_key().exportKey()) # Receive public key from supervisor supervisor_public_key = RSA.importKey( NetworkUtil.receive_message(connection)) return keypair, supervisor_public_key
def send_purchase_to_supervisor(supervisor_connection, purchase_message, keypair, supervisor_public_key): # Encrypt purchase message with supervisor's public key encrypted_purchase_message = CryptoUtil.encrypt(purchase_message.encode(), supervisor_public_key) # Sign purchase message with our private key signature = CryptoUtil.sign(purchase_message.encode(), keypair) NetworkUtil.send_message(supervisor_connection, encrypted_purchase_message) NetworkUtil.send_message(supervisor_connection, signature) confirmation = NetworkUtil.receive_message(supervisor_connection) return encrypted_purchase_message, signature, confirmation
def extract_data(html_page): temporary_variable = '' parsed_html_page = parse_html_content(html_page.content) links = parsed_html_page.find_all('a') for link in links: if link.get('href') == '/locations/': temporary_variable = link.get('href') break if temporary_variable != '' and (not checkers.is_url(temporary_variable)): CustomConstants.URL_TO_BE_VISITED.add( NetworkUtil.get_absolute_url(temporary_variable)) else: return CustomConstants.SOMETHING_WENT_WRONG_WHILE_FETCHING_LOCATIONS html_page = NetworkUtil.read_from_network( CustomConstants.URL_TO_BE_VISITED.pop()) parsed_html_page = parse_html_content(html_page.content) location_cards = parsed_html_page.find_all(class_='location card') if len(location_cards) > 0: clear_set_data() for location_card in location_cards: link = location_card.get('href') if checkers.is_url(link): CustomConstants.URL_TO_BE_VISITED.add(link) else: link = NetworkUtil.get_absolute_url(link) CustomConstants.URL_TO_BE_VISITED(link) room_links = set() for location in CustomConstants.URL_TO_BE_VISITED: html = NetworkUtil.read_from_network(location) parsed_html = parse_html_content(html.content) room_links.update(extract_rooms_feed(parsed_html)) time.sleep(3.0) clear_set_data() room_detail_list = list() for room_link in room_links: html_page = NetworkUtil.read_from_network(room_link) parsed_html_page = parse_html_content(html_page.content) room_detail = extract_room_detail(parsed_html_page) room_detail_list.append(room_detail) time.sleep(3.0) return room_detail_list
def start_scraping(self): CustomUtil.clear_all_files() CustomConstants.URL_TO_BE_VISITED.add( CustomConstants.URL_TO_BE_CRAWLED) html_response = NetworkUtil.read_from_network( CustomConstants.URL_TO_BE_VISITED.pop()) if html_response == CustomConstants.URL_IS_NOT_VALID: print(CustomConstants.URL_IS_NOT_VALID_MSG) elif html_response == CustomConstants.ERROR_OCCURED_WHILE_SENDING_REQUEST: print(CustomConstants.ERROR_OCCURED_WHILE_SENDING_REQUEST_MSG) else: room_detail_list = CustomUtil.extract_data(html_response) if len(room_detail_list) > 0: CustomUtil.write_data_into_file( CustomConstants.JSON_DATA_FILE_NAME, room_detail_list) room_detail_list = CustomUtil.read_file( CustomConstants.JSON_DATA_FILE_NAME) city_list = CustomUtil.get_city_list(room_detail_list) for city in city_list: city_room_detail_list = CustomUtil.get_city_data( city, room_detail_list) room_capacity_list = CustomUtil.get_room_capacity_list( city_room_detail_list) CustomUtil.print_analysis(city, room_capacity_list, city_room_detail_list)
def sendAndEncode(self, msg): if self.isWebsocket: if not NU.sendData(self.socket, msg): return False else: return True else: self.socket.send(msg.encode("utf-8"))
def handle_supervisor_purchase_message(supervisor_connection, keypair, customer_public_key, supervisor_public_key): customer_signature = NetworkUtil.receive_message(supervisor_connection) encrypted_purchase_message = NetworkUtil.receive_message( supervisor_connection) supervisor_signature = NetworkUtil.receive_message(supervisor_connection) purchase_message = CryptoUtil.decrypt(encrypted_purchase_message, keypair).decode() print(f"Purchase message is: {purchase_message}") CryptoUtil.verify(purchase_message.encode(), customer_public_key, customer_signature) CryptoUtil.verify(purchase_message.encode(), supervisor_public_key, supervisor_signature) return encrypted_purchase_message, supervisor_signature, purchase_message
def handle_customer_purchase_message_and_confirm(customer_connection, keypair, customer_public_key): encrypted_purchase_message = NetworkUtil.receive_message( customer_connection) customer_signature = NetworkUtil.receive_message(customer_connection) purchase_message = CryptoUtil.decrypt(encrypted_purchase_message, keypair).decode() print(f"Purchase message is: {purchase_message}") CryptoUtil.verify(purchase_message.encode(), customer_public_key, customer_signature) timestamp = purchase_message.split("|")[0] item = purchase_message.split("|")[1] print(f"Customer would like to purchase {item} at {timestamp}.") confirmation = input(f"Confirm the above purchase? (y/n) ") return confirmation, customer_signature, purchase_message, encrypted_purchase_message
def DummyNeighbors(allProts, path, stpFile, dnFile, neighborType): prizes = set() with open(os.path.join(path, stpFile)) as f: for line in f: parts = line.split() # mRNAs can't be Steiner nodes if parts[0] == "W" and not parts[1].endswith("_MRNA"): prizes.add(parts[1]) psNodes = allProts.difference(prizes) if neighborType == "prizes": NetworkUtil.WriteCollection(os.path.join(path, dnFile), prizes) elif neighborType == "nonprizes": NetworkUtil.WriteCollection(os.path.join(path, dnFile), psNodes) else: raise RuntimeError( "%s is not a valid type of dummy node neighbor connection" % neighborType) return psNodes
def listen_for_department_and_customer(): # Connection as Supervisor being Index Server (department and customer as clients) print("Supervisor will start on localhost") listening_socket = NetworkUtil.create_listening_socket( NetworkUtil.server_port) print("Supervisor done binding to host and port successfully") print("Supervisor is waiting for incoming connections") print("\nListening for Department...") department_connection, addr_dept = NetworkUtil.accept_connection( listening_socket) print(addr_dept, " Department has connected to the Supervisor and is now online...") print("\nListening for Customer...") customer_connection, addr_cust = NetworkUtil.accept_connection( listening_socket) print(addr_cust, "Customer Has connected to the Supervisor and is now online...") return department_connection, customer_connection
def receiveAndDecode(self): try: if self.isWebsocket: decodedMsg = NU.decode(self.socket.recv(self.messageSize)) if decodedMsg == None: self.log.logAndPrintError("Error decoding message, maybe wrong format?") else: return decodedMsg else: return self.socket.recv(self.messageSize).decode("utf-8") except socket.error: self.log.logAndPrintError("Connection reset by peer, if reocurring restart server") return False
def Evaluate(network2Pathway, outFileName, fraction, noise, weightedNetworks=False): with open(outFileName, "w") as outFile: npSum = 0 nrSum = 0 epSum = 0 erSum = 0 outFile.write("Steiner forest\tPathway\tTrue prizes\tNoisy prizes\tForest nodes\tPathway nodes\tIntersection nodes\tNode precision\tNode recall\tForest edges\tPathway edges\tIntersection edges\tEdge precision\tEdge recall\n") # The name forestFile assumes the networks to evaluate are Steiner forests, but they can # be any network for forestFile, pathwayFile in network2Pathway: # For each Steiner forest, compute the precision and recall with respect to the original pathway forest = NetworkUtil.LoadNetwork(forestFile, weight=weightedNetworks) # Remove the artificial node if the forest is not empty if "DUMMY" in forest: forest.remove_node("DUMMY") # NetworkUtil.LoadNetwork only works for the simple format used when writing synthetic # pathways. LoadGraphiteNetwork works for the simple format and the graphite edge list. pathway = NetworkUtil.LoadGraphiteNetwork(pathwayFile) intersection = NetworkUtil.Intersection(forest, pathway) if forest.order() == 0: nPrecision = 0 else: nPrecision = float(intersection.order())/forest.order() npSum += nPrecision nRecall = float(intersection.order())/pathway.order() nrSum += nRecall if forest.size() == 0: ePrecision = 0 else: ePrecision = float(intersection.size())/forest.size() epSum += ePrecision eRecall = float(intersection.size())/pathway.size() erSum += eRecall truePrizes = int(math.ceil(fraction*pathway.order())) noisyPrizes = int(math.ceil(noise*truePrizes)) outFile.write("%s\t%s\t%d\t%d\t%d\t%d\t%d\t%f\t%f\t%d\t%d\t%d\t%f\t%f\n" % (os.path.basename(forestFile), os.path.basename(pathwayFile), truePrizes, noisyPrizes, forest.order(), pathway.order(), intersection.order(), nPrecision, nRecall, forest.size(), pathway.size(), intersection.size(), ePrecision, eRecall)) # Write the average node/edge precision/recall outFile.write("Average\t\t\t\t\t\t\t%f\t%f\t\t\t\t%f\t%f\n" % (npSum/len(network2Pathway), nrSum/len(network2Pathway), epSum/len(network2Pathway), erSum/len(network2Pathway)))
def receiveAndDecode(self): try: if self.isWebsocket: decodedMsg = NU.decode(self.socket.recv(self.messageSize)) if decodedMsg == None: self.log.logAndPrintError( "Error decoding message, maybe wrong format?") else: return decodedMsg else: return self.socket.recv(self.messageSize).decode("utf-8") except socket.error: self.log.logAndPrintError( "Connection reset by peer, if reocurring restart server") return False
def performHandshake(self): if not self.established: #receive connreq or perform websocket handshake if client is connecting over websockets if self.handshakeStatus == 0: inputMSG = self.receiveAndDecode() if not inputMSG: self.socket.close() if inputMSG[:3] == "GET": handshake = NU.create_handshake(inputMSG) self.sendAndEncode(handshake) self.isWebsocket = True else: try: msg = json.loads(inputMSG) if self.midac.GetMessageType(msg) == MSGType.ConnREQ: self.handshakeStatus = 1 except ValueError: self.log.logAndPrintError("Error while parsing input") #send connack to client elif self.handshakeStatus == 1: self.sendAndEncode( self.midac.GenerateConnACK("None", self.conf.SEGMENT_SIZE)) self.handshakeStatus = 2 #send connlao to client elif self.handshakeStatus == 2: self.sendAndEncode(self.LAO) self.handshakeStatus = 3 #receive connstt and set status to established else: inputMSG = self.receiveAndDecode() if not inputMSG: self.socket.close() try: msg = json.loads(inputMSG) if self.midac.GetMessageType(msg) == MSGType.ConnSTT: self.established = True except ValueError: self.established = False else: raise Exception("Handshake already performed")
def performHandshake(self): if not self.established: #receive connreq or perform websocket handshake if client is connecting over websockets if self.handshakeStatus == 0: inputMSG = self.receiveAndDecode() if not inputMSG: self.socket.close() if inputMSG[:3] == "GET": handshake = NU.create_handshake(inputMSG) self.sendAndEncode(handshake) self.isWebsocket = True else: try: msg = json.loads(inputMSG) if self.midac.GetMessageType(msg) == MSGType.ConnREQ: self.handshakeStatus = 1 except ValueError: self.log.logAndPrintError("Error while parsing input") #send connack to client elif self.handshakeStatus == 1: self.sendAndEncode(self.midac.GenerateConnACK("None", self.conf.SEGMENT_SIZE)) self.handshakeStatus = 2 #send connlao to client elif self.handshakeStatus == 2: self.sendAndEncode(self.LAO) self.handshakeStatus = 3 #receive connstt and set status to established else: inputMSG = self.receiveAndDecode() if not inputMSG: self.socket.close() try: msg = json.loads(inputMSG) if self.midac.GetMessageType(msg) == MSGType.ConnSTT: self.established = True except ValueError: self.established = False else: raise Exception("Handshake already performed")
def reencrypt_and_resign_purchase_message_to_department( department_connection, customer_signature, purchase_message, department_public_key): # Resend customer signature to deparmtent NetworkUtil.send_message(department_connection, customer_signature) # Re-encrypt purchase message using the department's public key, generate our own signature and send both to # the department encrypted_purchase_message = CryptoUtil.encrypt(purchase_message.encode(), department_public_key) supervisor_signature = CryptoUtil.sign(purchase_message.encode(), keypair) NetworkUtil.send_message(department_connection, encrypted_purchase_message) NetworkUtil.send_message(department_connection, supervisor_signature)
def exchange_public_keys(department_connection, customer_connection): keypair = CryptoUtil.generate_keypair() # Send Supervisor Public key to Department and Customer NetworkUtil.send_message(department_connection, keypair.public_key().exportKey()) # Department NetworkUtil.send_message(customer_connection, keypair.public_key().exportKey()) # Customer # Gather Public Keys from Department and Customer department_public_key = RSA.importKey( NetworkUtil.receive_message(department_connection)) customer_public_key = RSA.importKey( NetworkUtil.receive_message(customer_connection)) # Send Customer Public key to Department NetworkUtil.send_message(department_connection, customer_public_key.exportKey()) return keypair, department_public_key, customer_public_key
def LoadPathways(pathwayPath, listFile): pathways = [] with open(listFile) as inFile: for pathwayLine in inFile: pathwayLine = pathwayLine.strip() # Each line is a relative path to a pathway file pathway = NetworkUtil.LoadGraphiteNetwork( os.path.join(pathwayPath, pathwayLine)) pathway.graph["filename"] = os.path.join(pathwayPath, pathwayLine) if pathwayLine.endswith(".txt"): pathwayLine = pathwayLine[0:-4] # Remove ".txt" pathway.graph["name"] = pathwayLine # Debugging print "Loaded %s with %d nodes and %d edges" % ( pathway.graph["name"], pathway.order(), pathway.size()) # Add the pathway to the list pathways.append(pathway) return pathways
def main(argList): # Parse the arguments, which either come from the command line or a list # provided by the Python code calling this function parser = CreateParser() (opts, args) = parser.parse_args(argList) print "Parameters: %s" % opts if opts.networkFile == "None": raise RuntimeError("Must specify an network filename") if opts.pathwaySource == "load" and (opts.pathwayPath == "None" or opts.pathwayListFile == "None"): raise RuntimeError( "Must specify pathwayPath and pathwayListFile when loading pathways" ) # Create the output path if needed if not os.path.exists(opts.outPath): print "Creating output directory %s" % opts.outPath os.makedirs(opts.outPath) # Load the interaction network network = NetworkUtil.LoadNetwork(opts.networkFile, weight=True) # Load or generate the pathways if opts.pathwaySource == "load": pathways = LoadPathways(opts.pathwayPath, opts.pathwayListFile) elif opts.pathwaySource == "generate": pathways = GeneratePathways(network, opts.numPathways, opts.branching, opts.depth, opts.outPath, opts.name) else: # Shouldn't be able to get to this case raise RuntimeError("%s is not a recognized pathway source" % opts.pathwaySource) # Sample from the pathways CreateSamples(pathways, opts.samples, opts.fraction, opts.outPath, opts.name, opts.noise, opts.sampleGroups, set(network.nodes()))
def CreateWgtPrizes(allProts, lastForests, lambda1, alpha, negativePrizes): forestFreq = NetworkUtil.SetFrequency(lastForests) artificialPrizes = {} if negativePrizes: # Need to iterate over all proteins when creating negative prizes for node in allProts: freq = 0 if node in forestFreq: freq = forestFreq[node] # Only create non-zero prizes, i.e. for nodes that are not in all # forests if freq < 1: artificialPrizes[node] = -lambda1 * ((1 - freq)**alpha) else: # For positive prizes only need to iterate over the nodes that appear # in some forest for node in forestFreq.iterkeys(): freq = forestFreq[node] # Frequently is guaranteed to be > 0 because the keys are only # the union of all forest nodes artificialPrizes[node] = lambda1 * (freq**alpha) return artificialPrizes
def RandSequential(opts, initPath, allProts, sampleMap, potentialSteinerMap, dummyNeighborMap, lastForestMap, countMap, weightedPrizes, negativePrizes, degPenalties): print "Learning forests in random sequential mode" # Iterate (rounds 2+) itrPath = initPath for itr in range(2, opts.iterations + 1): #lastPath = itrPath itrPath = os.path.join(opts.resultPath, "itr%d" % itr) if not os.path.exists(itrPath): os.makedirs(itrPath) # Only constrain the Steiner forests to be similar to other samples in the same group for group in sampleMap.iterkeys(): sampleNames = sampleMap[group] numSamples = countMap[group] potentialSteiner = potentialSteinerMap[group] dummyNeighborFiles = dummyNeighborMap[group] lastForests = lastForestMap[group] if len(sampleNames) != numSamples or len( potentialSteiner) != numSamples or len( dummyNeighborFiles) != numSamples or len( lastForests) != numSamples: raise RuntimeError( "Must have the same number of samples in group %s" % group) # Randomly choose the order in which to learn forests at this iteration order = range(numSamples) random.shuffle(order) # Write the order to a file with open(os.path.join(itrPath, "sampleOrder_%s.txt" % group), "w") as f: for index in order: f.write("%d\t%s\n" % (index, sampleNames[index])) # Iterate over all samples in the random order for index in order: # Create artificial prizes for this sample using all N-1 lastForests otherLastForests = list(lastForests) otherLastForests.pop(index) if weightedPrizes: # lambda2 is used as the alpha parameter artificialPrizes = CreateWgtPrizes(allProts, otherLastForests, opts.lambda1, opts.lambda2, negativePrizes) else: # Use all N-1 other sets of potential Steiner nodes otherPotentialSteiner = list(potentialSteiner) otherPotentialSteiner.pop(index) artificialPrizes = CreateUnwgtPrizes( allProts, otherPotentialSteiner, otherLastForests, opts.lambda1, opts.lambda2, negativePrizes) NetworkUtil.WriteDict( os.path.join( itrPath, "%s_artificialPrizes.txt" % sampleNames[index]), artificialPrizes) # Update the stp file based on the artificial prizes and degree penalties and copy the dummy neighbors UpdateStp(artificialPrizes, degPenalties, potentialSteiner[index], initPath, itrPath, sampleNames[index]) shutil.copyfile( os.path.join(initPath, dummyNeighborFiles[index]), os.path.join(itrPath, dummyNeighborFiles[index])) # Learn a new forest for this sample and update lastForests # All samples (besides the first and last in the random order) will use last forests # that are a mix of forests from this iteration and the previous iteration LearnSteiner(opts, itrPath, itrPath, sampleNames[index], dummyNeighborFiles[index], opts.workers) lastForests[index] = LoadForestNodes( "%s/symbol_%s_%s_1.0_%d.txt" % (itrPath, sampleNames[index], str(opts.W), opts.depth)) # Store all forests learned for this group at this iteration so they can be # retreived at the next iteration lastForestMap[group] = lastForests return itrPath
def connect_to_supervisor(): # Connecting to supervisor host = input("Please enter the hostname of the supervisor: ") connection = NetworkUtil.create_connection(host, NetworkUtil.server_port) print("Connected to Supervisor...") return connection
# Connect to department and customer department_connection, customer_connection = listen_for_department_and_customer( ) # Generate our key pair and and get the public keys of others keypair, department_public_key, customer_public_key = exchange_public_keys( department_connection, customer_connection) while True: confirmation, customer_signature, purchase_message, encrypted_purchase_message = handle_customer_purchase_message_and_confirm( customer_connection, keypair, customer_public_key) if confirmation == 'y': # Tell customer that order is confirmed confirmation_msg = CryptoUtil.encrypt( "Order was confirmed".encode(), customer_public_key) NetworkUtil.send_message(customer_connection, confirmation_msg) reencrypt_and_resign_purchase_message_to_department( department_connection, customer_signature, purchase_message, department_public_key) else: # Rejected order confirmation_msg = CryptoUtil.encrypt( "Order was rejected".encode(), customer_public_key) NetworkUtil.send_message(customer_connection, confirmation_msg) # Show all the received messages (encrypted) presentation = input( "Show the all sending and receiving messages? (y/n)") if (presentation == 'y'): print(f"\nCustomer's public key: {customer_public_key}")
CryptoUtil.verify(purchase_message.encode(), customer_public_key, customer_signature) CryptoUtil.verify(purchase_message.encode(), supervisor_public_key, supervisor_signature) return encrypted_purchase_message, supervisor_signature, purchase_message if __name__ == "__main__": # Connect to supervisor supervisor_connection = ClientNetworkUtil.connect_to_supervisor() # Generate our key pair and and get the public keys of others keypair, supervisor_public_key = ClientNetworkUtil.exchange_public_keys( supervisor_connection) customer_public_key = RSA.importKey( NetworkUtil.receive_message(supervisor_connection)) while True: (encrypted_purchase_message, supervisor_signature, purchase_message) = handle_supervisor_purchase_message( supervisor_connection, keypair, customer_public_key, supervisor_public_key) timestamp = purchase_message.split("|")[0] item = purchase_message.split("|")[1] print(f"Customer would like to purchase {item} at {timestamp}.") # Show all the received messages (encrypted) presentation = input( "Show the all sending and receiving messages? (y/n)") if (presentation == 'y'): print(f"\nSuperviosr's public key: {customer_public_key}")
def main(argList): # Parse the arguments, which either come from the command line or a list # provided by the Python code calling this function parser = CreateParser() (opts, args) = parser.parse_args(argList) print "Starting constrained multi-sample Steiner forest %s" % time.strftime( "%a, %d %b %Y %H:%M:%S", time.localtime()) print "Multi-PCSF version %s" % __version__ print "Parameters: %s" % opts # TODO Add error checking of inputs if opts.iterations < 1: raise RuntimeError("Must have at least 1 iteration") # TODO Should allow the option to run serially without the pool because a # pool with 1 worker is not efficient if opts.workers < 1: opts.workers = multiprocessing.cpu_count() # Assume negative prizes to implement the common set # and change if using positive common set prizes negativePrizes = True if "positive" in opts.artificialPrizes: negativePrizes = False # Assume unweighted prizes weightedPrizes = False if "Weighted" in opts.artificialPrizes: weightedPrizes = True # Assume batch mode batchMode = True if opts.iterMode == "random": batchMode = False # Load all of the proteins in the interactome, ignoring # genes. The artificial prizes will be created for a subset of these nodes. allProts = LoadProteins(opts.interactomePath, opts.undirectedFile, opts.directedFile, opts.tfdnaFile) # Load the negative prizes for the degree penalties or an empty dictionary # if they aren't being used directedFile = "None" if opts.directedFile != "None": directedFile = os.path.join(opts.interactomePath, opts.directedFile) degPenalties = NetworkUtil.DegreePenalties( opts.mu, os.path.join(opts.interactomePath, opts.undirectedFile), directedFile) # Create the initial stp files # New directory to hold the original data before the iterations begin # These stp files will be read and updated at subsequent iterations initPath = os.path.join(opts.resultPath, "initial") if not os.path.exists(initPath): os.makedirs(initPath) # Load the list of terminal files and the sample-to-group mapping terminalMap, sampleMap, countMap = LoadTerminalFiles( opts.terminalPath, opts.masterTerminalFile) # Store the groups in a fixed order groups = sorted(terminalMap.iterkeys()) for group in groups: print "%d samples in group %s" % (countMap[group], group) # Create a pool for creating .stp files and learning Steiner forests in parallel # using the specified number of workers. Use it to create the initial # .stp files. Even when running the subsequent iterations in random sequential # order, create a pool to learn the initial trees and final pruned trees (if applicable). print "Creating a pool with %d workers" % opts.workers pool = multiprocessing.Pool(opts.workers) initialStpMap = dict() for group in groups: terminalFiles = terminalMap[group] sampleNames = sampleMap[group] # opts and initPath are invariant arguments for each sample zippedArgs = itertools.izip(itertools.repeat(opts), itertools.repeat(initPath), terminalFiles, sampleNames) initialStpMap[group] = pool.map( CreateStpHelper, zippedArgs) # Blocks until all are finished # Store which proteins don't have prizes for each patient. # These are the nodes that could potentially be Steiner nodes for # each sample. This can't be recovered from the stp files at later # iterations because both original prizes and artificial prizes will exist. # Also track how the dummy node will be connected # to the networks, either all prizes or all non-prizes (potential Steiner nodes) potentialSteinerMap = dict() dummyNeighborMap = dict() for group in groups: numSamples = countMap[group] sampleNames = sampleMap[group] initialStps = initialStpMap[group] potentialSteiner = [] # A list of sets dummyNeighborFiles = [] # A list of filenames for i in range(numSamples): dnFile = sampleNames[i] + "_dummyNeighbors.txt" dummyNeighborFiles.append(dnFile) potentialSteiner.append( DummyNeighbors(allProts, initPath, initialStps[i], dnFile, opts.dummyNeighbors)) potentialSteinerMap[group] = potentialSteiner dummyNeighborMap[group] = dummyNeighborFiles itrPath = os.path.join(opts.resultPath, "itr1") if not os.path.exists(itrPath): os.makedirs(itrPath) # Initialize the artificial prizes to be an empty dictionary so that # we learn the initial trees independently artificialPrizes = dict() # Write the unused itr1 artificial prizes so that the files exist for post-processing for group in groups: NetworkUtil.WriteDict( os.path.join(itrPath, "artificialPrizes_%s.txt" % group), artificialPrizes) print "%d artificial prizes at iteration 1" % len(artificialPrizes) # Add the degree penalties to the initial stp files. Pass in the empty artificial prize # dictionary, which won't have an effect. for group in groups: sampleNames = sampleMap[group] numSamples = countMap[group] potentialSteiner = potentialSteinerMap[group] dummyNeighborFiles = dummyNeighborMap[group] for i in range(numSamples): # Copy the dummy neighbors, which must be in the same directory as the stp file UpdateStp(artificialPrizes, degPenalties, potentialSteiner[i], initPath, itrPath, sampleNames[i]) shutil.copyfile(os.path.join(initPath, dummyNeighborFiles[i]), os.path.join(itrPath, dummyNeighborFiles[i])) # Learn the first iteration Steiner forests in parallel # Run single-threaded belief propagation when using the worker pool lastForestMap = dict() for group in groups: numSamples = countMap[group] sampleNames = sampleMap[group] dummyNeighborFiles = dummyNeighborMap[group] zippedArgs = itertools.izip(itertools.repeat(opts), itertools.repeat(itrPath), itertools.repeat(itrPath), sampleNames, dummyNeighborFiles, itertools.repeat(1)) pool.map(LearnSteinerHelper, zippedArgs) lastForests = [ ] # A list of sets, where each set contains the Steiner forest nodes for i in range(numSamples): lastForests.append( LoadForestNodes( "%s/symbol_%s_%s_1.0_%d.txt" % (itrPath, sampleNames[i], str(opts.W), opts.depth))) lastForestMap[group] = lastForests # Learn the forests at all remaining iterations and return the directory # that contains the forests from the last iteration. if opts.iterations > 1: if batchMode: itrPath = Batch(opts, pool, initPath, allProts, sampleMap, potentialSteinerMap, dummyNeighborMap, lastForestMap, countMap, weightedPrizes, negativePrizes, degPenalties) else: itrPath = RandSequential(opts, initPath, allProts, sampleMap, potentialSteinerMap, dummyNeighborMap, lastForestMap, countMap, weightedPrizes, negativePrizes, degPenalties) # Prune Steiner nodes from the forests that are not used to reach any prizes and # are only present because they were in the common set. # This is not necessary if only 1 iteration was run because in that case there # is no common set. # It is also not necessary if negative prizes were used. if opts.iterations > 1 and (not negativePrizes): print "Learning final forests" print "Pruning forests from %s" % itrPath finalPath = os.path.join(opts.resultPath, "final") if not os.path.exists(finalPath): os.makedirs(finalPath) # Nothing is returned by these operations so they can be performed # simultaneously independent of the groupings sampleNames = FlattenDict(sampleMap, groups) dummyNeighborFiles = FlattenDict(dummyNeighborMap, groups) potentialSteiner = FlattenDict(potentialSteinerMap, groups) for i in range(len(sampleNames)): forestFile = "%s/symbol_%s_%s_1.0_%d.txt" % ( itrPath, sampleNames[i], str(opts.W), opts.depth) FilterStpEdges(forestFile, initPath, finalPath, sampleNames[i], degPenalties, potentialSteiner[i]) shutil.copyfile(os.path.join(initPath, dummyNeighborFiles[i]), os.path.join(finalPath, dummyNeighborFiles[i])) zippedArgs = itertools.izip(itertools.repeat(opts), itertools.repeat(finalPath), itertools.repeat(finalPath), sampleNames, dummyNeighborFiles, itertools.repeat(1)) pool.map(LearnSteinerHelper, zippedArgs) print "Finishing constrained multi-sample Steiner forest %s" % time.strftime( "%a, %d %b %Y %H:%M:%S", time.localtime()) pool.close()
def Batch(opts, pool, initPath, allProts, sampleMap, potentialSteinerMap, dummyNeighborMap, lastForestMap, countMap, weightedPrizes, negativePrizes, degPenalties): print "Learning forests in parallel batch mode" # Iterate (rounds 2+) itrPath = initPath for itr in range(2, opts.iterations + 1): #lastPath = itrPath itrPath = os.path.join(opts.resultPath, "itr%d" % itr) if not os.path.exists(itrPath): os.makedirs(itrPath) # Only constrain the Steiner forests to be similar to other samples in the same group for group in sampleMap.iterkeys(): sampleNames = sampleMap[group] numSamples = countMap[group] potentialSteiner = potentialSteinerMap[group] dummyNeighborFiles = dummyNeighborMap[group] lastForests = lastForestMap[group] if len(sampleNames) != numSamples or len( potentialSteiner) != numSamples or len( dummyNeighborFiles) != numSamples or len( lastForests) != numSamples: raise RuntimeError( "Must have the same number of samples in group %s" % group) # Update artificial prizes based on the forests from the previous iteration if weightedPrizes: # lambda2 is used as the alpha parameter artificialPrizes = CreateWgtPrizes(allProts, lastForests, opts.lambda1, opts.lambda2, negativePrizes) else: artificialPrizes = CreateUnwgtPrizes(allProts, potentialSteiner, lastForests, opts.lambda1, opts.lambda2, negativePrizes) NetworkUtil.WriteDict( os.path.join(itrPath, "artificialPrizes_%s.txt" % group), artificialPrizes) print "%d artificial prizes in group %s at iteration %d" % ( len(artificialPrizes), group, itr) # Update the stp files based on the new artificial prizes and degree penalties # and copy the potential Steiner node files, which need to be in itrPath for i in range(numSamples): UpdateStp(artificialPrizes, degPenalties, potentialSteiner[i], initPath, itrPath, sampleNames[i]) shutil.copyfile(os.path.join(initPath, dummyNeighborFiles[i]), os.path.join(itrPath, dummyNeighborFiles[i])) # Learn new Steiner forests in parallel zippedArgs = itertools.izip(itertools.repeat(opts), itertools.repeat(itrPath), itertools.repeat(itrPath), sampleNames, dummyNeighborFiles, itertools.repeat(1)) pool.map(LearnSteinerHelper, zippedArgs) lastForests = [] for i in range(numSamples): lastForests.append( LoadForestNodes( "%s/symbol_%s_%s_1.0_%d.txt" % (itrPath, sampleNames[i], str(opts.W), opts.depth))) lastForestMap[group] = lastForests return itrPath