Example #1
0
def exchange_public_keys(connection):
    keypair = CryptoUtil.generate_keypair()

    # Send department public key to supervisor
    NetworkUtil.send_message(connection, keypair.public_key().exportKey())

    # Receive public key from supervisor
    supervisor_public_key = RSA.importKey(
        NetworkUtil.receive_message(connection))

    return keypair, supervisor_public_key
def send_purchase_to_supervisor(supervisor_connection, purchase_message,
                                keypair, supervisor_public_key):
    # Encrypt purchase message with supervisor's public key
    encrypted_purchase_message = CryptoUtil.encrypt(purchase_message.encode(),
                                                    supervisor_public_key)
    # Sign purchase message with our private key
    signature = CryptoUtil.sign(purchase_message.encode(), keypair)

    NetworkUtil.send_message(supervisor_connection, encrypted_purchase_message)
    NetworkUtil.send_message(supervisor_connection, signature)

    confirmation = NetworkUtil.receive_message(supervisor_connection)
    return encrypted_purchase_message, signature, confirmation
Example #3
0
def extract_data(html_page):
    temporary_variable = ''
    parsed_html_page = parse_html_content(html_page.content)
    links = parsed_html_page.find_all('a')

    for link in links:
        if link.get('href') == '/locations/':
            temporary_variable = link.get('href')
            break

    if temporary_variable != '' and (not checkers.is_url(temporary_variable)):
        CustomConstants.URL_TO_BE_VISITED.add(
            NetworkUtil.get_absolute_url(temporary_variable))
    else:
        return CustomConstants.SOMETHING_WENT_WRONG_WHILE_FETCHING_LOCATIONS

    html_page = NetworkUtil.read_from_network(
        CustomConstants.URL_TO_BE_VISITED.pop())
    parsed_html_page = parse_html_content(html_page.content)
    location_cards = parsed_html_page.find_all(class_='location card')

    if len(location_cards) > 0:
        clear_set_data()

    for location_card in location_cards:
        link = location_card.get('href')
        if checkers.is_url(link):
            CustomConstants.URL_TO_BE_VISITED.add(link)
        else:
            link = NetworkUtil.get_absolute_url(link)
            CustomConstants.URL_TO_BE_VISITED(link)

    room_links = set()

    for location in CustomConstants.URL_TO_BE_VISITED:
        html = NetworkUtil.read_from_network(location)
        parsed_html = parse_html_content(html.content)
        room_links.update(extract_rooms_feed(parsed_html))
        time.sleep(3.0)

    clear_set_data()

    room_detail_list = list()

    for room_link in room_links:
        html_page = NetworkUtil.read_from_network(room_link)
        parsed_html_page = parse_html_content(html_page.content)
        room_detail = extract_room_detail(parsed_html_page)
        room_detail_list.append(room_detail)
        time.sleep(3.0)
    return room_detail_list
Example #4
0
    def start_scraping(self):
        CustomUtil.clear_all_files()
        CustomConstants.URL_TO_BE_VISITED.add(
            CustomConstants.URL_TO_BE_CRAWLED)
        html_response = NetworkUtil.read_from_network(
            CustomConstants.URL_TO_BE_VISITED.pop())
        if html_response == CustomConstants.URL_IS_NOT_VALID:
            print(CustomConstants.URL_IS_NOT_VALID_MSG)
        elif html_response == CustomConstants.ERROR_OCCURED_WHILE_SENDING_REQUEST:
            print(CustomConstants.ERROR_OCCURED_WHILE_SENDING_REQUEST_MSG)
        else:
            room_detail_list = CustomUtil.extract_data(html_response)
            if len(room_detail_list) > 0:
                CustomUtil.write_data_into_file(
                    CustomConstants.JSON_DATA_FILE_NAME, room_detail_list)
                room_detail_list = CustomUtil.read_file(
                    CustomConstants.JSON_DATA_FILE_NAME)
                city_list = CustomUtil.get_city_list(room_detail_list)

                for city in city_list:
                    city_room_detail_list = CustomUtil.get_city_data(
                        city, room_detail_list)
                    room_capacity_list = CustomUtil.get_room_capacity_list(
                        city_room_detail_list)
                    CustomUtil.print_analysis(city, room_capacity_list,
                                              city_room_detail_list)
 def sendAndEncode(self, msg):
     if self.isWebsocket:
         if not NU.sendData(self.socket, msg):
             return False
         else:
             return True
     else:
         self.socket.send(msg.encode("utf-8"))
def handle_supervisor_purchase_message(supervisor_connection, keypair,
                                       customer_public_key,
                                       supervisor_public_key):
    customer_signature = NetworkUtil.receive_message(supervisor_connection)
    encrypted_purchase_message = NetworkUtil.receive_message(
        supervisor_connection)
    supervisor_signature = NetworkUtil.receive_message(supervisor_connection)

    purchase_message = CryptoUtil.decrypt(encrypted_purchase_message,
                                          keypair).decode()
    print(f"Purchase message is: {purchase_message}")

    CryptoUtil.verify(purchase_message.encode(), customer_public_key,
                      customer_signature)
    CryptoUtil.verify(purchase_message.encode(), supervisor_public_key,
                      supervisor_signature)
    return encrypted_purchase_message, supervisor_signature, purchase_message
	def sendAndEncode(self, msg):
		if self.isWebsocket:
			if not NU.sendData(self.socket, msg):
				return False
			else:
				return True
		else:
			self.socket.send(msg.encode("utf-8")) 
def handle_customer_purchase_message_and_confirm(customer_connection, keypair,
                                                 customer_public_key):
    encrypted_purchase_message = NetworkUtil.receive_message(
        customer_connection)
    customer_signature = NetworkUtil.receive_message(customer_connection)

    purchase_message = CryptoUtil.decrypt(encrypted_purchase_message,
                                          keypair).decode()
    print(f"Purchase message is: {purchase_message}")

    CryptoUtil.verify(purchase_message.encode(), customer_public_key,
                      customer_signature)

    timestamp = purchase_message.split("|")[0]
    item = purchase_message.split("|")[1]
    print(f"Customer would like to purchase {item} at {timestamp}.")

    confirmation = input(f"Confirm the above purchase? (y/n) ")
    return confirmation, customer_signature, purchase_message, encrypted_purchase_message
Example #9
0
def DummyNeighbors(allProts, path, stpFile, dnFile, neighborType):
    prizes = set()
    with open(os.path.join(path, stpFile)) as f:
        for line in f:
            parts = line.split()
            # mRNAs can't be Steiner nodes
            if parts[0] == "W" and not parts[1].endswith("_MRNA"):
                prizes.add(parts[1])

    psNodes = allProts.difference(prizes)

    if neighborType == "prizes":
        NetworkUtil.WriteCollection(os.path.join(path, dnFile), prizes)
    elif neighborType == "nonprizes":
        NetworkUtil.WriteCollection(os.path.join(path, dnFile), psNodes)
    else:
        raise RuntimeError(
            "%s is not a valid type of dummy node neighbor connection" %
            neighborType)

    return psNodes
def listen_for_department_and_customer():
    # Connection as Supervisor being Index Server (department and customer as clients)
    print("Supervisor will start on localhost")
    listening_socket = NetworkUtil.create_listening_socket(
        NetworkUtil.server_port)
    print("Supervisor done binding to host and port successfully")
    print("Supervisor is waiting for incoming connections")

    print("\nListening for Department...")
    department_connection, addr_dept = NetworkUtil.accept_connection(
        listening_socket)
    print(addr_dept,
          " Department has connected to the Supervisor and is now online...")

    print("\nListening for Customer...")
    customer_connection, addr_cust = NetworkUtil.accept_connection(
        listening_socket)
    print(addr_cust,
          "Customer Has connected to the Supervisor and is now online...")

    return department_connection, customer_connection
	def receiveAndDecode(self):
		try:
			if self.isWebsocket:
				decodedMsg = NU.decode(self.socket.recv(self.messageSize))
				if decodedMsg == None:
					self.log.logAndPrintError("Error decoding message, maybe wrong format?") 
				else:
					return decodedMsg 
			else:
				return self.socket.recv(self.messageSize).decode("utf-8") 
		except socket.error:
			self.log.logAndPrintError("Connection reset by peer, if reocurring restart server") 
			return False
def Evaluate(network2Pathway, outFileName, fraction, noise, weightedNetworks=False):
    with open(outFileName, "w") as outFile:
        npSum = 0
        nrSum = 0
        epSum = 0
        erSum = 0
        outFile.write("Steiner forest\tPathway\tTrue prizes\tNoisy prizes\tForest nodes\tPathway nodes\tIntersection nodes\tNode precision\tNode recall\tForest edges\tPathway edges\tIntersection edges\tEdge precision\tEdge recall\n")
        # The name forestFile assumes the networks to evaluate are Steiner forests, but they can
        # be any network        
        for forestFile, pathwayFile in network2Pathway:
            # For each Steiner forest, compute the precision and recall with respect to the original pathway
            forest = NetworkUtil.LoadNetwork(forestFile, weight=weightedNetworks)
            # Remove the artificial node if the forest is not empty
            if "DUMMY" in forest:
                forest.remove_node("DUMMY")
            # NetworkUtil.LoadNetwork only works for the simple format used when writing synthetic
            # pathways.  LoadGraphiteNetwork works for the simple format and the graphite edge list.
            pathway = NetworkUtil.LoadGraphiteNetwork(pathwayFile)
            intersection = NetworkUtil.Intersection(forest, pathway)
            if forest.order() == 0:
                nPrecision = 0
            else:
                nPrecision = float(intersection.order())/forest.order()
            npSum += nPrecision
            nRecall = float(intersection.order())/pathway.order()
            nrSum += nRecall
            if forest.size() == 0:
                ePrecision = 0
            else:
                ePrecision = float(intersection.size())/forest.size()
            epSum += ePrecision
            eRecall = float(intersection.size())/pathway.size()
            erSum += eRecall
            truePrizes = int(math.ceil(fraction*pathway.order()))
            noisyPrizes = int(math.ceil(noise*truePrizes))
            outFile.write("%s\t%s\t%d\t%d\t%d\t%d\t%d\t%f\t%f\t%d\t%d\t%d\t%f\t%f\n" % (os.path.basename(forestFile), os.path.basename(pathwayFile), truePrizes, noisyPrizes, forest.order(), pathway.order(), intersection.order(), nPrecision, nRecall, forest.size(), pathway.size(), intersection.size(), ePrecision, eRecall))
        # Write the average node/edge precision/recall
        outFile.write("Average\t\t\t\t\t\t\t%f\t%f\t\t\t\t%f\t%f\n" % (npSum/len(network2Pathway), nrSum/len(network2Pathway), epSum/len(network2Pathway), erSum/len(network2Pathway)))
 def receiveAndDecode(self):
     try:
         if self.isWebsocket:
             decodedMsg = NU.decode(self.socket.recv(self.messageSize))
             if decodedMsg == None:
                 self.log.logAndPrintError(
                     "Error decoding message, maybe wrong format?")
             else:
                 return decodedMsg
         else:
             return self.socket.recv(self.messageSize).decode("utf-8")
     except socket.error:
         self.log.logAndPrintError(
             "Connection reset by peer, if reocurring restart server")
         return False
    def performHandshake(self):
        if not self.established:
            #receive connreq or perform websocket handshake if client is connecting over websockets
            if self.handshakeStatus == 0:
                inputMSG = self.receiveAndDecode()
                if not inputMSG:
                    self.socket.close()

                if inputMSG[:3] == "GET":
                    handshake = NU.create_handshake(inputMSG)
                    self.sendAndEncode(handshake)
                    self.isWebsocket = True
                else:
                    try:
                        msg = json.loads(inputMSG)

                        if self.midac.GetMessageType(msg) == MSGType.ConnREQ:
                            self.handshakeStatus = 1
                    except ValueError:
                        self.log.logAndPrintError("Error while parsing input")

            #send connack to client
            elif self.handshakeStatus == 1:
                self.sendAndEncode(
                    self.midac.GenerateConnACK("None", self.conf.SEGMENT_SIZE))
                self.handshakeStatus = 2

            #send connlao to client
            elif self.handshakeStatus == 2:
                self.sendAndEncode(self.LAO)
                self.handshakeStatus = 3

            #receive connstt and set status to established
            else:
                inputMSG = self.receiveAndDecode()
                if not inputMSG:
                    self.socket.close()
                try:
                    msg = json.loads(inputMSG)
                    if self.midac.GetMessageType(msg) == MSGType.ConnSTT:
                        self.established = True
                except ValueError:
                    self.established = False

        else:
            raise Exception("Handshake already performed")
	def performHandshake(self):
		if not self.established:
			#receive connreq or perform websocket handshake if client is connecting over websockets
			if self.handshakeStatus == 0:
				inputMSG = self.receiveAndDecode() 
				if not inputMSG:
					self.socket.close()

				if inputMSG[:3] == "GET":
					handshake = NU.create_handshake(inputMSG)
					self.sendAndEncode(handshake)
					self.isWebsocket = True 
				else:
					try:
						msg = json.loads(inputMSG) 

						if self.midac.GetMessageType(msg) == MSGType.ConnREQ:
							self.handshakeStatus = 1 
					except ValueError:
						self.log.logAndPrintError("Error while parsing input") 

			#send connack to client
			elif self.handshakeStatus == 1:
				self.sendAndEncode(self.midac.GenerateConnACK("None", self.conf.SEGMENT_SIZE)) 
				self.handshakeStatus = 2 

			#send connlao to client
			elif self.handshakeStatus == 2:
				self.sendAndEncode(self.LAO) 
				self.handshakeStatus = 3 

			#receive connstt and set status to established
			else:
				inputMSG = self.receiveAndDecode() 
				if not inputMSG:
					self.socket.close() 
				try:
					msg = json.loads(inputMSG) 
					if self.midac.GetMessageType(msg) == MSGType.ConnSTT:
						self.established = True 
				except ValueError:
					self.established = False 

		else:
			raise Exception("Handshake already performed") 
def reencrypt_and_resign_purchase_message_to_department(
        department_connection, customer_signature, purchase_message,
        department_public_key):
    # Resend customer signature to deparmtent
    NetworkUtil.send_message(department_connection, customer_signature)

    # Re-encrypt purchase message using the department's public key, generate our own signature and send both to
    # the department
    encrypted_purchase_message = CryptoUtil.encrypt(purchase_message.encode(),
                                                    department_public_key)
    supervisor_signature = CryptoUtil.sign(purchase_message.encode(), keypair)
    NetworkUtil.send_message(department_connection, encrypted_purchase_message)
    NetworkUtil.send_message(department_connection, supervisor_signature)
def exchange_public_keys(department_connection, customer_connection):
    keypair = CryptoUtil.generate_keypair()

    # Send Supervisor Public key to Department and Customer
    NetworkUtil.send_message(department_connection,
                             keypair.public_key().exportKey())  # Department
    NetworkUtil.send_message(customer_connection,
                             keypair.public_key().exportKey())  # Customer

    # Gather Public Keys from Department and Customer
    department_public_key = RSA.importKey(
        NetworkUtil.receive_message(department_connection))
    customer_public_key = RSA.importKey(
        NetworkUtil.receive_message(customer_connection))

    # Send Customer Public key to Department
    NetworkUtil.send_message(department_connection,
                             customer_public_key.exportKey())

    return keypair, department_public_key, customer_public_key
def LoadPathways(pathwayPath, listFile):
    pathways = []
    with open(listFile) as inFile:
        for pathwayLine in inFile:
            pathwayLine = pathwayLine.strip()
            # Each line is a relative path to a pathway file
            pathway = NetworkUtil.LoadGraphiteNetwork(
                os.path.join(pathwayPath, pathwayLine))

            pathway.graph["filename"] = os.path.join(pathwayPath, pathwayLine)
            if pathwayLine.endswith(".txt"):
                pathwayLine = pathwayLine[0:-4]  # Remove ".txt"
            pathway.graph["name"] = pathwayLine

            # Debugging
            print "Loaded %s with %d nodes and %d edges" % (
                pathway.graph["name"], pathway.order(), pathway.size())
            # Add the pathway to the list
            pathways.append(pathway)
    return pathways
def main(argList):
    # Parse the arguments, which either come from the command line or a list
    # provided by the Python code calling this function
    parser = CreateParser()
    (opts, args) = parser.parse_args(argList)

    print "Parameters: %s" % opts

    if opts.networkFile == "None":
        raise RuntimeError("Must specify an network filename")

    if opts.pathwaySource == "load" and (opts.pathwayPath == "None"
                                         or opts.pathwayListFile == "None"):
        raise RuntimeError(
            "Must specify pathwayPath and pathwayListFile when loading pathways"
        )

    # Create the output path if needed
    if not os.path.exists(opts.outPath):
        print "Creating output directory %s" % opts.outPath
        os.makedirs(opts.outPath)

    # Load the interaction network
    network = NetworkUtil.LoadNetwork(opts.networkFile, weight=True)

    # Load or generate the pathways
    if opts.pathwaySource == "load":
        pathways = LoadPathways(opts.pathwayPath, opts.pathwayListFile)
    elif opts.pathwaySource == "generate":
        pathways = GeneratePathways(network, opts.numPathways, opts.branching,
                                    opts.depth, opts.outPath, opts.name)
    else:
        # Shouldn't be able to get to this case
        raise RuntimeError("%s is not a recognized pathway source" %
                           opts.pathwaySource)

    # Sample from the pathways
    CreateSamples(pathways, opts.samples, opts.fraction,
                  opts.outPath, opts.name, opts.noise, opts.sampleGroups,
                  set(network.nodes()))
Example #20
0
def CreateWgtPrizes(allProts, lastForests, lambda1, alpha, negativePrizes):
    forestFreq = NetworkUtil.SetFrequency(lastForests)
    artificialPrizes = {}
    if negativePrizes:
        # Need to iterate over all proteins when creating negative prizes
        for node in allProts:
            freq = 0
            if node in forestFreq:
                freq = forestFreq[node]
            # Only create non-zero prizes, i.e. for nodes that are not in all
            # forests
            if freq < 1:
                artificialPrizes[node] = -lambda1 * ((1 - freq)**alpha)
    else:
        # For positive prizes only need to iterate over the nodes that appear
        # in some forest
        for node in forestFreq.iterkeys():
            freq = forestFreq[node]
            # Frequently is guaranteed to be > 0 because the keys are only
            # the union of all forest nodes
            artificialPrizes[node] = lambda1 * (freq**alpha)

    return artificialPrizes
Example #21
0
def RandSequential(opts, initPath, allProts, sampleMap, potentialSteinerMap,
                   dummyNeighborMap, lastForestMap, countMap, weightedPrizes,
                   negativePrizes, degPenalties):
    print "Learning forests in random sequential mode"

    # Iterate (rounds 2+)
    itrPath = initPath
    for itr in range(2, opts.iterations + 1):
        #lastPath = itrPath
        itrPath = os.path.join(opts.resultPath, "itr%d" % itr)
        if not os.path.exists(itrPath):
            os.makedirs(itrPath)

        # Only constrain the Steiner forests to be similar to other samples in the same group
        for group in sampleMap.iterkeys():
            sampleNames = sampleMap[group]
            numSamples = countMap[group]
            potentialSteiner = potentialSteinerMap[group]
            dummyNeighborFiles = dummyNeighborMap[group]
            lastForests = lastForestMap[group]

            if len(sampleNames) != numSamples or len(
                    potentialSteiner) != numSamples or len(
                        dummyNeighborFiles) != numSamples or len(
                            lastForests) != numSamples:
                raise RuntimeError(
                    "Must have the same number of samples in group %s" % group)

            # Randomly choose the order in which to learn forests at this iteration
            order = range(numSamples)
            random.shuffle(order)

            # Write the order to a file
            with open(os.path.join(itrPath, "sampleOrder_%s.txt" % group),
                      "w") as f:
                for index in order:
                    f.write("%d\t%s\n" % (index, sampleNames[index]))

            # Iterate over all samples in the random order
            for index in order:
                # Create artificial prizes for this sample using all N-1 lastForests
                otherLastForests = list(lastForests)
                otherLastForests.pop(index)
                if weightedPrizes:
                    # lambda2 is used as the alpha parameter
                    artificialPrizes = CreateWgtPrizes(allProts,
                                                       otherLastForests,
                                                       opts.lambda1,
                                                       opts.lambda2,
                                                       negativePrizes)
                else:
                    # Use all N-1 other sets of potential Steiner nodes
                    otherPotentialSteiner = list(potentialSteiner)
                    otherPotentialSteiner.pop(index)
                    artificialPrizes = CreateUnwgtPrizes(
                        allProts, otherPotentialSteiner, otherLastForests,
                        opts.lambda1, opts.lambda2, negativePrizes)
                NetworkUtil.WriteDict(
                    os.path.join(
                        itrPath,
                        "%s_artificialPrizes.txt" % sampleNames[index]),
                    artificialPrizes)

                # Update the stp file based on the artificial prizes and degree penalties and copy the dummy neighbors
                UpdateStp(artificialPrizes, degPenalties,
                          potentialSteiner[index], initPath, itrPath,
                          sampleNames[index])
                shutil.copyfile(
                    os.path.join(initPath, dummyNeighborFiles[index]),
                    os.path.join(itrPath, dummyNeighborFiles[index]))

                # Learn a new forest for this sample and update lastForests
                # All samples (besides the first and last in the random order) will use last forests
                # that are a mix of forests from this iteration and the previous iteration
                LearnSteiner(opts, itrPath, itrPath, sampleNames[index],
                             dummyNeighborFiles[index], opts.workers)
                lastForests[index] = LoadForestNodes(
                    "%s/symbol_%s_%s_1.0_%d.txt" %
                    (itrPath, sampleNames[index], str(opts.W), opts.depth))
            # Store all forests learned for this group at this iteration so they can be
            # retreived at the next iteration
            lastForestMap[group] = lastForests

    return itrPath
Example #22
0
def connect_to_supervisor():
    # Connecting to supervisor
    host = input("Please enter the hostname of the supervisor: ")
    connection = NetworkUtil.create_connection(host, NetworkUtil.server_port)
    print("Connected to Supervisor...")
    return connection
    # Connect to department and customer
    department_connection, customer_connection = listen_for_department_and_customer(
    )
    # Generate our key pair and and get the public keys of others
    keypair, department_public_key, customer_public_key = exchange_public_keys(
        department_connection, customer_connection)

    while True:
        confirmation, customer_signature, purchase_message, encrypted_purchase_message = handle_customer_purchase_message_and_confirm(
            customer_connection, keypair, customer_public_key)

        if confirmation == 'y':
            # Tell customer that order is confirmed
            confirmation_msg = CryptoUtil.encrypt(
                "Order was confirmed".encode(), customer_public_key)
            NetworkUtil.send_message(customer_connection, confirmation_msg)

            reencrypt_and_resign_purchase_message_to_department(
                department_connection, customer_signature, purchase_message,
                department_public_key)
        else:
            # Rejected order
            confirmation_msg = CryptoUtil.encrypt(
                "Order was rejected".encode(), customer_public_key)
            NetworkUtil.send_message(customer_connection, confirmation_msg)

        # Show all the received messages (encrypted)
        presentation = input(
            "Show the all sending and receiving messages? (y/n)")
        if (presentation == 'y'):
            print(f"\nCustomer's public key: {customer_public_key}")
    CryptoUtil.verify(purchase_message.encode(), customer_public_key,
                      customer_signature)
    CryptoUtil.verify(purchase_message.encode(), supervisor_public_key,
                      supervisor_signature)
    return encrypted_purchase_message, supervisor_signature, purchase_message


if __name__ == "__main__":
    # Connect to supervisor
    supervisor_connection = ClientNetworkUtil.connect_to_supervisor()
    # Generate our key pair and and get the public keys of others
    keypair, supervisor_public_key = ClientNetworkUtil.exchange_public_keys(
        supervisor_connection)
    customer_public_key = RSA.importKey(
        NetworkUtil.receive_message(supervisor_connection))

    while True:
        (encrypted_purchase_message, supervisor_signature,
         purchase_message) = handle_supervisor_purchase_message(
             supervisor_connection, keypair, customer_public_key,
             supervisor_public_key)
        timestamp = purchase_message.split("|")[0]
        item = purchase_message.split("|")[1]
        print(f"Customer would like to purchase {item} at {timestamp}.")

        # Show all the received messages (encrypted)
        presentation = input(
            "Show the all sending and receiving messages? (y/n)")
        if (presentation == 'y'):
            print(f"\nSuperviosr's public key: {customer_public_key}")
Example #25
0
def main(argList):
    # Parse the arguments, which either come from the command line or a list
    # provided by the Python code calling this function
    parser = CreateParser()
    (opts, args) = parser.parse_args(argList)

    print "Starting constrained multi-sample Steiner forest %s" % time.strftime(
        "%a, %d %b %Y %H:%M:%S", time.localtime())
    print "Multi-PCSF version %s" % __version__
    print "Parameters: %s" % opts

    # TODO Add error checking of inputs
    if opts.iterations < 1:
        raise RuntimeError("Must have at least 1 iteration")
    # TODO Should allow the option to run serially without the pool because a
    # pool with 1 worker is not efficient
    if opts.workers < 1:
        opts.workers = multiprocessing.cpu_count()

    # Assume negative prizes to implement the common set
    # and change if using positive common set prizes
    negativePrizes = True
    if "positive" in opts.artificialPrizes:
        negativePrizes = False

    # Assume unweighted prizes
    weightedPrizes = False
    if "Weighted" in opts.artificialPrizes:
        weightedPrizes = True

    # Assume batch mode
    batchMode = True
    if opts.iterMode == "random":
        batchMode = False

    # Load all of the proteins in the interactome, ignoring
    # genes.  The artificial prizes will be created for a subset of these nodes.
    allProts = LoadProteins(opts.interactomePath, opts.undirectedFile,
                            opts.directedFile, opts.tfdnaFile)

    # Load the negative prizes for the degree penalties or an empty dictionary
    # if they aren't being used
    directedFile = "None"
    if opts.directedFile != "None":
        directedFile = os.path.join(opts.interactomePath, opts.directedFile)
    degPenalties = NetworkUtil.DegreePenalties(
        opts.mu, os.path.join(opts.interactomePath, opts.undirectedFile),
        directedFile)

    # Create the initial stp files
    # New directory to hold the original data before the iterations begin
    # These stp files will be read and updated at subsequent iterations
    initPath = os.path.join(opts.resultPath, "initial")
    if not os.path.exists(initPath):
        os.makedirs(initPath)

    # Load the list of terminal files and the sample-to-group mapping
    terminalMap, sampleMap, countMap = LoadTerminalFiles(
        opts.terminalPath, opts.masterTerminalFile)
    # Store the groups in a fixed order
    groups = sorted(terminalMap.iterkeys())
    for group in groups:
        print "%d samples in group %s" % (countMap[group], group)

    # Create a pool for creating .stp files and learning Steiner forests in parallel
    # using the specified number of workers.  Use it to create the initial
    # .stp files.  Even when running the subsequent iterations in random sequential
    # order, create a pool to learn the initial trees and final pruned trees (if applicable).
    print "Creating a pool with %d workers" % opts.workers
    pool = multiprocessing.Pool(opts.workers)
    initialStpMap = dict()
    for group in groups:
        terminalFiles = terminalMap[group]
        sampleNames = sampleMap[group]
        # opts and initPath are invariant arguments for each sample
        zippedArgs = itertools.izip(itertools.repeat(opts),
                                    itertools.repeat(initPath), terminalFiles,
                                    sampleNames)
        initialStpMap[group] = pool.map(
            CreateStpHelper, zippedArgs)  # Blocks until all are finished

    # Store which proteins don't have prizes for each patient.
    # These are the nodes that could potentially be Steiner nodes for
    # each sample.  This can't be recovered from the stp files at later
    # iterations because both original prizes and artificial prizes will exist.
    # Also track how the dummy node will be connected
    # to the networks, either all prizes or all non-prizes (potential Steiner nodes)
    potentialSteinerMap = dict()
    dummyNeighborMap = dict()
    for group in groups:
        numSamples = countMap[group]
        sampleNames = sampleMap[group]
        initialStps = initialStpMap[group]
        potentialSteiner = []  # A list of sets
        dummyNeighborFiles = []  # A list of filenames
        for i in range(numSamples):
            dnFile = sampleNames[i] + "_dummyNeighbors.txt"
            dummyNeighborFiles.append(dnFile)
            potentialSteiner.append(
                DummyNeighbors(allProts, initPath, initialStps[i], dnFile,
                               opts.dummyNeighbors))
        potentialSteinerMap[group] = potentialSteiner
        dummyNeighborMap[group] = dummyNeighborFiles

    itrPath = os.path.join(opts.resultPath, "itr1")
    if not os.path.exists(itrPath):
        os.makedirs(itrPath)

    # Initialize the artificial prizes to be an empty dictionary so that
    # we learn the initial trees independently
    artificialPrizes = dict()
    # Write the unused itr1 artificial prizes so that the files exist for post-processing
    for group in groups:
        NetworkUtil.WriteDict(
            os.path.join(itrPath, "artificialPrizes_%s.txt" % group),
            artificialPrizes)
    print "%d artificial prizes at iteration 1" % len(artificialPrizes)

    # Add the degree penalties to the initial stp files.  Pass in the empty artificial prize
    # dictionary, which won't have an effect.
    for group in groups:
        sampleNames = sampleMap[group]
        numSamples = countMap[group]
        potentialSteiner = potentialSteinerMap[group]
        dummyNeighborFiles = dummyNeighborMap[group]
        for i in range(numSamples):
            # Copy the dummy neighbors, which must be in the same directory as the stp file
            UpdateStp(artificialPrizes, degPenalties, potentialSteiner[i],
                      initPath, itrPath, sampleNames[i])
            shutil.copyfile(os.path.join(initPath, dummyNeighborFiles[i]),
                            os.path.join(itrPath, dummyNeighborFiles[i]))

    # Learn the first iteration Steiner forests in parallel
    # Run single-threaded belief propagation when using the worker pool
    lastForestMap = dict()
    for group in groups:
        numSamples = countMap[group]
        sampleNames = sampleMap[group]
        dummyNeighborFiles = dummyNeighborMap[group]
        zippedArgs = itertools.izip(itertools.repeat(opts),
                                    itertools.repeat(itrPath),
                                    itertools.repeat(itrPath), sampleNames,
                                    dummyNeighborFiles, itertools.repeat(1))
        pool.map(LearnSteinerHelper, zippedArgs)
        lastForests = [
        ]  # A list of sets, where each set contains the Steiner forest nodes
        for i in range(numSamples):
            lastForests.append(
                LoadForestNodes(
                    "%s/symbol_%s_%s_1.0_%d.txt" %
                    (itrPath, sampleNames[i], str(opts.W), opts.depth)))
        lastForestMap[group] = lastForests

    # Learn the forests at all remaining iterations and return the directory
    # that contains the forests from the last iteration.
    if opts.iterations > 1:
        if batchMode:
            itrPath = Batch(opts, pool, initPath, allProts, sampleMap,
                            potentialSteinerMap, dummyNeighborMap,
                            lastForestMap, countMap, weightedPrizes,
                            negativePrizes, degPenalties)
        else:
            itrPath = RandSequential(opts, initPath, allProts, sampleMap,
                                     potentialSteinerMap, dummyNeighborMap,
                                     lastForestMap, countMap, weightedPrizes,
                                     negativePrizes, degPenalties)

    # Prune Steiner nodes from the forests that are not used to reach any prizes and
    # are only present because they were in the common set.
    # This is not necessary if only 1 iteration was run because in that case there
    # is no common set.
    # It is also not necessary if negative prizes were used.
    if opts.iterations > 1 and (not negativePrizes):
        print "Learning final forests"
        print "Pruning forests from %s" % itrPath
        finalPath = os.path.join(opts.resultPath, "final")
        if not os.path.exists(finalPath):
            os.makedirs(finalPath)

        # Nothing is returned by these operations so they can be performed
        # simultaneously independent of the groupings
        sampleNames = FlattenDict(sampleMap, groups)
        dummyNeighborFiles = FlattenDict(dummyNeighborMap, groups)
        potentialSteiner = FlattenDict(potentialSteinerMap, groups)

        for i in range(len(sampleNames)):
            forestFile = "%s/symbol_%s_%s_1.0_%d.txt" % (
                itrPath, sampleNames[i], str(opts.W), opts.depth)
            FilterStpEdges(forestFile, initPath, finalPath, sampleNames[i],
                           degPenalties, potentialSteiner[i])
            shutil.copyfile(os.path.join(initPath, dummyNeighborFiles[i]),
                            os.path.join(finalPath, dummyNeighborFiles[i]))

        zippedArgs = itertools.izip(itertools.repeat(opts),
                                    itertools.repeat(finalPath),
                                    itertools.repeat(finalPath), sampleNames,
                                    dummyNeighborFiles, itertools.repeat(1))
        pool.map(LearnSteinerHelper, zippedArgs)

    print "Finishing constrained multi-sample Steiner forest %s" % time.strftime(
        "%a, %d %b %Y %H:%M:%S", time.localtime())

    pool.close()
Example #26
0
def Batch(opts, pool, initPath, allProts, sampleMap, potentialSteinerMap,
          dummyNeighborMap, lastForestMap, countMap, weightedPrizes,
          negativePrizes, degPenalties):
    print "Learning forests in parallel batch mode"

    # Iterate (rounds 2+)
    itrPath = initPath
    for itr in range(2, opts.iterations + 1):
        #lastPath = itrPath
        itrPath = os.path.join(opts.resultPath, "itr%d" % itr)
        if not os.path.exists(itrPath):
            os.makedirs(itrPath)

        # Only constrain the Steiner forests to be similar to other samples in the same group
        for group in sampleMap.iterkeys():
            sampleNames = sampleMap[group]
            numSamples = countMap[group]
            potentialSteiner = potentialSteinerMap[group]
            dummyNeighborFiles = dummyNeighborMap[group]
            lastForests = lastForestMap[group]

            if len(sampleNames) != numSamples or len(
                    potentialSteiner) != numSamples or len(
                        dummyNeighborFiles) != numSamples or len(
                            lastForests) != numSamples:
                raise RuntimeError(
                    "Must have the same number of samples in group %s" % group)

            # Update artificial prizes based on the forests from the previous iteration
            if weightedPrizes:
                # lambda2 is used as the alpha parameter
                artificialPrizes = CreateWgtPrizes(allProts, lastForests,
                                                   opts.lambda1, opts.lambda2,
                                                   negativePrizes)
            else:
                artificialPrizes = CreateUnwgtPrizes(allProts,
                                                     potentialSteiner,
                                                     lastForests, opts.lambda1,
                                                     opts.lambda2,
                                                     negativePrizes)
            NetworkUtil.WriteDict(
                os.path.join(itrPath, "artificialPrizes_%s.txt" % group),
                artificialPrizes)
            print "%d artificial prizes in group %s at iteration %d" % (
                len(artificialPrizes), group, itr)

            # Update the stp files based on the new artificial prizes and degree penalties
            # and copy the potential Steiner node files, which need to be in itrPath
            for i in range(numSamples):
                UpdateStp(artificialPrizes, degPenalties, potentialSteiner[i],
                          initPath, itrPath, sampleNames[i])
                shutil.copyfile(os.path.join(initPath, dummyNeighborFiles[i]),
                                os.path.join(itrPath, dummyNeighborFiles[i]))

            # Learn new Steiner forests in parallel
            zippedArgs = itertools.izip(itertools.repeat(opts),
                                        itertools.repeat(itrPath),
                                        itertools.repeat(itrPath),
                                        sampleNames, dummyNeighborFiles,
                                        itertools.repeat(1))
            pool.map(LearnSteinerHelper, zippedArgs)
            lastForests = []
            for i in range(numSamples):
                lastForests.append(
                    LoadForestNodes(
                        "%s/symbol_%s_%s_1.0_%d.txt" %
                        (itrPath, sampleNames[i], str(opts.W), opts.depth)))
            lastForestMap[group] = lastForests

    return itrPath