def save_genotype_results(self, genotype_sets=Gc.ALL): """ Write the genotype level results to files. A results file is generated for each genotype set. Note: This method can be used only after `analyze()` has been called on the given genotype sets. :param genotype_sets: List of names of genotype sets for which to generate the result files. If a value is not explicitly specified for this parameter, result files are written for all genotype sets. :return: No return value. """ if self.VERBOSE: sys.stdout.write("\nWriting genotype level results ... ") # If a single string is received, convert it into an iterable genotype_sets = [genotype_sets] if type(genotype_sets) == str else genotype_sets Writer.writeSeqAttribs(self.repToGiantDict, self.cmdArgs.outPath, WriterFilter.seqAttribsToIgnore, WriterFilter.seq_attribute_to_order, genotype_sets) if self.VERBOSE: sys.stdout.write("Done.\n")
def save_network_results(self, genotype_sets=Gc.ALL): """ Write the genotype set level results to file. A file named 'Genotype_set_measures.txt' is generated in the output directory specified at the time of the `Genonets` object creation. Note: This method can be used only after `analyze()` has been called on the given genotype sets. :param genotype_sets: List of names of genotype sets for which to generate the result files. If a value is not explicitly specified for this parameter, result files are written for all genotype sets. :return: No return value. """ if self.VERBOSE: sys.stdout.write("\nWriting genotype set level results ... ") # If a single string is received, convert it into an iterable genotype_sets = [genotype_sets] if type(genotype_sets) == str else genotype_sets Writer.writeNetAttribs(self.repToNetDict, self.repToGiantDict, self.netBuilder, self.cmdArgs.outPath, WriterFilter.netAttribsToIgnore, WriterFilter.net_attribute_to_order, WriterFilter.genotype_set_to_order, genotype_sets) if self.VERBOSE: sys.stdout.write("Done.\n")
def save(self, genotype_sets=Gc.ALL): """ Write the genotype networks corresponding to the given genotype sets to file. The networks are saved in GML format. For networks with more than one components, separate files are generated for the entire network and the dominant network. Note: This method can be used only after `analyze()` has been called on the given genotype sets. :param genotype_sets: List of names of genotype sets for which the genotype should be written to file. If a value is not explicitly specified for this parameter, result files are written for all genotype sets. :return: No return value. """ if self.VERBOSE: sys.stdout.write("\nWriting GML files for genotype networks ... ") # If a single string is received, convert it into an iterable genotype_sets = [genotype_sets] if type(genotype_sets) == str else genotype_sets Writer.writeNetsToFile(self.repToNetDict, self.repToGiantDict, self.netBuilder, self.cmdArgs.outPath, WriterFilter.gmlAttribsToIgnore, genotype_sets) if self.VERBOSE: sys.stdout.write("Done.\n")
def __init__(self, arguments): # Molecule type: RNA, DNA, Protein, etc. self.moleculeType = arguments.alphabetType # 'Use reverse complements' flag self.use_reverse_complements = True if arguments.use_reverse_complements else False # Report exception if 'use_reverse_complements' has been passed as an argument with # alphabet type other than DNA if self.use_reverse_complements and self.moleculeType != "DNA": print("Error: " + ErrorCodes.getErrDescription(ErrorCodes.RC_ALPHABET_MISMATCH)) raise GenonetsError(ErrorCodes.RC_ALPHABET_MISMATCH) # Flag to indicate whether shift mutations should # be considered if arguments.includeIndels.lower() == "true": self.useIndels = True else: self.useIndels = False # Path to the input file self.inFilePath = arguments.inFilePath # Lower bound on fitness values to be used. self.tau = arguments.tau # Path to the output folder self.outPath = arguments.outPath # Make sure the path ends with "/", since this is needed # in the file writing routines if not self.outPath.endswith("/"): self.outPath += "/" # Maximum number of parallel processes to be used self.num_procs = arguments.num_procs # Verbose flag self.verbose = True if arguments.verbose else False # Create a dictionary of parameters paramsDict = { "alphabetType": self.moleculeType, "includeIndels": str(self.useIndels), "inFilePath": self.inFilePath, "tau": str(self.tau), "outPath": self.outPath, "useReverseComplements": str(self.use_reverse_complements), "num_procs": str(self.num_procs), "verbose": str(self.verbose) } # Print the parsed parameter values self.printInParams(paramsDict) # Write input parameters to file Writer.writeInParamsToFile(paramsDict, self.outPath)
def save_phenotype_network(self): """ Write the phenotype network to file in GML format. Note: This method can only be used after the phenotype network has been created. :return: No return value. """ if self.VERBOSE: sys.stdout.write("\nWriting GML file for phenotype network ... ") Writer.writeNetToFile(self.pheno_net, self.cmdArgs.outPath, WriterFilter.gmlAttribsToIgnore) if self.VERBOSE: sys.stdout.write("Done.\n")
def overlap(self, r=None): # Overlap analysis is not allowed during parallel processing, # since it needs to be performed only once if self.parallel: return # If overlap has been computed for any of the repertoires already, # it does not need to be computed again. if not self.overlapMatrix: # Create the overlap analyzer overlapAnalyzer = OverlapAnalyzer(self.repToGiantDict, self.caller.genotype_sets(), self.bitManip, self.isDoubleStranded, WriterFilter.genotype_set_to_order) # Compute overlap data. Note: The list of genotype sets is returned from # the function to make sure the order used inside the function is the one # used for further calculations here. self.overlapMatrix, repertoires, overlapDict = overlapAnalyzer.getOverlapData() # If the overlap dict was populated if overlapDict: # Use the overlap dict to populate vertex level attributes in all giants # For each repertoire, for repertoire in self.caller.genotype_sets(): # Get giant giant = self.repToGiantDict[repertoire] # List of all unique repertoires that overlap with this giant giant["Overlapping_genotype_sets"] = set() # Get the sequence dict for this repertoire seqDict = overlapDict[repertoire] # For each sequence in seq dict, for sequence in seqDict.keys(): # Get the corresponding vertex id from the network vertex = self.netBuilder.getVertex(sequence, giant) # List of repertoires that contain the sequence overlapping_seqs = seqDict[sequence] # Add the list of targets as an attribute for this # vertex in giant giant.vs[vertex.index]["Overlaps_with_genotypes_in"] = \ overlapping_seqs # Add the overlapping repertoires to the set of all # repertoires that overlap with any sequence in this # repertoire. giant["Overlapping_genotype_sets"] |= set(overlapping_seqs) # Convert the set to list for easy output file writing giant["Overlapping_genotype_sets"] = \ list(giant["Overlapping_genotype_sets"]) # Calculate the ratio of No. of overlapping repertoires to # the total No. of other repertoires try: ratio = float(len(giant["Overlapping_genotype_sets"])) / \ (float(len(self.caller.genotype_sets())) - 1) except ZeroDivisionError: ratio = 0 giant["Ratio_of_overlapping_genotype_sets"] = ratio # If overlap matrix was populated, if self.overlapMatrix: # Write matrix to file Writer.writeOverlapToFile(self.overlapMatrix, repertoires, self.caller.cmdArgs.outPath)