# Create a distance matrix file progress.StartTask("Computing dissimilarity matrix") SVMlFile = "DissimilarityMatrix" + conf.SampleSize # Calculate distance and identify redundant structures within the same condition SF.DistanceStruct(os.path.join(OutputSamples, 'Samples.txt'), SVMlFile, int(conf.SampleSize), ProbingConditions) progress.EndTask() ################################# Calculate Conditional Boltzmann probabilities # for each condition, calculate Z over all non redundant structures and return a conditional Boltzmann probability for all structures with null value for redundant ones. progress.StartTask("Computing Boltzmann probabilities") BoltzmannFactor = defaultdict(lambda: defaultdict()) ConditionalBoltzmannProbability = defaultdict(lambda: defaultdict()) Zprobabilities = defaultdict(lambda: defaultdict()) Redondantestructure = FF.UnpickleVariable( "Redondantestructures_Id.pkl") ConditionalBoltzmannProbability = SF.Boltzmann_Calc( ProbingConditions, OutputSamples, int(conf.SampleSize), RNASequence, Redondantestructure) progress.EndTask() ################################# Clustering of structures based on their base pair distance progress.StartTask("Iterative clustering") # Load the pickled dissimilarity matrix DM = FF.UnpickleVariable("dissmatrix.pkl") # Get the list of redundant structures Redundant = [] Redundant = FF.UnpickleVariable("Redondantestructures.pkl") BoltzmannFactor = FF.UnpickleVariable("Boltzman.pkl") method = "MiniBatchKMean" Clusters, CentroidStructure = OC.DefineNumberCluster(
print("Distance Matrix generation for % d Structures started " % (int(conf.numberofsruct) * len(listfiles) + MFESnbrstruct)) Redundant = [] Newnumberofsruct = defaultdict(lambda: defaultdict()) #calculate distance SF.DistanceStruct(os.path.join(OutputSamples, 'Samples.txt'), SVMlFile, int(conf.numberofsruct), MFESnbrstruct, listfiles) endtime = time.time() print( "End of distance calculation between the structures in the sample %53f\t" % (endtime - startime)) #!!!!!!!!!!!!!Clustering!!!!!!!!!!! # load the pickled dissimilarity matrix DM = FF.UnpickleVariable(os.path.join(conf.PickledData, "dissmatrix.pkl")) Redundant = FF.UnpickleVariable( os.path.join(conf.PickledData, "Redondantestructures.pkl")) Redundant_Id = FF.UnpickleVariable( os.path.join(conf.PickledData, "Redondantestructures_Id.pkl")) Newnumberofsruct = FF.UnpickleVariable( os.path.join(conf.PickledData, "Dicnumberofsruct.pkl")) # clustering with DIANA Algorithm if conf.Diana == "true": startime = time.time() Clusters = defaultdict(list) structs = [i + 1 for i in range(len(DM))] clusters = CL.DIANA.doClustering(DM, structs, conf.maxDiameterThreshold, conf.maxAverageDiameterThreshold)
#!!!!!!!!!!!!! Distance Matrix calculation !!!!!!!!!!! # startime = time.time() print("Distance Matrix generation for % d Structures started "%(int(conf.numberofsruct)*len(conf.constraintes)+MFESnbrstruct)) Redundant=[] Newnumberofsruct=defaultdict(lambda: defaultdict()) #calculate distance SF.DistanceStruct(os.path.join(OutputSamples,'Samples.txt'),SVMlFile,int(conf.numberofsruct),MFESnbrstruct,listfiles) endtime=time.time() print("End of distance calculation between the structures in the sample %53f\t"%(endtime-startime)) #!!!!!!!!!!!!!Clustering!!!!!!!!!!! # load the pickled dissimilarity matrix DM=FF.UnpickleVariable(os.path.join(conf.PickledData,"dissmatrix.pkl")) Redundant=FF.UnpickleVariable(os.path.join(conf.PickledData,"Redondantestructures.pkl")) Redundant_Id=FF.UnpickleVariable(os.path.join(conf.PickledData,"Redondantestructures_Id.pkl")) Newnumberofsruct=FF.UnpickleVariable(os.path.join(conf.PickledData,"Dicnumberofsruct.pkl")) # clustering with DIANA Algorithm if conf.Diana=="true": startime = time.time() Clusters = defaultdict(list) structs = [i + 1 for i in range(len(DM))] clusters = CL.DIANA.doClustering(DM, structs,conf.maxDiameterThreshold, conf.maxAverageDiameterThreshold) for i in range(len(clusters)): Clusters[i] = clusters[i] endtime = time.time() print ("Clusters using Diana algorithm: %s %53f\t"%(Clusters,endtime-startime))