Example #1
0
def drawStructure(Sequence, Shapefile, outfile):
    lines = ""
    f = FF.Parsefile(Shapefile)
    lines = ";".join(
        ["%.3f" % float(line.strip().split('\t')[1]) for line in f])
    cmd = 'java -cp VARNAv3-93.jar fr.orsay.lri.varna.applications.VARNAcmd -i ' + Sequence + ' -colorMap ' + '"' + lines + '"' + ' -colorMapStyle ' + '"-5.00:#4747B6,0.00:#4747FF,0.40:#1CFF47,0.70:#FF4747,2.41:#FFFF00"' + ' -algorithm line -o ' + outfile + " > /dev/null"
    os.system(cmd)
Example #2
0
def Adjust_structure(File, directory, IndexRna):
    rnaaligned = FF.Parsefile(File)[0]
    listdeletion = [i for i, j in enumerate(rnaaligned) if j == '_']
    lines = FF.Parsefile(File)
    #SeqLen=len(lines[1])-1
    #print SeqLen,"seq length"
    fileout = os.path.join(directory, IndexRna + 'MSA')
    o = open(fileout, "w")
    o.write('>' + IndexRna + 'MSA\n')
    for struct in lines[2:-2]:
        struc = list(struct)
        for k in listdeletion:
            #print struct
            struc[k] = ""
        o.write('%s' % ("".join(struc)))
    o.close()
Example #3
0
def FromStructFiletoRNAEvalInput(StructFile, InputRNAeval, rna):
    lines = FF.Parsefile(StructFile)
    o = open(
        InputRNAeval, "w"
    )  # geneate intermediate file with sequence+strcuture , seq+strcture .... as the input format  to use RNAeval
    # print "sdfqspkojr",len(lines)
    for i in range(1, len(lines)):
        o.write("%s%s\t" % (rna, lines[i]))
    o.close()
Example #4
0
def EnergyValuesFromStructure(StructFile, rna):
    Energy = []
    # generate the rnaeval input file
    FromStructFiletoRNAEvalInput(StructFile, "InputRNAeval", rna)
    # launch the RNaeval command
    os.system('RNAeval <' + "InputRNAeval" + '>' + "energyvalues")
    # Parse the RNAevaloutput to extract energy values
    lines = FF.Parsefile("energyvalues")
    for i in xrange(1, len(lines), 2):
        Energy.append(lines[i].split(" ")[1][1:-2])
    return Energy
Example #5
0
def GetBasePairsFromStructFile(
        faPath):  #return dic={structure:[liste de pairs de base ],....}
    #print faPath
    DicStruct = {}
    lines = FF.Parsefile(faPath)
    #print lines
    SeqLen = len(lines[1]) - 1
    #print SeqLen,"seq length"
    for j in range(len(lines)):
        DicStruct[j] = ListBasePairsFromStruct(lines[j].strip().split(' ')[0])
    return len(lines), DicStruct
Example #6
0
def CentroidBycluster(clusters, StructFile, Boltzmann, numberofsruct, constrainte, rna):
    progress.StartTask("Computing centroids")
    dim_clustering = len(clusters)
    E = defaultdict()
    mycentroid = defaultdict()
    Intradistance = []
    centroids = defaultdict(lambda: defaultdict())
    Myproba = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0)))

    ListStructures = [SF.BasePairsFromStruct(Struct) for Struct in FF.Parsefile(StructFile)]

    progress.StartTask("Gathering base pairs")
    ListBPbystructure, ListBP, Myproba, Boltzmancluster = BasePairsbyCluster(clusters, ListStructures, Boltzmann,
                                                                             numberofsruct,
                                                                             constrainte)
    # Eliminate cluster reporting one structure
    ListDiameters, Listeliminated_clusers = ClustersDiameter(clusters, ListBPbystructure)
    for elem in Listeliminated_clusers:
        del clusters[elem]
    progress.EndTask()

    progress.StartTask("Computing cluster distance distribution")
    E = ClustersDistances(clusters, Boltzmann, ListBPbystructure, numberofsruct, constrainte)
    progress.EndTask()
    progress.StartTask("Computing MEA centroids")
    for ClusterNumber in clusters:
        mycentroid[ClusterNumber], centroids[ClusterNumber] = MEA(Myproba[ClusterNumber], rna)
    progress.EndTask()

    MatriceDistanceCentroids = scipy.zeros([dim_clustering, dim_clustering])
    MatriceDistanceClustersEucld = scipy.zeros([dim_clustering, dim_clustering])
    for ClusterNumber in clusters.keys():
        for ClusterNumber2 in clusters.keys():
            if ClusterNumber2 > ClusterNumber:
                l = SF.DistanceTwoStructs(centroids[ClusterNumber], centroids[ClusterNumber2])
                #print "BP_centoid_distances", "\t", ClusterNumber, "\t", ClusterNumber2, "\t", l
                Intradistance.append(l)
                # print "distance between clusters comparing the centroide's distances",l
                MatriceDistanceCentroids[ClusterNumber][ClusterNumber2] = l
                MatriceDistanceCentroids[ClusterNumber2][ClusterNumber] = l
                # print "distance between clusters comparing the means distances", ClusterNumber, ClusterNumber2, np.abs(E[ClusterNumber]-E[ClusterNumber2]),np.sqrt(abs(pow(E[ClusterNumber],2)-pow(E[ClusterNumber2],2)))
                # print E
                l = np.sqrt(abs(pow(E[ClusterNumber], 2) - pow(E[ClusterNumber2], 2)))
                MatriceDistanceClustersEucld[ClusterNumber][ClusterNumber2] = l
                MatriceDistanceClustersEucld[ClusterNumber2][ClusterNumber] = l
            # print "distance between clusters compring the centroide's distances", ClusterNumber, ClusterNumber2, DistanceTwoBPlist(ListBPbystrcut[ClusterNumber][listCentroidStructure[ClusterNumber][0]],ListBPbystrcut[ClusterNumber2][listCentroidStructure[ClusterNumber2][0]])
    # VT.plotDistanceClusters(MatriceDistanceCentroids, clusters, "blue", " Base pair distance between centroids")
    # VT.plotDistanceClusters(MatriceDistanceClustersEucld, clusters, "red", "Eucledian distance between structures")
    #print "BZ_distance_btw_clusters", "\t", E
    progress.EndTask()
    return mycentroid, Boltzmancluster, E, MatriceDistanceCentroids, ListDiameters, Intradistance
Example #7
0
def ENERGY_VALUES_STRUCTURES(StructFile, rna):
    Energy = []
    #generate the rnaeval input file
    FromStructFiletoRNAEvalInput(StructFile, "InputRNAeval", rna)
    # launch the RNaeval command
    os.system('RNAeval <' + "InputRNAeval" + '>' + "energyvalues")
    #Sp.call("RNAeval " , stdin="InputRNAeval", stdout="energyvalues", shell=True)
    # Parse the RNAevaloutput to extract energy values
    lines = FF.Parsefile("energyvalues")
    for i in xrange(1, len(lines), 2):
        # i is the stucture number and 'lines[i].split(" ")[1][1:-2]' is  the  corresponding  energy value
        #print 'holla',(lines[i].split(" ")[1][1:-2])
        Energy.append(lines[i].split(" ")[1][1:-2])
    return Energy
Example #8
0
def Load_Probabilities(mypath):
    Dic = {}
    B = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0)))
    Beta = [
        f for f in listdir(mypath)
        if isfile(join(mypath, f)) and f.endswith('.proba')
    ]
    for i, file in enumerate(Beta):
        Dic[i] = file.split(".proba")[0]
        lines = FF.Parsefile(mypath + "/" + file)
        for it in range(len(lines)):
            lines[it] = lines[it].split("\t")
            B[i][int(lines[it][0])][int(lines[it][1])] = float(lines[it][2])

    return Dic, B
Example #9
0
def RunEval(InputFile):
    Energy = []
    # launch the RNaeval command
    conf = loadConfig()
    energiesFile = os.path.join(conf.OutputFolder, "tmp", "energyvalues")
    os.system('RNAeval <' + InputFile + '>' + energiesFile)
    # Parse the RNAevaloutput to extract energy values
    lines = FF.Parsefile(energiesFile)
    for i in xrange(1, len(lines), 2):
        # i is the stucture number and 'lines[i].split(" ")[1][1:-2]' is  the  corresponding  energy value
        # print 'holla',(lines[i].split(" ")[1][1:-2])
        Energy.append(
            lines[i].split(" ", 1)[1]
            [1:-2])  # TODO ,1 is to get the first occurence of the space !!!
    return Energy
Example #10
0
def GetBasePairsFromStructFile(
        faPath):  # return dic={structure:[liste de pairs de base ],....}
    # print faPath
    DicStruct = {}
    lines = FF.Parsefile(faPath)
    # print lines
    SeqLen = len(lines[1]) - 1
    # print SeqLen,"seq length"
    rawStructs = []
    for j in range(len(lines)):
        sec_str = lines[j].strip().split(' ')[0]
        rawStructs.append(sec_str)
        DicStruct[j] = BasePairsFromStruct(sec_str)
    progress.Print("Loaded %s structures (%s distinct)" %
                   (len(rawStructs), len(set(rawStructs))))
    return len(lines), DicStruct
Example #11
0
m = 2.6 / 2
b = -0.8 / 2
path_Fasta = 'fasta_files'
Alignementfolder = 'Alignement'
FileExtensionFasta = 'fa'
print("Sampling Process for % s Structures" % (conf.numberofsruct))
OutputSamples = SP.StructSampling(
    [conf.PathConstrainteFile, conf.PathConstrainteFileShape],
    Alignementfolder, conf.numberofsruct, conf.Temperature,
    conf.Fastaextenstion, m, b)

for filz in GetListFile(path_Fasta, FileExtensionFasta):
    print filz, "Treatement "
    startimebig = time.time()
    rna = FF.Parsefile(
        os.path.join(path_Fasta, filz + '.' + FileExtensionFasta))[1]
    Indexe = filz
    SVMlFile = "DissimilarityMatrix" + conf.numberofsruct
    listfiles = [filz + state for state in ["NMIA", "1M7", "MSA"]]
    OutputSamples = 'OutputSamples' + conf.numberofsruct
    MFESnbrstruct = len(
        listfiles)  # 1 for the case where no constraint is given
    FF.MergeFiles(OutputSamples, os.path.join(OutputSamples, 'Samples.txt'),
                  listfiles, 1)
    #endtime=time.time()
    #print("Sampling done with success in  %53f\t"%(endtime-startime))

    #!!!!!!!!!!!!! Distance Matrix calculation !!!!!!!!!!!
    #
    startime = time.time()
Example #12
0
def FromStructFiletoRNAEvalInput(StructFile, InputRNAeval, rna):
    lines = FF.Parsefile(StructFile)
    StructsToRNAEvalInput(lines[SP.NUM_HEADER_LINES:], InputRNAeval, rna)
Example #13
0
    FF.CreateFold(os.path.join(conf.OutputFolder, "tmp", conf.PickledData))

    # Redirects all the print to the output Log file
    sys.stdout = Logger(os.path.join(conf.OutputFolder, conf.OutputLogfile))

    # ******************************** Generate sample

    try:
        rna = os.path.split(conf.RNA)[-1]
        RNAName = rna[:-(len(FASTA_EXTENSION) + 1)]
        progress.StartTask("Processing RNA %s" % (RNAName))
        if not os.path.isfile(conf.RNA):
            raise FF.IPANEMAPError("Input file '%s' not found" % (conf.RNA))

        # Get the rna sequence
        RNASequence = FF.Parsefile(conf.RNA)[1].strip()

        # Get probing conditions for the treated RNA
        ProbingConditions = [RNAName + state for state in conf.Conditions]

        # Specify  whether to generate new sample or use a previously  generated one
        OutputSamples = os.path.join(conf.OutputFolder, "tmp",
                                     'OutputSamples') + conf.SampleSize
        if str.lower(
                conf.Sampling) == "true" or not os.path.isdir(OutputSamples):
            progress.StartTask("Sampling %s structures for each condition" %
                               (conf.SampleSize))
            OutputSamples = SP.StructSampling(
                [conf.PathConstraintsFile, conf.PathConstraintsFileShape],
                ProbingConditions, int(conf.SampleSize), conf.Temperature,
                conf.m, conf.b, conf.RNA)
Example #14
0
#!/usr/bin/env python2.7
#@author SAADI AFAF, 2016
import conf , FileFunctions as FF, Sampling as SP, StructureFunctions as SF, Clustering as CL, VisualizationTools as VT, ClustersTrait as CT
import time,os,sys,pickle
from collections import defaultdict


#Redirect all the print to Logfile.txt
sys.stdout =conf.Logger("Logfile.txt")

SVMlFile="DissimilarityMatrix"+conf.numberofsruct
rna = FF.Parsefile(conf.rnafile)[1]
listfiles=conf.constraintes+["MFES"]

#!!!!!!!!!!!!!!Global study by comparing dotplotMatrix
print '**************Calculation of Eucledian distance between different BP dot plot conditions**********'
SF.DotplotRnaFold(conf.Psdotpath, conf.PathConstrainteFile, conf.PathConstrainteFileShape)
SF.Writeproba(conf.Psdotpath, conf.Matrixproba, conf.constraintes, rna)
VT.plotClusteringDistribution(int(conf.numberofsruct), conf.Matrixproba, len(rna))
print "Eucledian distance calculation done"



#!!!!!!!!!!!!! SAMPLING!!!!!!!!!!!
#  This step requires as Input: fasta_constraints folder, fasta_Shape folder.
# it generates using RNAsubopt a sample of strctures that will be written in OutptSamples_numberofstructure "Sample.txt"

print '****************Sampling RNA Secondary Structures***********************'
startime=time.time()
print("Sampling Process for % s Structures"%(conf.numberofsruct))
OutputSamples=SP.StructSampling([conf.PathConstrainteFile,conf.PathConstrainteFileShape],conf.numberofsruct,conf.Temperature, conf.Fastaextenstion)