def SuperSiteAnalyzer(SiteFile1, SiteFile2, SiteFile3, RepeatFile1, 
                      RepeatFile2, RepeatFile3, RepeatFile4):
    #==============Constructing Lists of Site data ==========================================    
    site1 = siteParser(SiteFile1)
    site2 = siteParser(SiteFile2)
    site3 = siteParser(SiteFile3)
    
    list = [RepeatFile1, RepeatFile2, RepeatFile3, RepeatFile4]
    
    
    for repeat in list:
        repeat = ParserS(repeat, repeat, False) # Constructing Lists of Repeat data
        siteAnalyzer(repeat, site1, SiteFile1)
        siteAnalyzer(repeat, site2, SiteFile2)
        siteAnalyzer(repeat, site3, SiteFile3)
        del repeat
def SuperSiteAnalyzer(SiteFile, RepeatFile1, RepeatFile2, RepeatFile3, RepeatFile4, 
                      RepeatFile5, RepeatFile6, RepeatFile7, RepeatFile8, RepeatFile9, 
                      RepeatFile10 , RepeatFile11, RepeatFile12, RepeatFile13, RepeatFile14, 
                      RepeatFile15, RepeatFile16, RepeatFile17, RepeatFile18, RepeatFile19):
    #==============Constructing Lists of Site data ==========================================    
    sites = siteParser(SiteFile)
    
    
    list = [RepeatFile1, RepeatFile2, RepeatFile3, RepeatFile4, RepeatFile5, RepeatFile6, RepeatFile7, RepeatFile8, RepeatFile9, RepeatFile10 , RepeatFile11, RepeatFile12, RepeatFile13, RepeatFile14, 
                      RepeatFile15, RepeatFile16, RepeatFile17, RepeatFile18, RepeatFile19]
    
    
    for repeat in list:
        repeats = ParserS(repeat, repeat, False) # Constructing Lists of Repeat data
        print ' now processing the ', repeat, 'for CpG sites' 
        siteAnalyzer(repeats, sites, 'CpG')
        
        del repeats
Esempio n. 3
0
def SuperSiteAnalyzer(SiteFile, RepeatFile1, RepeatFile2, RepeatFile3,
                      RepeatFile4, RepeatFile5, RepeatFile6, RepeatFile7,
                      RepeatFile8, RepeatFile9, RepeatFile10, RepeatFile11,
                      RepeatFile12, RepeatFile13, RepeatFile14, RepeatFile15,
                      RepeatFile16, RepeatFile17, RepeatFile18, RepeatFile19):
    #==============Constructing Lists of Site data ==========================================
    sites = siteParser(SiteFile)

    list = [
        RepeatFile1, RepeatFile2, RepeatFile3, RepeatFile4, RepeatFile5,
        RepeatFile6, RepeatFile7, RepeatFile8, RepeatFile9, RepeatFile10,
        RepeatFile11, RepeatFile12, RepeatFile13, RepeatFile14, RepeatFile15,
        RepeatFile16, RepeatFile17, RepeatFile18, RepeatFile19
    ]

    for repeat in list:
        repeats = ParserS(repeat, repeat,
                          False)  # Constructing Lists of Repeat data
        print ' now processing the ', repeat, 'for CpG sites'
        siteAnalyzer(repeats, sites, 'CpG')

        del repeats
Esempio n. 4
0
def siteAnalyzer(RepeatMaskerData, siteData, suffix):
# ============ Picking the Repeat Element name from the RepeatMaskerData file ==========    
    data = open(RepeatMaskerData, 'rU')
    string = data.readline()
    list = string.split()
    print 'Analyzing reads for the following repeat element :', list[9]
    repFamily = list[9]
    data.close()

#==============Constructing Lists of RepeatMasker data and Read data =====================    

    repeats = ParserS(RepeatMaskerData, repFamily, False) #False is to specify that we are not saving the parser output to any file. 
    #only the per base scores will be saved. 
    sites = siteParser(siteData)

#================= getting consensus length of the repeat element =====================
    
# one line of repeats looks like this :
# 318  23.0  3.8  0.0  chr1        15265   15355 (249235266) C  MIR3           SINE/MIR             (119)  143     49      5    
    if repeats[0][0][8] == '+':
        length = repeats[0][0][12] + repeats[0][0][13]
    else:
        length = repeats[0][0][11]+ repeats[0][0][12]
        
    consensus = [0]*(length*2)  # consensus length is kept twice of the original length as some repeat elements extend beyond the range of consensus
    x = 0
#========================================================================================    

    
#=========== now doing binary search=====================================================

# this  calls binaryS() method, which is a recursive implementation of binary search. The method is present in Overlap.py
    
    i = 0
    while i < 24:
        for read in sites[i]:
            max = len(repeats[i]) - 1
            min = 0
            mid = binaryS(repeats[i], read, min, max)
            if mid == -2:
                x +=1
            else:
                readS = read[1]
                readE = read[2]
                TPS = repeats[i][mid][5]
                TPE = repeats[i][mid][6]
                if repeats[i][mid][8] == '+':
                    start = repeats[i][mid][11] - 1
                    end = repeats[i][mid][12]
                else:
                    start = repeats[i][mid][13] - 1
                    end = repeats[i][mid][12] 
                overlap(consensus, start, end, readS, readE, TPS, TPE)
        #========= end of for loop =============            
        i = i+1    
        
    Newlength = length + (length/20) # clipping back consensus to its original length

    Newconsensus = consensus[:Newlength] 
    
    print 'number of unmatched sites is :', x
    printScore(Newconsensus, repFamily, suffix) #this method prints the consensus to an output file
    graph(Newconsensus, repFamily, True) #this plots the data and saves the figure