Beispiel #1
0
def main(parclipA, parclipB, start, stop, width, anno=None, annowidth=100,
         logRatio=False, verbose=False):
    tmpA     = ParclipSiteContainer()
    dataB    = ParclipSiteContainer()
    tmpA.loadFromFile(parclipA)
    tmpA.sort(key='occ')
    dataB.loadFromFile(parclipB)
    if start < 0 or stop < start or stop >= tmpA.size():
        print('Bullshit start and stop indices. Come on! Concentrate!')
        sys.exit()
    dataA = parclipsites.ParclipSites('')
    total = stop - start
    count = 0
    i = start
    while count < total and i < (tmpA.size()-1):
        if verbose:
            functions.showProgress(count,total-1,'Selecting PAR-CLIP sites')
        if anno == None:
            dataA.addSite(tmpA.chrs[i], tmpA.pos[i], tmpA.m[i], tmpA.r[i],
                          tmpA.result[i], tmpA.strand[i], tmpA.occ[i])
            count +=1
        else:
            if anno.isInside(tmpA.chrs[i], tmpA.pos[i], tmpA.strand[i], 
                             annowidth, annowidth)[1]:
                dataA.addSite(tmpA.chrs[i], tmpA.pos[i], tmpA.m[i], tmpA.r[i],
                              tmpA.result[i], tmpA.strand[i], tmpA.occ[i])
                count +=1
        i += 1
    coloc = 1
    count_coloc = 1
    if verbose:
        print('\n')
    for i in range(dataA.size()):
        values = dataB.getValues(dataA.chrs[i], dataA.pos[i], dataA.strand[i], True, width, width)
        if values != None:
            count_coloc += 1
            coloc += max(values)
        if verbose:
            functions.showProgress(i, (dataA.size()-1), 'Collecting colocolization data')
    coloc = coloc / count_coloc
    if verbose:
        print('')
    if logRatio:
        return math.log( coloc/functions.getQuantile(dataB.occ,0.5) ,2)
    else:
        return coloc
Beispiel #2
0
from mockinbird.utils import ParclipSiteContainer

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Takes PAR-CLIP sites and a genome and saves genomic sequences as fasta file around PAR-CLIP sites according to the given parameters.', epilog="contact: [email protected]")
    parser.add_argument('sites',      help='PAR-CLIP file *.table')
    parser.add_argument('genome',     help='path to genome')
    parser.add_argument('fafile',     help='output filename')
    parser.add_argument('filterGFF',  help='set path to GFF if sites should be removed that overlap with the GFF [default = '']', default='')
    parser.add_argument('start',      help='start index of PAR-CLIP sites [default=0]', type=int, default = 0)
    parser.add_argument('stop',       help='stop index of PAR-CLIP sites [default=1500]', type=int, default = 1500)
    parser.add_argument('width',      help='number of nt +/- the crosslink site [default=15]', type=int, default = 15)
    parser.add_argument('additionalFilterWidth', help='number of nt that are added to the start/stop indices of the GFF annotations', type=int, default = 20)
    parser.add_argument('key',  help='set key that is used for PAR-CLIP site ordering [default = \'occ\'], options: [\'occ\', \'m\', \'r\', \'mr\', \'pvalue\']', default='occ')
    parser.add_argument('-v','--verbose', dest='verbose', action="store_true", default=False, help='verbose output')
    args = parser.parse_args()
    
    yeast   = genome.Genome(args.genome, False)
    sites   = ParclipSiteContainer()
    sites.loadFromFile(args.sites)
    
    if args.verbose:
        print('#sites              : '+str(sites.size()))
    if args.filterGFF != '':
        anno     = gff.GFF(args.filterGFF)
        sites   = sites.removeSitesLocatedInGFF(anno, args.additionalFilterWidth)
        print('#sites after removal: '+str(sites.size()))
    
    sites.sort(args.key)
    sites.save2Fasta(yeast, args.fafile, args.start, args.stop, args.width)