Ejemplo n.º 1
0
    accession_pat = re.compile('ACC.*ID="([^"]*)";')

    with DeleteOnError(outputfile, gzip.open) as output:
        for line in urllib.urlopen(Paths.MIRBASE_URL):
            if line.startswith('#'):
                continue
            fields = line[:-1].split('\t')
            name = accession_pat.findall(fields[8])[0]
            print >> output, '\t'.join([
                'chr'+fields[0], str(int(fields[3])-1), fields[4],
                'miRNA|%s|%s' % (name, name), '.', fields[6]
            ])


@files([(None, Paths.repeatmasker_original(chrom), chrom)
        for chrom in Options.UCSC_REPEATMASKER_CHROMOSOMES])
@jobs_limit(Options.MAX_PARALLEL_DOWNLOADING, 'download')
def download_repeatmasker_catalogs(inputfile, outputfile, chrom):
    import urllib

    url = Paths.REPEATMASKER_URL % (Options.GENOME, chrom)
    print 'Downloading %s ...' % url
    urllib.urlretrieve(url, outputfile)


@files([Paths.repeatmasker_original(chrom)
        for chrom in Options.UCSC_REPEATMASKER_CHROMOSOMES],
       Paths.repeatmasker_catalog)
@follows(download_repeatmasker_catalogs)
def merge_repeatmasker_catalogs(inputfiles, outputfile):