def get_chapter(driver, root_url, chapter_num, manga_name):
    chapter_url = f'{root_url}/{chapter_num}'

    path = f'{home}/Downloads/Manga/{manga_name}/Chapter_{chapter_num}/'
    create_dir(path)

    # get all pages
    page_num = 1
    img_srcs = []
    print(f'getting chapter {chapter_num}...')

    with concurrent.futures.ThreadPoolExecutor() as executor:
        while True:
            future = executor.submit(get_page, driver, chapter_url, page_num,
                                     chapter_num, manga_name, path)
            result = future.result()

            if result == '404':
                break
            elif result == 'unknow_error':
                continue
            else:
                img_srcs.append(result)
                page_num += 1

    print(f'chapter {chapter_num} successfully getted!')
Exemple #2
0
def main():
    options = docopt(__doc__)
    command_log = 'fast_circ.py parameters: ' + ' '.join(sys.argv)
    work_dir = options['--output']
    if work_dir != '.' and 'work_dir' != './':
        create_dir(work_dir)

    if options['parse']:
        # parse fusion reads from <fusion> file
        options['--bed'] = '%s/back_spliced_junction.bed' % work_dir
        parse_command(options, command_log)
        # annotate circular RNAs
        options['--output'] = '%s/circularRNA_known.txt' % options['--output']
        annotate_command(options, command_log)
    elif options['annotate']:
        # align fusion reads
        options['--output'] = '%s/alignment' % work_dir
        options['--bed'] = '%s/back_spliced_junction.bed' % work_dir
        align_command(options, command_log)
        # annotate circular RNAs
        options['--output'] = '%s/circularRNA_known.txt' % options['--output']
        annotate_command(options, command_log)
    elif options['denovo']:
        # align fusion reads
        options['--output'] = '%s/alignment' % work_dir
        options['--bed'] = '%s/back_spliced_junction.bed' % work_dir
        align_command(options, command_log)
        # de novo assemble circular RNAs
        options['--tophat'] = '%s/alignment/tophat' % work_dir
        options['--output'] = '%s/assemble' % work_dir
        assemble_command(options, command_log)
        # fetch AS events of circular RNAs
        options['--output'] = '%s/denovo' % work_dir
        options['--abs'] = '%s/abs' % work_dir
        denovo_command(options, work_dir, command_log)
def download_manga_from_list():
    with open('download_list.json', 'r') as file:
        manga_data = json.load(file)

    manga_name = manga_data['manga_name']
    chapters = manga_data['chapters']

    home = get_home_dir()

    for chapter in chapters:
        chapter_id = list(chapter.keys())[0]
        chapter_imgs = list(chapter.values())[0]

        path = f'{home}/Downloads/Manga/{manga_name}/{chapter_id}/'
        create_dir(path)

        num = 1
        chapter_number = chapter_id.split('_')[1]
        print(f'Downloading chapter {chapter_number}...')

        with concurrent.futures.ThreadPoolExecutor() as executor:
            for img in chapter_imgs:
                executor.submit(download_img, img, path, num)
                num += 1

        print(f'Chapter {chapter_number} Downloaded!\n')
Exemple #4
0
def assemble(options):
    # check output directory
    out_dir = create_dir(options['--output'])
    # check tophat results
    tophat_dir = options['--tophat']
    # check cufflinks
    if which('cufflinks') is None:
        sys.exit('Cufflinks is required for CIRCexplorer2 assemble!')
    # check genePredToGtf
    if which('genePredToGtf') is None:
        sys.exit('genePredToGtf is required for CIRCexplorer2 assemble!')
    # check gtfToGenePred
    if which('gtfToGenePred') is None:
        sys.exit('gtfToGenePred is required for CIRCexplorer2 assemble!')
    # prepare cufflinks directory
    cufflinks_dir = out_dir
    create_dir(cufflinks_dir)
    # filter ref file
    ref_filter(options['--ref'], tophat_dir, cufflinks_dir)
    # assemble with cufflinks
    cufflinks_assemble(tophat_dir, cufflinks_dir, options['--thread'],
                       options['--remove-rRNA'], options['--max-bundle-frags'])
    # convert assembly results
    convert_assembly_gtf(tophat_dir, cufflinks_dir, options['--ref'],
                         options['--bb'], options['--chrom-size'])
def annotate(options):
    # check output directory
    out_dir = check_dir(options['<circ_dir>'])
    # prepare annotate directory
    annotate_dir = '%s/annotate' % out_dir
    create_dir(annotate_dir)
    # annotate fusion junctions
    annotate_fusion(options['--ref'], annotate_dir)
    # fix fusion juncrions
    fix_fusion(options['--ref'], options['--genome'], annotate_dir,
               options['--no-fix'])
Exemple #6
0
def annotate(options):
    # check output directory
    out_dir = check_dir(options['<circ_dir>'])
    # prepare annotate directory
    annotate_dir = '%s/annotate' % out_dir
    create_dir(annotate_dir)
    # annotate fusion junctions
    annotate_fusion(options['--ref'], annotate_dir)
    # fix fusion juncrions
    fix_fusion(options['--ref'], options['--genome'], annotate_dir,
               options['--no-fix'])
Exemple #7
0
def denovo(options):
    # check output directory
    out_dir = check_dir(options['<circ_dir>'])
    # check tophat results
    if options['--tophat-dir']:
        tophat_dir = check_dir(options['--tophat-dir'])
    else:
        tophat_dir = check_dir(out_dir + '/tophat')
    # prepare denovo directory
    denovo_dir = '%s/denovo' % out_dir
    create_dir(denovo_dir)
    # combine ref files
    cufflinks_ref_path = '%s/cufflinks/transcripts_ref.txt' % out_dir
    if os.path.isfile(cufflinks_ref_path):
        print('Combine %s with %s to create a new ref file!' %
              (options['--ref'], cufflinks_ref_path))
        ref_path = '%s/combined_ref.txt' % denovo_dir
        new_ref_f = open(ref_path, 'w')
        with open(cufflinks_ref_path, 'r') as cuff_ref:
            for line in cuff_ref:
                if line.startswith('CUFF'):  # only import novel isoforms
                    new_ref_f.write(line)
        new_ref_f.write(open(options['--ref'], 'r').read())
        new_ref_f.close()
    else:
        print('Warning: no cufflinks directory under %s!' % out_dir)
        print('Please run CIRCexplorer2 assembly before this step!')
        ref_path = options['--ref']
    # annotate fusion junctions
    annotate_fusion(ref_path, denovo_dir, denovo_flag=1)
    # fix fusion juncrions
    fix_fusion(ref_path,
               options['--genome'],
               denovo_dir,
               options['--no-fix'],
               denovo_flag=1)
    # extract novel circRNAs
    extract_novel_circ(denovo_dir, options['--ref'])
    if options['--as']:
        if options['--pAplus'] and os.path.isdir(options['--pAplus']):
            pAplus_dir = os.path.abspath(options['--pAplus'])
        else:
            sys.exit('You should offer --pAplus option in --as mode!')
        if not options['--as-type'] or options['--as-type'] == 'CE':
            # extract cassette exons
            extract_cassette_exon(denovo_dir, tophat_dir, pAplus_dir,
                                  options['--rpkm'])
        if not options['--as-type'] or options['--as-type'] == 'RI':
            # extract retained introns
            extract_retained_intron(denovo_dir, tophat_dir, pAplus_dir)
        if not options['--as-type'] or options['--as-type'] == 'ASS':
            # characterize A5SS and A3SS
            parse_splice_site(denovo_dir, tophat_dir, pAplus_dir)
Exemple #8
0
def denovo(options):
    # check output directory
    out_dir = check_dir(options['<circ_dir>'])
    # check tophat results
    if options['--tophat-dir']:
        tophat_dir = check_dir(options['--tophat-dir'])
    else:
        tophat_dir = check_dir(out_dir + '/tophat')
    # prepare denovo directory
    denovo_dir = '%s/denovo' % out_dir
    create_dir(denovo_dir)
    # combine ref files
    cufflinks_ref_path = '%s/cufflinks/transcripts_ref.txt' % out_dir
    if os.path.isfile(cufflinks_ref_path):
        print('Combine %s with %s to create a new ref file!' %
              (options['--ref'], cufflinks_ref_path))
        ref_path = '%s/combined_ref.txt' % denovo_dir
        new_ref_f = open(ref_path, 'w')
        with open(cufflinks_ref_path, 'r') as cuff_ref:
            for line in cuff_ref:
                if line.startswith('CUFF'):  # only import novel isoforms
                    new_ref_f.write(line)
        new_ref_f.write(open(options['--ref'], 'r').read())
        new_ref_f.close()
    else:
        print('Warning: no cufflinks directory under %s!' % out_dir)
        print('Please run CIRCexplorer2 assembly before this step!')
        ref_path = options['--ref']
    # annotate fusion junctions
    annotate_fusion(ref_path, denovo_dir, 1)
    # fix fusion juncrions
    fix_fusion(ref_path, options['--genome'], denovo_dir,
               options['--no-fix'], 1)
    # extract novel circRNAs
    extract_novel_circ(denovo_dir, options['--ref'])
    if options['--as']:
        if options['--pAplus'] and os.path.isdir(options['--pAplus']):
            pAplus_dir = os.path.abspath(options['--pAplus'])
        else:
            sys.exit('You should offer --pAplus option in --as mode!')
        if not options['--as-type'] or options['--as-type'] == 'CE':
            # extract cassette exons
            extract_cassette_exon(denovo_dir, tophat_dir, pAplus_dir,
                                  options['--rpkm'])
        if not options['--as-type'] or options['--as-type'] == 'RI':
            # extract retained introns
            extract_retained_intron(denovo_dir, tophat_dir, pAplus_dir)
        if not options['--as-type'] or options['--as-type'] == 'ASS':
            # characterize A5SS and A3SS
            parse_splice_site(denovo_dir, tophat_dir, pAplus_dir)
Exemple #9
0
def check_outdir(out_dir):
    '''
    1. Clear output directory if not empty
    2. Create essential subdirectories
    '''
    print('Check output directory...')
    # clear output directory if not empty
    create_dir(out_dir)
    dir_path = os.path.abspath(out_dir)
    # create essential subdirectories
    os.mkdir(dir_path + '/bowtie1_index')
    os.mkdir(dir_path + '/bowtie2_index')
    os.mkdir(dir_path + '/tophat')
    os.mkdir(dir_path + '/tophat_fusion')
    return dir_path
Exemple #10
0
def check_outdir(out_dir):
    '''
    1. Clear output directory if not empty
    2. Create essential subdirectories
    '''
    print('Check output directory...')
    # clear output directory if not empty
    create_dir(out_dir)
    dir_path = os.path.abspath(out_dir)
    # create essential subdirectories
    os.mkdir(dir_path + '/bowtie1_index')
    os.mkdir(dir_path + '/bowtie2_index')
    os.mkdir(dir_path + '/tophat')
    os.mkdir(dir_path + '/tophat_fusion')
    return dir_path
Exemple #11
0
def parse(options):
    aliger = set(['STAR', 'MapSplice', 'segemehl'])
    if options['-t'] not in aliger:
        sys.exit('Error: CIRCexplorer2 parse does not support %s!' %
                 options['-t'])
    # check output directory
    create_dir(options['--output'])
    out_dir = os.path.abspath(options['--output'])
    out = out_dir + '/fusion_junction.bed'
    # parse fusion junctions from other aligers
    if options['-t'] == 'STAR':
        star_parse(options['<fusion>'], out)
    elif options['-t'] == 'MapSplice':
        mapsplice_parse(options['<fusion>'], out)
    elif options['-t'] == 'segemehl':
        segemehl_parse(options['<fusion>'], out)
Exemple #12
0
def parse(options):
    aliger = set(['STAR', 'MapSplice', 'segemehl'])
    if options['-t'] not in aliger:
        sys.exit('Error: CIRCexplorer2 parse does not support %s!' %
                 options['-t'])
    # check output directory
    create_dir(options['--output'])
    out_dir = os.path.abspath(options['--output'])
    out = out_dir + '/fusion_junction.bed'
    # parse fusion junctions from other aligers
    if options['-t'] == 'STAR':
        star_parse(options['<fusion>'], out)
    elif options['-t'] == 'MapSplice':
        mapsplice_parse(options['<fusion>'], out)
    elif options['-t'] == 'segemehl':
        segemehl_parse(options['<fusion>'], out)
Exemple #13
0
def hisat_to_tophat(bam_f, denovo_dir):
    if which('regtools') is None:
        sys.exit('regtools is required "as" analysis when use hisat2 mapping \
                 results!')
    o_dir = create_dir('%s/temp%f' % (denovo_dir, time.time()))
    os.symlink(os.path.realpath(bam_f), "%s/accepted_hits.bam" % o_dir)
    pysam.index("%s/accepted_hits.bam" % o_dir)

    # creat junctions.bed file
    regtools_cmd = 'regtools junctions extract -s 0 '
    regtools_cmd += '-o %s %s' % ("%s/junctions.bed" % o_dir,
                                  "%s/accepted_hits.bam" % o_dir)
    regtools_cmd += ' 2> %s/regtools.log' % o_dir
    print('Creating junctions.bed command:')
    print(regtools_cmd)
    return_code = os.system(regtools_cmd) >> 8
    if return_code:
        sys.exit('Error: cannot create junctions.bed file!')

    return o_dir
Exemple #14
0
def denovo(options):
    # check tophat results
    # if options['--tophat']:
        # tophat_dir = check_dir(options['--tophat'])

    # prepare denovo directory
    denovo_dir = options['--output']
    create_dir(denovo_dir)

    # combine ref files
    cufflinks_ref_path = '%s/transcripts_ref.txt' % options['--cuff']
    if os.path.isfile(cufflinks_ref_path):
        print('Combine %s with %s to create a new ref file!' %
              (options['--ref'], cufflinks_ref_path))
        ref_path = '%s/combined_ref.txt' % denovo_dir
        new_ref_f = open(ref_path, 'w')
        with open(cufflinks_ref_path, 'r') as cuff_ref:
            for line in cuff_ref:
                if line.startswith('CUFF'):  # only import novel isoforms
                    new_ref_f.write(line)
        new_ref_f.write(open(options['--ref'], 'r').read())
        new_ref_f.close()
    else:
        print('Warning: no cufflinks directory %s!' % options['--cuff'])
        print('Please run CIRCexplorer2 assembly before this step!')
        ref_path = options['--ref']
    # create temporary annotated fusion file
    fusion_tmp = tempfile.TemporaryFile(mode='w+')
    # annotate fusion junctions
    annotate_fusion(ref_path, options['--bed'], fusion_tmp, denovo_flag=1)
    # fix fusion juncrions
    out_f = '%s/circularRNA_full.txt' % denovo_dir
    fix_fusion(ref_path, options['--genome'], fusion_tmp, out_f,
               options['--no-fix'], denovo_flag=1)
    # extract novel circRNAs
    extract_novel_circ(denovo_dir, options['--ref'])
    if options['--as']:
        create_dir(options['--as'])

        if options['--pAplus'] and os.path.isdir(options['--pAplus']):
            pAplus_dir = os.path.abspath(options['--pAplus'])
        elif options['--pAplus'] and os.path.isfile(options['--pAplus']):
            pAplus_dir = hisat_to_tophat(options['--pAplus'], denovo_dir)
        else:
            sys.exit('You should offer --pAplus option in --as mode!')

        if options['--tophat'] and os.path.isdir(options['--tophat']):
            tophat_dir = os.path.abspath(options['--tophat'])
        elif options['--tophat'] and os.path.isfile(options['--tophat']):
            tophat_dir = hisat_to_tophat(options['--tophat'], denovo_dir)
        else:
            sys.exit('You should offer p(A)minus dir/file in --as mode!')

        if not options['--as-type'] or options['--as-type'] == 'CE':
            # extract cassette exons
            extract_cassette_exon(denovo_dir, tophat_dir, pAplus_dir,
                                  options['--as'], options['--rpkm'])
        if not options['--as-type'] or options['--as-type'] == 'RI':
            # extract retained introns
            extract_retained_intron(denovo_dir, tophat_dir, pAplus_dir,
                                    options['--as'])
        if not options['--as-type'] or options['--as-type'] == 'ASS':
            # characterize A5SS and A3SS
            parse_splice_site(denovo_dir, tophat_dir, pAplus_dir,
                              options['--as'])

    if options['--abs']:
        create_dir(options['--abs'])

        analyze_abs(denovo_dir, options['--genome'], options['--abs'])