Ejemplo n.º 1
0
def select_closely_related(sketch_path, genus, mash_screen, contig_name,
                           threads, output_dir, mash_threshold,
                           download_contig_nums, contig_id):

    if sketch_path:
        if mash_screen:
            mash_file = mash.screen(contig_name, sketch_path, threads,
                                    output_dir, mash_threshold,
                                    download_contig_nums, contig_id)
        else:
            mash_file = mash.dist(contig_name, sketch_path, threads,
                                  output_dir, mash_threshold,
                                  download_contig_nums, contig_id)
        ncbi_id = mash.get_ncbi_id(mash_file)
        if len(ncbi_id
               ) < 5:  #Would'nt polish if closely-related genomes less than 5
            return False
        url_list = download.parser_url(ncbi_id)

    if genus:
        ncbi_id, url_list = download.parser_genus(genus)

    db_path = download.download(output_dir, ncbi_id, url_list)

    return db_path
Ejemplo n.º 2
0
def mainloop():
    utils.debug(2, '-----mainloop(start)-----', utils.whoami())
    for i in dllist:
        curversion = str(utils.config.get(i, 'current_version'))
        downloadcfg = str(utils.config.get(i, 'download'))
        file_regex = str(utils.config.get(i, 'file_regex'))
        linkprovider = str(utils.config.get(i, 'link_provider'))
        chkresult = ''
        if str(downloadcfg) == '1':
            try:
                create = importlib.import_module(linkprovider)
                dllink = create.link(i, file_regex, curversion)
            except ModuleNotFoundError:
                print('No link provider %s found.' % linkprovider)
                print('Please check your config again.')
                sys.exit(2)
            except Exception as e:
                print('There was an exception in the %s module.' %
                      linkprovider)
                print(e)
                sys.exit(2)
            if dllink:
                utils.debug(2, 'Download link is %s' % dllink, utils.whoami())
                file_name = utils.file_name(dllink)
                utils.debug(2, 'Filename is %s' % file_name, utils.whoami())
                if file_name == curversion:
                    utils.debug(
                        1, 'We already have the current version for %s.' % i,
                        utils.whoami())
                else:
                    dlinfo = download.download(dllink, dlpath)
                    if hasattr(create, 'chkurl'):
                        utils.debug(
                            3, 'Starting checksum check for %s' % linkprovider,
                            utils.whoami())
                        chkresult = create.chksum(file_name, dlpath)
                    # utils.linkinfo may return an Exception with HTTP Error code. Dont update current_version then.
                    if not utils.is_number(dlinfo) and chkresult is not 'Fail':
                        utils.config.set(i, 'current_version', file_name)
                        utils.debug(
                            2, 'Current_version is: ' +
                            utils.config.get(i, 'current_version'),
                            utils.whoami())
                        with open(utils.configfile, 'w') as configfile:
                            utils.config.write(configfile)
                    elif utils.is_number(dlinfo):
                        print('Download failed with HTTP error code: %s' %
                              dlinfo)
                    else:
                        print('Download failed due to checksum mismatch.')
            else:
                utils.debug(4, 'No download link found for %s' % i,
                            utils.whoami())
        else:
            utils.debug(2, 'Download for %s is set to off.' % i,
                        utils.whoami())
    utils.debug(2, '-----mainloop(stop)-----', utils.whoami())
Ejemplo n.º 3
0
    def _load_data(self, filename, offset):
        """
        Load the data in the given file. Automatically downloads the file
        if it does not already exist in the data_dir.

        :param filename: Name of the data-file.
        :param offset: Start offset in bytes when reading the data-file.
        :return: The data as a numpy array.
        """

        # Download the file from the internet if it does not exist locally.
        download(base_url=base_url,
                 filename=filename,
                 download_dir=self.data_dir)

        # Read the data-file.
        path = os.path.join(self.data_dir, filename)
        with gzip.open(path, 'rb') as f:
            data = np.frombuffer(f.read(), np.uint8, offset=offset)

        return data
Ejemplo n.º 4
0
def download_action(ncbi_id, homologous_output_dir, contig_name=None):
    download_start_time = time.time()
    print_system_log('Download closely-related genomes')
    url_list = download.parser_url(ncbi_id)
    sys.stderr.write(TextColor.GREEN + " INFO: " + str(len(url_list)) +
                     " homologous sequence need to download: \n" +
                     TextColor.END)
    db_path = download.download(homologous_output_dir, ncbi_id, url_list,
                                contig_name)
    download_end_time = time.time()
    download_time = get_elapsed_time_string(download_start_time,
                                            download_end_time)
    print_stage_time('Download closely-related genomes time', download_time)
    return db_path
Ejemplo n.º 5
0
def start_bot(bot, update):
    msg = update.message.text
    msg = str(msg)
    user_id = update.message.from_user.id
    user_id = str(user_id)
    if "|" in msg:
        user_cmd, url = msg.split("|")
        user_cmd = user_cmd.strip()
        user_cmd = user_cmd.lower()
        url = url.strip()
    else:
        user_cmd = None
        url = msg
    #site = requests.get(url)
    #if site.status_code == 200:
    #site = httplib.HTTPConnection(url)
    #site.request("HEAD", '')
    #if site.getresponse().status == 200:
    sent_message = bot.send_message(chat_id=update.message.chat_id,
                                    text=Text.VERIFYING_URL)
    time.sleep(1)
    if validators.url(url):
        sent_message.edit_text(Text.PROCESSING)
        if user_cmd:
            if (user_cmd == "video"):
                filename = download_video.download(url)
                if "ERROR" in filename:
                    sent_message.edit_text(Text.FAILED + filename,
                                           parse_mode=telegram.ParseMode.HTML)
                else:
                    bot.send_chat_action(chat_id=update.message.chat_id,
                                         action=telegram.ChatAction.TYPING)
                    sent_message.edit_text(Text.UPLOADING_GD)
                    dwnld_url = upload.upload(filename)
                    size = (os.path.getsize(filename)) / 1048576
                    sent_message.edit_text(Text.DONE.format(
                        filename, size, dwnld_url),
                                           parse_mode=telegram.ParseMode.HTML)
                    os.remove(filename)
            elif (user_cmd == "audio"):
                if ("youtube" in url or "youtu" in url):
                    filename = download_audio.download(url)
                    if "ERROR" in filename:
                        sent_message.edit_text(
                            Text.FAILED + filename,
                            parse_mode=telegram.ParseMode.HTML)
                    else:
                        bot.send_chat_action(chat_id=update.message.chat_id,
                                             action=telegram.ChatAction.TYPING)
                        sent_message.edit_text(Text.UPLOADING_TG)
                        audio = open(filename, 'rb')
                        bot.send_audio(chat_id=update.message.chat_id,
                                       audio=audio,
                                       caption=filename.replace(".mp3", ""))
                        audio.close()
                        os.remove(filename)
                        sent_message.edit_text(Text.DONE)
                else:
                    sent_message.edit_text(Text.NOT_SUPPORTED,
                                           parse_mode=telegram.ParseMode.HTML)
            else:
                if download.is_downloadable(url):
                    size = download.check_filesize(url)
                    if size <= 10000:
                        filename = user_cmd
                        raw_file = download.download(url, filename)
                        if "ERROR" in raw_file:
                            sent_message.edit_text(
                                Text.FAILED + raw_file,
                                parse_mode=telegram.ParseMode.HTML)
                        else:
                            bot.send_chat_action(
                                chat_id=update.message.chat_id,
                                action=telegram.ChatAction.TYPING)
                            sent_message.edit_text(Text.UPLOADING_GD)
                            dwnld_url = upload.upload(raw_file)
                            sent_message.edit_text(
                                Text.DONE.format(raw_file, size, dwnld_url),
                                parse_mode=telegram.ParseMode.HTML)

                    else:
                        sent_message.edit_text(Text.MAXLIMITEXCEEDED)
                else:
                    sent_message.edit_text(Text.ISNOT_DOWNLOADABLE,
                                           parse_mode=telegram.ParseMode.HTML)
        else:
            if download.is_downloadable(url):
                size = download.check_filesize(url) / 1048576
                if size <= 10000:
                    raw_file = download.download(url, None)
                    bot.send_chat_action(chat_id=update.message.chat_id,
                                         action=telegram.ChatAction.TYPING)
                    sent_message.edit_text(Text.UPLOADING_GD)
                    dwnld_url = upload.upload(raw_file)
                    sent_message.edit_text(Text.DONE.format(
                        raw_file, size, dwnld_url),
                                           parse_mode=telegram.ParseMode.HTML)
                else:
                    sent_message.edit_text(Text.MAXLIMITEXCEEDED)
            else:
                sent_message.edit_text(Text.ISNOT_DOWNLOADABLE,
                                       parse_mode=telegram.ParseMode.HTML)
    elif ("help" not in url and "start" not in url and "broadcast" not in url
          and "donate" not in url and "add_user" not in url
          and "revoke_user" not in url):
        bot.send_chat_action(chat_id=update.message.chat_id,
                             action=telegram.ChatAction.TYPING)
        time.sleep(1)
        sent_message.edit_text(Text.RETARD)
Ejemplo n.º 6
0
def polish_genome(assembly, model_path, sketch_path, genus, threads,
                  output_dir, minimap_args, mash_threshold,
                  download_contig_nums, debug):

    out = []
    output_dir = FileManager.handle_output_directory(output_dir)
    contig_output_dir_debug = output_dir + '/debug'
    contig_output_dir_debug = FileManager.handle_output_directory(
        contig_output_dir_debug)
    assembly_name = assembly.rsplit('/', 1)[-1]
    assembly_name = assembly_name.split('.')[0]

    total_start_time = time.time()
    for contig in SeqIO.parse(assembly, 'fasta'):
        timestr = time.strftime("[%Y/%m/%d %H:%M]")
        sys.stderr.write(TextColor.GREEN + str(timestr) + " INFO: RUN-ID: " +
                         contig.id + "\n" + TextColor.END)
        contig_output_dir = contig_output_dir_debug + '/' + contig.id
        contig_output_dir = FileManager.handle_output_directory(
            contig_output_dir)
        contig_name = contig_output_dir + '/' + contig.id + '.fasta'
        SeqIO.write(contig, contig_name, "fasta")

        if sketch_path:
            screen_start_time = time.time()
            print_system_log('MASH SCREEN')
            mash_file = mash.screen(contig_name, sketch_path, threads,
                                    contig_output_dir, mash_threshold,
                                    download_contig_nums, contig.id)
            screen_end_time = time.time()

            ncbi_id = mash.get_ncbi_id(mash_file)
            if len(
                    ncbi_id
            ) < 5:  #Would'nt polish if closely-related genomes less than 5
                out.append(contig_name)
                continue

            url_list = download.parser_url(ncbi_id)

        if genus:
            ncbi_id, url_list = download.parser_genus(genus)

        download_start_time = time.time()
        print_system_log('DOWNLOAD CONTIGS')
        db = download.download(contig_output_dir, ncbi_id, url_list)
        download_end_time = time.time()

        pileup_start_time = time.time()
        print("\n")
        print_system_log('PILE UP')
        db_npz = alignment.align(contig_name, minimap_args, threads, db,
                                 contig_output_dir)
        if db_npz == False:
            continue
        pileup_end_time = time.time()

        align2df_start_time = time.time()
        print_system_log('TO DATAFRAME')
        df = align2df.todf(contig_name, db_npz, contig_output_dir)
        align2df_end_time = time.time()

        predict_start_time = time.time()
        print_system_log('PREDICT')
        df = contig_output_dir + '/' + contig.id + '.feather'
        result = predict.predict(df, model_path, threads, contig_output_dir)
        predict_end_time = time.time()

        polish_start_time = time.time()
        print_system_log('POLISH')
        finish = polish.stitch(contig_name, result, contig_output_dir)
        polish_end_time = time.time()

        if sketch_path:
            screen_time = get_elapsed_time_string(screen_start_time,
                                                  screen_end_time)
            print_stage_time('SCREEN', screen_time)

        #calculating time
        download_time = get_elapsed_time_string(download_start_time,
                                                download_end_time)
        pileup_time = get_elapsed_time_string(pileup_start_time,
                                              pileup_end_time)
        align2df_time = get_elapsed_time_string(align2df_start_time,
                                                align2df_end_time)
        predict_time = get_elapsed_time_string(predict_start_time,
                                               predict_end_time)
        polish_time = get_elapsed_time_string(polish_start_time,
                                              polish_end_time)

        #print stage time
        print_stage_time('DOWNLOAD', download_time)
        print_stage_time('PILEUP', pileup_time)
        print_stage_time('TO DATAFRAME', align2df_time)
        print_stage_time('PREDICT', predict_time)
        print_stage_time('POLISH', polish_time)
        out.append(finish)

    os.system('cat {} > {}/{}_homopolished.fasta'.format(
        ' '.join(out), output_dir, assembly_name))

    if debug:
        try:
            shutil.rmtree(contig_output_dir_debug)
        except OSError as e:
            print(e)
        else:
            return True

    total_end_time = time.time()
    total_time = get_elapsed_time_string(total_start_time, total_end_time)
    print_stage_time('Total', total_time)
Ejemplo n.º 7
0
def make_train_data(mash_screen, assembly, reference, sketch_path,
                    genus_species, threads, output_dir, minimap_args,
                    mash_threshold, download_contig_nums, debug):
    output_dir = FileManager.handle_output_directory(output_dir)
    contig_output_dir_debug = make_output_dir("debug", output_dir)

    assembly_name = assembly.rsplit('/', 1)[-1]
    assembly_name = assembly_name.split('.')[0]

    total_start_time = time.time()
    for contig in SeqIO.parse(assembly, 'fasta'):
        timestr = time.strftime("[%Y/%m/%d %H:%M]")
        sys.stderr.write(TextColor.GREEN + str(timestr) + " INFO: RUN-ID: " +
                         contig.id + "\n" + TextColor.END)
        contig_output_dir = make_output_dir("contig", contig_output_dir_debug,
                                            contig.id)

        contig_name = contig_output_dir + '/' + contig.id + '.fasta'
        SeqIO.write(contig, contig_name, "fasta")

        print_system_log('Select closely-related genomes and download')
        collect_start_time = time.time()
        #db_path = mash_select_closely_related(sketch_path, mash_screen, threads, contig_output_dir, mash_threshold, download_contig_nums, contig_name, contig.id)
        ncbi_id = mash_select_closely_related(sketch_path, mash_screen,
                                              threads, contig_output_dir,
                                              mash_threshold,
                                              download_contig_nums,
                                              contig_name, contig.id)
        '''
        if len(ncbi_id) < 5:
            sys.stderr.write(TextColor.PURPLE + "This contig " + contig.id + " closely-related genome is less than 5, not to polish...\n" + TextColor.END)
            out.append(contig_name)
            continue
        '''

        collect_end_time = time.time()
        collect_time = get_elapsed_time_string(collect_start_time,
                                               collect_end_time)
        #print_stage_time('Select closely-related genomes and download', collect_time)

        print_system_log('Download closely-related genomes')
        url_list = download.parser_url(ncbi_id)
        sys.stderr.write(TextColor.GREEN + " INFO: " + str(len(url_list)) +
                         " homologous sequence need to download: \n" +
                         TextColor.END)
        db_path = download.download(contig_output_dir, ncbi_id, url_list)

        seq_paf = alignment.align(contig_name, minimap_args, threads, db_path,
                                  contig_output_dir)
        ref_paf = alignment.align(contig_name, minimap_args, threads, db_path,
                                  contig_output_dir, reference)

        if os.stat(seq_paf).st_size != 0 and os.stat(ref_paf).st_size != 0:
            record = SeqIO.read(contig_name, "fasta")
            genome_size = len(record)

            dataframe_path = homologous_retrieval(seq_paf, genome_size,
                                                  contig_output_dir, contig.id,
                                                  contig_name, ref_paf)

        else:
            sys.stderr.write(TextColor.PURPLE + contig.id +
                             " minimap2 can't align......\n" + TextColor.END)

        shutil.move(dataframe_path,
                    output_dir + '/' + assembly_name + '.feather')