Python delete_index_files 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: g2g_fileutils

메소드/함수: delete_index_files

hotexamples.com에서의 예제들: 6

Python delete_index_files - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 g2g_fileutils.delete_index_files에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: g2g.py 프로젝트: churchill-lab/g2gtools

def fasta_transform(fasta_file, chain_file, locations, output_file, bgzip=False, reverse=False):
    """

    :param fasta_file:
    :param chain_file:
    :param locations:
    :param output_file:
    :param bgzip:
    :param reverse:
    :return:
    """
    start = time.time()

    if not isinstance(fasta_file, FastaFile):
        fasta_file = g2g_fu.check_file(fasta_file)

    if not isinstance(chain_file, ChainIter):
        chain_file = g2g_fu.check_file(chain_file)

    output_file = g2g_fu.check_file(output_file, 'w')
    g2g_fu.delete_file(output_file)
    g2g_fu.delete_index_files(output_file)

    LOG.info("FASTA FILE: {0}".format(fasta_file))
    LOG.info("CHAIN FILE: {0}".format(chain_file))
    LOG.info("OUTPUT FILE: {0}".format(output_file))
    LOG.info("BGZIP: {0}".format(bgzip))
    LOG.info("REVERSE: {0}".format(reverse))

    if isinstance(fasta_file, FastaFile):
        fasta = fasta_file
    else:
        fasta = FastaFile(fasta_file)

    if not isinstance(chain_file, ChainIter):
        chain_file = ChainIter(chain_file, reverse=reverse)

    seq_ids = []

    if locations:
        LOG.debug("Have locations")
        new_locations = []
        for l in locations:
            if isinstance(l, Location):
                new_locations.append(l)
            else:
                new_locations.append(parse_location(l))
            seq_ids.append(new_locations[-1].seqid)
        locations = new_locations
    else:
        LOG.debug("Calculating locations")
        locations = [parse_location("{0}:1-{1}".format(a, fasta.get_reference_length(a)), 1) for a in fasta.references]
        seq_ids = [a for a in fasta.references]

    temp_output_file = output_file

    if bgzip:
        if g2g_fu.get_extension(output_file) != 'gz':
            output_file = "{0}.gz".format(output_file)
        else:
            temp_output_file = temp_output_file[:-3]

    fasta_out = open(temp_output_file, "w")

    LOG.info("Transforming...")

    chr_info = {}

    try:
        # will need a better way, but for now...
        LOG.info("Parsing chain file...")
        for line in chain_file:
            if len(line) > 7:
                LOG.debug("Adding chromosome {0}".format(chain_file.current_chain_header[1]))
                chr_info[chain_file.current_chain_header[1]] = {'from_size': line[2], 'from_start': line[4], 'from_end': line[5],
                                  'to_size': line[7], 'to_start': line[9], 'to_end': line[10],
                                  'header_chain':chain_file.current_chain_header, 'lines': []}
            else:
                chr_info[chain_file.current_chain_header[1]]['lines'].append(line)

        LOG.info("Chain file parsed")

        insertion_bases = 0
        deletion_bases = 0

        for location in locations:
            LOG.info("Processing chromosome={0}".format(location.seqid))
            LOG.debug("Location: {0}".format(location))

            chrom_size_from = chr_info[location.seqid]['from_size']
            chrom_size_to = chr_info[location.seqid]['to_size']

            last_pos = chr_info[location.seqid]['from_start']
            new_sequence = StringIO()
            chain_file.reset()

            for chain_line in chr_info[location.seqid]['lines']:
                LOG.debug("\nLINE: {0} : {1}".format(chain_file.line_no, chain_line))

                if len(chain_line) == 1:
                    # last line
                    fragment = chain_line[0]

                    partial_seq = fasta.fetch(location.seqid, last_pos, last_pos + fragment)
                    new_sequence.write(str(partial_seq))

                    if len(new_sequence.getvalue()) < chrom_size_to:
                        LOG.warn("Length's do not match, chromosome length in chain: {0}, sequence length: {1}".format(chrom_size_to, len(new_sequence.getvalue())))

                    fasta_out.write(">{0} {1}:{2}-{3}\n".format(location.seqid, location.seqid, chr_info[location.seqid]['from_start'] + 1, chrom_size_to))

                    for l in wrap_sequence(new_sequence.getvalue()):
                        fasta_out.write(l.strip())
                        fasta_out.write('\n')

                    break

                else:

                    # fragment_size dt_size dq_size same_bases dt_bases dq_bases

                    fragment = chain_line[0]
                    dt = chain_line[1 if not reverse else 2]
                    dq = chain_line[2 if not reverse else 1]
                    same = chain_line[3]
                    dt_bases = chain_line[4 if not reverse else 5]
                    dq_bases = chain_line[5 if not reverse else 4]

                    partial_seq = fasta.fetch(location.seqid, last_pos, last_pos + fragment)
                    new_sequence.write(partial_seq)

                    if dq > 0:
                        # insertion
                        LOG.debug("INSERTION")
                        new_sequence.write(dq_bases)
                        LOG.debug("{0}:{1}-{2} (Length: {3})".format(location.seqid, last_pos, last_pos + fragment, len(partial_seq)))
                        if len(partial_seq) > 100:
                            LOG.debug("{0}...{1}".format(partial_seq[:10], partial_seq[-10:]))
                        else:
                            LOG.debug(partial_seq)
                        LOG.debug("Adding {0}".format(dq_bases))
                        LOG.debug("SAME={0}, {1}".format(same, partial_seq[-(len(same)):]))

                        insertion_bases += dq

                    if dt > 0:
                        # deletion
                        LOG.debug("DELETION")
                        last_pos += dt
                        LOG.debug("skipping ahead {0} bases".format(dt))

                        deletion_bases += dt

                    last_pos += fragment

                    LOG.debug("LAST_POS={0}, INSERTIONS={1}, DELETIONS={2}, DIFF={3}".format(last_pos, insertion_bases, deletion_bases, (insertion_bases - deletion_bases)))

        # bgzip and index
        if bgzip:
            LOG.info("Compressing and indexing...")
            g2g_fu.bgzip_index(temp_output_file, output_file, 'fa')

    except G2GLocationError, le:
        LOG.debug("Unable to parse location, {0}".format(le.message))
        raise le

예제 #2

파일 보기

파일: fasta_patch.py 프로젝트: churchill-lab/g2gtools

def fasta_patch(
    filename_fasta,
    filename_vcf,
    strain,
    filename_output,
    bgzip=False,
    num_processes=None,
    pass_only=False,
    quality=False,
    diploid=False,
):
    """
    Patch a Fasta file by replacing the bases where the SNPs are located in the VCF file.

    :param filename_fasta: name of the input Fasta file
    :type filename_fasta: string
    :param filename_vcf: name of the VCF file
    :type filename_vcf: string
    :param strain: name of strain to use in VCF file
    :type strain: string
    :param filename_output: name of the output Fasta file
    :type filename_output: string
    :param bgzip: compress file in BGZIP format
    :type bgzip: boolean
    :param num_processes: the number of processes to spawn
    :type num_processes: int
    :param pass_only: Only process those VCF records with a 'PASS'
    :type pass_only: boolean
    :param quality: filter on quality, FI=PASS
    :type quality: boolean
    :param diploid: don't ignore hets and create 2 files
    :type diploid: boolean
    :return: Nothing
    """
    start = time.time()

    filename_fasta = g2g_fu.check_file(filename_fasta)
    filename_vcf = g2g_fu.check_file(filename_vcf)

    LOG.info("INPUT FASTA FILE: {0}".format(filename_fasta))
    LOG.info("VCF FILE: {0}".format(filename_vcf))
    LOG.info("STRAIN: {0}".format(strain))
    LOG.info("PASS FILTER ON: {0}".format(str(pass_only)))
    LOG.info("QUALITY FILTER ON: {0}".format(str(quality)))
    LOG.info("DIPLOID: {0}".format(str(diploid)))

    if not strain:
        raise G2GValueError("No strain was specified.")

    filename_output_l, filename_output_r = prepare_fasta_patch(filename_fasta, filename_output, bgzip, diploid)

    if not num_processes:
        num_processes = multiprocessing.cpu_count()
    else:
        if num_processes <= 0:
            num_processes = 1

    LOG.info("NUMBER OF PROCESSES: {0}".format(num_processes))
    if bgzip:
        if diploid:
            LOG.info("OUTPUT FASTA FILES: {0}.gz".format(filename_output_l))
            LOG.info("                    {0}.gz".format(filename_output_r))
        else:
            LOG.info("OUTPUT FASTA FILE: {0}.gz".format(filename_output_l))
    else:
        if diploid:
            LOG.info("OUTPUT FASTA FILES: {0}".format(filename_output_l))
            LOG.info("                    {0}".format(filename_output_r))
        else:
            LOG.info("OUTPUT FASTA FILE: {0}".format(filename_output_l))

    LOG.info("Patching...")

    try:
        patch(
            filename_fasta,
            filename_vcf,
            strain,
            filename_output_l,
            filename_output_r,
            num_processes,
            pass_only,
            quality,
            diploid,
        )

        LOG.info("Patching complete")

        # remove the fai
        LOG.debug("removing the FAI index for {0}".format(g2g_fu.delete_index_files(filename_output_l)))
        g2g_fu.delete_index_files(filename_output_l)

        # move temp to final destination
        if bgzip:
            LOG.info("Compressing and indexing...")
            g2g_fu.bgzip_index(filename_output_l, "{0}.gz".format(filename_output_l), "fa")
            if diploid:
                g2g_fu.bgzip_index(filename_output_r, "{0}.gz".format(filename_output_r), "fa")

        LOG.info("Execution complete: {0}".format(format_time(start, time.time())))
    except Exception, e:
        LOG.debug(e)
        raise G2GError("")

예제 #3

파일 보기

파일: fasta_patch.py 프로젝트: churchill-lab/g2gtools

def prepare_fasta_patch(filename_fasta, filename_output, bgzip=False, diploid=False):
    """
    Initialize fasta_patch variables

    :param filename_fasta:
    :param filename_vcf:
    :param strain:
    :param filename_output:
    :param bgzip:
    :param diploid:
    :return:
    """

    filename_output = g2g_fu.check_file(filename_output, "w")
    output_file_dir = os.path.abspath(os.path.dirname(filename_output))

    new_filename_output = filename_output

    # let's figure out what our output names will be
    if filename_output.lower().endswith(".gz"):
        # strip off .gz
        new_filename_output = filename_output[:-3]

    if not filename_output.lower().endswith(".fa"):
        raise G2GValueError("Expecting output filename extension to be either '.fa.gz' or '.fa'")

    if diploid:
        filename_output_l = g2g_fu.prepend_before_extension(new_filename_output, "l")
        filename_output_r = g2g_fu.prepend_before_extension(new_filename_output, "r")

        g2g_fu.delete_index_files(filename_output_l)
        g2g_fu.delete_index_files(filename_output_r)
    else:
        filename_output_l = new_filename_output
        filename_output_r = None

        g2g_fu.delete_index_files(filename_output_l)

    # at this point we are hoping for a .fa extension

    # let's figure out our input and process accordingly
    if filename_fasta.lower().endswith(".fa.gz"):
        # decompress the fasta file if it is compressed

        LOG.info("Copying and decompressing fasta file")

        # copy file and preserve gz extension for bgzip -d to work
        tmp_file_name = os.path.basename(filename_fasta)  # something.gz
        LOG.debug("tmp_file_name={0}".format(tmp_file_name))

        tmp_fasta = os.path.join(output_file_dir, tmp_file_name)  # /path/something.fa.gz
        LOG.debug("tmp_fasta={0}".format(tmp_fasta))

        LOG.debug("COPYING {0} to {1}".format(filename_fasta, tmp_fasta))
        shutil.copy(filename_fasta, tmp_fasta)  # cp /original/something.fa.gz /output/something.fa.gz

        LOG.debug("DECOMPRESSING {0}".format(tmp_fasta))
        g2g_fu.bgzip_decompress(tmp_fasta)

        tmp_fasta = tmp_fasta[:-3]  # /path/something.fa
        LOG.debug("tmp_fasta={0}".format(tmp_fasta))

        LOG.debug("Moving '{0}' to '{1}'...".format(tmp_fasta, filename_output_l))
        shutil.move(tmp_fasta, filename_output_l)

    elif filename_fasta.lower().endswith(".fa"):
        LOG.debug("File is not compressed")

        LOG.debug("COPYING {0} to {1}".format(filename_fasta, filename_output_l))
        shutil.copy(filename_fasta, filename_output_l)
    else:
        raise G2GValueError("Expecting input filename extension to be either '.fa.gz' or '.fa'")

    if diploid:
        LOG.debug("Copying '{0}' to '{1}'...".format(filename_output_l, filename_output_r))
        shutil.copy(filename_output_l, filename_output_r)

    # build a temporary fasta index
    pysam.FastaFile(filename_output_l)

    return filename_output_l, filename_output_r

예제 #4

파일 보기

파일: g2g.py 프로젝트: juanfmacias3/g2gtools

def fasta_transform(fasta_file,
                    chain_file,
                    locations,
                    output_file,
                    bgzip=False,
                    reverse=False):
    """

    :param fasta_file:
    :param chain_file:
    :param locations:
    :param output_file:
    :param bgzip:
    :param reverse:
    :return:
    """
    start = time.time()

    if not isinstance(fasta_file, FastaFile):
        fasta_file = g2g_fu.check_file(fasta_file)

    if not isinstance(chain_file, ChainIter):
        chain_file = g2g_fu.check_file(chain_file)

    output_file = g2g_fu.check_file(output_file, 'w')
    g2g_fu.delete_file(output_file)
    g2g_fu.delete_index_files(output_file)

    LOG.info("FASTA FILE: {0}".format(fasta_file))
    LOG.info("CHAIN FILE: {0}".format(chain_file))
    LOG.info("OUTPUT FILE: {0}".format(output_file))
    LOG.info("BGZIP: {0}".format(bgzip))
    LOG.info("REVERSE: {0}".format(reverse))

    if isinstance(fasta_file, FastaFile):
        fasta = fasta_file
    else:
        fasta = FastaFile(fasta_file)

    if not isinstance(chain_file, ChainIter):
        chain_file = ChainIter(chain_file, reverse=reverse)

    seq_ids = []

    if locations:
        LOG.debug("Have locations")
        new_locations = []
        for l in locations:
            if isinstance(l, Location):
                new_locations.append(l)
            else:
                new_locations.append(parse_location(l))
            seq_ids.append(new_locations[-1].seqid)
        locations = new_locations
    else:
        LOG.debug("Calculating locations")
        locations = [
            parse_location(
                "{0}:1-{1}".format(a, fasta.get_reference_length(a)), 1)
            for a in fasta.references
        ]
        seq_ids = [a for a in fasta.references]

    temp_output_file = output_file

    if bgzip:
        if g2g_fu.get_extension(output_file) != 'gz':
            output_file = "{0}.gz".format(output_file)
        else:
            temp_output_file = temp_output_file[:-3]

    fasta_out = open(temp_output_file, "w")

    LOG.info("Transforming...")

    chr_info = {}

    try:
        # will need a better way, but for now...
        LOG.info("Parsing chain file...")
        for line in chain_file:
            if len(line) > 7:
                LOG.debug("Adding chromosome {0}".format(
                    chain_file.current_chain_header[1]))
                chr_info[chain_file.current_chain_header[1]] = {
                    'from_size': line[2],
                    'from_start': line[4],
                    'from_end': line[5],
                    'to_size': line[7],
                    'to_start': line[9],
                    'to_end': line[10],
                    'header_chain': chain_file.current_chain_header,
                    'lines': []
                }
            else:
                chr_info[chain_file.current_chain_header[1]]['lines'].append(
                    line)

        LOG.info("Chain file parsed")

        insertion_bases = 0
        deletion_bases = 0

        for location in locations:
            LOG.info("Processing chromosome={0}".format(location.seqid))
            LOG.debug("Location: {0}".format(location))

            chrom_size_from = chr_info[location.seqid]['from_size']
            chrom_size_to = chr_info[location.seqid]['to_size']

            last_pos = chr_info[location.seqid]['from_start']
            new_sequence = StringIO()
            chain_file.reset()

            for chain_line in chr_info[location.seqid]['lines']:
                LOG.debug("\nLINE: {0} : {1}".format(chain_file.line_no,
                                                     chain_line))

                if len(chain_line) == 1:
                    # last line
                    fragment = chain_line[0]

                    partial_seq = fasta.fetch(location.seqid, last_pos,
                                              last_pos + fragment)
                    new_sequence.write(str(partial_seq))

                    if len(new_sequence.getvalue()) < chrom_size_to:
                        LOG.warn(
                            "Length's do not match, chromosome length in chain: {0}, sequence length: {1}"
                            .format(chrom_size_to,
                                    len(new_sequence.getvalue())))

                    fasta_out.write(">{0} {1}:{2}-{3}\n".format(
                        location.seqid, location.seqid,
                        chr_info[location.seqid]['from_start'] + 1,
                        chrom_size_to))

                    for l in wrap_sequence(new_sequence.getvalue()):
                        fasta_out.write(l.strip())
                        fasta_out.write('\n')

                    break

                else:

                    # fragment_size dt_size dq_size same_bases dt_bases dq_bases

                    fragment = chain_line[0]
                    dt = chain_line[1 if not reverse else 2]
                    dq = chain_line[2 if not reverse else 1]
                    same = chain_line[3]
                    dt_bases = chain_line[4 if not reverse else 5]
                    dq_bases = chain_line[5 if not reverse else 4]

                    partial_seq = fasta.fetch(location.seqid, last_pos,
                                              last_pos + fragment)
                    new_sequence.write(partial_seq)

                    if dq > 0:
                        # insertion
                        LOG.debug("INSERTION")
                        new_sequence.write(dq_bases)
                        LOG.debug("{0}:{1}-{2} (Length: {3})".format(
                            location.seqid, last_pos, last_pos + fragment,
                            len(partial_seq)))
                        if len(partial_seq) > 100:
                            LOG.debug("{0}...{1}".format(
                                partial_seq[:10], partial_seq[-10:]))
                        else:
                            LOG.debug(partial_seq)
                        LOG.debug("Adding {0}".format(dq_bases))
                        LOG.debug("SAME={0}, {1}".format(
                            same, partial_seq[-(len(same)):]))

                        insertion_bases += dq

                    if dt > 0:
                        # deletion
                        LOG.debug("DELETION")
                        last_pos += dt
                        LOG.debug("skipping ahead {0} bases".format(dt))

                        deletion_bases += dt

                    last_pos += fragment

                    LOG.debug(
                        "LAST_POS={0}, INSERTIONS={1}, DELETIONS={2}, DIFF={3}"
                        .format(last_pos, insertion_bases, deletion_bases,
                                (insertion_bases - deletion_bases)))

        # bgzip and index
        if bgzip:
            LOG.info("Compressing and indexing...")
            g2g_fu.bgzip_index(temp_output_file, output_file, 'fa')

    except G2GLocationError, le:
        LOG.debug("Unable to parse location, {0}".format(le.message))
        raise le

예제 #5

파일 보기

파일: fasta_patch.py 프로젝트: juanfmacias3/g2gtools

def fasta_patch(filename_fasta, filename_vcf, strain, filename_output, bgzip=False,
                num_processes=None, pass_only=False, quality=False, diploid=False):
    """
    Patch a Fasta file by replacing the bases where the SNPs are located in the VCF file.

    :param filename_fasta: name of the input Fasta file
    :type filename_fasta: string
    :param filename_vcf: name of the VCF file
    :type filename_vcf: string
    :param strain: name of strain to use in VCF file
    :type strain: string
    :param filename_output: name of the output Fasta file
    :type filename_output: string
    :param bgzip: compress file in BGZIP format
    :type bgzip: boolean
    :param num_processes: the number of processes to spawn
    :type num_processes: int
    :param pass_only: Only process those VCF records with a 'PASS'
    :type pass_only: boolean
    :param quality: filter on quality, FI=PASS
    :type quality: boolean
    :param diploid: don't ignore hets and create 2 files
    :type diploid: boolean
    :return: Nothing
    """
    start = time.time()

    filename_fasta = g2g_fu.check_file(filename_fasta)
    filename_vcf = g2g_fu.check_file(filename_vcf)

    LOG.info("INPUT FASTA FILE: {0}".format(filename_fasta))
    LOG.info("VCF FILE: {0}".format(filename_vcf))
    LOG.info("STRAIN: {0}".format(strain))
    LOG.info("PASS FILTER ON: {0}".format(str(pass_only)))
    LOG.info("QUALITY FILTER ON: {0}".format(str(quality)))
    LOG.info("DIPLOID: {0}".format(str(diploid)))

    if not strain:
        raise G2GValueError("No strain was specified.")

    filename_output_l, filename_output_r = prepare_fasta_patch(filename_fasta, filename_output, bgzip, diploid)

    if not num_processes:
        num_processes = multiprocessing.cpu_count()
    else:
        if num_processes <= 0:
            num_processes = 1

    LOG.info("NUMBER OF PROCESSES: {0}".format(num_processes))
    if bgzip:
        if diploid:
            LOG.info("OUTPUT FASTA FILES: {0}.gz".format(filename_output_l))
            LOG.info("                    {0}.gz".format(filename_output_r))
        else:
            LOG.info("OUTPUT FASTA FILE: {0}.gz".format(filename_output_l))
    else:
        if diploid:
            LOG.info("OUTPUT FASTA FILES: {0}".format(filename_output_l))
            LOG.info("                    {0}".format(filename_output_r))
        else:
            LOG.info("OUTPUT FASTA FILE: {0}".format(filename_output_l))

    LOG.info("Patching...")

    try:
        patch(filename_fasta, filename_vcf, strain, filename_output_l, filename_output_r,
              num_processes, pass_only, quality, diploid)

        LOG.info("Patching complete")

        # remove the fai
        LOG.debug("removing the FAI index for {0}".format(g2g_fu.delete_index_files(filename_output_l)))
        g2g_fu.delete_index_files(filename_output_l)

        # move temp to final destination
        if bgzip:
            LOG.info("Compressing and indexing...")
            g2g_fu.bgzip_index(filename_output_l, "{0}.gz".format(filename_output_l), 'fa')
            if diploid:
                g2g_fu.bgzip_index(filename_output_r, "{0}.gz".format(filename_output_r), 'fa')

        LOG.info("Execution complete: {0}".format(format_time(start, time.time())))
    except Exception, e:
        LOG.debug(e)
        raise G2GError("")

예제 #6

파일 보기

파일: fasta_patch.py 프로젝트: juanfmacias3/g2gtools

def prepare_fasta_patch(filename_fasta, filename_output, bgzip=False, diploid=False):
    """
    Initialize fasta_patch variables

    :param filename_fasta:
    :param filename_vcf:
    :param strain:
    :param filename_output:
    :param bgzip:
    :param diploid:
    :return:
    """

    filename_output = g2g_fu.check_file(filename_output, 'w')
    output_file_dir = os.path.abspath(os.path.dirname(filename_output))

    new_filename_output = filename_output

    # let's figure out what our output names will be
    if filename_output.lower().endswith('.gz'):
        # strip off .gz
        new_filename_output = filename_output[:-3]

    if not filename_output.lower().endswith('.fa'):
        raise G2GValueError("Expecting output filename extension to be either '.fa.gz' or '.fa'")


    if diploid:
        filename_output_l = g2g_fu.prepend_before_extension(new_filename_output, 'l')
        filename_output_r = g2g_fu.prepend_before_extension(new_filename_output, 'r')

        g2g_fu.delete_index_files(filename_output_l)
        g2g_fu.delete_index_files(filename_output_r)
    else:
        filename_output_l = new_filename_output
        filename_output_r = None

        g2g_fu.delete_index_files(filename_output_l)

    # at this point we are hoping for a .fa extension

    # let's figure out our input and process accordingly
    if filename_fasta.lower().endswith('.fa.gz'):
        # decompress the fasta file if it is compressed

        LOG.info("Copying and decompressing fasta file")

        # copy file and preserve gz extension for bgzip -d to work
        tmp_file_name = os.path.basename(filename_fasta)                        # something.gz
        LOG.debug("tmp_file_name={0}".format(tmp_file_name))

        tmp_fasta = os.path.join(output_file_dir, tmp_file_name)                # /path/something.fa.gz
        LOG.debug("tmp_fasta={0}".format(tmp_fasta))

        LOG.debug("COPYING {0} to {1}".format(filename_fasta, tmp_fasta))
        shutil.copy(filename_fasta, tmp_fasta)  # cp /original/something.fa.gz /output/something.fa.gz

        LOG.debug("DECOMPRESSING {0}".format(tmp_fasta))
        g2g_fu.bgzip_decompress(tmp_fasta)

        tmp_fasta = tmp_fasta[:-3]         # /path/something.fa
        LOG.debug("tmp_fasta={0}".format(tmp_fasta))

        LOG.debug("Moving '{0}' to '{1}'...".format(tmp_fasta, filename_output_l))
        shutil.move(tmp_fasta, filename_output_l)

    elif filename_fasta.lower().endswith('.fa'):
        LOG.debug("File is not compressed")

        LOG.debug("COPYING {0} to {1}".format(filename_fasta, filename_output_l))
        shutil.copy(filename_fasta, filename_output_l)
    else:
        raise G2GValueError("Expecting input filename extension to be either '.fa.gz' or '.fa'")

    if diploid:
        LOG.debug("Copying '{0}' to '{1}'...".format(filename_output_l, filename_output_r))
        shutil.copy(filename_output_l, filename_output_r)

    # build a temporary fasta index
    pysam.FastaFile(filename_output_l)

    return filename_output_l, filename_output_r