コード例 #1
0
ファイル: kmer_utils.py プロジェクト: 1eesh/rnaseqlib
def output_dinuc_shuffled_fasta(fasta_fname, shuffled_fasta_fname,
                                num_shuffles=1):
    """
    Given a FASTA file, output a dinucleotide shuffled version of it.
    """
    fasta_out = fastx_utils.write_open_fastx(shuffled_fasta_fname)
    for fastx_entry in fastx_utils.get_fastx_entries(fasta_fname):
        fastx_name, fastx_seq = fastx_entry
        shuffled_recs = []
        for shuffle_num in range(num_shuffles):
            shuffled_seq = \
                get_dinuc_shuffles(fastx_seq)[0]
            shuffled_rec = (fastx_name, shuffled_seq)
            shuffled_recs.append(shuffled_rec)
        fasta_utils.write_fasta(fasta_out, shuffled_recs)
    fasta_out.close()
コード例 #2
0
ファイル: kmer_utils.py プロジェクト: y461650833y/rnaseqlib
def output_dinuc_shuffled_fasta(fasta_fname,
                                shuffled_fasta_fname,
                                num_shuffles=1):
    """
    Given a FASTA file, output a dinucleotide shuffled version of it.
    """
    fasta_out = fastx_utils.write_open_fastx(shuffled_fasta_fname)
    for fastx_entry in fastx_utils.get_fastx_entries(fasta_fname):
        fastx_name, fastx_seq = fastx_entry
        shuffled_recs = []
        for shuffle_num in range(num_shuffles):
            shuffled_seq = \
                get_dinuc_shuffles(fastx_seq)[0]
            shuffled_rec = (fastx_name, shuffled_seq)
            shuffled_recs.append(shuffled_rec)
        fasta_utils.write_fasta(fasta_out, shuffled_recs)
    fasta_out.close()
コード例 #3
0
def download_misc_seqs(genome, output_dir):
    """
    Download assorted sequences related to genome.
    """
    # Mapping from sequence label (e.g. rRNA)
    # to accession numbers
    organism = None
    if genome.startswith("hg"):
        organism = "human"
    elif genome.startswith("mm"):
        organism = "mouse"
    else:
        print "Error: Unsupported genome."
        sys.exit(1)
    # Fetch the accession numbers for the organism's
    # misc sequences and download them
    misc_seqs = NCBI_MISC_SEQS[organism]
    ncbi_outdir = os.path.join(output_dir, "ncbi")
    misc_outdir = os.path.join(output_dir, "misc")
    utils.make_dir(ncbi_outdir)
    utils.make_dir(misc_outdir)
    for seq_label, access_id in misc_seqs.iteritems():
        if access_id is None:
            continue
        output_filename = os.path.join(misc_outdir, "%s.fa" %(seq_label))
        if os.path.isfile(output_filename):
            print "%s exists. Skipping download.." %(seq_label)
            continue
        print "Downloading: %s (NCBI: %s)" %(seq_label,
                                             access_id)
        url_filename = download_ncbi_fasta(access_id, ncbi_outdir)
        fasta_in = fasta_utils.read_fasta(url_filename)
        fasta_out = open(output_filename, "w")
        print "  - Writing to: %s" %(output_filename)
        # Fetch first FASTA record
        rec = fasta_in.next()
        curr_label, fasta_seq = rec
        # Output it with the required label
        new_rec = (">%s" %(seq_label), fasta_seq)
        fasta_utils.write_fasta(fasta_out, [new_rec])