コード例 #1
0
ファイル: gff.py プロジェクト: linlifeng/jcvi
def make_index(gff_file):
    """
    Make a sqlite database for fast retrieval of features.
    """
    import GFFutils
    db_file = gff_file + ".db"

    if need_update(gff_file, db_file):
        if op.exists(db_file):
            os.remove(db_file)
        GFFutils.create_gffdb(gff_file, db_file)

    return GFFutils.GFFDB(db_file)
コード例 #2
0
def make_index(gff_file):
    """
    Make a sqlite database for fast retrieval of features.
    """
    import GFFutils
    db_file = gff_file + ".db"

    if need_update(gff_file, db_file):
        if op.exists(db_file):
            os.remove(db_file)
        GFFutils.create_gffdb(gff_file, db_file)

    return GFFutils.GFFDB(db_file)
コード例 #3
0
 def to_sam(self, fasta):
     """
     Prints items as SAM lines
     """
     s = []
     genome = GFFutils.Genome(fasta)
     for item in self.items:
         s.append(item.to_sam(genome))
     return ''.join(s)
コード例 #4
0
 def to_fastq(self, fasta):
     """
     Creates sequences and fake quality scores.  Sequence names are the same
     as the GFFFeature.id.
     """
     genome = GFFutils.Genome(fasta)
     s = []
     for item in self.items:
         s.append(item.to_fastq(genome))
     return ''.join(s)
コード例 #5
0
def main(gff_file, fasta_file, parents, children):

    db_file = gff_file + ".db"

    if not op.exists(db_file):
        GFFutils.create_gffdb(gff_file, db_file)

    f = Fasta(fasta_file)
    g = GFFutils.GFFDB(db_file)

    parents = set(parents.split(','))
    parents_iter = [g.features_of_type(x) for x in parents]
    parents_list = itertools.chain(*parents_iter)
    children_list = set(children.split(','))

    for feat in parents_list:

        children = []
        for c in g.children(feat.id, 1):

            if c.featuretype not in children_list: continue
            child = f.sequence(dict(chr=c.chrom, start=c.start, stop=c.stop,
                strand=c.strand))
            children.append((child, c))

        if not children: 
            print >>sys.stderr, "[warning] %s has no children with type %s" \
                                    % (feat.id, ','.join(children_list))
            continue
        # sort children in incremental position
        children.sort(key=lambda x: x[1].start)
        # reverse children if negative strand
        if feat.strand=='-': children.reverse()
        feat_seq = ''.join(x[0] for x in children)

        print ">%s" % feat.id
        print feat_seq
コード例 #6
0
ファイル: gff_loader.py プロジェクト: yuzhenpeng/bio-pipeline
def main(gff_file, fasta_file, parents, children):

    db_file = gff_file + ".db"

    if not op.exists(db_file):
        GFFutils.create_gffdb(gff_file, db_file)

    f = Fasta(fasta_file)
    g = GFFutils.GFFDB(db_file)

    parents = set(parents.split(','))
    parents_iter = [g.features_of_type(x) for x in parents]
    parents_list = itertools.chain(*parents_iter)
    children_list = set(children.split(','))

    for feat in parents_list:

        children = []
        for c in g.children(feat.id, 1):

            if c.featuretype not in children_list: continue
            child = f.sequence(
                dict(chr=c.chrom, start=c.start, stop=c.stop, strand=c.strand))
            children.append((child, c))

        if not children:
            print >>sys.stderr, "[warning] %s has no children with type %s" \
                                    % (feat.id, ','.join(children_list))
            continue
        # sort children in incremental position
        children.sort(key=lambda x: x[1].start)
        # reverse children if negative strand
        if feat.strand == '-': children.reverse()
        feat_seq = ''.join(x[0] for x in children)

        print ">%s" % feat.id
        print feat_seq
コード例 #7
0
#!/usr/bin/python

import sys
import numpy as np
np.set_printoptions(threshold=np.inf)
#from itertools import count, tee, izip, islice
import GFFutils
import pyBigWig
import matplotlib.pyplot as plt

G = GFFutils.GFFDB("/home/user/dm1.db")
C_bw = pyBigWig.open("/home/user/Symb_treatedVscontrol_50bin.bw")
Gene_final = []


def separate_exon_intron(EI_list, EI1, EI2, EI3):
    for idx, ele in enumerate(EI_list):
        ele = len(ele)
        if int(idx) == 0:
            EI1.append(ele)
        elif int(idx) == 1:
            EI2.append(ele)
        elif int(idx) == 2:
            EI3.append(ele)


exon_len = []
upstream_3000, exon1, intron1, exon2, intron2, exon3,intron3, after_1000, before_1000,intron_3,exon_3, intron_2, exon_2, intron_1, exon_1, downstream_3000 = [],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]

for mRNA in G.features_of_type('mRNA'):
    exons = list(G.children(mRNA, featuretype='exon'))
コード例 #8
0
#!/usr/bin/python

usage = """
    
    Create a GFF database from a GFF file, e.g., downloaded from FlyBase.
    
    Ryan Dale 2010 ([email protected])
    """
import GFFutils
import optparse
import os
import sys

op = optparse.OptionParser(usage=usage)
op.add_option('--gff',dest='gff',help='Input GFF file')
op.add_option('--gffdb',dest='gffdb',help='Destination GFF database file')
options,args = op.parse_args()

if not options.gff or not options.gffdb:
    op.print_help()
    print '\nERROR: Please specify an input GFF file and an output GFF database file'
    sys.exit()


if not os.path.exists(options.gff):
    print 'GFF file %s does not exist!' % options.gff
    sys.exit()

GFFutils.create_gffdb(options.gff, options.gffdb)

コード例 #9
0
import GFFutils

G = GFFutils.GFFDB('dm3.db')
exon_len = []
exon1 = []
exon2 = []
exon3 = []
exon_3 = []
exon_2 = []
exon_1 = []
intron1 = []
intron2 = []
intron3 = []
intron_3 = []
intron_2 = []
intron_1 = []

gene_count = 0
for mRNA in G.features_of_type('mRNA'):
    #    print(mRNA)
    exons = list(G.children(mRNA, featuretype='exon'))
    introns = list(G.interfeatures(exons))
    if mRNA.strand == "-":
        first_3_exons = exons[-3:]
        last_3_exons = exons[:3]
        first_3_introns = introns[-3:]
        last_3_introns = introns[:3]
    else:
        first_3_exons = exons[:3]
        last_3_exons = exons[-3:]
        first_3_introns = introns[:3]
コード例 #10
0
Optionally adds a "chr" to the beginning of each chromosome.

Writes to stdout.
"""
op = optparse.OptionParser(usage=usage)
op.add_option('--addchr',
              action='store_true',
              help='Prefix each chromosome with "chr" in the output file.')

options, args = op.parse_args()

gfffn = args[0]

# this is a list of featuretypes that you want to remove.
culled_features = [
    'orthologous_to', 'pcr_product', 'BAC_cloned_genomic_insert'
]

f = GFFutils.GFFFile(gfffn)
for feature in f:
    if (feature.start is None) or (feature.stop is None):
        continue
    if (feature.start < 0) or (feature.stop < 0) or (feature.start >
                                                     feature.stop):
        continue
    if feature.featuretype in culled_features:
        continue
    if options.addchr:
        feature.chr = 'chr' + feature.chr
    sys.stdout.write(feature.tostring())
コード例 #11
0
g10.parse(gene_models4)

gene_models5 = gene_models5.splitlines(True)
g11 = GenomeModel(chrom_start=1, scalar=5, read_length=3, debug=False)
g11.parse(gene_models5)

gene_models6 = gene_models6.splitlines(True)
g12 = GenomeModel(chrom='chr3R',
                  chrom_start=101,
                  scalar=5,
                  read_length=3,
                  debug=False)
g12.parse(gene_models6)

here = os.path.dirname(__file__)
genome = GFFutils.Genome(os.path.join(here, 'data/dm3.chr2L.oneline.fa'))


def feature_exists(genome_model_obj,
                   start,
                   stop,
                   featuretype,
                   chrom='chr2L',
                   strand='+'):
    # Checks to see if a feature exists.  Does not check names, only genomic
    # coords and featuretype
    for feature in genome_model_obj.features:
        if (feature.start == start) and (feature.stop == stop) \
           and (feature.chrom == chrom) and (feature.strand == strand) \
           and (feature.featuretype == featuretype):
            return True