예제 #1
0
def load_gff(gff):
    genes = defaultdict(list)
    gene_exon_positions = defaultdict(lambda: defaultdict(tuple))
    try:
        with open(gff) as g:
            for line in g:
                if line.startswith('#') or 'contig' in line:
                    continue
                feature = GFF(line)
                gene_id = get_gene_attribute(feature, "ID")
                if feature.featuretype == 'exon':
                    gene_exon_positions[feature.genename][gene_id] = (
                        feature.start, feature.end)
                if feature.featuretype == 'CDS':
                    for exon in gene_exon_positions[feature.genename]:
                        e = gene_exon_positions[feature.genename][exon]
                        if e[0] <= feature.start <= e[1] and e[
                                0] <= feature.end <= e[1]:
                            gene_id = exon + "_CDS"
                if gene_id is None:
                    print("No gene id for CDS found", feature, end="")
                feature.id = gene_id
                genes[feature.genename].append(feature)

    except IOError:
        print("Failed to load GFF file {}".format(gff))
        sys.exit()

    return genes
def load_gff(gff):
    genes = defaultdict(list)
    gene_exon_positions = defaultdict(lambda: defaultdict(tuple))
    try:
        with open(gff) as g:
            for line in g:
                if line.startswith('#') or 'contig' in line:
                    continue
                feature = GFF(line)
                gene_id = get_gene_attribute(feature, "ID")
                if feature.featuretype == 'exon':
                    gene_exon_positions[feature.genename][gene_id] = (feature.start, feature.end)
                if feature.featuretype == 'CDS':
                    for exon in gene_exon_positions[feature.genename]:
                        e = gene_exon_positions[feature.genename][exon]
                        if e[0] <= feature.start <= e[1] and e[0] <= feature.end <= e[1]:
                            gene_id = exon + "_CDS"
                if gene_id is None:
                    print("No gene id for CDS found", feature, end="")
                feature.id = gene_id
                genes[feature.genename].append(feature)
                
    except IOError:
        print("Failed to load GFF file {}".format(gff))
        sys.exit()
    
    return genes
예제 #3
0
def load_crossmap(crossmapout):
    genes = defaultdict(list)
    gene_exon_positions = defaultdict(lambda: defaultdict(tuple))
    try:
        with open(crossmapout) as c:
            for line in c:
                cm = None
                if 'fail' in line:
                    featureline = line.split('\tfail')[0]
                if '->' in line:
                    featureline, cmline = line.split('\t->\t')
                    cm = GFF(cmline)
                    cm.attributes += ';Note=CrossMap'
                feature = GFF(featureline)
                feature.crossmap = cm
                gene_id = get_gene_attribute(feature, "ID")
                if feature.featuretype == 'exon':
                    gene_exon_positions[feature.genename][gene_id] = (
                        feature.start, feature.end)
                if feature.featuretype == 'CDS':
                    for exon in gene_exon_positions[feature.genename]:
                        e = gene_exon_positions[feature.genename][exon]
                        if e[0] <= feature.start <= e[1] and e[
                                0] <= feature.end <= e[1]:
                            gene_id = exon + "_CDS"

                    if feature.crossmap:  # Lose CrossMap features where the CDSs aren't the same length
                        cds_len = feature.end - feature.start + 1
                        cm_len = feature.crossmap.end - feature.crossmap.start + 1
                        if cds_len != cm_len:
                            feature.crossmap = None
                if gene_id is None:
                    print("No gene id for CDS found", feature, end="")
                feature.id = gene_id

                genes[feature.genename].append(feature)

    except IOError:
        print("Failed to load CrossMap output {}".format(crossmapout))
        sys.exit()

    return genes
def load_crossmap(crossmapout):
    genes = defaultdict(list)
    gene_exon_positions = defaultdict(lambda: defaultdict(tuple))
    try:
        with open(crossmapout) as c:
            for line in c:
                cm = None
                if 'fail' in line:
                    featureline = line.split('\tfail')[0]
                if '->' in line:
                    featureline, cmline = line.split('\t->\t')
                    cm = GFF(cmline)
                    cm.attributes += ';Note=CrossMap'
                feature=GFF(featureline)
                feature.crossmap = cm
                gene_id = get_gene_attribute(feature, "ID")
                if feature.featuretype == 'exon':
                    gene_exon_positions[feature.genename][gene_id] = (feature.start, feature.end)
                if feature.featuretype == 'CDS':
                    for exon in gene_exon_positions[feature.genename]:
                        e = gene_exon_positions[feature.genename][exon]
                        if e[0] <= feature.start <= e[1] and e[0] <= feature.end <= e[1]:
                            gene_id = exon + "_CDS"
                    
                    if feature.crossmap:    # Lose CrossMap features where the CDSs aren't the same length
                        cds_len = feature.end - feature.start + 1
                        cm_len  = feature.crossmap.end - feature.crossmap.start + 1
                        if cds_len != cm_len:
                            feature.crossmap = None
                if gene_id is None:
                    print("No gene id for CDS found", feature, end="")
                feature.id = gene_id

                genes[feature.genename].append(feature)
                
    except IOError:
        print("Failed to load CrossMap output {}".format(crossmapout))
        sys.exit()

    return genes