Python feature_lambda Examples, gff3.feature_lambda Python Examples

Example #1

0

Show file

def find_introns(gff3, fasta):
    seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))
    for rec in GFF.parse(gff3, base_dict=seq_dict):
        genes = list(
            feature_lambda(rec.features,
                           feature_test_type, {"type": "gene"},
                           subfeatures=True))
        for gene in genes:
            cdss = sorted(
                list(
                    feature_lambda(
                        gene.sub_features,
                        feature_test_type,
                        {"type": "CDS"},
                        subfeatures=False,
                    )),
                key=lambda x: x.location.start,
            )
            if len(cdss) > 1:
                intron = ""
                for i in range(
                        len(cdss) -
                        1):  # find pairs of cdss with introns in between
                    intron_start = cdss[i].location.end
                    intron_end = cdss[i + 1].location.start
                    intron += rec[intron_start:intron_end].seq
                sys.stdout.write(">" + rec.id + "\n")
                sys.stdout.write(intron + "\n")

Example #2

0

Show file

File: gff3_require_phage_start.py Project: Guiwenting/galaxy-tools

def require_shinefind(gff3, fasta):
    # Load up sequence(s) for GFF3 data
    seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))
    # Parse GFF3 records
    for record in GFF.parse(gff3, base_dict=seq_dict):
        # Reopen
        genes = list(
            feature_lambda(record.features,
                           feature_test_type, {'type': 'gene'},
                           subfeatures=True))
        good_genes = []
        for gene in genes:
            cdss = list(
                feature_lambda(gene.sub_features,
                               feature_test_type, {'type': 'CDS'},
                               subfeatures=False))
            if len(cdss) == 0:
                continue

            one_good_cds = False
            for cds in cdss:
                if cds.extract(record).seq[0:3].upper() in ('GTG', 'ATG',
                                                            'TTG'):
                    one_good_cds = True

            if one_good_cds:
                good_genes.append(gene)
        record.features = good_genes
        record.annotations = {}
        yield record

Example #3

0

Show file

def fix_ncbi(gff3):
    for rec in GFF.parse(gff3):
        for feature in feature_lambda(rec.features,
                                      feature_test_type, {'type': 'gene'},
                                      subfeatures=True):
            CDSs = list(
                feature_lambda(feature.sub_features,
                               feature_test_type, {'type': 'CDS'},
                               subfeatures=False))
            if len(CDSs) == 1:
                feature.qualifiers.update(safe_qualifiers(CDSs[0].qualifiers))

        GFF.write([rec], sys.stdout)

Example #4

0

Show file

File: stop_stats.py Project: Guiwenting/galaxy-tools

def main(fasta, gff3):
    seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))

    codon_usage = {}

    for rec in GFF.parse(gff3, base_dict=seq_dict):
        for feat in feature_lambda(rec.features,
                                   feature_test_type, {'type': 'CDS'},
                                   subfeatures=True):
            seq = str(feat.extract(rec).seq)[-3:]
            try:
                codon_usage[seq] += 1
            except KeyError:
                codon_usage[seq] = 1

    names = {
        'TAG': 'Amber',
        'TAA': 'Ochre',
        'TGA': 'Opal',
    }

    # TODO: print all actg combinations? Or just ones that are there
    print '# Name\tCodon\tCount'
    for key in sorted(codon_usage):
        print '\t'.join((names.get(key.upper(),
                                   'None'), key, str(codon_usage[key])))

Example #5

0

Show file

File: autoreopen.py Project: mdcarruthers/galaxy-tools

    def _orfCalls(self):
        fnmga = self.base_name + ".mga"
        if not os.path.exists(fnmga):
            log.warn("%s does not exist, calling genes in %s", fnmga,
                     self.rec_file.name)
            # Run MGA
            subprocess.check_call(["mga_linux_x64", "-s", self.rec_file.name],
                                  stdout=open(fnmga, "w"))

        # Convert to gff3
        fn = self.base_name + ".mga.gff3"
        self.mga_gff3 = fn
        with open(fnmga, "r") as handle, open(fn, "w") as output:
            self.rec_file.seek(0)
            for result in mga_to_gff3(handle, self.rec_file):
                # Store gFF3 data in self in order to access later.
                self.mga_rec = result
                GFF.write([result], output)

        # Process a feature id -> feature table in mem.
        self.featureDict = {}
        for f in feature_lambda(self.mga_rec.features,
                                lambda x: True, {},
                                subfeatures=True):
            self.featureDict[f.qualifiers["ID"][0]] = f

        # Extract
        fnfa = self.base_name + ".mga.fa"
        self.fnfa = fnfa
        subprocess.check_call(
            [
                "python2",
                os.path.join(SCRIPT_DIR, os.pardir, "gff3",
                             "gff3_extract_sequence.py"),
                "--feature_filter",
                "CDS",
                self.rec_file.name,
                fn,
            ],
            stdout=open(fnfa, "w"),
        )

        # Translate
        fnpfa = self.base_name + ".mga.pfa"
        self.fnpfa = fnpfa
        subprocess.check_call(
            [
                "python2",
                os.path.join(SCRIPT_DIR, os.pardir, "fasta",
                             "fasta_translate.py"),
                "--table",
                "11",
                "--strip_stops",
                "--target",
                "protein",
                fnfa,
            ],
            stdout=open(fnpfa, "w"),
        )
        return fnpfa

Example #6

0

Show file

File: fix-aragorn-gff3.py Project: mdcarruthers/galaxy-tools

def fixed_feature(rec):
    for idx, feature in enumerate(
            feature_lambda(rec.features,
                           feature_test_type, {"type": "tRNA"},
                           subfeatures=True)):
        fid = "tRNA-%03d" % (1 + idx)
        name = ["tRNA-" + feature.qualifiers["Codon"][0]]
        gene = SeqFeature(
            location=feature.location,
            type="gene",
            qualifiers={
                "ID": [fid + ".gene"],
                "source": ["aragorn"],
                "Name": name
            },
        )
        feature.qualifiers["Name"] = name
        # Below that we have an mRNA
        exon = SeqFeature(
            location=feature.location,
            type="exon",
            qualifiers={
                "source": ["aragorn"],
                "ID": ["%s.exon" % fid],
                "Name": name
            },
        )
        feature.qualifiers["ID"] = [fid]

        # gene -> trna -> exon
        feature.sub_features = [exon]
        gene.sub_features = [feature]
        yield gene

Example #7

0

Show file

File: gff3_add_parents_to_cds.py Project: Guiwenting/galaxy-tools

def fixed_feature(rec):
    for feature in feature_lambda(
        rec.features,
        feature_test_type,
        {'type': 'CDS'},
        subfeatures=True
    ):
        import random
        fid = feature.qualifiers['ID'][0] + '_' + str(random.random())
        gene = SeqFeature(
            location=feature.location,
            type='gene',
            qualifiers={
                'ID': [fid],
                'source': ['cpt.fixModel'],
            }
        )
        # Below that we have an mRNA
        mRNA = SeqFeature(
            location=feature.location,
            type='mRNA',
            qualifiers={
                'source': ['cpt.fixModel'],
                'ID': ['%s.mRNA' % fid]
            }
        )
        feature.qualifiers['ID'] = [fid + '.CDS']

        mRNA.sub_features = [feature]
        gene.sub_features = [mRNA]
        yield gene

Example #8

0

Show file

def gff_reopen(gff3, index=1, fasta=None, fasta_output=None):
    # Convert to zero-based
    index -= 1
    it = None
    if fasta:
        seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))
        it = GFF.parse(gff3, base_dict=seq_dict)
    else:
        it = GFF.parse(gff3)

    for rec in it:
        # Reopen
        if len(
                list(
                    feature_lambda(rec.features,
                                   feature_test_contains, {'index': index},
                                   subfeatures=False))) > 0:
            log.warn(
                "WARNING: Index chosen is in the middle of a feature. This feature will disappear from the output"
            )
        # TODO: This call removes metadata!
        rec = rec[index:] + rec[0:index]
        rec.features = sorted(rec.features, key=lambda x: x.location.start)

        if fasta:
            if len(rec.seq) == rec.seq.count("?"):
                log.error(
                    "ERROR: You have provided a fasta file but the sequence ID in the fasta file DID NOT MATCH THE GFF. THIS IS BAD."
                )

        yield rec

Example #9

0

Show file

def parse_gff(gff3):
    """ Extracts strand and start location to be used in cluster filtering """
    log.debug("parse_gff3")
    gff_info = {}
    _rec = None
    for rec in GFF.parse(gff3):
        _rec = rec
        _rec.annotations = {}
        for feat in feature_lambda(rec.features,
                                   test_true, {},
                                   subfeatures=False):
            if feat.type == 'CDS':
                gff_info[feat.id] = {
                    'strand': feat.strand,
                    'start': feat.location.start,
                    'loc': feat.location,
                    'feat': feat,
                }

    gff_info = OrderedDict(
        sorted(gff_info.items(), key=lambda k: k[1]['start']))
    for i, feat_id in enumerate(gff_info):
        gff_info[feat_id].update({'index': i})

    return dict(gff_info), _rec

Example #10

0

Show file

def merge_interpro(gff3, interpro):
    ipr_additions = {}
    # blacklist = ('Name', 'ID', 'Target', 'date', 'status', 'signature_desc', 'source', 'md5', 'score')
    whitelist = ('Dbxref', 'Ontology_term')

    for rec in GFF.parse(interpro):
        ipr_additions[rec.id] = {}
        for feature in rec.features:
            quals = feature.qualifiers
            for key in quals:
                if key not in ipr_additions[rec.id]:
                    ipr_additions[rec.id][key] = set()
                for value in quals[key]:
                    ipr_additions[rec.id][key].add(value)

        # Cast as a list so we aren't iterating over actual keyset. Otherwise,
        # we'll throw an error for modifying keyset during iteration, which we
        # don't really care about here.
        for key in list(ipr_additions[rec.id]):
            if key not in whitelist:
                del ipr_additions[rec.id][key]

    for rec in GFF.parse(gff3):
        for feature in feature_lambda(rec.features, feature_test_true, None, subfeatures=True):
            if feature.id in ipr_additions:
                for key in ipr_additions[feature.id]:
                    if key not in feature.qualifiers:
                        feature.qualifiers[key] = []

                    feature.qualifiers[key] += list(ipr_additions[feature.id][key])
        rec.annotations = {}
        GFF.write([rec], sys.stdout)

Example #11

0

Show file

File: fix-aragorn-gff3.py Project: Guiwenting/galaxy-tools

def fixed_feature(rec):
    for idx, feature in enumerate(
            feature_lambda(rec.features,
                           feature_test_type, {'type': 'tRNA'},
                           subfeatures=True)):
        fid = 'tRNA-%03d' % (1 + idx)
        name = ['tRNA-' + feature.qualifiers['Codon'][0]]
        gene = SeqFeature(location=feature.location,
                          type='gene',
                          qualifiers={
                              'ID': [fid + '.gene'],
                              'source': ['aragorn'],
                              'Name': name,
                          })
        feature.qualifiers['Name'] = name
        # Below that we have an mRNA
        exon = SeqFeature(location=feature.location,
                          type='exon',
                          qualifiers={
                              'source': ['aragorn'],
                              'ID': ['%s.exon' % fid],
                              'Name': name,
                          })
        feature.qualifiers['ID'] = [fid]

        # gene -> trna -> exon
        feature.sub_features = [exon]
        gene.sub_features = [feature]
        yield gene

Example #12

0

Show file

File: gff3_add_parents_to_cds.py Project: mdcarruthers/galaxy-tools

def fixed_feature(rec):
    for feature in feature_lambda(rec.features,
                                  feature_test_type, {"type": "CDS"},
                                  subfeatures=True):
        import random

        fid = feature.qualifiers["ID"][0] + "_" + str(random.random())
        gene = SeqFeature(
            location=feature.location,
            type="gene",
            qualifiers={
                "ID": [fid],
                "source": ["cpt.fixModel"]
            },
        )
        # Below that we have an mRNA
        mRNA = SeqFeature(
            location=feature.location,
            type="mRNA",
            qualifiers={
                "source": ["cpt.fixModel"],
                "ID": ["%s.mRNA" % fid]
            },
        )
        feature.qualifiers["ID"] = [fid + ".CDS"]

        mRNA.sub_features = [feature]
        gene.sub_features = [mRNA]
        yield gene

Example #13

0

Show file

File: stop_stats.py Project: mdcarruthers/galaxy-tools

def main(fasta, gff3):
    seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))

    codon_usage = {}

    for rec in GFF.parse(gff3, base_dict=seq_dict):
        for feat in feature_lambda(rec.features,
                                   feature_test_type, {"type": "CDS"},
                                   subfeatures=True):
            seq = str(feat.extract(rec).seq)[-3:]
            try:
                codon_usage[seq] += 1
            except KeyError:
                codon_usage[seq] = 1

    names = {
        "TAG": "Amber",
        "TAA": "Ochre",
        "TGA": "Opal",
    }

    # TODO: print all actg combinations? Or just ones that are there
    print "# Name\tCodon\tCount"
    for key in sorted(codon_usage):
        print "\t".join((names.get(key.upper(),
                                   "None"), key, str(codon_usage[key])))

Example #14

0

Show file

File: genome_map.py Project: mdcarruthers/galaxy-tools

 def get_cdss(self):
     return list(
         feature_lambda(
             self.feature.sub_features,
             feature_test_type,
             {"type": "CDS"},
             subfeatures=False,
         ))

Example #15

0

Show file

File: gff3_fix_apollo_sd.py Project: Guiwenting/galaxy-tools

def gff_filter(gff3):
    for rec in GFF.parse(gff3):
        for feature in feature_lambda(
                rec.features, test_true, {}, subfeatures=True):
            if feature.type == 'exon' and len(feature) < 20:
                feature.type = 'Shine_Dalgarno_sequence'

        rec.annotations = {}
        GFF.write([rec], sys.stdout)

Example #16

0

Show file

def gff_filter(gff3):
    cs = ColorScheme()

    for rec in GFF.parse(gff3):
        rec.features = feature_lambda(rec.features,
                                      apply_color, {"cs": cs},
                                      subfeatures=False)
        rec.annotations = {}
        GFF.write([rec], sys.stdout)

Example #17

0

Show file

def require_shinefind(gff3, fasta):
    sd_finder = NaiveSDCaller()
    # Load up sequence(s) for GFF3 data
    seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))
    # Parse GFF3 records
    for record in GFF.parse(gff3, base_dict=seq_dict):
        # Reopen
        genes = list(
            feature_lambda(record.features,
                           feature_test_type, {"type": "gene"},
                           subfeatures=True))
        good_genes = []
        for gene in genes:
            cdss = sorted(
                list(
                    feature_lambda(
                        gene.sub_features,
                        feature_test_type,
                        {"type": "CDS"},
                        subfeatures=False,
                    )),
                key=lambda x: x.location.start,
            )
            if len(cdss) == 0:
                continue

            cds = cdss[0]

            sds, start, end, seq = sd_finder.testFeatureUpstream(cds,
                                                                 record,
                                                                 sd_min=5,
                                                                 sd_max=15)
            if len(sds) >= 1:
                sd_features = sd_finder.to_features(sds,
                                                    gene.location.strand,
                                                    start,
                                                    end,
                                                    feature_id=gene.id)
                gene.sub_features.append(sd_features[0])

                good_genes.append(gene)

        record.features = good_genes
        yield record

Example #18

0

Show file

File: wig_rebase.py Project: mdcarruthers/galaxy-tools

def getGff3Locations(parent, map_by="ID"):
    featureLocations = {}
    recs = GFF.parse(parent)
    # Only parse first.
    rec = next(recs)
    # Get all the feature locations in this genome
    for feature in feature_lambda(rec.features, feature_test_true, {}):
        id = feature.qualifiers.get(map_by, [feature.id])[0]
        featureLocations[id] = feature.location
    return rec, featureLocations

Example #19

0

Show file

def require_shinefind(gff3, fasta):
    sd_finder = NaiveSDCaller()
    # Load up sequence(s) for GFF3 data
    seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))
    # Parse GFF3 records
    for record in GFF.parse(gff3, base_dict=seq_dict):
        # Reopen
        genes = list(
            feature_lambda(record.features,
                           feature_test_type, {'type': 'gene'},
                           subfeatures=True))
        good_genes = []
        for gene in genes:
            cdss = list(
                feature_lambda(gene.sub_features,
                               feature_test_type, {'type': 'CDS'},
                               subfeatures=False))
            if len(cdss) == 0:
                continue

            # Someday this will bite me in the arse.
            cds = cdss[0]

            sds, start, end, seq = sd_finder.testFeatureUpstream(cds,
                                                                 record,
                                                                 sd_min=5,
                                                                 sd_max=15)
            if len(sds) >= 1:
                # TODO
                # Double plus yuck
                sd_features = sd_finder.to_features(sds,
                                                    gene.location.strand,
                                                    start,
                                                    end,
                                                    feature_id=gene.id)
                gene.sub_features.append(sd_features[0])

                good_genes.append(gene)

        # Yuck!
        record.features = good_genes
        yield record

Example #20

0

Show file

def fixed_feature(rec):
    # Get all gene features to remove the mRNAs from
    for feature in feature_lambda(rec.features,
                                  feature_test_type, {"type": "gene"},
                                  subfeatures=True):
        gene = feature
        sub_features = []
        # Filter out mRNA subfeatures, save other ones to new gene object.
        for sf in feature_lambda(
                feature.sub_features,
                feature_test_type,
            {"type": "mRNA"},
                subfeatures=True,
                invert=True,
        ):
            sf.qualifiers["Parent"] = gene.qualifiers["ID"]
            sub_features.append(sf)
        # override original subfeatures with our filtered list
        gene.sub_features = sub_features
        yield gene

Example #21

0

Show file

File: gff3_diff.py Project: mdcarruthers/galaxy-tools

def gff3_diff(gff3_1, gff3_2):
    feats1 = {}
    feats2 = {}
    for rec1 in GFF.parse(gff3_1):
        for feat in feature_lambda(rec1.features,
                                   feature_test_type, {"type": "gene"},
                                   subfeatures=True):
            if feat.location.strand == 1:
                feats1[feat.location.start] = feat
            else:
                feats1[feat.location.end] = feat

    for rec2 in GFF.parse(gff3_2):
        for feat in feature_lambda(rec2.features,
                                   feature_test_type, {"type": "gene"},
                                   subfeatures=True):
            if feat.location.strand == 1:
                feats2[feat.location.start] = feat
            else:
                feats2[feat.location.end] = feat

    no_match = []
    flags_list = {}
    for i in feats1:
        try:
            diffs = find_differences(feats1[i], feats2[i])
            # need to somehow check for subfeatures
            del feats2[i]
            for d in diffs:
                if diffs[d]:
                    flags_list[
                        i] = flags  # noqa HXR: Commented out for linting, please remove when ready.
                    break
        except:
            no_match.append(feats1[i])

    print flags_list
    for nm in no_match:
        print nm
    for f in feats2:
        print feats2[f]

Example #22

0

Show file

def handle_non_gene_features(features):
    # These are NON-GENE features (maybe terminators? etc?)
    for feature in feature_lambda(
            features,
            feature_test_type,
        {"type": "gene"},
            subfeatures=False,
            invert=True,
            recurse=False,
    ):
        if feature.type in ("terminator", "tRNA"):
            yield feature

Example #23

0

Show file

File: gff2gb.py Project: Guiwenting/galaxy-tools

def fminmax(feature):
    fmin = None
    fmax = None
    for sf in feature_lambda([feature], feature_test_true, {}, subfeatures=True):
        if fmin is None:
            fmin = sf.location.start
            fmax = sf.location.end
        if sf.location.start < fmin:
            fmin = sf.location.start
        if sf.location.end > fmax:
            fmax = sf.location.end
    return fmin, fmax

Example #24

0

Show file

File: assessment1-admin.py Project: mdcarruthers/galaxy-tools

def validate(gff3):
    results = {}
    for rec in GFF.parse(gff3):
        for feature in feature_lambda(rec.features,
                                      feature_test_type, {"type": "gene"},
                                      subfeatures=True):
            checks = []
            graded = []
            # dbxrefs
            if "CPT:283675" in feature.qualifiers.get("Dbxref", []):
                checks.append(True)
                graded.append({})
            else:
                checks.append(False)
                graded.append({"q1": "0"})  # ???

            # Notes
            if "Howdy!" in feature.qualifiers.get("Note", []):
                checks.append(True)
                graded.append({})
            else:
                checks.append(False)
                graded.append({"q2": "0"})  # ???

            owner = feature.qualifiers.get("owner", ["unknown"])[0]
            results[owner] = {
                "checks": checks,
                "graded": graded,
                "score": checks.count(True),
            }

    # Process all students at once
    token = auth(open("/galaxy/creds.json", "r"), GUANINE_URL)
    for email, result in results.items():
        sid = student_id(email, GUANINE_URL, token)
        result = post_result(
            sid,
            result["score"],
            2,
            token,
            GUANINE_URL,
            "a59a5001-57e7-4776-8807-63b544735f3f",
            json.dumps({
                "raw": result,
                "graded": result["graded"]
            }),
        )
        if result.status_code in (200, 201):
            print("Success")
        else:
            print("[Error] user=%s msg=%s" % (email, result.text))

Example #25

0

Show file

def suppress(genome, annotations, suppress=None):
    if suppress is None:
        raise Exception("Must provide a list of stop codons to suppress")

    seq_dict = SeqIO.to_dict(SeqIO.parse(genome, "fasta"))

    suppressed_features = []

    for record in GFF.parse(annotations, base_dict=seq_dict):
        for feature in feature_lambda(
                record.features,
                feature_test,
            {
                "type": "CDS",
                "record": record,
                "stops": suppress
            },
                subfeatures=True,
        ):
            log.info("Found matching feature %s", feature.id)
            new_end = None

            codon_idx = 0
            while new_end is None:
                if feature.strand > 0:
                    cs = feature.location.end + (3 * codon_idx)
                    codon = str(record.seq[cs:cs + 3])
                else:
                    cs = feature.location.start - (3 * (1 + codon_idx))
                    codon = reverse_complement(record.seq[cs:cs + 3])

                if codon not in suppress and translate(codon, 11) == "*":
                    new_end = codon_idx
                    break

                codon_idx += 1
                if codon_idx > 40:
                    log.warn("Could not find a new stop codon")
                    break

            if new_end is not None:
                if feature.strand > 0:
                    feature.location._end += codon_idx * 3
                else:
                    feature.location._start -= codon_idx * 3
            suppressed_features.append(feature)

        record.features = suppressed_features
        record.annotations = {}
        GFF.write([record], sys.stdout)

Example #26

0

Show file

def gff_filter(gff3, id_list=None, id="", attribute_field="ID", subfeatures=True):
    attribute_field = attribute_field.split("__cn__")
    if id_list:
        filter_strings = [line.strip() for line in id_list]
    else:
        filter_strings = [x.strip() for x in id.split("__cn__")]
    for rec in GFF.parse(gff3):
        rec.features = feature_lambda(
            rec.features,
            feature_test_qual_value,
            {"qualifier": attribute_field, "attribute_list": filter_strings},
            subfeatures=subfeatures,
        )
        rec.annotations = {}
        GFF.write([rec], sys.stdout)

Example #27

0

Show file

def gff_filter(gff3, id_list=None, id='', attribute_field='ID', subfeatures=True):
    attribute_field = attribute_field.split('__cn__')
    if id_list:
        filter_strings = [line.strip() for line in id_list]
    else:
        filter_strings = [x.strip() for x in id.split('__cn__')]
    for rec in GFF.parse(gff3):
        rec.features = feature_lambda(
            rec.features,
            feature_test_qual_value,
            {'qualifier': attribute_field, 'attribute_list': filter_strings},
            subfeatures=subfeatures
        )
        rec.annotations = {}
        GFF.write([rec], sys.stdout)

Example #28

0

Show file

File: transterm.py Project: Guiwenting/galaxy-tools

def generate_annotation_file(gff3):
    # TODO: cleanup
    t = tempfile.NamedTemporaryFile(delete=False, suffix='.coords')
    for rec in GFF.parse(gff3):
        features = feature_lambda(rec.features, feature_test_type, {'type': 'CDS'}, subfeatures=False)
        for feature in sorted(features, key=lambda x: x.location.start):
            t.write('\t'.join(map(str, [
                feature.id,
                feature.location.start + 1,
                feature.location.end,
                rec.id
            ])) + '\n')
    name = t.name
    t.close()
    return name

Example #29

0

Show file

File: phage_annotation_table.py Project: mdcarruthers/galaxy-tools

def genes_all(feature_list, feature_type=["gene"], sort=False):
    """
    Simple filter to extract gene features from the feature set.
    """

    if not sort:
        for x in feature_lambda(feature_list,
                                feature_test_type, {"types": feature_type},
                                subfeatures=True):
            yield x
    else:
        data = list(genes_all(feature_list, feature_type, sort=False))
        data = sorted(data, key=lambda feature: feature.location.start)
        for x in data:
            yield x

Example #30

0

Show file

def validate(gff3):
    results = {}
    for rec in GFF.parse(gff3):
        for feature in feature_lambda(rec.features, feature_test_type,
                                      {'type': 'gene'}, subfeatures=True):
            checks = []
            graded = []
            # dbxrefs
            if 'CPT:283675' in feature.qualifiers.get('Dbxref', []):
                checks.append(True)
                graded.append({})
            else:
                checks.append(False)
                graded.append({'q1': '0'})  # ???

            # Notes
            if 'Howdy!' in feature.qualifiers.get('Note', []):
                checks.append(True)
                graded.append({})
            else:
                checks.append(False)
                graded.append({'q2': '0'})  # ???

            owner = feature.qualifiers.get('owner', ['unknown'])[0]
            results[owner] = {
                'checks': checks,
                'graded': graded,
                'score': checks.count(True),
            }

    # Process all students at once
    token = auth(open('/galaxy/creds.json', 'r'), GUANINE_URL)
    for email, result in results.items():
        sid = student_id(email, GUANINE_URL, token)
        result = post_result(
            sid,
            result['score'],
            2, token, GUANINE_URL,
            'a59a5001-57e7-4776-8807-63b544735f3f',
            json.dumps({
                "raw": result,
                "graded": result['graded']
            }),
        )
        if result.status_code in (200, 201):
            print("Success")
        else:
            print("[Error] user=%s msg=%s" % (email, result.text))