def parse_description(s): """ Returns a dictionary based on the FASTA header, assuming JCVI data """ s = "".join(s.split()[1:]).replace("/", ";") a = parse_qs(s) return a
def make_attributes(s, gff3=True): """ In GFF3, the last column is typically: ID=cds00002;Parent=mRNA00002; In GFF2, the last column is typically: Gene 22240.t000374; Note "Carbonic anhydrase" """ if gff3: d = parse_qs(s) else: attributes = s.split("; ") d = DefaultOrderedDict(list) for a in attributes: key, val = a.strip().split(' ', 1) val = val.replace('"', '') d[key].append(val) for key, val in d.items(): d[key] = list(flatten([v.split(",") for v in val])) return d
def test_parse_qs(querystr, expected): attributes = parse_qs(querystr) assert attributes == expected