def getProteinGenbankDescription(self,directory,proteinID): fname = "%s/%s.gbk"%(directory,proteinID) seq_record = SeqIO.parse(open(fname), "genbank").next() note = "" for feature in seq_record.features: try: if "note" in feature.qualifiers: note+= text.formatText(feature.qualifiers["note"][0]) if "function" in feature.qualifiers: note+= text.formatText(feature.qualifiers["function"][0]) if "product" in feature.qualifiers: note+= text.formatText(feature.qualifiers["product"][0]) except KeyError as k: continue return note
def buildProteinTable(self): for genbank_file in self.genbank_files: seq_record = SeqIO.parse(open(genbank_file), "genbank").next() for feature in seq_record.features: try: proteinID = feature.qualifiers["protein_id"][0] note = "" if "note" in feature.qualifiers: note+= text.formatText(feature.qualifiers["note"][0]) if "function" in feature.qualifiers: note+= text.formatText(feature.qualifiers["function"][0]) if "product" in feature.qualifiers: note+= text.formatText(feature.qualifiers["product"][0]) self.proteinDict[proteinID] = note except KeyError as k: continue
def insertGenbankProteins(genbank_files,dbout): db = sqlite3.connect(dbout) cursor = db.cursor() for genbank_file in genbank_files: seq_record = SeqIO.parse(open(genbank_file), "genbank").next() for feature in seq_record.features: try: proteinID = feature.qualifiers["protein_id"][0] note = "" if "note" in feature.qualifiers: note+= text.formatText(feature.qualifiers["note"][0]) if "function" in feature.qualifiers: note+= text.formatText(feature.qualifiers["function"][0]) if "product" in feature.qualifiers: note+= text.formatText(feature.qualifiers["product"][0]) cursor.execute('''INSERT INTO protein_text(protein_id,note) VALUES(?,?)''',(proteinID,note)) except KeyError as k: continue db.commit() db.close()
def filterWords(self, words): # Remove all single letter words return [text.formatText(w) for w in words if len(w) > 1]