def classify(self,db,fastain): proIDs,features,labels = [],[],[] prevFeatureset = '' prevText = '' for seq_record in SeqIO.parse(fastain, "fasta"): title = seq_record.id toks = title.split("|") proteinID = toks[5] query_rows = genbank.proteinQuery(proteinID,db) ids,text = zip(*query_rows) text = ''.join(map(str,text)) if text=='': label = ['na'] else: text = word_reg.findall(text) featureset = self.gene_features(text) assert text!=prevText assert featureset!=prevFeatureset prevFeatureset = featureset prevText = text label = self.classifier.batch_classify([featureset]) proIDs.append(proteinID) labels+=label return zip(proIDs,labels)