def get_datatype_by_extension(self, ext ): """Returns a datatype based on an extension""" try: builder = self.datatypes_by_extension[ext] except KeyError: builder = data.Text() return builder
def get_datatype_by_extension(ext): """ Returns a datatype based on an extension """ try: builder = datatypes_by_extension[ext] except KeyError: builder = data.Text() log.warning('unkown extension in data factory %s' % ext) return builder
'gff3' : interval.Gff3(), 'genetrack' : tracks.GeneTrack(), 'interval' : interval.Interval(), 'laj' : images.Laj(), 'lav' : sequence.Lav(), 'maf' : sequence.Maf(), 'pileup' : tabular.Pileup(), 'qualsolid' : qualityscore.QualityScoreSOLiD(), 'qualsolexa' : qualityscore.QualityScoreSolexa(), 'qual454' : qualityscore.QualityScore454(), 'sam' : tabular.Sam(), 'scf' : binary.Scf(), 'sff' : binary.Sff(), 'tabular' : tabular.Tabular(), 'taxonomy' : tabular.Taxonomy(), 'txt' : data.Text(), 'wig' : interval.Wiggle(), 'xml' : xml.GenericXml(), } self.mimetypes_by_extension = { 'ab1' : 'application/octet-stream', 'axt' : 'text/plain', 'bam' : 'application/octet-stream', 'bed' : 'text/plain', 'customtrack' : 'text/plain', 'csfasta' : 'text/plain', 'eland' : 'application/octet-stream', 'fasta' : 'text/plain', 'fastq' : 'text/plain', 'fastqsanger' : 'text/plain', 'gtf' : 'text/plain',
import data import tagger TRAIN_DATA = 'data/train.txt' print('Loading data...') train_corpus = data.Text(TRAIN_DATA) print('Done') pos_tagger = tagger.POSTagger() print('Training model...') pos_tagger.fit(train_corpus) pos_tagger.save_model('model.pickle') print('Done') example = 'I am God'.split(' ') prob, tags = pos_tagger.predict(example) print('Predict POS tag for:', example) print('Tags:', tags, 'With probability:', prob)