Ejemplo n.º 1
0
 def get_datatype_by_extension(self, ext ):
     """Returns a datatype based on an extension"""
     try:
         builder = self.datatypes_by_extension[ext]
     except KeyError:
         builder = data.Text()
     return builder
Ejemplo n.º 2
0
def get_datatype_by_extension(ext):
    """
    Returns a datatype based on an extension
    """
    try:
        builder = datatypes_by_extension[ext]
    except KeyError:
        builder = data.Text()
        log.warning('unkown extension in data factory %s' % ext)
    return builder
Ejemplo n.º 3
0
     'gff3'        : interval.Gff3(),
     'genetrack'   : tracks.GeneTrack(),
     'interval'    : interval.Interval(), 
     'laj'         : images.Laj(),
     'lav'         : sequence.Lav(),
     'maf'         : sequence.Maf(),
     'pileup'      : tabular.Pileup(),
     'qualsolid'   : qualityscore.QualityScoreSOLiD(),
     'qualsolexa'  : qualityscore.QualityScoreSolexa(),
     'qual454'     : qualityscore.QualityScore454(),
     'sam'         : tabular.Sam(), 
     'scf'         : binary.Scf(),
     'sff'         : binary.Sff(),
     'tabular'     : tabular.Tabular(),
     'taxonomy'    : tabular.Taxonomy(),
     'txt'         : data.Text(),
     'wig'         : interval.Wiggle(),
     'xml'         : xml.GenericXml(),
 }
 self.mimetypes_by_extension = { 
     'ab1'         : 'application/octet-stream',
     'axt'         : 'text/plain',
     'bam'         : 'application/octet-stream',
     'bed'         : 'text/plain', 
     'customtrack' : 'text/plain',
     'csfasta'     : 'text/plain',
     'eland'       : 'application/octet-stream',
     'fasta'       : 'text/plain',
     'fastq'       : 'text/plain',
     'fastqsanger' : 'text/plain',
     'gtf'         : 'text/plain',
Ejemplo n.º 4
0
import data
import tagger

TRAIN_DATA = 'data/train.txt'

print('Loading data...')
train_corpus = data.Text(TRAIN_DATA)
print('Done')

pos_tagger = tagger.POSTagger()
print('Training model...')
pos_tagger.fit(train_corpus)
pos_tagger.save_model('model.pickle')
print('Done')

example = 'I am God'.split(' ')
prob, tags = pos_tagger.predict(example)

print('Predict POS tag for:', example)
print('Tags:', tags, 'With probability:', prob)