Python Tagger Examples, Tagger.Tagger Python Examples

Example #1

0

Show file

File: TFRecordReader.py Project: renatovaler/BRLicensePlateGen

    def tfRecordToCaffe(self,
                        datasetName,
                        outputPath,
                        nameAsGroundTruth=False):
        MkDataSetStructure(os.path.join(outputPath, datasetName))
        fileManager = Tagger(os.path.join(outputPath, datasetName))

        dataBuffer = self.readTFRecord()
        for data in dataBuffer:
            if nameAsGroundTruth: imageName = data['filename']
            else: imageName = data["sourceID"]

            imageFilename = imageName + ".jpg"
            imageOutputPath = os.path.join(outputPath, datasetName, "Images")

            fileManager.AppendTrainingImg(imageName)
            self.saveFromRawImageData(data["imgEncoded"], data["height"],
                                      data["width"], imageOutputPath,
                                      imageFilename)

            for xMin, yMin, xMax, yMax, classText, classID in zip(
                    data["xMins"], data["yMins"], data["xMaxs"], data["yMaxs"],
                    data["classesText"], data["classesID"]):
                fileManager.AppendAnnotation(
                    (xMin, yMin), (xMax, yMax), imageName,
                    classText.decode('utf-8') + " " + str(classID))

Example #2

0

Show file

    def path_input(self, pth):
        """Handle request for new input file. If new tag and display else display"""

        self.clear_results()
        tfiles = None

        #       Single image path
        if not isdir(pth):
            new = True
            #           SQL exception if path is not unique
            try:
                DataBase.add_image(pth)
            except IntegrityError:
                new = False

#          Tag new
            if new:
                tags = Tagger.tag_file(pth)
                for tag in tags:
                    DataBase.tag_image(tag, pth=pth)
#       Directory path
        else:
            tags, tfiles = Tagger.tag_dir(pth)
            for i in range(len(tfiles)):
                f = tfiles[i]
                #               Full path to image
                fpth = join(pth, f)
                tfiles[i] = fpth
                #               Continue if already present
                if DataBase.exists(pth=fpth):
                    continue
                else:
                    DataBase.add_image(fpth)
                    #                   Tuple results
                    if not isinstance(tags[i], str):
                        for t in tags[i]:
                            DataBase.tag_image(t, pth=fpth)
#                   String result
                    else:
                        DataBase.tag_image(tags[i], pth=fpth)

        L = 1
        #       Display
        if tfiles is None:
            self.queue_images(pth)
        else:
            #Number of listbox for results length
            L = len(tfiles)
            nlb = max(3, L)
            nlb = min(nlb, 12)
            self.ui.builder.get_object("ListResults").config(height=nlb)

            self.queue_images(tfiles)

        self.update_info("Processed " + str(L) + " images")

Example #3

0

Show file

def cross_validate(count_words):
    ''' Runs cross validation on the Tagger, count_words = True iff the Tagger counts all the words,
        so P(word | tag) is known, but P(tag | prev_tag) is still only 90% known '''

    global total_errs, total_matches
    sum_err = 0
    print 'Fold    Err    Match    Frac_Match'

    # files in the corpus range from 0-100, so test and train ranges are slices of this
    # range, such that train_range and test_range together make range(0,100)
    for i in xrange(folds):
        train_range = range(0, chunk * i) + range(chunk * (i + 1), 100)
        test_range = range(chunk * i, chunk * (i + 1))
        total_errs = 0
        total_matches = 0

        tm = time.time()
        c = TagCounter()
        c.parse_corpus_range(train_range)
        if (count_words):
            c.only_words = count_words
            c.parse_corpus_range(test_range)
        t = Tagger(c)
        tm = time.time() - tm
        if timing:
            print tm,

        # file_validate is mapped across all files in test_range
        def file_validate(f):
            global total_errs, total_matches
            sentences = parse_file(f)
            for sent in sentences[:size]:
                words = []
                for word in sent[1:]:  # sent[0] will always be START
                    words.append(word.true_chars)
                tagged = t.tag_words(words)
                matches = 0
                errs = 0
                for (actual_w, pred_w) in zip(sent, tagged):
                    if actual_w.tag != pred_w.tag:
                        #print actual_w, pred_w     # prints the mistagged pairs
                        errs += 1
                    else:
                        matches += 1
                total_errs += errs
                total_matches += matches

        tm = time.time()
        map_files(file_validate, test_range)
        tm = time.time() - tm
        if timing:
            print tm
        print '%3d  %6d  %7d      %0.4f' % (i, total_errs, total_matches,
                                            percent_match(
                                                total_errs, total_matches))
        print ''

        sum_err += percent_match(total_errs, total_matches)

    print 'cumulative averaged error:', (sum_err * 1.0) / folds

Example #4

0

Show file

File: main.py Project: koenvdheide/text-mining

def extract_entities(pmid):
    """
    This function tags genes and species in the PubMed article (based on the id provided).
    If possible these genes and species are further annotated using information present
    in the NCBi gene/taxonomy database. 
    :param pmid: The id of the article that should be tagged.
    :return: The genes (as Gene objects) and the organisms (as Organism object) found in the article. 
    """
    tagger = Tagger()
    tag_object = tagger.tag([pmid])
    genes = []
    organisms = []
    if tag_object:
        tag_object = tag_object[0]
        annotation = tag_object.get_annotation()
        genes = annotation.get("Gene", {None})
        organisms = annotation.get("Species", {None})
        genes = [convert_to_object(gene, "Gene") for gene in genes]
        organisms = [
            convert_to_object(organism, "Species") for organism in organisms
        ]
    return genes, organisms

Example #5

0

Show file

File: TaggerHandler.py Project: phanigadde/CSRelated

class TaggerHandler:
  def __init__(self, dataDir, table):
    sys.stderr.write("TaggerHandler: Constructor\n")
    self.__TaggerInstance = Tagger()
    self.__dataDir = dataDir
    self.__tableFile = open(table,'w')
    
  def __updateTable(self, setting, accuracies):
    self.__tableFile.write(setting+'\t'+'\t'.join(map(lambda x:str(x), accuracies))+'\n')
  
  def __runTagger(self, trainFile, testFile):
    self.__TaggerInstance.loadData(trainFile, testFile)
    self.__TaggerInstance.train()
    accuracies = self.__TaggerInstance.test()
    setting = trainFile.split("_")[0]
    self.__updateTable(setting, accuracies)
  
  def run(self, trainFiles, testFiles):
    trainFiles = [self.__dataDir+line.strip() for line in open(trainFiles)]
    testFiles = [self.__dataDir+line.strip() for line in open(testFiles)]
    for trainFile in trainFiles:
      for testFile in testFiles:
        self.__runTagger(trainFile, testFile)
    self.__tableFile.close()

Example #6

0

Show file

File: Test.py Project: Nithanaroy/Parts-of-Speech-Tagger-ClassProject

    def save_results(self):
        abs_filepath = os.path.abspath(self.outfile)

        total = Utils.get_count_of_sentences(self.testfile) * 1.0
        logging.info("{} sentences found".format(total))

        with open(abs_filepath, 'w') as f:
            for i, s in enumerate(Utils.get_sentence(self.testfile)):
                original_sentence = s['o']  # original raw sentence from file
                s = s['c']  # cleaned sentence
                untagged_sentence = self.remove_tags(s)
                untagged_original_sentence = self.remove_tags(
                    original_sentence)
                tags = Tagger(self.model, untagged_sentence).tag()
                tagged_sentence = self.attach_tags(untagged_original_sentence,
                                                   tags)
                Utils.write_sentence(f, tagged_sentence)

                if i % 100 == 0:
                    logging.info("{}% done. Last tagged: {}".format(
                        round(i / total * 100.0, 2), s.replace("\n", " ")))
                # break

            f.write(Utils.SENTENCE_SEPARATOR)

Example #7

0

Show file

# -*- coding: cp1254 -*-

# Onur Yilmaz

# Imports
from Tagger import Tagger

# Open the file where tagger is saved
taggerFileName = 'my_tagger.yaml'
myTagger = Tagger.load(taggerFileName)


# Keep the original functionality intact
def tag(sentence):
    return myTagger.tag(sentence)


# End of code

Example #8

0

Show file

File: pos_tagger.py Project: onuryilmaz/turkish-pos-tagger

# -*- coding: cp1254 -*-

# Onur Yilmaz

# Imports
from Tagger import Tagger

# Open the file where tagger is saved
taggerFileName = 'my_tagger.yaml'
myTagger = Tagger.load(taggerFileName)

# Keep the original functionality intact
def tag(sentence):
    return myTagger.tag(sentence)

# End of code

Example #9

0

Show file

File: Assignment.py Project: adamkona/InformationExtraction

# Setting up directory
mypath = os.getcwd() + '/data/seminar_testdata/test_untagged/'
trainingPath = 'data/training'
directory = os.fsencode(mypath)

# Runs the ontology classification
print('Running Ontology Classification: \n')
ontology = Ontology()
ontology.run(mypath)

# Begins tagging
print("\nTagging progress beginning. Get a brew, it'll take a while... \n")
extractor = DataExtractor()

# Trains our model
extractor.train(trainingPath)
tagger = Tagger()

# Tags all emails in the directory given
tagger.tag_seminar(mypath, directory, extractor)

# Calculates how long the program took
seconds = time.time() - start_time
m, s = divmod(seconds, 60)
print("The program has been running for {0} minutes and {1} seconds \n".format(
    round(m), round(s)))

# Evaluates results
eval = Evaluation()
eval.run()

Example #10

0

Show file

File: testTagger.py Project: onuryilmaz/turkish-pos-tagger

 def test_load_incorrectly(self):
     temporaryFileName = os.path.join(self.tempDir, 'temporary_file.txt')
     with open(temporaryFileName, 'w') as file:
         file.write("This is a line that won't be able to be read")
     with self.assertRaises(TypeError):
         Tagger.load(temporaryFileName)

Example #11

0

Show file

File: testTagger.py Project: onuryilmaz/turkish-pos-tagger

 def setUp(self):
     self.filePath = 'my_tagger.yaml'
     self.tag = Tagger.load(self.filePath)

Example #12

0

Show file

File: TaggerHandler.py Project: phanigadde/CSRelated

class TaggerHandler:
  def __init__(self, dataDir, table):
    sys.stderr.write("TaggerHandler: Constructor\n")
    self.__TaggerInstance = Tagger()
    self.__dataDir = dataDir
    self.__tableFile = open(table, 'w', 1)
    self.__tableFile.write('TrainCSType\tTrainPureCSSplit\tTrainSize\tExperimentType\tTestCSType\tTestPureCSSplit\tTestSize\tTagset\tOverallAccuracy\tSameContextAccuracy\tDifferentContextAccuracy\tPrevWordDifferentAccuracy\tPrePrevWordDifferentAccuracy\tUnknowns\n')
    
  def __updateTable(self, setting, accuracies):
    ##print accuracies
    self.__tableFile.write(setting + '\t' + '\t'.join(map(lambda x:str(x), accuracies)) + '\n')
  
  def __getSetting(self, string):
    string = string.split("/")[-1].split("TrainCS")[1]
    cstype = string.split("CS")[0]
    csSplit = string.split("CS")[1].split("Pure")[0]
    pureSplit = string.split("Pure")[1].split("Total")[0]
    pureCSSplit = pureSplit + '-' + csSplit 
    totalSize = string.split("Total")[1].split("_")[0]
    return '\t'.join([cstype, pureCSSplit, totalSize])
  
  def __runTagger(self, trainFile, testFile):
    self.__TaggerInstance.loadData(trainFile, testFile)
    self.__TaggerInstance.train()
    accuracies = self.__TaggerInstance.test()
    trainSetting = self.__getSetting(trainFile)
    testSetting = self.__getSetting(testFile)
    tagset = self.__tagset(trainFile)
    self.__updateTable(trainSetting + '\t' + testSetting + '\t' + tagset, accuracies)
    
  def __runTagger2(self, trainFile, testFile, expType):
    trainFile = self.__dataDir+trainFile
    testFile = self.__dataDir+testFile
    self.__TaggerInstance.loadData(trainFile, testFile)
    self.__TaggerInstance.train()
    accuracies = self.__TaggerInstance.test()
    trainSetting = self.__getSetting(trainFile)
    testSetting = self.__getSetting(testFile)
    tagset = self.__tagset(trainFile)
    self.__updateTable(trainSetting + '\t' + expType + '\t' + testSetting + '\t' + tagset, accuracies)
    
  def __tagset(self, string):
    tagset = "Mixed"
    if len(string.split(".")) > 1 and string.split(".")[1] == "uni":
      tagset = 'Universal'
    if string.find(".uniq") >= 0:
      tagset += ".uniq"
    return tagset
  
  def run(self, trainFiles, testFiles):
    #trainFiles = [self.__dataDir+line.strip() for line in open(trainFiles)]
    #testFiles = [self.__dataDir+line.strip() for line in open(testFiles)]
    trainFiles = [line.strip() for line in open(trainFiles)]
    testFiles = [line.strip() for line in open(testFiles)]
    for trainFile in trainFiles:
      #if trainFile.find("Type1")>=0 or trainFile.find("Type0")>=0:
      #  continue
      for testFile in testFiles:
        if self.__tagset(trainFile) != "Mixed" or self.__tagset(trainFile) != self.__tagset(testFile):
        #if self.__tagset(trainFile)!= self.__tagset(testFile):
          continue
        ##if testFile.find("CS0Pure100")<0:
        ##  continue
        ##print testFile
        self.__runTagger(trainFile, testFile)
    self.__tableFile.close()
    
  def run2(self, trainFiles, testFiles):
    trainFiles = [line.strip() for line in open(trainFiles)]
    testFiles = [line.strip() for line in open(testFiles)]
    for trainFile in trainFiles:
      for testFile in testFiles:
        if self.__tagset(trainFile) != self.__tagset(testFile):
          continue
        controlTrainFile = trainFile + "_Control"
        if self.__tagset(trainFile) == "Universal":
          controlTrainFile = trainFile.split(".uni")[0] + "_Control" + ".uni"
        ##if testFile.find("CS0Pure100")<0:
        ##  continue
        ##print testFile
        self.__runTagger2(trainFile, testFile, "Experiment")
        self.__runTagger2(controlTrainFile, testFile, "Control")
    self.__tableFile.close()

Example #13

0

Show file

File: Main.py Project: ttamada/POS-Tagging_Armenian

    print "("+token+")",
print "\n\nNUMBER OF TOKENS ANSWER,TEST"
print len(gold_tokens),len(test_tokens)
print "\n\nDIFFERENCE\nONLY IN TEST"
difference = set(test_tokens)-set(gold_tokens)
for token in difference:
    print token+"  ",
print "\n\nONLY IN ANSWER"
difference = set(gold_tokens)-set(test_tokens)
for token in difference:
    print token+"  ",
print "\n"


print "--TAGGER--"
tagger = Tagger(tagged_train_sents)
print "EVALUATE TAGGER"
print "RATE"
tagger.evaluate(tagged_gold_sents)
print "\n\nANSWER"
tagged_gold_tokens = sum(tagged_gold_sents,[])
for tup in tagged_gold_tokens:
        print str(tup[0])+"/"+str(tup[1]),
print "\n\nTEST"
gold_tokens = []
for sent in tagged_gold_sents:
    for tup in sent:
        gold_tokens.append(tup[0])
tagged_test_tokens = tagger.tag(gold_tokens)
for tup in tagged_test_tokens:
    print str(tup[0])+"/"+str(tup[1]),

Example #14

0

Show file

File: tagger_measures.py Project: chriskoerner/DataProcessing

    return float(orphan_count) / len(tag_occurrences)


def calculate_combined(tagging_user):
    """
    returns the combined measure
    """
    return (get_orphaniness(tagging_user) + get_cond_entropy_normalized(tagging_user)) / 2

    
        
                
                                
if __name__ == '__main__':
    TAGGER = Tagger("hugo")
    TAG_SET = set()
    TAG_SET.add("computer")
    TAG_SET.add("reference")
    
    TAGGER.add_post("1", TAG_SET)
    TAG_SET.clear()
    
    TAG_SET.add("reference")
    TAG_SET.add("calculator")
    TAG_SET.add("rate")
    
    TAGGER.add_post("2", TAG_SET)
    
    TAG_SET.clear()

Example #15

0

Show file

File: hmm-tagger.py Project: ranasaani/hmm-tagger

######### hmm-tagger.py #########

from Tagger import Tagger # import the tagging controller
import os # for path info


# initialize a tagging object with the cleaned corpus file(s)
t = Tagger(os.getcwd()+'/', ['text_1.txt', 'text_2.txt', 'text_3.txt'], ['text_5.txt'])

# perform ten-fold cross-validation
t.run_test_cycles()

Example #16

0

Show file

 def setUp(self):
     self.filePath = 'my_tagger.yaml'
     self.tag = Tagger.load(self.filePath)

Example #17

0

Show file

 def test_load_incorrectly(self):
     temporaryFileName = os.path.join(self.tempDir, 'temporary_file.txt')
     with open(temporaryFileName, 'w') as file:
         file.write("This is a line that won't be able to be read")
     with self.assertRaises(TypeError):
         Tagger.load(temporaryFileName)

Example #18

0

Show file

 def test_load_nonexisting(self):
     with self.assertRaises(FileNotFoundError):
         Tagger.load("this_file_definitely_doesnt_exist.txt")

Example #19

0

Show file

 def test_load_file(self):
     tagger = Tagger.load(self.filePath)
     self.assertIsInstance(tagger.myTagger, BrillTagger)

Example #20

0

Show file

File: TaggerHandler.py Project: phanigadde/CSRelated

 def __init__(self, dataDir, table):
   sys.stderr.write("TaggerHandler: Constructor\n")
   self.__TaggerInstance = Tagger()
   self.__dataDir = dataDir
   self.__tableFile = open(table, 'w', 1)
   self.__tableFile.write('TrainCSType\tTrainPureCSSplit\tTrainSize\tExperimentType\tTestCSType\tTestPureCSSplit\tTestSize\tTagset\tOverallAccuracy\tSameContextAccuracy\tDifferentContextAccuracy\tPrevWordDifferentAccuracy\tPrePrevWordDifferentAccuracy\tUnknowns\n')

Example #21

0

Show file

File: hmm-tagger.py Project: zhemingyang/hmm-tagger

######### hmm-tagger.py #########

from TreebankCleaner import TreebankCleaner  # import cleaning class
from Tagger import Tagger  # import the tagging controller
import os  # for path info
import sys  # for command line options

if '--clean' in sys.argv:
    # initialize treebank cleaner with the current path and pre-downloaded file(s)
    t = TreebankCleaner(os.getcwd() + '/', ['treebank3_sect2.txt'])
    # do cleaning
    t.clean()

# initialize a tagging object with the cleaned corpus file(s)
t = Tagger(os.getcwd() + '/', ['treebank3_sect2.txt_cleaned'])

# perform ten-fold cross-validation
t.run_test_cycles()

Example #22

0

Show file

File: testTagger.py Project: onuryilmaz/turkish-pos-tagger

 def test_load_nonexisting(self):
     with self.assertRaises(FileNotFoundError):
         Tagger.load("this_file_definitely_doesnt_exist.txt")

Example #23

0

Show file

File: testTagger.py Project: onuryilmaz/turkish-pos-tagger

 def test_load_file(self):
     tagger = Tagger.load(self.filePath)
     self.assertIsInstance(tagger.myTagger, BrillTagger)

Example #24

0

Show file

File: TaggerHandler.py Project: phanigadde/CSRelated

 def __init__(self, dataDir, table):
   sys.stderr.write("TaggerHandler: Constructor\n")
   self.__TaggerInstance = Tagger()
   self.__dataDir = dataDir
   self.__tableFile = open(table,'w')