Esempio n. 1
0
    def __init__(self):

        # This determines source location on where to caputer picture
        # QuickTime - MacOS has record feature for phone (best)
        # WebCam - Use OpenCV to capture photo (untested)
        self.use_quicktime = False
        self.use_webcam = False
        self.use_input = False

        # The filename of picture (no extension means we're capturing image)
        self.picture = 'source'

        # Default location of where to work on self.picture
        self.location = os.getcwd()

        # Replace with your own auth file name
        self.google_auth_json = 'blissend.json'

        # Default the language for wikipedia searches
        self.wiki = wikipediaapi.Wikipedia('en')
        self.vb = Vocabulary()

        # The OCR text
        self.raw = ''

        # The information we ultimately wanted to be analyzed
        self.question = ''
        self.answers = {}
        self.definitions = {}

        # For debugging
        self.verbose = False
Esempio n. 2
0
    def __init__(self):
        # QuickTime - MacOS has record feature for phone (best)
        self.use_quicktime = False
        self.use_input = False

        # the filename of the image (no extension = capturing image)
        self.picture = 'source'
        # location of where to work on self.picture
        self.location = os.getcwd()

        # Replace with your own auth file name
        self.google_auth_json = 'HQproject-a1a4e25e4b45.json'

        # wikipedia setting (english)
        self.wiki = wikipediaapi.Wikipedia('en')
        self.vb = Vocabulary()

        # The OCR text (directly converted from image)
        self.raw = ''
        # processed texts
        self.question = ''
        self.question_nouns = ''
        self.answers = {}
        self.lookup_info = {}

        # For debugging
        self.times = {}
        self.verbose = False
Esempio n. 3
0
from options_loader import *

if __name__ == '__main__':
    #Vocab = loadFromPKL('../../vocab/gigaword.pkl')

    log_vocab = mylog(logFile='log/log_vocab')
    options = optionsLoader(log_vocab, True)

    fileName = options['primary_dir']

    inputCorpus = [fileName + options['trainSet'] + '.Ndocument']

    outputCorpus = [fileName + options['trainSet'] + '.Nsummary']

    Vocab = Vocabulary(options,
                       inputCorpus=inputCorpus,
                       outputCorpus=outputCorpus)

    log_vocab.log(
        str(Vocab.full_size) + ', ' + str(Vocab.n_in) + ', ' +
        str(Vocab.n_out))

    saveToPKL(fileName + sys.argv[1] + '.Vocab', Vocab)

    f = codecs.open(fileName + sys.argv[1] + '.i2w', 'w', encoding='utf-8')
    for item in Vocab.i2w:
        if (item == '<unk>' or item == '<s>'):
            print >> f, item, 'NAN'
        else:
            print >> f, item, Vocab.typeFreq[item]