コード例 #1
0
    def do(self, search, page):
        """Search a term in the Whoosh index."""
        aborted_search = False
        results = []
        num_results = 0
        total_time = 0
        PER_PAGE = 100

        start_time = time.time()

        if search.has_invalid_search_term:
            aborted_search = True
            pagination = None
            glossary = None
        else:
            g = Glossary(search.source)
            g.search()
            glossary = g.get_results()

            raw_results = search.get_results()
            num_results = raw_results.scored_length()

            if len(raw_results) > 0:

                url = request.url
                o = urllib.parse.urlparse(url)
                url = '?' + o.query

                pagination = Pagination(PER_PAGE, len(raw_results), url, page)
                start = (pagination.page - 1) * PER_PAGE
                end = start

                max_end = start + PER_PAGE
                if num_results - start < max_end:
                    end += num_results - start
                else:
                    end += PER_PAGE

                for i in range(start, end):
                    results.append(self.get_result(raw_results[i]))

                pages = pagination.pages
            else:
                pagination = None
                pages = 0

        total_time = time.time() - start_time
        ctx = {
            'source': search.source,
            'target': search.target,
            'project': search.project,
            'num_results': num_results,
            'time': "{:.2f}".format(total_time),
            'aborted_search': aborted_search,
            'glossary': glossary,
            'pages': pages,
            'results': results,
        }

        return ctx
コード例 #2
0
    def do(self, search):
        """Search a term in the Whoosh index."""
        aborted_search = False
        results = []
        num_results = 0
        total_time = 0
        PER_PAGE = 100

        g = Glossary(search.source)
        g.search()
        glossary = g.get_results()

        if search.has_invalid_search_term:
            aborted_search = True
            pagination = None
        else:
            start_time = time.time()
            raw_results = search.get_results()
            total_time = time.time() - start_time
            num_results = raw_results.scored_length()

            if len(raw_results) > 0:

                url = request.url.encode('utf-8')
                o = urlparse(url)
                url = '?' + o.query

                pagination = Pagination(PER_PAGE, len(raw_results), url)
                start = (pagination.page - 1) * PER_PAGE
                end = start

                max_end = start + PER_PAGE
                if num_results - start < max_end:
                    end += num_results - start
                else:
                    end += PER_PAGE

                for i in xrange(start, end):
                    results.append(self.get_result(raw_results[i]))
            else:
                pagination = None

        ctx = {
            'source': search.source,
            'target': search.target,
            'project': search.project,
            'results': results,
            'num_results': num_results,
            'time': "{:.2f}".format(total_time),
            'aborted_search': aborted_search,
            'glossary': glossary,
            'pagination': pagination,
        }

        env = Environment(loader=FileSystemLoader('./'))
        template = env.get_template('templates/search_results.html')

        r = template.render(ctx).encode('utf-8')
        return r
コード例 #3
0
    def do(self, search):
        """Search a term in the Whoosh index."""
        aborted_search = False
        results = []
        num_results = 0
        total_time = 0
        PER_PAGE = 100

        g = Glossary(search.source)
        g.search()
        glossary = g.get_results()

        if search.has_invalid_search_term:
            aborted_search = True
            pagination = None
        else:
            start_time = time.time()
            raw_results = search.get_results()
            total_time = time.time() - start_time
            num_results = raw_results.scored_length()

            if len(raw_results) > 0:

                url = request.url
                o = urllib.parse.urlparse(url)
                url = '?' + o.query

                pagination = Pagination(PER_PAGE, len(raw_results), url)
                start = (pagination.page - 1) * PER_PAGE
                end = start

                max_end = start + PER_PAGE
                if num_results - start < max_end:
                    end += num_results - start
                else:
                    end += PER_PAGE

                for i in range(start, end):
                    results.append(self.get_result(raw_results[i]))
            else:
                pagination = None

        ctx = {
            'source': search.source,
            'target': search.target,
            'project': search.project,
            'results': results,
            'num_results': num_results,
            'time': "{:.2f}".format(total_time),
            'aborted_search': aborted_search,
            'glossary': glossary,
            'pagination': pagination,
        }

        env = Environment(loader=FileSystemLoader('./'))
        template = env.get_template('templates/search_results.html')

        r = template.render(ctx).encode('utf-8')
        return r
コード例 #4
0
def process_projects(src_directory, glossary_description, glossary_file):
    corpus = Corpus(src_directory)
    corpus.process()

    reference_sources = ReferenceSources()
    reference_sources.read_sources()

    metrics = Metrics()
    metrics.create(corpus)

    # Select terms
    MAX_TERMS = 5000
    sorted_terms_by_tfxdf = sorted(metrics.tfxdf, key=metrics.tfxdf.get,
                                   reverse=True)

    # Developer report
    glossary_entries = OrderedDict()
    translations = Translations()
    selected_terms = sorted_terms_by_tfxdf[:MAX_TERMS] # Sorted by frequency

    for term in selected_terms:
        glossary_entries[term] = translations.create_for_word_sorted_by_frequency(corpus.documents, term, reference_sources)

    dev_glossary_serializer = DevGlossarySerializer()
    dev_glossary_serializer.create(u"dev-" + glossary_file + ".html",
                                   glossary_description, corpus,
                                   glossary_entries, reference_sources)

    # User report
    glossary_entries = []
    selected_terms = sorted(sorted_terms_by_tfxdf[:MAX_TERMS])  # Sorted by term

    glossary = Glossary(glossary_description)
    for term in selected_terms:
        glossary_entry = GlossaryEntry(
            term,
            translations.create_for_word_sorted_by_frequency(corpus.documents,
                                                             term,
                                                             reference_sources)
        )
        glossary.entries.append(glossary_entry)

    glossary_entries = glossary.get_dict()
    process_template('templates/userglossary-html.mustache',
                     glossary_file + ".html", glossary_entries)
    process_template('templates/userglossary-csv.mustache',
                     glossary_file + ".csv", glossary_entries)

    generate_database(glossary, glossary_file)
コード例 #5
0
def process_projects(src_directory, glossary_description, glossary_file):
    corpus = Corpus(src_directory)
    corpus.process()

    reference_sources = ReferenceSources()
    reference_sources.read_sources()

    metrics = Metrics()
    metrics.create(corpus)

    # Select terms
    MAX_TERMS = 1000
    sorted_terms_by_tfxdf = sorted(metrics.tfxdf,
                                   key=metrics.tfxdf.get,
                                   reverse=True)

    # Developer report
    glossary_entries = OrderedDict()
    translations = Translations()
    selected_terms = sorted_terms_by_tfxdf[:MAX_TERMS]  # Sorted by frequency

    for term in selected_terms:
        glossary_entries[
            term] = translations.create_for_word_sorted_by_frequency(
                corpus.documents, term, reference_sources)

    dev_glossary_serializer = DevGlossarySerializer()
    dev_glossary_serializer.create(u"dev-" + glossary_file + ".html",
                                   glossary_description, corpus,
                                   glossary_entries, reference_sources)

    # User report
    glossary_entries = []
    selected_terms = sorted(
        sorted_terms_by_tfxdf[:MAX_TERMS])  # Sorted by term

    glossary = Glossary(glossary_description)
    for term in selected_terms:
        glossary_entry = GlossaryEntry(
            term,
            translations.create_for_word_sorted_by_frequency(
                corpus.documents, term, reference_sources))
        glossary.entries.append(glossary_entry)

    glossary_entries = glossary.get_dict()
    process_template('templates/userglossary-html.mustache',
                     glossary_file + ".html", glossary_entries)
    process_template('templates/userglossary-csv.mustache',
                     glossary_file + ".csv", glossary_entries)
コード例 #6
0
def process_projects():
    global glossary_file
    global glossary_description

    corpus = Corpus(src_directory)
    corpus.process()

    reference_sources = ReferenceSources()
    reference_sources.read_sources()

    metrics = Metrics()
    metrics.create(corpus)

    # Select terms
    MAX_TERMS = 1000
    sorted_terms_by_tfxdf = sorted(metrics.tfxdf, key=metrics.tfxdf.get,
                                   reverse=True)

    # Developer report
    glossary_entries = OrderedDict()
    translations = Translations()
    selected_terms = sorted_terms_by_tfxdf[:MAX_TERMS] # Sorted by frequency

    for term in selected_terms:
        glossary_entries[term] = translations.create_for_word_sorted_by_frequency(corpus.documents, term, reference_sources)

    dev_glossary_serializer = DevGlossarySerializer()
    dev_glossary_serializer.create(u"dev-" + glossary_file + ".html",
                                   glossary_description, corpus,
                                   glossary_entries, reference_sources)

    # User report
    glossary_entries = []
    selected_terms = sorted(sorted_terms_by_tfxdf[:MAX_TERMS])  # Sorted by term

    glossary = Glossary()
    glossary.description = glossary_description
    for term in selected_terms:
        glossary_entry = GlossaryEntry()
        glossary_entry.source_term = term
        glossary_entry.translations = translations.create_for_word_sorted_by_frequency(corpus.documents, term, reference_sources)
        glossary.entries.append(glossary_entry)

    user_glossary_serializer = UserGlossarySerializer()
    user_glossary_serializer.create(glossary_file, glossary.get_dict(),
                                    reference_sources)
コード例 #7
0
ファイル: customparser.py プロジェクト: igorgsn/crawlers
 def __init__(self):
     super().__init__()
     self._page_time_sleep = 1
     self._detail_page_time_sleep = 0.500
     self._start_page_number = 0
     self._page_limit = 20        
     self._glossary = Glossary()
     self._data_output = []
     self._url = 'https://kinozal-tv.appspot.com/'
     self._request_header = {'user-agent': 'Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0', 'Charset': 'utf-8'}
コード例 #8
0
ファイル: reverse.py プロジェクト: Armel35/pyglossary
except:
  print 'Warning: module "psyco" not found.'


t0 = time.time()
try:
  dicPath = sys.argv[1]
except:
  dicPath ='quick_eng-persian-e0.3.txt'
try:
  wordsFilePath = sys.argv[2]
except:
  wordsFilePath = dicPath[:-4]+'-words.txt'


g = Glossary()
g.readTabfile(dicPath)
g.checkUnicode()
#g.faEdit()

#words = g.takeOutputWords()
#wordsFile = open(wordsFilePath, "w")
#print len(words),"words found. writing to file..." 
#wordsFile.write( string.join(words,"\n") )
#del wordsFile

wordsFile = open(wordsFilePath, "r")

g2 = g.reverseDic(wordsFile, {'matchWord':True})
g2.writeTabfile()
print 'About',int(time.time()-t0) ,'seconds left.'
コード例 #9
0
#!/usr/bin/python
import sys

sys.path.append("/usr/share/pyglossary/src")
from glossary import Glossary
import time

t0 = time.time()

dicPath = sys.argv[1]
g = Glossary()
g.read(dicPath)

words = g.takeOutputWords({"minLen": 4, "noEn": True})

wordsFile = open(dicPath[:-4] + "-words.tab.txt", "w")
print(len(words), "words found. writing to file...")
wordsFile.write(string.join(words, "\t#\n") + "\tNothing\n")
wordsFile.close()

print("%f  seconds left." % (time.time() - t0))
コード例 #10
0
def glossary_search_api():
    source = request.args.get('source')

    glossary = Glossary(source)
    glossary.search()
    return Response(glossary.get_json(), mimetype='application/json')
コード例 #11
0
ファイル: reverse.py プロジェクト: Armel35/pyglossary
    print 'Using module "psyco" to reduce execution time.'
    psyco.bind(Glossary)
except:
    print 'Warning: module "psyco" not found.'

t0 = time.time()
try:
    dicPath = sys.argv[1]
except:
    dicPath = 'quick_eng-persian-e0.3.txt'
try:
    wordsFilePath = sys.argv[2]
except:
    wordsFilePath = dicPath[:-4] + '-words.txt'

g = Glossary()
g.readTabfile(dicPath)
g.checkUnicode()
#g.faEdit()

#words = g.takeOutputWords()
#wordsFile = open(wordsFilePath, "w")
#print len(words),"words found. writing to file..."
#wordsFile.write( string.join(words,"\n") )
#del wordsFile

wordsFile = open(wordsFilePath, "r")

g2 = g.reverseDic(wordsFile, {'matchWord': True})
g2.writeTabfile()
print 'About', int(time.time() - t0), 'seconds left.'
コード例 #12
0
ファイル: merge.py プロジェクト: xunglv/pyglossary
#!/usr/bin/python
import sys
sys.path.append('/usr/share/pyglossary/src')
from glossary import Glossary

g1 = Glossary()
g2 = Glossary()
g1.read(sys.argv[1])
g2.read(sys.argv[2])
gm = g1.merge(g2)
gm.writeTabfile()




コード例 #13
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self._glossary = Glossary()
     self._delimiter = ';'
コード例 #14
0
        options_vbox.pack_start(hbox, 0, 0)
        ##
        options_exp.add(options_vbox)
        self.vbox.pack_start(options_exp, 0, 0)
        ####
        button_close = self.add_button(gtk.STOCK_CLOSE, 0)
        button_replace_all = self.add_button('Replace All', 0)
        button_replace_all.set_image(
            gtk.image_new_from_stock(gtk.STOCK_FIND_AND_REPLACE,
                                     gtk.ICON_SIZE_BUTTON))
        button_replace = self.add_button('Replace', 0)
        button_replace.set_image(
            gtk.image_new_from_stock(gtk.STOCK_FIND_AND_REPLACE,
                                     gtk.ICON_SIZE_BUTTON))
        button_find = self.add_button(gtk.STOCK_FIND, 0)
        self.action_area.set_homogeneous(False)
        ####
        self.vbox.show_all()

    def onDeleteEvent(self, widget, event):
        self.hide()
        return True


## Warn when replacing in all entries, and show number of occurrences

if __name__ == '__main__':
    from glossary import Glossary
    glos = Glossary()
    DbEditorFindDialog(glos).run()
コード例 #15
0
def glossary_search_api():
    source = request.args.get('source')

    glossary = Glossary(source)
    glossary.search()
    return Response(glossary.get_json(), mimetype='application/json')
コード例 #16
0
import sys
sys.path.append('/usr/share/pyglossary/src')
from glossary import Glossary
import time
try:
  import psyco
  print 'Using module "psyco" to reduce execution time.'
  usePsyco = True
except:
  print 'Warning: module "psyco" not found'
  usePsyco = False

t0 = time.time()

dicPath=sys.argv[1]
g = Glossary()
g.read(dicPath)
if usePsyco:
  psyco.bind(Glossary, 100)

words = g.takeOutputWords({'minLen':4, 'noEn':True})

wordsFile = open(dicPath[:-4]+"-words.tab.txt", "w")
print len(words),"words found. writing to file..." 
wordsFile.write( string.join(words,'\t#\n')+'\tNothing\n' )
wordsFile.close()

print  '%f  seconds left.' %(time.time()-t0)


コード例 #17
0
ファイル: any_to_txt.py プロジェクト: zhyongwei/pyglossary
#!/usr/bin/python
import sys
sys.path.append('/usr/share/pyglossary/src')
from glossary import Glossary

g = Glossary()
g.read(sys.argv[1])
g.writeTabfile()