def uc_updated_page(page, old_uc): ''' Returns the page with old unique counters changeed to new ones ''' page_parts = uc_pat.split(page) page = '' for part in page_parts: if part.startswith('%23') and part.endswith('">'): # Generate a new unique counter if this is an old counter if int(part[3:-2]) < old_uc: page += '%23' + str(uniq_cntr()) + '">' else: page += part else: page += part return page
def do_GET(self): global page, word, firstClient global uc_to_pn, curr_page_num, viewed_pages #print 'path = ', self.path sp = self.path[1:] if unquote_plus(sp) == 'SHUTDOWN THE SERVER': print 'Server shutting down!' os._exit(0) if sp == 'favicon.ico': type = 'image/x-icon' page = open(sp).read() else: type = 'html' old_uc = uniq_cntr() # Trigger the update of old uc:s if sp == '': # We are starting if firstClient: firstClient = False page = open('index.html').read() else: page = open('index_2.html').read() word = 'green' elif sp.endswith('.html'): # Trying to fetch a HTML file usp = unquote_plus(sp) if usp == 'NLTK Wordnet Browser Database Info.html': word = '* Database Info *' if os.path.isfile(usp): page = open(usp).read() else: page = (html_header % word) + '<p>The database info file:'\ '<p><b>' + usp + '</b>' + \ '<p>was not found. Run this:' + \ '<p><b>python dbinfo_html.py</b>' + \ '<p>to produce it.' + html_trailer else: if os.path.isfile(usp): word = sp page = open(usp).read() else: word = '' page = (html_header % word) + '<p>The file:'\ '<p><b>' + usp + '</b>' + \ '<p>was not found.' + html_trailer #self.send_error(404) else: #print '######################################################' #print 'SP==>', sp, '\n<==SP' #print '######################################################' # Grab the unique counter uc = int(sp[sp.rfind('%23') + 3:]) # Page lookup needs not and cannot be done for the search words if uc: if uc in uc_to_pn and uc_to_pn[uc] in viewed_pages: page = viewed_pages[uc_to_pn[uc]] page,word = page_word(page, word, sp) page = uc_updated_page(page, old_uc) new_uc = uniq_cntr() for uc in range(old_uc, new_uc): uc_to_pn[uc] = curr_page_num viewed_pages[curr_page_num] = page curr_page_num += 1 self.send_head(type) self.wfile.write(page)
def create_db_info(): ''' Create the file: NLTK Wordnet Browser Database Info.html ''' print 'Database information is being gathered!' print print 'Producing this summary may, depending on your computer,' print 'take a couple of minutes. Please be patient!' counts = [[0 for i in range(len(col_heads))] for j in range(len(display_names))] rel_counts = defaultdict(int) rel_words = {} unique_beginners = defaultdict(list) for n_pos,pos in enumerate(all_pos): #['adv']): #all_pos): print '\n\nStarting the summary for POS: %s' % col_heads[n_pos] d = defaultdict(int) # Word counts for ind in open(data_path + 'index.' + pos): if ind.startswith(' '): continue ind_parts = ind.split() syn_count = int(ind_parts[2]) d['w_s_pairs'] += syn_count if syn_count == 1: d['monos'] += 1 else: d['poly_words'] += 1 d['poly_senses'] += syn_count w = ind_parts[0] d['forms'] += 1 if w.find('_') != -1: d['simple'] += 1 else: d['collo'] += 1 d['apimw'] = 1.0 * (d['monos'] + d['poly_senses']) / \ (d['monos'] + d['poly_words']) d['apemw'] = 1.0 * d['poly_senses'] / d['poly_words'] # Synsets and relations for syns in open(data_path + 'data.' + pos): if syns.startswith(' '): continue d['syns'] += 1 synset = getSynset(pos,int(syns[:8])) syn_rel = bu.relations_2(synset) if HYPERNYM not in syn_rel and 'hypernym (instance)' not in syn_rel: unique_beginners[n_pos].append(synset) d['rels'] += len(syn_rel) for sr in syn_rel: rel_counts[(sr,n_pos)] += 1 rel_words[(sr,n_pos)] = synset.words[0] # Prepare counts for displaying nd = {} for n,(x,y) in enumerate(display_names): nd[x] = n if x in d: counts[n][n_pos] = d[x] counts[n][4] += d[x] if x == 'apimw' or x == 'apemw': m_c = counts[nd['monos']][4] m_ps = counts[nd['poly_senses']][4] m_pw = counts[nd['poly_words']][4] if x == 'apimw': counts[n][4] = 1.0 * (m_c + m_ps) / (m_c + m_pw) else: counts[n][4] = 1.0 * m_ps / m_pw # Format the counts print '\n\nStarting the construction of result tables' html = (bu.html_header % '* Database Info *') + \ bu._hlev(2, 'Word, synset and relation counts by POS') html += ''' <table border="1" cellpadding="1" cellspacing="1" summary=""> <col align="left"><col align="right"><col align="right"> <col align="right"><col align="right"><col align="right"> <tr><th></th><th align="center">Noun</th><th align="center">Verb</th> <th align="center">Adjective</th><th align="center">Adverb</th> <th align="center">Total</th></tr> ''' for n,(x,y) in enumerate(display_names): if x == 'rels': html += '<tr><th align="left"> </th>' html += ''.join('<td align="right"> </td>' for c in counts[n]) \ + '</tr>\n' html += '<tr><th align="left">' + '%s' % y + '</th>' if x == 'apimw' or x == 'apemw': html += ''.join('<td align="right">' + '%6.2f ' % c + '</td>' \ for c in counts[n]) + '</tr>\n' else: html += ''.join('<td align="right">' + '%6d ' % c + '</td>' \ for c in counts[n]) + '</tr>\n' # Format the relation counts r_counts = [0 for i in range(len(col_heads))] for rk in groupby(sorted(rel_counts.keys()),key=lambda x:x[0]): for i in range(len(col_heads)): r_counts[i] = 0 dn = bu._dbname_to_dispname(rk[0]).split('/') if dn[0] == '???': dn = rk[0] + '(???)' else: dn = dn[0] html += '<tr><th align="left">' + '%s' % ('--- ' + dn) + '</th>' for y in rk[1]: r_counts[y[1]] = rel_counts[y] r_counts[len(col_heads) - 1] = sum(r_counts) html += ''.join('<td align="right">' + '%6d ' % rc + '</td>' for rc in r_counts) + '</tr>\n' html += '</table>' # Format the example words for relations html += '<br><br>' + bu._hlev(2, 'Example words for relations, 1 per POS') html += ''' <table border="1" cellpadding="1" cellspacing="1" summary=""> <caption></caption> <col align="center"><col align="center"><col align="center"> <col align="center"><col align="center"> <tr><th>Relation</th><th>Noun</th><th>Verb</th><th>Adjective</th><th>Adverb</th></tr> ''' for rk in groupby(sorted(rel_counts.keys()),key=lambda x:x[0]): dn = bu._dbname_to_dispname(rk[0]).split('/') if dn[0] == '???': dn = rk[0] + '(???)' else: dn = dn[0] html += '<tr><th align="center">' + dn + '</th>' rel_word_examples = [''] * 4 for y in rk[1]: rel_word_examples[y[1]] = rel_words[y] hlp = ''.join('<td align="center"><a href="M' + \ quote_plus(x + '#' + str(bu.uniq_cntr())) + '">' + \ x.replace('_', ' ') + '</a></td>' \ for x in rel_word_examples) hlp = hlp.replace('<a href="M"></a>','-') html += hlp + '</tr>\n' html += '</table>' + bu.html_trailer dbinfo_html_file = open('NLTK Wordnet Browser Database Info.html', 'wt') dbinfo_html_file.write(html) dbinfo_html_file.close() print '\n\nCreation complete: NLTK Wordnet Browser Database Info.html' return