def changeOneTable( table_name ): #{ the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() statement = 'truncate table %s' % table_name the_cursor.execute( statement ) source_table = table_name[ 2 : ] # strip off 'u_' prefix statement = 'insert into %s select * from %s' % (table_name, source_table) the_cursor.execute( statement ) key_list = table_keys[ table_name ] select_keys = ", ".join( key_list ) statement = "select %s from %s order by %s" % (select_keys, table_name, select_keys) the_cursor.execute( statement ) results = the_cursor.fetchall() for row in results: #{ i = -1 where_clause = "" for keyname in key_list: #{ i += 1 if where_clause != "": where_clause += " and " where_clause += "%s = %d" % (keyname, row[ i ]) #} field_list = fields_to_convert[ table_name ] for fieldname in field_list: #{ statement = "select %s from %s where %s" % (fieldname, table_name, where_clause) the_cursor.execute( statement ) one_result = the_cursor.fetchone() fieldval = one_result[ 0 ] if not fieldval: continue # Reformat in 2 steps: # 1. Convert homespun ASCII coding invented by Richard Sharpe to HTML entities. # 2. Convert the HTML entities to UTF-8. # Turn ASCII coding into HTML fieldval = w.reformat( fieldval ) # Turn HTML entities into UTF-8 characters fieldval = html_parser.unescape( fieldval ) # Remove some Django template tags fieldval = fieldval.replace( '{% templatetag openvariable %}', '{{' ) fieldval = fieldval.replace( '{% templatetag closevariable %}', '}}' ) fieldval = fieldval.replace( "'", "''" ) # escape for SQL statement = "update %s set %s = '%s' where %s" % (table_name, fieldname, fieldval, where_clause) #print statement.encode( 'utf8' ) the_cursor.execute( statement.encode( 'utf8' ) ) #} #} the_cursor.close() the_database_connection.close()
def writeFlagDecodes(): #{ the_database_connection = None the_cursor = None try: outfile_handle = file outfile_handle = open(output_filename, 'wb') # 'wb' allows entry of UTF-8 cat.write_inherit_and_title_block(outfile_handle) cat.write_start_main_content(outfile_handle) outfile_handle.write('<h3>Definition of codes used in the index</h3>') outfile_handle.write('<dl class="catalogue_entry_flags">') the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() statement = "select flag_code, flag_desc, flag_example from index_entry_flags " statement += " order by flag_id" the_cursor.execute(statement) results = the_cursor.fetchall() for row in results: #{ flag_code = w.reformat(row[0]) flag_desc = w.reformat(row[1]) flag_example = w.reformat(row[2]) outfile_handle.write('<dt>' + newline) outfile_handle.write(flag_code) if flag_example: outfile_handle.write(' e.g. %s' % flag_example) outfile_handle.write(newline + '</dt>' + newline + '<dd>' + newline) outfile_handle.write('%s' % flag_desc) outfile_handle.write(newline) outfile_handle.write('</dd>' + newline + newline) #} outfile_handle.write('</dl>') cat.write_end_main_content(outfile_handle, include_link_to_definitions=False) outfile_handle.close() the_cursor.close() the_database_connection.close() except: if isinstance(outfile_handle, file): if not outfile_handle.closed: outfile_handle.close() if the_cursor: the_cursor.close() if the_database_connection: the_database_connection.close() raise
def writeAllHTMLFiles(): #{ reload(sys) sys.setdefaultencoding("utf8") writeOneHTMLFile( 'listbydate' ) writeOneHTMLFile( 'list' ) the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() statement = "select coalesce( doc_group_type_parent, doc_group_type ) as doc_group_type, " statement += " doc_group_id, doc_group_name, document_code " statement += " from index_medieval_documents_view where document_code > '' " statement += " order by doc_group_type, doc_group_name, document_code_sort, document_code" the_cursor.execute( statement ) documents = the_cursor.fetchall() prev_type_code = '' prev_loc_name = '' for document in documents: #{ type_code = document[ 0 ] loc_id = document[ 1 ] loc_name = document[ 2 ] document_code = document[ 3 ] if type_code != prev_type_code: #{ prev_type_code = type_code prev_loc_name = '' print "" print 'Producing LIST for document group type %s' % type_code writeOneHTMLFile( 'list', type_code ) #} if loc_name != prev_loc_name: #{ prev_loc_name = loc_name print "" print 'Producing LIST for document group %s' % loc_name writeOneHTMLFile( 'list', type_code, loc_id, loc_name ) #} print 'Producing output for ONE document code: %s' % document_code writeOneHTMLFile( document_code ) #} the_cursor.close() the_database_connection.close() print 'Finished producing output.'
def writeFlagDecodes(): #{ the_database_connection = None the_cursor = None try: outfile_handle = file outfile_handle = open( output_filename, 'wb' ) # 'wb' allows entry of UTF-8 cat.write_inherit_and_title_block( outfile_handle ) cat.write_start_main_content( outfile_handle ) outfile_handle.write( '<h3>Definition of codes used in the index</h3>' ) outfile_handle.write( '<dl class="catalogue_entry_flags">' ) the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() statement = "select flag_code, flag_desc, flag_example from index_entry_flags " statement += " order by flag_id" the_cursor.execute( statement ) results = the_cursor.fetchall() for row in results: #{ flag_code = w.reformat( row[ 0 ] ) flag_desc = w.reformat( row[ 1 ] ) flag_example = w.reformat( row[ 2 ] ) outfile_handle.write( '<dt>' + newline ) outfile_handle.write( flag_code ) if flag_example: outfile_handle.write( ' e.g. %s' % flag_example) outfile_handle.write( newline + '</dt>' + newline + '<dd>' + newline ) outfile_handle.write( '%s' % flag_desc ) outfile_handle.write( newline ) outfile_handle.write( '</dd>' + newline + newline ) #} outfile_handle.write( '</dl>' ) cat.write_end_main_content( outfile_handle, include_link_to_definitions = False ) outfile_handle.close() the_cursor.close() the_database_connection.close() except: if isinstance( outfile_handle, file ): if not outfile_handle.closed : outfile_handle.close() if the_cursor: the_cursor.close() if the_database_connection: the_database_connection.close() raise
def addDocumentCodeSort(): #{ the_database_connection = None the_cursor = None #================================================================= # Read each line of the original file, manipulate it as necessary, # and then write it into the new file. #================================================================= try: # Connect to the database and create a cursor the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() # Update both the 'documents' lookup table and the 'copies' table. # Although this duplicates the same information in two places, it makes it much # simpler to access the data. tables_to_update = ['index_medieval_documents', 'index_entry_copies'] for the_table in tables_to_update: #{ select = "select distinct document_code from %s " % the_table select += " where document_code > '' order by document_code" the_cursor.execute(select) results = the_cursor.fetchall() for row in results: #{ document_code = row[0].strip() new_code = pad_with_zeroes(document_code) upd = "update %s set document_code_sort = '%s' where document_code = '%s'" \ % (the_table, new_code, document_code) print upd the_cursor.execute(upd) #} #} the_cursor.close() the_database_connection.close() except: if the_cursor: the_cursor.close() if the_database_connection: the_database_connection.close() raise
def process_all_documents(): #{ the_database_connection = None the_cursor = None try: outfile_handle = file outfile_handle = open(output_filename, 'wb') # 'wb' allows entry of UTF-8 outfile_handle.write(newline + '-- This script was generated by dates.py ' + newline + newline) the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() statement = "select document_id, document_code, document_name " statement += " from index_medieval_documents where document_code > ''" statement += " order by document_code_sort" the_cursor.execute(statement) results = the_cursor.fetchall() for row in results: #{ output = process_document(row) outfile_handle.write(output.encode('utf-8')) outfile_handle.write(newline) #} outfile_handle.close() the_cursor.close() the_database_connection.close() except: if isinstance(outfile_handle, file): if not outfile_handle.closed: outfile_handle.close() if the_cursor: the_cursor.close() if the_database_connection: the_database_connection.close() raise
def process_all_documents(): #{ the_database_connection = None the_cursor = None try: outfile_handle = file outfile_handle = open( output_filename, 'wb' ) # 'wb' allows entry of UTF-8 outfile_handle.write( newline + '-- This script was generated by dates.py ' + newline + newline ) the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() statement = "select document_id, document_code, document_name " statement += " from index_medieval_documents where document_code > ''" statement += " order by document_code_sort" the_cursor.execute( statement ) results = the_cursor.fetchall() for row in results: #{ output = process_document( row ) outfile_handle.write( output.encode( 'utf-8' ) ) outfile_handle.write( newline ) #} outfile_handle.close() the_cursor.close() the_database_connection.close() except: if isinstance( outfile_handle, file ): if not outfile_handle.closed : outfile_handle.close() if the_cursor: the_cursor.close() if the_database_connection: the_database_connection.close() raise
def parseCopies(): #{ # Write the SQL to a file. We can then store this in Subversion and revert to it if necessary. outfile_handle = open( output_filename, 'wb' ) # 'wb' allows entry of UTF-8 outfile_handle.write( newline ) outfile_handle.write( '-- This SQL script is generated by importIndexedCopies.py.' ) outfile_handle.write( newline ) # Connect to the database the_database_connection = c.get_database_connection() # Create a Cursor object the_cursor = the_database_connection.cursor() # Clear out the destination table ('index entry copies') before you begin outfile_handle.write( newline + "TRUNCATE TABLE index_entry_copies;" + newline + newline ) # Get the 'copies' text field from the source table ('index entry books') statement = "SELECT e.entry_id, b.entry_book_count, b.copies, e.entry_name, b.title_of_book " \ + " FROM index_entry_books b, index_entries e" \ + " WHERE b.entry_id = e.entry_id " \ + " order by entry_id, entry_book_count" the_cursor.execute( statement ) results = the_cursor.fetchall() # Extract the copy IDs from the text field, and from each copy ID extract # the document code and sequence number. for row in results: #{ entry_id = row[0] entry_book_count = row[1] copies = row[2] entry_name = row[3] title_of_book = row[4] outfile_handle.write( '%s%s/*Entry ID %d, book count %d*/%s' \ % (newline, newline, entry_id, entry_book_count, newline)) author_and_title_comment = "/* %s %s */%s" % (entry_name, title_of_book, newline) outfile_handle.write( author_and_title_comment ) print author_and_title_comment, 'Book count', entry_book_count # Remove a few known typo's copies = correct_typos( copies ) copies_comment = "%s/* %s */%s" % (newline, copies, newline) outfile_handle.write( copies_comment ) # Remove any final full stops, as these can cause the final copy code not to be recognised if copies.strip().endswith( full_stop ): copies = copies.strip()[ 0 : -1 ] # Remove the 'notes' sections between brackets before trying to extract the copy codes full_copies = copies copies = strip_bits_in_brackets( copies ) copy_count = 0 copy_codes = [] words = copies.split() # since no separator is specified, any whitespace is used as the separator for word in words: #{ copy_code = '' document_code = '' seqno_in_document = '' is_valid_document_code = False if not is_copy_code( word ): #{ if is_document_code( word ): #{ # Try to avoid picking up bibliographical references that are NOT document codes checkword = word if checkword.endswith( comma ): checkword = checkword.replace( comma, '', 1 ) checkword = checkword.strip() statement = "select count(*) from index_medieval_documents where document_code = '%s'" \ % checkword #outfile_handle.write( "/* %s */ \n" % statement ) the_cursor.execute( statement ) docrow = the_cursor.fetchone() found = docrow[ 0 ] if found > 0: #{ # it is a real document code, just missing sequence no is_valid_document_code = True #} #} #} if is_copy_code( word ) or is_valid_document_code: #{ copy_code = word.strip() if copy_code.endswith( comma ): copy_code = copy_code[ 0 : -1 ] #} if copy_code: #{ # Check that we haven't already got it already_in_list = False if copy_code in copy_codes: already_in_list = True if already_in_list: continue copy_codes.append( copy_code ) copy_count += 1 #} #} # Finished picking out the copy codes from the 'copies' field. # Now get the text in between the copy codes copy_count = 0 num_copies = len( copy_codes ) while copy_count < num_copies: #{ copy_code = copy_codes[ copy_count ] copy_count += 1 copy_notes = '' rest_of_line = full_copies.split( copy_code, 1 )[ 1 ] if copy_count < len( copy_codes ): #{ # still another copy code to come after this one next_copy_code = copy_codes[ copy_count ] copy_notes = rest_of_line.split( next_copy_code, 1 )[ 0 ] copy_notes = copy_notes.strip() last_char = '' if copy_notes: last_char = copy_notes[ -1 : ] if next_copy_code and last_char in copy_code_connectors: #{ copy_notes = '%s %s' % (copy_notes, next_copy_code) #} #} else: copy_notes = rest_of_line copy_notes = copy_notes.strip() if copy_notes == comma or copy_notes == full_stop: #{ copy_notes = '' #} elif copy_notes.endswith( comma ): #{ copy_notes = copy_notes[ 0 : -1 ] #} document_code = get_document_code( copy_code ) seqno_in_document = get_seqno_in_document( copy_code ) if not seqno_in_document.strip(): seqno_in_document = 'null' copy_notes = copy_notes.strip() statement = get_copy_insert_statement( entry_id, entry_book_count, copy_count, copy_code, \ document_code, seqno_in_document, copy_notes ) outfile_handle.write( statement.encode( 'utf-8' )) if is_numeric_range( copy_code ): #{ # need to generate sequence numbers for rest of range rest_of_range = get_rest_of_numeric_range( copy_code ) for int_seqno in rest_of_range: #{ copy_count += 1 num_copies += 1 copy_codes.insert( 0, copy_code ) # add to start so we don't keep coming to the same one again! seqno_in_document = str( int_seqno ) outfile_handle.write( '/* generating sequence no. %d for %s */' % (int_seqno, copy_code)) statement = get_copy_insert_statement( entry_id, entry_book_count, copy_count, copy_code, \ document_code, seqno_in_document, copy_notes ) outfile_handle.write( statement.encode( 'utf-8' )) #} #} #} print newline #} the_cursor.close() the_database_connection.close()
def stripUnwantedTags(): #{ the_database_connection = None the_cursor = None #================================================================= # Read each line of the original file, manipulate it as necessary, # and then write it into the new file. #================================================================= try: # Connect to the database and create a cursor the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() # Look at all the text fields that could contain unwanted formatting for table_name, field_names in text_fields.items(): #{ #print newline + table_name + newline for field in field_names: #{ #print newline + table_name + ': ' + field + newline # generate a select statement to pick up rows containing any of the problematic tags first_tag = True for problem_tag_start in problem_tags_start: #{ if first_tag: select = "select id, %s from %s where %s like '%s%s%s'" \ % (field, table_name, field, percent, problem_tag_start, percent) else: select = "%s or %s like '%s%s%s'" % ( select, field, percent, problem_tag_start, percent) first_tag = False #} select += " order by id" #print select the_cursor.execute(select) results = the_cursor.fetchall() # start working through the results for row in results: #{ row_id = row[0] text_value = row[1] print '' print '' print '=======================================' print table_name, field, 'ID', row_id print '=======================================' print '' print '==== RAW VALUE, ID %d ====' % row_id print text_value print '==== end RAW VALUE, ID %d ==== %s' % (row_id, newline) for problem_tag_start in problem_tags_start: #{ print 'Processing', problem_tag_start if problem_tag_start not in text_value: continue # Convert, e.g., '<div style="font-family: Courier New">' to just '<div>' value_parts = text_value.split(problem_tag_start) new_value_parts = [] new_value = '' i = -1 for part in value_parts: #{ i += 1 formatting = '' data = '' if i == 0: #{ # the first section (index 0) is before the formatting tag data = part #} else: #{ # at start of formatting tag formatting_and_data = part.split( closing_angle_bracket, 1) if len(formatting_and_data) != 2: #{ print 'Mismatched tag start and end in:', formatting_and_data print 'Cancelling change.' continue #} formatting = formatting_and_data[0] data = formatting_and_data[1] #print newline + 'About to remove the following formatting:' #print formatting + newline #} new_value_parts.append(data) #} fixed_tag = problem_tag_start.strip( ) + closing_angle_bracket new_value = fixed_tag.join(new_value_parts) if fixed_tag == '<a>': # no point in keeping these new_value = new_value.replace('<a></a>', '') text_value = new_value new_value = new_value.replace("'", "''") # escape for SQL new_value = new_value.replace("\\", "\\\\") # escape for SQL statement = "update %s set %s = '%s' where id = %d" % ( table_name, field, new_value, row_id) print newline, '/* new value */', statement, newline the_cursor.execute(statement) #} # end processing all problem tags in one field of one row of data #} # end loop through rows containing problem tags in a particular field #} # end loop through one table's fields that may contain problem tags #} # end loop through tables with fields that may contain problem tags the_cursor.close() the_database_connection.close() except: if the_cursor: the_cursor.close() if the_database_connection: the_database_connection.close() raise
def setBookIDs(): #{ the_database_connection = None the_cursor = None #================================================================= # Read each line of the original file, manipulate it as necessary, # and then write it into the new file. #================================================================= try: # Connect to the database and create a cursor the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() # Get details of the medieval catalogues in which an MLGB book appeared. select = "select id, medieval_catalogue from books_book where medieval_catalogue > ''" select += " order by id" the_cursor.execute( select ) results = the_cursor.fetchall() # Clear out old data the_cursor.execute( "TRUNCATE TABLE index_mlgb_links" ) # Insert the new data for row in results: #{ book_id = row[ 0 ] medieval_catalogue = row[ 1 ].strip() print "\n\n%s" % medieval_catalogue # Avoid confusion potentially caused by spaces in wrong place etc. medieval_catalogue = medieval_catalogue.replace( '. ', '.' ) medieval_catalogue = medieval_catalogue.replace( '?', '' ) medieval_catalogue = medieval_catalogue.replace( '=', ' ' ) words = medieval_catalogue.split() for word in words: #{ catalogue_entries = [] if not i.is_copy_code( word ): continue word = word.strip() if word.endswith( ',' ): word = word[ 0 : -1 ] # take off any commas from the end document_code = i.get_document_code( word ) seqno_in_document = i.get_seqno_in_document( word ) if document_code.isalnum() and seqno_in_document.isdigit() \ and int( seqno_in_document ) > 0: #{ catalogue_entries.append( seqno_in_document ) else: # some kind of incomplete or garbled entry - don't try to save it continue #} if i.is_numeric_range( word ): #{ # need to generate sequence numbers for rest of range rest_of_range = i.get_rest_of_numeric_range( word ) for int_seqno in rest_of_range: #{ seqno_in_document = str( int_seqno ) catalogue_entries.append( seqno_in_document ) #} #} for seqno_in_document in catalogue_entries: #{ print "%d: '%s' %s" % (book_id, document_code, seqno_in_document) insert_statement = 'insert into index_mlgb_links ' insert_statement += '( mlgb_book_id, document_code, seqno_in_document ) values ' insert_statement += "( %d, '%s', %s )" % (book_id, document_code, seqno_in_document) the_cursor.execute( insert_statement ) #} #} #} the_cursor.close() the_database_connection.close() except: if the_cursor: the_cursor.close() if the_database_connection: the_database_connection.close() raise
def parseCopies(): #{ # Write the SQL to a file. We can then store this in Subversion and revert to it if necessary. outfile_handle = open(output_filename, 'wb') # 'wb' allows entry of UTF-8 outfile_handle.write(newline) outfile_handle.write( '-- This SQL script is generated by importIndexedCopies.py.') outfile_handle.write(newline) # Connect to the database the_database_connection = c.get_database_connection() # Create a Cursor object the_cursor = the_database_connection.cursor() # Clear out the destination table ('index entry copies') before you begin outfile_handle.write(newline + "TRUNCATE TABLE index_entry_copies;" + newline + newline) # Get the 'copies' text field from the source table ('index entry books') statement = "SELECT e.entry_id, b.entry_book_count, b.copies, e.entry_name, b.title_of_book " \ + " FROM index_entry_books b, index_entries e" \ + " WHERE b.entry_id = e.entry_id " \ + " order by entry_id, entry_book_count" the_cursor.execute(statement) results = the_cursor.fetchall() # Extract the copy IDs from the text field, and from each copy ID extract # the document code and sequence number. for row in results: #{ entry_id = row[0] entry_book_count = row[1] copies = row[2] entry_name = row[3] title_of_book = row[4] outfile_handle.write( '%s%s/*Entry ID %d, book count %d*/%s' \ % (newline, newline, entry_id, entry_book_count, newline)) author_and_title_comment = "/* %s %s */%s" % (entry_name, title_of_book, newline) outfile_handle.write(author_and_title_comment) print author_and_title_comment, 'Book count', entry_book_count # Remove a few known typo's copies = correct_typos(copies) copies_comment = "%s/* %s */%s" % (newline, copies, newline) outfile_handle.write(copies_comment) # Remove any final full stops, as these can cause the final copy code not to be recognised if copies.strip().endswith(full_stop): copies = copies.strip()[0:-1] # Remove the 'notes' sections between brackets before trying to extract the copy codes full_copies = copies copies = strip_bits_in_brackets(copies) copy_count = 0 copy_codes = [] words = copies.split( ) # since no separator is specified, any whitespace is used as the separator for word in words: #{ copy_code = '' document_code = '' seqno_in_document = '' is_valid_document_code = False if not is_copy_code(word): #{ if is_document_code(word): #{ # Try to avoid picking up bibliographical references that are NOT document codes checkword = word if checkword.endswith(comma): checkword = checkword.replace(comma, '', 1) checkword = checkword.strip() statement = "select count(*) from index_medieval_documents where document_code = '%s'" \ % checkword #outfile_handle.write( "/* %s */ \n" % statement ) the_cursor.execute(statement) docrow = the_cursor.fetchone() found = docrow[0] if found > 0: #{ # it is a real document code, just missing sequence no is_valid_document_code = True #} #} #} if is_copy_code(word) or is_valid_document_code: #{ copy_code = word.strip() if copy_code.endswith(comma): copy_code = copy_code[0:-1] #} if copy_code: #{ # Check that we haven't already got it already_in_list = False if copy_code in copy_codes: already_in_list = True if already_in_list: continue copy_codes.append(copy_code) copy_count += 1 #} #} # Finished picking out the copy codes from the 'copies' field. # Now get the text in between the copy codes copy_count = 0 num_copies = len(copy_codes) while copy_count < num_copies: #{ copy_code = copy_codes[copy_count] copy_count += 1 copy_notes = '' rest_of_line = full_copies.split(copy_code, 1)[1] if copy_count < len( copy_codes ): #{ # still another copy code to come after this one next_copy_code = copy_codes[copy_count] copy_notes = rest_of_line.split(next_copy_code, 1)[0] copy_notes = copy_notes.strip() last_char = '' if copy_notes: last_char = copy_notes[-1:] if next_copy_code and last_char in copy_code_connectors: #{ copy_notes = '%s %s' % (copy_notes, next_copy_code) #} #} else: copy_notes = rest_of_line copy_notes = copy_notes.strip() if copy_notes == comma or copy_notes == full_stop: #{ copy_notes = '' #} elif copy_notes.endswith(comma): #{ copy_notes = copy_notes[0:-1] #} document_code = get_document_code(copy_code) seqno_in_document = get_seqno_in_document(copy_code) if not seqno_in_document.strip(): seqno_in_document = 'null' copy_notes = copy_notes.strip() statement = get_copy_insert_statement( entry_id, entry_book_count, copy_count, copy_code, \ document_code, seqno_in_document, copy_notes ) outfile_handle.write(statement.encode('utf-8')) if is_numeric_range( copy_code ): #{ # need to generate sequence numbers for rest of range rest_of_range = get_rest_of_numeric_range(copy_code) for int_seqno in rest_of_range: #{ copy_count += 1 num_copies += 1 copy_codes.insert( 0, copy_code ) # add to start so we don't keep coming to the same one again! seqno_in_document = str(int_seqno) outfile_handle.write( '/* generating sequence no. %d for %s */' % (int_seqno, copy_code)) statement = get_copy_insert_statement( entry_id, entry_book_count, copy_count, copy_code, \ document_code, seqno_in_document, copy_notes ) outfile_handle.write(statement.encode('utf-8')) #} #} #} print newline #} the_cursor.close() the_database_connection.close()
def changeOneTextField( handle, table_name, field_name ): #{ global unidentified_chars the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() statement = "select distinct entry_id, %s from %s " % (field_name, table_name) statement += " where %s like '%s%s%s'" % (field_name, percent, greek_start, percent) statement += " order by entry_id, %s" % field_name #print statement the_cursor.execute( statement ) results = the_cursor.fetchall() for row in results: #{ entry_id = row[ 0 ] fieldval = row[ 1 ] field_parts = fieldval.split( greek_start ) index = -1 for section in field_parts: #{ index += 1 if index == 0: continue # before first bit of Greek sub_sections = section.split( greek_end ) encoded_latin = sub_sections[ 0 ].strip() orig_encoded_latin = encoded_latin greek = u'' print entry_id, encoded_latin # We need to change s/sigma at the end of words to 'final sigma' final_char = encoded_latin[ -1 : ] if final_char == 's': #{ encoded_latin = '%s%s' % (encoded_latin[ 0 : -1 ], final_s) #} encoded_latin = encoded_latin.replace( 's ', final_s + ' ' ) print entry_id, encoded_latin # Now look up the name of the Greek character corresponding to this Latin character for one_char in encoded_latin[ : ]: #{ greek_char_name = '' greek_char = '' if one_char.strip() == '': # whitespace character of some kind greek += one_char elif letters.has_key( one_char ): #{ greek_char_name = letters[ one_char ] #print '%s = %s' % (one_char, greek_char_name) #} elif raw_accents.has_key( one_char ): #{ greek_char_name = raw_accents[ one_char ] #print '%s = %s' % (one_char, greek_char_name) #} else: if one_char not in unidentified_chars: unidentified_chars.append( one_char ) if greek_char_name: #{ greek_char = unicodedata.lookup( greek_char_name ) charnum = ord( greek_char ) greek += '&#%d;' % charnum #} #} # Breathings and accents appear BEFORE capital letters, and are written that way in the English, # e.g. "'Aposhmeiw<seis". However, combining characters always follow the character to which they # apply. So we need to do some rearrangement. In practice I think we need to add an extra space # before the start of the word, for the breathings and accents to sit on. processed_words = [] words = greek.split() for word in words: #{ for numeric_entity in accent_entities: #{ if word.startswith( numeric_entity ): #{ word = numeric_non_break_space + ' ' + word #} #} processed_words.append( word ) #} greek = ' '.join( processed_words ) orig_encoded_latin = orig_encoded_latin.replace( "'", "''" ) # escape single quotes for SQL handle.write( "update %s set %s = replace( %s, '" % (table_name, field_name, field_name)) handle.write( orig_encoded_latin ) handle.write( "', '" ) handle.write( greek ) handle.write( "' ) where entry_id = %d" % entry_id ) handle.write( " and %s like '%s%s%s';" % (field_name, percent, orig_encoded_latin, percent) ) handle.write( newline + newline ) print ' ' #} #} # remove the marker for 'Greek starts here' handle.write( "update %s set %s = replace( %s, '%s', '' );" \ % (table_name, field_name, field_name, greek_start) ) handle.write( newline ) # remove the marker for 'Greek end here', # but remember that sometimes one or two dollar signs have been missed off the end tmp_greek_end = full_greek_end while len( tmp_greek_end ) >= len( greek_end ): #{ handle.write( "update %s set %s = replace( %s, '%s', '' );" \ % (table_name, field_name, field_name, tmp_greek_end) ) handle.write( newline ) tmp_greek_end = tmp_greek_end[ 0 : -1 ] # trim off the last character #} the_cursor.close() the_database_connection.close() print '----' print 'Finished processing %s %s' % (table_name, field_name) print '----' print ' '
def stripUnwantedTags(): #{ the_database_connection = None the_cursor = None #================================================================= # Read each line of the original file, manipulate it as necessary, # and then write it into the new file. #================================================================= try: # Connect to the database and create a cursor the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() # Look at all the text fields that could contain unwanted formatting for table_name, field_names in text_fields.items(): #{ #print newline + table_name + newline for field in field_names: #{ #print newline + table_name + ': ' + field + newline # generate a select statement to pick up rows containing any of the problematic tags first_tag = True for problem_tag_start in problem_tags_start: #{ if first_tag: select = "select id, %s from %s where %s like '%s%s%s'" \ % (field, table_name, field, percent, problem_tag_start, percent) else: select = "%s or %s like '%s%s%s'" % (select, field, percent, problem_tag_start, percent) first_tag = False #} select += " order by id" #print select the_cursor.execute( select ) results = the_cursor.fetchall() # start working through the results for row in results: #{ row_id = row[ 0 ] text_value = row[ 1 ] print '' print '' print '=======================================' print table_name, field, 'ID', row_id print '=======================================' print '' print '==== RAW VALUE, ID %d ====' % row_id print text_value print '==== end RAW VALUE, ID %d ==== %s' % (row_id, newline) for problem_tag_start in problem_tags_start: #{ print 'Processing', problem_tag_start if problem_tag_start not in text_value: continue # Convert, e.g., '<div style="font-family: Courier New">' to just '<div>' value_parts = text_value.split( problem_tag_start ) new_value_parts = [] new_value = '' i = -1 for part in value_parts: #{ i += 1 formatting = '' data = '' if i == 0: #{ # the first section (index 0) is before the formatting tag data = part #} else: #{ # at start of formatting tag formatting_and_data = part.split( closing_angle_bracket, 1 ) if len( formatting_and_data ) != 2: #{ print 'Mismatched tag start and end in:', formatting_and_data print 'Cancelling change.' continue #} formatting = formatting_and_data[ 0 ] data = formatting_and_data[ 1 ] #print newline + 'About to remove the following formatting:' #print formatting + newline #} new_value_parts.append( data ) #} fixed_tag = problem_tag_start.strip() + closing_angle_bracket new_value = fixed_tag.join( new_value_parts ) if fixed_tag == '<a>': # no point in keeping these new_value = new_value.replace( '<a></a>', '' ) text_value = new_value new_value = new_value.replace( "'", "''" ) # escape for SQL new_value = new_value.replace( "\\", "\\\\" ) # escape for SQL statement = "update %s set %s = '%s' where id = %d" % (table_name, field, new_value, row_id) print newline, '/* new value */', statement, newline the_cursor.execute( statement ) #} # end processing all problem tags in one field of one row of data #} # end loop through rows containing problem tags in a particular field #} # end loop through one table's fields that may contain problem tags #} # end loop through tables with fields that may contain problem tags the_cursor.close() the_database_connection.close() except: if the_cursor: the_cursor.close() if the_database_connection: the_database_connection.close() raise
def writeDocumentContents( handle, document_code ): #{ write_inherit_and_title_block( handle ) write_start_main_content( handle ) # Connect to the database and create a cursor the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() statement = "select coalesce( doc_group_type_parent, doc_group_type ) as doc_group_type, " statement += " doc_group_type_name, doc_group_id, doc_group_name, document_name " statement += " from index_medieval_documents_view where document_code = '%s'" % document_code the_cursor.execute( statement ) document = the_cursor.fetchone() type_code = document[ 0 ] type_name = w.reformat( document[ 1 ] ) loc_id = document[ 2 ] loc_name = w.reformat( document[ 3 ] ) document_name = w.reformat( document[ 4 ] ) statement = "select entry_id, entry_book_count, copy_count, copy_code, copy_notes, seqno_in_document" statement += " from index_entry_copies where document_code = '%s'" % document_code statement += " order by seqno_in_document, copy_code" the_cursor.execute( statement ) copy_results = the_cursor.fetchall() write_breadcrumbs( handle, type_code, type_name, loc_id, loc_name, document_name ) if type_name == loc_name: # don't have 'HENRY DE KIRKESTEDE: HENRY DE KIRKESTEDE' handle.write( '<h2>%s</h2>' % type_name ) else: handle.write( '<h2>%s: %s</h2>' % (type_name, loc_name) ) handle.write( '<h3 class="medieval_catalogue_desc">%s. %s</h3>' % (document_code, document_name) ) handle.write( newline ) write_catalogue_entries_total( handle, len( copy_results ) ) handle.write( newline ) handle.write( '<div class="index">' ) handle.write( newline + newline ) prev_copy_code = '' authors = [] # record which authors have already been displayed on this page # and don't repeat the bibliography paragraph on 2nd or subsequent appearances handle.write( '<ul id="catalogue_entry_list">' + newline ) for copy_row in copy_results: #{ # Extract copy information entry_id = copy_row[ 0 ] entry_book_count = copy_row[ 1 ] copy_count = copy_row[ 2 ] copy_code = w.reformat( copy_row[ 3 ] ) copy_notes = w.reformat( copy_row[ 4 ] ) seqno_in_document = copy_row[ 5 ] if copy_code and copy_code == prev_copy_code: continue # don't repeat e.g. BC1.5--7 prev_copy_code = copy_code # Extract book information statement="select role_in_book, title_of_book, book_biblio_line, xref_title_of_book, problem" statement += " from index_entry_books where entry_id = %d" % entry_id statement += " and entry_book_count = %d" % entry_book_count the_cursor.execute( statement ) book = the_cursor.fetchone() role_in_book = w.reformat( book[ 0 ] ) title_of_book = w.reformat( book[ 1 ] ) book_biblio_line = w.reformat( book[ 2 ] ) xref_title_of_book = w.reformat( book[ 3 ] ) problem = w.reformat( book[ 4 ] ) # Extract author information statement="select entry_name, xref_name, entry_biblio_line, entry_biblio_block, letter" statement += " from index_entries where entry_id = %d" % entry_id the_cursor.execute( statement ) author = the_cursor.fetchone() # Get links to the main MLGB database mlgb_links = get_mlgb_links( the_cursor, document_code, seqno_in_document ) entry_name = w.reformat( author[ 0 ] ) xref_name = w.reformat( author[ 1 ] ) entry_biblio_line = w.reformat( author[ 2 ] ) entry_biblio_block = w.reformat( author[ 3 ] ) letter = author[ 4 ].replace( '/', '' ) # Write out the details handle.write( '<li>' + newline ) if mlgb_links: #{ mlgb_book_id = mlgb_links[0][0] hover_title_of_book = w.strip_html_for_hover( title_of_book ) handle.write( w.get_mlgb_book_link( mlgb_book_id, hover_title_of_book )) #} handle.write( copy_code ) if mlgb_links: handle.write( '</a>' ) copy_notes = copy_notes.strip() if copy_notes: #{ if not copy_notes.startswith( ',' ) and not copy_notes.startswith( ': ' ): copy_notes = ' ' + copy_notes handle.write( copy_notes ) #} handle.write( ': ' + newline ) # Write out details from 'entry' table # linking to the main entry in the author/title index. link_to_authortitle = authortitle_url + '/' + letter + '/' anchor = '#entry%d_anchor' % entry_id link_to_authortitle += anchor handle.write( '<a href="%s%s">' % (w.if_editable, link_to_authortitle) ) handle.write( entry_name + '</a>' + newline ) if xref_name: handle.write( ' ' + right_arrow + ' ' + xref_name + newline ) if entry_biblio_line: handle.write( entry_biblio_line + newline ) handle.write( linebreak + newline ) if entry_biblio_block: #{ if entry_name not in authors: #{ handle.write( entry_biblio_block + linebreak + newline ) authors.append( entry_name ) #} #} # Write out details from 'book' table if problem: handle.write( problem + newline ) if role_in_book: handle.write( role_in_book + newline ) if title_of_book: handle.write( '<strong>' + title_of_book + '</strong>' + newline ) if xref_title_of_book: handle.write( ' ' + right_arrow + ' ' + xref_title_of_book + newline ) if book_biblio_line: handle.write( book_biblio_line + newline ) handle.write( '</li>' + newline + newline ) #} handle.write( '</ul><!-- end catalogue_entry_list -->' + newline ) handle.write( '</div><!-- end div class "index" -->' ) handle.write( newline ) handle.write( '<p>' + newline ) write_catalogue_entries_total( handle, len( copy_results ) ) handle.write( '</p>' + newline ) write_end_main_content( handle ) # Close your cursor and your connection the_cursor.close() the_database_connection.close()
def changeOneTable(table_name): #{ the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() statement = 'truncate table %s' % table_name the_cursor.execute(statement) source_table = table_name[2:] # strip off 'u_' prefix statement = 'insert into %s select * from %s' % (table_name, source_table) the_cursor.execute(statement) key_list = table_keys[table_name] select_keys = ", ".join(key_list) statement = "select %s from %s order by %s" % (select_keys, table_name, select_keys) the_cursor.execute(statement) results = the_cursor.fetchall() for row in results: #{ i = -1 where_clause = "" for keyname in key_list: #{ i += 1 if where_clause != "": where_clause += " and " where_clause += "%s = %d" % (keyname, row[i]) #} field_list = fields_to_convert[table_name] for fieldname in field_list: #{ statement = "select %s from %s where %s" % (fieldname, table_name, where_clause) the_cursor.execute(statement) one_result = the_cursor.fetchone() fieldval = one_result[0] if not fieldval: continue # Reformat in 2 steps: # 1. Convert homespun ASCII coding invented by Richard Sharpe to HTML entities. # 2. Convert the HTML entities to UTF-8. # Turn ASCII coding into HTML fieldval = w.reformat(fieldval) # Turn HTML entities into UTF-8 characters fieldval = html_parser.unescape(fieldval) # Remove some Django template tags fieldval = fieldval.replace('{% templatetag openvariable %}', '{{') fieldval = fieldval.replace('{% templatetag closevariable %}', '}}') fieldval = fieldval.replace("'", "''") # escape for SQL statement = "update %s set %s = '%s' where %s" % ( table_name, fieldname, fieldval, where_clause) #print statement.encode( 'utf8' ) the_cursor.execute(statement.encode('utf8')) #} #} the_cursor.close() the_database_connection.close()
def writeDocumentListByDate( handle ): #{ # Connect to the database and create a cursor the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() write_inherit_and_title_block( handle ) write_start_main_content( handle ) handle.write( '<h2>List of medieval catalogues</h2>' ) handle.write( newline ) handle.write( '<div class="index">' ) handle.write( newline + newline ) # Write navigation by century centuries = [ '10', '11', '12', '13', '14', '15', '16', '17', 'undated' ] century_nav = '' for century in centuries: #{ century_desc = get_century_desc( century ) anchor_name = get_century_anchor( century ) if century_nav != '': century_nav += ' | ' century_nav += '<a href="#%s">%s</a>' % (anchor_name, century_desc ) #} handle.write( '<h3 class="inline_heading">Overview by date</h3>' ) handle.write( '{% if printing %}<br />{%else%}' ) handle.write( ' | <a href="%s">Overview by provenance</a>' % medieval_catalogues_url ) handle.write( '{% endif %}' ) handle.write( newline + newline ) statement = "select document_code, document_name, " statement += " coalesce( start_date, '2000-01-01') as sort_start_date, " statement += " coalesce( end_date, '2000-01-01') as sort_end_date, " statement += " doc_group_type_name, doc_group_name " statement += " from index_medieval_documents_view where document_code > '' " statement += " order by sort_start_date, sort_end_date, document_code_sort" the_cursor.execute( statement ) documents = the_cursor.fetchall() prev_type_code = '' prev_start_year = '' prev_century = 0 century_desc = '' total_for_century = 0 handle.write( '<div id="catalogues_by_date">' + newline ) for document in documents: #{ document_code = document[ 0 ] #document_code e.g. BC21 document_name = document[ 1 ] #document_name e.g. 'Books read in the refectory 1473' start_date = document[ 2 ] end_date = document[ 3 ] library_type = document[ 4 ] # e.g. Benedictines library_loc = document[ 5 ] # e.g. Abbey of St Frideswide document_name = w.reformat( document_name ) library_type = w.reformat( library_type ) library_loc = w.reformat( library_loc ) type_code = document_code[ 0 : 1 ] # this wouldn't work with 2-letter types e.g. BA # but we are only really interested in K and R start_year = start_date[ 0 : 4 ] if start_year.startswith( '0' ): start_year = start_year[ 1 : ] century = int( math.floor( int( start_year ) / 100 ) + 1 ) print century, start_year, document_code, document_name if century != prev_century: #{ if prev_century > 0: #{ handle.write( '</td></tr></table>' + newline ) write_total_for_century( handle, century_desc, total_for_century ) #} century_desc = get_century_desc( century ) anchor_name = get_century_anchor( century ) handle.write( '<p><a name="%s"></a></p>' % anchor_name ) write_century_nav( handle, century_nav ) prev_century = century total_for_century = 0 prev_type_code = '' handle.write( '<h4>%s</h4>' % century_desc ) handle.write( newline ) handle.write( '<table class="century" id="century%dtab">' % century ) handle.write( newline ) #} total_for_century += 1 statement = "select count(*) from index_entry_copies where document_code = '%s'" \ % document_code the_cursor.execute( statement ) count_row = the_cursor.fetchone() num_catalogue_entries = count_row[ 0 ] # no need to keep repeating the same decode hundreds of times for K and R if type_code in inline_lists and type_code == prev_type_code: #{ handle.write( ' • ' ) if num_catalogue_entries > 0: write_link_to_document( handle, document_code ) handle.write( '%s (%d) ' % (document_code, num_catalogue_entries)) if num_catalogue_entries > 0: handle.write( '</a>' + newline ) #} else: #{ # not in middle of K or R, so write out a complete row for each entry if total_for_century > 1: handle.write( '</td></tr>' + newline ) handle.write( '<tr><td>' + newline ) if start_year in document_name: handle.write( '<em>' + start_year + '</em>' + newline ) handle.write( '</td><td>' + newline ) handle.write( '%s' % library_type ) if library_loc != library_type: handle.write( ': %s' % library_loc ) handle.write( '</td><td>' + newline ) if num_catalogue_entries > 0: write_link_to_document( handle, document_code ) handle.write('%s. %s (%d)' % (document_code, document_name, num_catalogue_entries)) if num_catalogue_entries > 0: handle.write( '</a>' + newline ) # the final <td> gets finished off when you get to the next entry #} prev_type_code = type_code handle.write( newline ); #} handle.write( '</td></tr></table>' + newline ) write_total_for_century( handle, century_desc, total_for_century ) handle.write( '</div><!-- end list of catalogues by date -->' + newline ) write_century_nav( handle, century_nav ) handle.write( '</div><!-- end div class "index" -->' ) handle.write( newline + newline ) write_end_main_content( handle ) # Close your cursor and your connection the_cursor.close() the_database_connection.close()
def setBookIDs(): #{ the_database_connection = None the_cursor = None #================================================================= # Read each line of the original file, manipulate it as necessary, # and then write it into the new file. #================================================================= try: # Connect to the database and create a cursor the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() # Get details of the medieval catalogues in which an MLGB book appeared. select = "select id, medieval_catalogue from books_book where medieval_catalogue > ''" select += " order by id" the_cursor.execute(select) results = the_cursor.fetchall() # Clear out old data the_cursor.execute("TRUNCATE TABLE index_mlgb_links") # Insert the new data for row in results: #{ book_id = row[0] medieval_catalogue = row[1].strip() print "\n\n%s" % medieval_catalogue # Avoid confusion potentially caused by spaces in wrong place etc. medieval_catalogue = medieval_catalogue.replace('. ', '.') medieval_catalogue = medieval_catalogue.replace('?', '') medieval_catalogue = medieval_catalogue.replace('=', ' ') words = medieval_catalogue.split() for word in words: #{ catalogue_entries = [] if not i.is_copy_code(word): continue word = word.strip() if word.endswith(','): word = word[0:-1] # take off any commas from the end document_code = i.get_document_code(word) seqno_in_document = i.get_seqno_in_document(word) if document_code.isalnum() and seqno_in_document.isdigit() \ and int( seqno_in_document ) > 0: #{ catalogue_entries.append(seqno_in_document) else: # some kind of incomplete or garbled entry - don't try to save it continue #} if i.is_numeric_range( word ): #{ # need to generate sequence numbers for rest of range rest_of_range = i.get_rest_of_numeric_range(word) for int_seqno in rest_of_range: #{ seqno_in_document = str(int_seqno) catalogue_entries.append(seqno_in_document) #} #} for seqno_in_document in catalogue_entries: #{ print "%d: '%s' %s" % (book_id, document_code, seqno_in_document) insert_statement = 'insert into index_mlgb_links ' insert_statement += '( mlgb_book_id, document_code, seqno_in_document ) values ' insert_statement += "( %d, '%s', %s )" % ( book_id, document_code, seqno_in_document) the_cursor.execute(insert_statement) #} #} #} the_cursor.close() the_database_connection.close() except: if the_cursor: the_cursor.close() if the_database_connection: the_database_connection.close() raise
def produceOutput(letter, handle): #{ # Connect to the database and create a cursor the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() #...................................... # Start writing the template handle.write('{% extends "base.html" %}' + newline) handle.write('{% block title %}' + newline) handle.write('<title>MLGB3 Author/Title Index</title>' + newline) handle.write('{% endblock %}' + newline) #...................................... # Write our own treeview expand/collapse function so that we can handle links # how we want rather than being constrained by the behaviour of the jQuery function. handle.write('{% block treeview %}' + newline) handle.write(get_expand_collapse_script() + newline) # Get entry/book IDs for use in expand/collapse script statement = 'select i.entry_id, b.entry_book_count from index_entries i, index_entry_books b ' statement += " where i.entry_id = b.entry_id and i.letter = '%s' " % letter statement += " order by i.entry_id, b.entry_book_count" the_cursor.execute(statement) id_results = the_cursor.fetchall() ids_for_expand_collapse = [] for row in id_results: #{ e_id_string = str(row[0]) b_id_string = get_book_id_for_expand_collapse(row[0], row[1]) if e_id_string not in ids_for_expand_collapse: ids_for_expand_collapse.append(e_id_string) ids_for_expand_collapse.append(b_id_string) #} # Write "expand/collapse all" scripts handle.write(get_expand_and_collapse_all_script(ids_for_expand_collapse)) handle.write('{% endblock %}' + newline) # end 'treeview' block #...................................... # Start writing the main page content handle.write('{% block maincontent %}' + newline) handle.write('<div class="index">') handle.write(newline + newline) # Write page heading handle.write('<h2>Browse Author/Title Index: %s</h2>' % letter) handle.write(newline) # Write navigation by initial letter handle.write('{% if not printing %}<div class="letterlinks">') for possible_letter in letters_with_entries: #{ if possible_letter == 'I/J': possible_letter = 'IJ' selection_class = '' if possible_letter == letter: selection_class = ' class="selected" ' handle.write( '<a href="%s/authortitle/browse/%s/" %s >%s</a>\n' \ % (if_editable, possible_letter, selection_class, possible_letter)) if possible_letter != 'Z': handle.write('<span class="spacer"> </span>') #} handle.write('</div><!-- end div "letterlinks" -->{% endif %}') handle.write(newline + newline) if not letter: #{ # just a menu of the letters available # Add the Advanced Search form here to fill up the blank space. handle.write("{% include 'includes/authortitle_adv_search.html' %}" + newline) write_link_to_source_file(handle) handle.write('</div><!-- end div "index" -->') handle.write(newline + indexmenu + newline) write_end_of_page(handle) the_cursor.close() the_database_connection.close() return #} #...................................... # Start writing author/title treeview handle.write('<div id="authortreecontrol">' + newline) handle.write('{% if not printing %}') handle.write( '<span class="like_a_link" onclick="collapseAllEntries()">Collapse All</span> ' ) handle.write(' | ') handle.write( '<span class="like_a_link" onclick="expandAllEntries()">Expand All</span> ' ) handle.write('{% endif %}') handle.write(newline) handle.write('</div>' + newline) handle.write('<ul class="authortreeview" id="authortree">' + newline) #...................................... # Get entry details for main display statement = "select * from index_entries where letter = '%s' order by entry_id" % letter the_cursor.execute(statement) entry_results = the_cursor.fetchall() # Start writing the main display of results for entry_row in entry_results: #{ # id, letter, name, xref name, biblio line, biblio block entry_id = entry_row[0] # we already know letter primary_name = reformat(entry_row[2]) xref_name = reformat(entry_row[3]) entry_bib_line = reformat(entry_row[4]) entry_bib_block = reformat(entry_row[5]) # Get a version of the index entry without HTML entities, for use in title displayed on hover hover_primary_name = strip_html_for_hover(primary_name) prev_problem = '' # Get the books belonging to this entry statement = "select * from index_entry_books where entry_id = %d order by entry_book_count" \ % entry_id the_cursor.execute(statement) book_results = the_cursor.fetchall() num_books = len(book_results) num_catalogue_entries = 0 # Start writing out the entry handle.write(newline + newline) handle.write('<!-- Start new entry "%s", entry ID %d -->' % (primary_name.strip(), entry_id)) handle.write(newline + '<a name="entry%d_anchor"></a>' % entry_id) handle.write(newline) handle.write('<li class="outerhead">' + newline) is_expandable = False if num_books > 0 or entry_bib_line or entry_bib_block: is_expandable = True if is_expandable: #{ handle.write(get_expand_collapse_button(entry_id, '+')) handle.write( get_expand_collapse_span(entry_id, primary_name, 'outerhead')) #} else: handle.write('<span class="outerhead">%s</span>' % primary_name.strip()) if xref_name: handle.write(' %s %s' % (right_arrow, xref_name)) if is_expandable: #{ if num_books == 1: #{ # could be just a dummy book, i.e. this is an entry by title not author title_of_book = book_results[0][3].strip() if not title_of_book: num_books = 0 #} # This 'bibliography' section should be invisible when tree is fully collapsed. biblio_block_id = 'biblio_' + get_outer_div_for_expand_collapse( entry_id) handle.write('<div id="%s" class="author_biblio_block" ' % biblio_block_id) handle.write( 'style="display:{%if printing%}block{%else%}none{%endif%}">') if entry_bib_line: #{ handle.write(entry_bib_line + newline) #} if entry_bib_line.strip() and entry_bib_block.strip(): #{ handle.write(linebreak + newline) #} if entry_bib_block: #{ handle.write(entry_bib_block + newline) #} handle.write('</div>') # An entry by title will still have medieval catalogue entries statement = "select count(*) from index_entry_copies where entry_id = %d" % entry_id the_cursor.execute(statement) total_row = the_cursor.fetchone() num_catalogue_entries = total_row[0] if num_books or num_catalogue_entries: #{ handle.write(newline + '<div class="totals">' + newline) totals_string = '' if num_catalogue_entries: #{ if num_catalogue_entries == 1: catcount_desc = 'catalogue entry' else: catcount_desc = 'catalogue entries' totals_string = '%d %s' % (num_catalogue_entries, catcount_desc) #} if num_books: #{ if num_books == 1: bookcount_desc = 'book' else: bookcount_desc = 'books' totals_string += ' (%d %s)' % (num_books, bookcount_desc) #} handle.write( get_expand_collapse_span(entry_id, totals_string, 'outer_subhead totals')) handle.write('</div><!-- end "totals" div -->') #} # Begin the section that expands and collapses # i.e. generally the author name with a hidden list of books below. handle.write( newline + '<div id="%s" class="expand_entry" ' \ % get_outer_div_for_expand_collapse( entry_id ) ) handle.write( ' style="display:{%if printing%}block{%else%}none{%endif%}">') # If there are multiple books for one author, provide link to expand/collapse them all at once if num_books > 1: #{ if num_books == 2: expand_collapse_msg = 'both' else: expand_collapse_msg = 'all %d' % num_books expand_collapse_msg = ' %s books' % expand_collapse_msg book_ids = [] for book in book_results: #{ entry_book_count = book[1] book_ids.append( get_book_id_for_expand_collapse( entry_id, entry_book_count)) #} handle.write('<script type="text/javascript">' + newline) handle.write(' function expand_books_for_entry_%d() { ' % entry_id) handle.write(newline) for book_id in book_ids: #{ handle.write(" expand_or_collapse( '%s', '+', 2 );" % book_id) handle.write(newline) #} handle.write(' }' + newline) handle.write(' function collapse_books_for_entry_%d() { ' % entry_id) handle.write(newline) for book_id in book_ids: #{ handle.write(" expand_or_collapse( '%s', '-', 2 );" % book_id) handle.write(newline) #} handle.write(' }' + newline) handle.write('</script>' + newline) handle.write('{% if not printing %}') handle.write( '<span class="like_a_link" onclick="collapse_books_for_entry_%d()">' % entry_id) handle.write('Collapse %s</span> ' % expand_collapse_msg) handle.write(' | ') handle.write( '<span class="like_a_link" onclick="expand_books_for_entry_%d()">' % entry_id) handle.write('Expand %s</span> ' % expand_collapse_msg) handle.write(linebreak + linebreak + newline) handle.write('{% endif %}') #} # Now start writing out the list of books handle.write(newline + '<ul><!-- start list of books -->' + newline) #} for book in book_results: #{ 0: entry_id, 1: entry_book_count, 2: role_in_book # 3: title_of_book, 4: book_biblio_line, 5: xref_title_of_book # 6: copies, 7: problem entry_book_count = book[1] role_in_book = reformat(book[2]) title_of_book = reformat(book[3]) book_biblio_line = reformat(book[4]) xref_title_of_book = reformat(book[5]) copies = reformat(book[6], preserve_linebreaks=True) problem = reformat(book[7]) # Get a version of the book title without HTML entities, for use in title displayed on hover hover_title_of_book = strip_html_for_hover(title_of_book) if problem != prev_problem: #{ handle.write(newline) handle.write('<p>%s</p>' % problem) handle.write(newline) prev_problem = problem #} handle.write(newline + '<li>') handle.write('<!-- start entry ID %d, book %d -->' % (entry_id, entry_book_count)) handle.write(newline) statement = "select copy_code, copy_notes, document_name, doc_group_name, doc_group_type_name, " statement += " document_code, seqno_in_document, copy_count " statement += " from index_entry_copies where entry_id = %d " % entry_id statement += " and entry_book_count = %d order by copy_count" % entry_book_count the_cursor.execute(statement) copy_results = the_cursor.fetchall() prev_copy_code = '' first_line_of_book_entry = role_in_book first_line_of_book_entry += title_of_book if book_biblio_line: first_line_of_book_entry += ": %s" % book_biblio_line if xref_title_of_book: first_line_of_book_entry += "%s %s" % (right_arrow, xref_title_of_book) # Write out the first line for this book, initially with the catalogue entries under it hidden book_id_for_expand_collapse = get_book_id_for_expand_collapse( entry_id, entry_book_count) if first_line_of_book_entry > '' and len(copy_results) > 0: #{ handle.write( get_expand_collapse_button(book_id_for_expand_collapse, '+')) handle.write( get_expand_collapse_span( book_id_for_expand_collapse, \ first_line_of_book_entry, 'innerhead' ) ) #} else: handle.write(first_line_of_book_entry) if len(copy_results) > 0: #{ if first_line_of_book_entry > '': initial_display_style = '{%if printing%}block{%else%}none{%endif%}' else: initial_display_style = 'block' handle.write( newline + '<table id="entry%s_tab" style="display:%s" ' \ % (book_id_for_expand_collapse, initial_display_style) ) handle.write(' class="catalogue_entries">') handle.write(newline) handle.write( '<tr class="catalogue_entry_head"><td>Catalogue entry</td>' ) handle.write('<td>Catalogue</td></tr>') handle.write(newline) for one_copy in copy_results: #{ copy_code = reformat(one_copy[0]) copy_notes = reformat(one_copy[1]) document_name = reformat(one_copy[2]) doc_group_name = reformat(one_copy[3]) doc_group_type_name = reformat(one_copy[4]) document_code = one_copy[5] seqno_in_document = one_copy[6] copy_count = one_copy[7] if copy_code == prev_copy_code: continue prev_copy_code = copy_code hover_title = hover_primary_name if hover_title_of_book: hover_title += ' ' + em_dash + ' ' + hover_title_of_book # See if we have got any links to the actual MLGB database mlgb_links = [] if seqno_in_document == None: seqno_in_document = '0' statement = "select mlgb_book_id from index_mlgb_links " statement += " where document_code = '%s' and seqno_in_document = %s " \ % (document_code, seqno_in_document) statement += " and seqno_in_document > 0 order by mlgb_book_id" the_cursor.execute(statement) mlgb_links = the_cursor.fetchall() handle.write( newline + '<!-- start entry %d, book %d, copy %d -->' \ % (entry_id, entry_book_count, copy_count) ) handle.write(newline + '<tr class="catalogue_entry">' + newline) handle.write('<td class="catalogue_entry_code">') handle.write( get_copy_code_and_desc( copy_code, seqno_in_document, copy_notes, \ hover_title, mlgb_links ) ) handle.write('</td>') handle.write(newline) handle.write('<td class="catalogue_name">') handle.write(newline) handle.write( '<a href="%s%s/%s"' % (if_editable, medieval_catalogues_url, document_code)) handle.write(' title="Further details of catalogue %s" ' % document_code) handle.write(' class="link_to_catalogue" >') if doc_group_type_name: handle.write(doc_group_type_name) if doc_group_name: #{ if not doc_group_type_name.endswith( doc_group_name): #{ handle.write(': %s' % doc_group_name) #} #} if document_name: handle.write(': %s' % document_name) handle.write('</a>') handle.write('</td>') handle.write(newline) handle.write(newline + '</tr>') #} handle.write(newline + '</table>' + newline) #} handle.write('</li><!-- end of one book -->') handle.write( newline + newline ) # make it a bit clearer by having a proper gap between books #} if is_expandable: #{ handle.write(newline + '</ul><!-- end list of books for one author -->' + newline) handle.write( newline + '</div><!-- end outer expandable/collapsible section -->' + newline) #} handle.write(newline + '</li><!-- end outerhead list item -->') handle.write(newline + '<!-- end entry ID %d (%s) -->' % (entry_id, primary_name.strip())) #} handle.write('</ul><!-- end tree -->' + newline) write_link_to_source_file(handle) handle.write('</div><!-- end div class "index" -->') handle.write(newline) handle.write(newline + indexmenu + newline) handle.write( newline + '{% if printing %}<script>window.print();</script>{% endif %}' + newline) write_end_of_page(handle) # Close your cursor and your connection the_cursor.close() the_database_connection.close()
def produceOutput( letter, handle ): #{ # Connect to the database and create a cursor the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() #...................................... # Start writing the template handle.write( '{% extends "base.html" %}' + newline ) handle.write( '{% block title %}' + newline ) handle.write( '<title>MLGB3 Author/Title Index</title>' + newline ) handle.write( '{% endblock %}' + newline ) #...................................... # Write our own treeview expand/collapse function so that we can handle links # how we want rather than being constrained by the behaviour of the jQuery function. handle.write( '{% block treeview %}' + newline ) handle.write( get_expand_collapse_script() + newline ) # Get entry/book IDs for use in expand/collapse script statement = 'select i.entry_id, b.entry_book_count from index_entries i, index_entry_books b ' statement += " where i.entry_id = b.entry_id and i.letter = '%s' " % letter statement += " order by i.entry_id, b.entry_book_count" the_cursor.execute( statement ) id_results = the_cursor.fetchall() ids_for_expand_collapse = [] for row in id_results: #{ e_id_string = str( row[ 0 ] ) b_id_string = get_book_id_for_expand_collapse( row[ 0 ], row[ 1 ] ) if e_id_string not in ids_for_expand_collapse: ids_for_expand_collapse.append( e_id_string ) ids_for_expand_collapse.append( b_id_string ) #} # Write "expand/collapse all" scripts handle.write( get_expand_and_collapse_all_script( ids_for_expand_collapse )) handle.write( '{% endblock %}' + newline ) # end 'treeview' block #...................................... # Start writing the main page content handle.write( '{% block maincontent %}' + newline ) handle.write( '<div class="index">' ) handle.write( newline + newline ) # Write page heading handle.write( '<h2>Browse Author/Title Index: %s</h2>' % letter ) handle.write( newline ) # Write navigation by initial letter handle.write( '{% if not printing %}<div class="letterlinks">' ) for possible_letter in letters_with_entries: #{ if possible_letter == 'I/J': possible_letter = 'IJ' selection_class = '' if possible_letter == letter: selection_class = ' class="selected" ' handle.write( '<a href="%s/authortitle/browse/%s/" %s >%s</a>\n' \ % (if_editable, possible_letter, selection_class, possible_letter)) if possible_letter != 'Z': handle.write( '<span class="spacer"> </span>' ) #} handle.write( '</div><!-- end div "letterlinks" -->{% endif %}' ) handle.write( newline + newline ) if not letter: #{ # just a menu of the letters available # Add the Advanced Search form here to fill up the blank space. handle.write( "{% include 'includes/authortitle_adv_search.html' %}" + newline ) write_link_to_source_file( handle ) handle.write( '</div><!-- end div "index" -->' ) handle.write( newline + indexmenu + newline) write_end_of_page( handle ) the_cursor.close() the_database_connection.close() return #} #...................................... # Start writing author/title treeview handle.write( '<div id="authortreecontrol">' + newline ) handle.write( '{% if not printing %}' ) handle.write( '<span class="like_a_link" onclick="collapseAllEntries()">Collapse All</span> ' ) handle.write( ' | ' ) handle.write( '<span class="like_a_link" onclick="expandAllEntries()">Expand All</span> ' ) handle.write( '{% endif %}' ) handle.write( newline ) handle.write( '</div>' + newline ) handle.write( '<ul class="authortreeview" id="authortree">' + newline ) #...................................... # Get entry details for main display statement = "select * from index_entries where letter = '%s' order by entry_id" % letter the_cursor.execute( statement ) entry_results = the_cursor.fetchall() # Start writing the main display of results for entry_row in entry_results: #{ # id, letter, name, xref name, biblio line, biblio block entry_id = entry_row[ 0 ] # we already know letter primary_name = reformat( entry_row[ 2 ] ) xref_name = reformat( entry_row[ 3 ] ) entry_bib_line = reformat( entry_row[ 4 ] ) entry_bib_block = reformat( entry_row[ 5 ] ) # Get a version of the index entry without HTML entities, for use in title displayed on hover hover_primary_name = strip_html_for_hover( primary_name ) prev_problem = '' # Get the books belonging to this entry statement = "select * from index_entry_books where entry_id = %d order by entry_book_count" \ % entry_id the_cursor.execute( statement ) book_results = the_cursor.fetchall() num_books = len( book_results ) num_catalogue_entries = 0 # Start writing out the entry handle.write( newline + newline ) handle.write( '<!-- Start new entry "%s", entry ID %d -->' % (primary_name.strip(), entry_id) ) handle.write( newline + '<a name="entry%d_anchor"></a>' % entry_id ) handle.write( newline ) handle.write( '<li class="outerhead">' + newline ) is_expandable = False if num_books > 0 or entry_bib_line or entry_bib_block: is_expandable = True if is_expandable: #{ handle.write( get_expand_collapse_button( entry_id, '+' ) ) handle.write( get_expand_collapse_span( entry_id, primary_name, 'outerhead' ) ) #} else: handle.write( '<span class="outerhead">%s</span>' % primary_name.strip() ) if xref_name: handle.write( ' %s %s' % (right_arrow, xref_name) ) if is_expandable: #{ if num_books == 1: #{ # could be just a dummy book, i.e. this is an entry by title not author title_of_book = book_results[ 0 ][ 3 ].strip() if not title_of_book: num_books = 0 #} # This 'bibliography' section should be invisible when tree is fully collapsed. biblio_block_id = 'biblio_' + get_outer_div_for_expand_collapse( entry_id ) handle.write( '<div id="%s" class="author_biblio_block" ' % biblio_block_id ) handle.write( 'style="display:{%if printing%}block{%else%}none{%endif%}">' ) if entry_bib_line: #{ handle.write( entry_bib_line + newline ) #} if entry_bib_line.strip() and entry_bib_block.strip(): #{ handle.write( linebreak + newline ) #} if entry_bib_block: #{ handle.write( entry_bib_block + newline ) #} handle.write( '</div>' ) # An entry by title will still have medieval catalogue entries statement = "select count(*) from index_entry_copies where entry_id = %d" % entry_id the_cursor.execute( statement ) total_row = the_cursor.fetchone() num_catalogue_entries = total_row[ 0 ] if num_books or num_catalogue_entries: #{ handle.write( newline + '<div class="totals">' + newline ) totals_string = '' if num_catalogue_entries: #{ if num_catalogue_entries == 1: catcount_desc = 'catalogue entry' else: catcount_desc = 'catalogue entries' totals_string = '%d %s' % (num_catalogue_entries, catcount_desc) #} if num_books: #{ if num_books == 1: bookcount_desc = 'book' else: bookcount_desc = 'books' totals_string += ' (%d %s)' % (num_books, bookcount_desc) #} handle.write( get_expand_collapse_span( entry_id, totals_string, 'outer_subhead totals' )) handle.write( '</div><!-- end "totals" div -->' ) #} # Begin the section that expands and collapses # i.e. generally the author name with a hidden list of books below. handle.write( newline + '<div id="%s" class="expand_entry" ' \ % get_outer_div_for_expand_collapse( entry_id ) ) handle.write( ' style="display:{%if printing%}block{%else%}none{%endif%}">' ) # If there are multiple books for one author, provide link to expand/collapse them all at once if num_books > 1: #{ if num_books == 2: expand_collapse_msg = 'both' else: expand_collapse_msg = 'all %d' % num_books expand_collapse_msg = ' %s books' % expand_collapse_msg book_ids = [] for book in book_results: #{ entry_book_count = book[ 1 ] book_ids.append( get_book_id_for_expand_collapse( entry_id, entry_book_count ) ) #} handle.write( '<script type="text/javascript">' + newline ) handle.write( ' function expand_books_for_entry_%d() { ' % entry_id ) handle.write( newline ) for book_id in book_ids: #{ handle.write( " expand_or_collapse( '%s', '+', 2 );" % book_id ) handle.write( newline ) #} handle.write( ' }' + newline ) handle.write( ' function collapse_books_for_entry_%d() { ' % entry_id ) handle.write( newline ) for book_id in book_ids: #{ handle.write( " expand_or_collapse( '%s', '-', 2 );" % book_id ) handle.write( newline ) #} handle.write( ' }' + newline ) handle.write( '</script>' + newline ) handle.write( '{% if not printing %}' ) handle.write( '<span class="like_a_link" onclick="collapse_books_for_entry_%d()">' % entry_id ) handle.write( 'Collapse %s</span> ' % expand_collapse_msg ) handle.write( ' | ' ) handle.write( '<span class="like_a_link" onclick="expand_books_for_entry_%d()">' % entry_id ) handle.write( 'Expand %s</span> ' % expand_collapse_msg ) handle.write( linebreak + linebreak + newline ) handle.write( '{% endif %}' ) #} # Now start writing out the list of books handle.write( newline + '<ul><!-- start list of books -->' + newline ) #} for book in book_results: #{ 0: entry_id, 1: entry_book_count, 2: role_in_book # 3: title_of_book, 4: book_biblio_line, 5: xref_title_of_book # 6: copies, 7: problem entry_book_count = book[ 1 ] role_in_book = reformat( book[ 2 ] ) title_of_book = reformat( book[ 3 ] ) book_biblio_line = reformat( book[ 4 ] ) xref_title_of_book = reformat( book[ 5 ] ) copies = reformat( book[ 6 ], preserve_linebreaks = True ) problem = reformat( book[ 7 ] ) # Get a version of the book title without HTML entities, for use in title displayed on hover hover_title_of_book = strip_html_for_hover( title_of_book ) if problem != prev_problem: #{ handle.write( newline ) handle.write( '<p>%s</p>' % problem ) handle.write( newline ) prev_problem = problem #} handle.write( newline + '<li>' ) handle.write( '<!-- start entry ID %d, book %d -->' % (entry_id, entry_book_count) ) handle.write( newline ) statement = "select copy_code, copy_notes, document_name, doc_group_name, doc_group_type_name, " statement += " document_code, seqno_in_document, copy_count " statement += " from index_entry_copies where entry_id = %d " % entry_id statement += " and entry_book_count = %d order by copy_count" % entry_book_count the_cursor.execute( statement ) copy_results = the_cursor.fetchall() prev_copy_code = '' first_line_of_book_entry = role_in_book first_line_of_book_entry += title_of_book if book_biblio_line: first_line_of_book_entry += ": %s" % book_biblio_line if xref_title_of_book: first_line_of_book_entry += "%s %s" % (right_arrow, xref_title_of_book) # Write out the first line for this book, initially with the catalogue entries under it hidden book_id_for_expand_collapse = get_book_id_for_expand_collapse( entry_id, entry_book_count ) if first_line_of_book_entry > '' and len( copy_results ) > 0: #{ handle.write( get_expand_collapse_button( book_id_for_expand_collapse, '+' ) ) handle.write( get_expand_collapse_span( book_id_for_expand_collapse, \ first_line_of_book_entry, 'innerhead' ) ) #} else: handle.write( first_line_of_book_entry ) if len( copy_results ) > 0: #{ if first_line_of_book_entry > '': initial_display_style = '{%if printing%}block{%else%}none{%endif%}' else: initial_display_style = 'block' handle.write( newline + '<table id="entry%s_tab" style="display:%s" ' \ % (book_id_for_expand_collapse, initial_display_style) ) handle.write( ' class="catalogue_entries">' ) handle.write( newline ) handle.write( '<tr class="catalogue_entry_head"><td>Catalogue entry</td>' ) handle.write( '<td>Catalogue</td></tr>' ) handle.write( newline ) for one_copy in copy_results: #{ copy_code = reformat( one_copy[ 0 ] ) copy_notes = reformat( one_copy[ 1 ] ) document_name = reformat( one_copy[ 2 ] ) doc_group_name = reformat( one_copy[ 3 ] ) doc_group_type_name = reformat( one_copy[ 4 ] ) document_code = one_copy[ 5 ] seqno_in_document = one_copy[ 6 ] copy_count = one_copy[ 7 ] if copy_code == prev_copy_code: continue prev_copy_code = copy_code hover_title = hover_primary_name if hover_title_of_book: hover_title += ' ' + em_dash + ' ' + hover_title_of_book # See if we have got any links to the actual MLGB database mlgb_links = [] if seqno_in_document == None: seqno_in_document = '0' statement = "select mlgb_book_id from index_mlgb_links " statement += " where document_code = '%s' and seqno_in_document = %s " \ % (document_code, seqno_in_document) statement += " and seqno_in_document > 0 order by mlgb_book_id" the_cursor.execute( statement ) mlgb_links = the_cursor.fetchall() handle.write( newline + '<!-- start entry %d, book %d, copy %d -->' \ % (entry_id, entry_book_count, copy_count) ) handle.write( newline + '<tr class="catalogue_entry">' + newline ) handle.write( '<td class="catalogue_entry_code">' ) handle.write( get_copy_code_and_desc( copy_code, seqno_in_document, copy_notes, \ hover_title, mlgb_links ) ) handle.write( '</td>' ) handle.write( newline ) handle.write( '<td class="catalogue_name">' ) handle.write( newline ) handle.write( '<a href="%s%s/%s"' % (if_editable, medieval_catalogues_url, document_code)) handle.write( ' title="Further details of catalogue %s" ' % document_code ) handle.write( ' class="link_to_catalogue" >' ) if doc_group_type_name: handle.write( doc_group_type_name ) if doc_group_name: #{ if not doc_group_type_name.endswith( doc_group_name ): #{ handle.write( ': %s' % doc_group_name ) #} #} if document_name: handle.write( ': %s' % document_name ) handle.write( '</a>' ) handle.write( '</td>' ) handle.write( newline ) handle.write( newline + '</tr>' ) #} handle.write( newline + '</table>' + newline ) #} handle.write( '</li><!-- end of one book -->' ) handle.write( newline + newline ) # make it a bit clearer by having a proper gap between books #} if is_expandable: #{ handle.write( newline + '</ul><!-- end list of books for one author -->' + newline ) handle.write( newline + '</div><!-- end outer expandable/collapsible section -->' + newline ) #} handle.write( newline + '</li><!-- end outerhead list item -->' ) handle.write( newline + '<!-- end entry ID %d (%s) -->' % (entry_id, primary_name.strip()) ) #} handle.write( '</ul><!-- end tree -->' + newline ) write_link_to_source_file( handle ) handle.write( '</div><!-- end div class "index" -->' ) handle.write( newline ) handle.write( newline + indexmenu + newline) handle.write( newline + '{% if printing %}<script>window.print();</script>{% endif %}' + newline ) write_end_of_page( handle ) # Close your cursor and your connection the_cursor.close() the_database_connection.close()
def get_index_by_modern_location(): #{ html = newline + "<ul><!-- start list of modern locations -->" + newline # Connect to the database and create a cursor the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() statement = "select p.provenance, ml1.modern_location_1, ml2.modern_location_2, " statement += " b.shelfmark_1, b.shelfmark_2, p.institution " statement += " from books_provenance p, " statement += " books_modern_location_1 ml1, " statement += " books_modern_location_2 ml2, " statement += " books_book b " statement += " where b.provenance_id = p.id " statement += " and b.modern_location_1_id = ml1.id " statement += " and b.modern_location_2_id = ml2.id " statement += " order by lower( replace( modern_location_1, 'St ', 'Saint ' ) ), " statement += " lower( replace( modern_location_2, 'St ', 'Saint ' ) ), " statement += " b.shelfmark_sort, b.id" the_cursor.execute( statement ) loc_results = the_cursor.fetchall() prev_location = '' prev_shelfmark1 = '' for loc in loc_results: #{ provenance = loc[ 0 ].strip() location1 = loc[ 1 ].strip() location2 = loc[ 2 ].strip() shelfmark1 = loc[ 3 ].strip() shelfmark2 = loc[ 4 ].strip() inst = loc[ 5 ].strip() location = location1 if location2 and not location1.endswith( ',' ): location += ', ' location += location2 if location != prev_location: #{ if prev_location: html += '</table></li><!-- end modern location -->' + newline prev_location = location prev_shelfmark1 = '' html += '<li><!-- start modern location -->' + newline html += '<h3>' + location + '</h3>' + newline html += '<table>' + newline #} html += '<tr>' + newline html += '<td>' #if shelfmark1 != prev_shelfmark1: html += shelfmark1 #html += '</td>' + newline #html += '<td>' #html += shelfmark2 html += "%s %s" % (shelfmark1, shelfmark2) html += '</td>' + newline html += '<td>' html += '<i>see</i> %s, <i>%s</i>' % (provenance.upper(), inst) html += '</td>' + newline html += '</tr>' + newline prev_shelfmark1 = shelfmark1 #} html += '</table></li><!-- end modern location -->' + newline html += newline + "</ul><!-- end list of modern locations -->" + newline # Close your cursor and your connection the_cursor.close() the_database_connection.close() return html
def get_list_of_surviving_books(): #{ html = "<ul><!-- start list of provenances -->" + newline + newline # Connect to the database and create a cursor the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() statement = "select provenance, county, institution, cells, notes, id " statement += " from books_provenance " statement += " order by lower( replace( provenance, 'St ', 'Saint ' ) )" the_cursor.execute( statement ) prov_results = the_cursor.fetchall() for prov in prov_results: #{ html += linebreak + '<li><!-- start provenance -->' + newline provenance = prov[ 0 ].strip().upper() county = prov[ 1 ].strip() institution = prov[ 2 ].strip() cells = prov[ 3 ].strip() notes = prov[ 4 ].strip() prov_id = prov[ 5 ] cells = cells.replace( blank_paragraph, '' ).strip() notes = notes.replace( blank_paragraph, '' ).strip() html += provenance if county: #{ if not html.endswith( ',' ): html += ',' html += ' ' + county #} if institution: #{ if not html.endswith( ',' ): html += ',' html += ' <i>' + institution + '</i>' #} html += newline if notes or cells: #{ html += '<div><small>' + newline if notes: html += notes + newline if cells: html += cells + newline html += '</small></div>' + newline #} statement = "select distinct ml1.modern_location_1, ml1.id " statement += " from books_book b, books_modern_location_1 ml1 " statement += " where b.modern_location_1_id = ml1.id and b.provenance_id = %d " % prov_id statement += " order by lower( replace(modern_location_1, 'St ', 'Saint ') )" the_cursor.execute( statement ) modern_city_results = the_cursor.fetchall() if len( modern_city_results ) > 0: #{ html += '<ul><!-- start list of modern locations -->' + newline for modern_city in modern_city_results: #{ modern_city_name = modern_city[ 0 ] modern_city_id = modern_city[ 1 ] html += '<li><!-- start modern location 1 (city) -->' + newline html += modern_city_name statement = "select ml2.modern_location_2, b.shelfmark_1, b.shelfmark_2, b.evidence_id, " statement += " b.author_title, b.date, b.pressmark, b.medieval_catalogue, b.unknown " statement += " from books_book b, books_modern_location_2 ml2 " statement += " where b.modern_location_2_id = ml2.id " statement += " and b.provenance_id = %d " % prov_id statement += " and b.modern_location_1_id = %d" % modern_city_id statement += " order by lower( replace( modern_location_2, 'St ', 'Saint ' ) )," statement += " shelfmark_sort, b.id" the_cursor.execute( statement ) modern_library_results = the_cursor.fetchall() if len( modern_library_results ) > 0: #{ html += newline + '<ul><!-- start list of books and their modern libraries -->' + newline for book in modern_library_results: #{ modern_library = book[ 0 ].strip() shelfmark_1 = book[ 1 ].strip() shelfmark_2 = book[ 2 ].strip() evidence_code = book[ 3 ].strip() author_title = book[ 4 ].strip() date = book[ 5 ].strip() pressmark = book[ 6 ].strip() medieval_catalogue = book[ 7 ].strip() unknown = book[ 8 ].strip() #} shelfmark = "%s %s" % (shelfmark_1, shelfmark_2) shelfmark = shelfmark.strip() if shelfmark and not shelfmark.endswith( '.' ): shelfmark += '.' if evidence_code: evidence_code = '<i>%s</i>' % evidence_code if date and not date.endswith( '.' ): date += '.' pressmark = pressmark.replace( '<p>', '' ) pressmark = pressmark.replace( '</p>', '' ) pressmark = pressmark.strip() if pressmark and not pressmark.endswith( '.' ): pressmark += '.' if medieval_catalogue: medieval_catalogue = "[%s]" % medieval_catalogue if unknown and not unknown.endswith( '.' ) and not unknown.endswith( '?' ): unknown += '.' html += '<li><!-- start one book -->' + newline html += '<b>%s</b>%s' % (modern_library, two_spaces) html += "%s " % shelfmark html += "%s" % evidence_code html += "%s " % author_title html += "%s " % date html += "%s " % pressmark html += "%s " % medieval_catalogue html += "%s " % unknown html += newline html += '</li><!-- end one book -->' + newline html += '</ul><!-- end list of books and their modern libraries -->' + newline #} html += '</li><!-- end modern location 1 (city) -->' + newline #} html += '</ul><!-- end list of modern locations -->' + newline #} html += '</li><!-- end provenance -->' + newline + newline #} html += newline + "</ul><!-- end list of provenances -->" + newline # Close your cursor and your connection the_cursor.close() the_database_connection.close() return html
def stripComments(): #{ the_database_connection = None the_cursor = None #================================================================= # Read each line of the original file, manipulate it as necessary, # and then write it into the new file. #================================================================= try: # Connect to the database and create a cursor the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() # Look at all the text fields that could contain unwanted XML for table_name, field_names in text_fields.items(): #{ #print newline + table_name + newline for field in field_names: #{ #print newline + table_name + ': ' + field + newline select = "select id, %s from %s where %s like '%s%s%s%s%s'" \ % (field, table_name, field, percent, comment_start, percent, comment_end, percent) select += " order by id" the_cursor.execute( select ) results = the_cursor.fetchall() # Check each value for XML/HTML comments for row in results: #{ row_id = row[ 0 ] text_value = row[ 1 ] print '' print '' print '=======================================' print table_name, field, 'ID', row_id print '=======================================' print '' print '==== RAW VALUE, ID %d ====' % row_id print text_value print '==== end RAW VALUE, ID %d ==== %s' % (row_id, newline) comment_start_count = text_value.count( comment_start ) comment_end_count = text_value.count( comment_end ) if comment_start_count != comment_end_count: #{ print 'Mismatched start/end tags:', comment_start_count, 'starts', comment_end_count, 'ends' continue # don't risk stripping out any real data #} value_parts = text_value.split( comment_start ) new_value_parts = [] new_value = '' i = -1 for part in value_parts: #{ i += 1 comment = '' data = '' if i == 0: #{ # the first section (index 0) is before the comment data = part.strip() #} else: #{ # at start of a comment comment_end_count = part.count( comment_end ) if comment_end_count != 1: #{ print 'Mismatched start/end tags in:', part continue #} comment_and_data = part.split( comment_end ) comment = comment_and_data[ 0 ] data = comment_and_data[ 1 ].strip() print newline + 'About to remove the following comment:' print comment + newline #} if data: #{ print newline + 'Retaining the following data:' print data + newline new_value_parts.append( data ) #} #} new_value = "".join( new_value_parts ) new_value = new_value.replace( "'", "''" ) # escape for SQL statement = "update %s set %s = '%s' where id = %d" % (table_name, field, new_value, row_id) print newline, '/* new value */', statement, newline the_cursor.execute( statement ) #} #} #} the_cursor.close() the_database_connection.close() except: if the_cursor: the_cursor.close() if the_database_connection: the_database_connection.close() raise
def writeDocumentList( handle, selected_type_code='', selected_loc_id=0, selected_loc_name=''): #{ # Connect to the database and create a cursor the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() # Work out what to show in your breadcrumbs trail etc. display_all = False selected_type_name = '' if not selected_type_code and not selected_loc_id: #{ display_all = True #} elif selected_type_code: #{ statement = "select doc_group_type_name from index_medieval_doc_group_types" \ + " where doc_group_type_code = '%s'" % selected_type_code the_cursor.execute( statement ) type_row = the_cursor.fetchone() selected_type_name = type_row[ 0 ] #} write_inherit_and_title_block( handle ) # Override the default treeview behaviour, which starts off expanded. # With such long files as the ones generated here, it might be best to start off collapsed. if display_all: set_treeview_collapsed( handle ) write_start_main_content( handle ) handle.write( '<h2>List of medieval catalogues</h2>' ) handle.write( newline ) write_breadcrumbs( handle, selected_type_code, selected_type_name, \ selected_loc_id, selected_loc_name ) handle.write( newline + newline ) if display_all: #{ # add navigation links to individual institution types statement = "select distinct coalesce( doc_group_type_parent, doc_group_type_code ) " \ + " as doc_group_type, doc_group_type_name from index_medieval_doc_group_types " \ + " order by doc_group_type" the_cursor.execute( statement ) institution_types = the_cursor.fetchall() i = 0 handle.write( '<p>' + newline ) for ins_type in institution_types: #{ if i > 0: handle.write( ' | ' ) i += 1 type_code = ins_type[ 0 ] type_name = ins_type[ 1 ] handle.write( '<a href="%s%s/source/%s" ' % (w.if_editable, medieval_catalogues_url, type_code) ) handle.write( ' title="%s" >' % type_name ) handle.write( type_name ) handle.write( '</a> ' ) #} handle.write( '</p>' + newline ) #} if display_all: #{ handle.write( '<h3 class="inline_heading">Overview by provenance</h3>' ) handle.write( '{% if not printing %}' ) handle.write( ' | <a href="%s">Overview by date</a>' % medieval_catalogues_by_date_url ) handle.write( '{% endif %}' ) handle.write( newline + newline ) handle.write( '{% if not printing %}' ) handle.write( '<div id="sidetreecontrol">' + newline ) handle.write( '<a href="?#">Collapse All</a> | <a href="?#">Expand All</a>' + newline ) handle.write( '</div>' + newline ) handle.write( '{% endif %}' ) #} statement = "select coalesce( doc_group_type_parent, doc_group_type ) as doc_group_type, " \ + " doc_group_type_name, doc_group_id, doc_group_name, document_code, document_name " \ + " from index_medieval_documents_view " if selected_type_code: #{ statement += " where document_code > '' " statement += " and coalesce( doc_group_type_parent, doc_group_type ) = '%s' " % selected_type_code #} if selected_loc_id: statement += " and doc_group_id = %d " % selected_loc_id statement += " order by doc_group_type, doc_group_name, document_code_sort, document_code" the_cursor.execute( statement ) documents = the_cursor.fetchall() prev_type_code = '' prev_loc_name = '' inline_display = False if display_all: handle.write( '<ul class="treeview AAA" id="tree">' + newline ) else: # use a different CSS class and ID so that links behave normally handle.write( '<ul class="AAA" id="catalogue_tree">' + newline ) for document in documents: #{ type_code = document[ 0 ] # doc_group_type e.g. BX for Benedictines type_name = document[ 1 ] # doc_group_type_name e.g. Benedictines loc_id = document[ 2 ] # doc_group_name e.g. numeric ID for Canterbury loc_name = document[ 3 ] # doc_group_name e.g. Canterbury document_code = document[ 4 ] # document_code e.g. BC21 document_name = document[ 5 ] # document_name e.g. 'Books read in the refectory 1473' type_name = w.reformat( type_name ) loc_name = w.reformat( loc_name ) document_name = w.reformat( document_name ) inline_display = False if type_code in inline_lists: inline_display = True if type_code != prev_type_code: #{ if prev_type_code: #{ handle.write( '</ul><!-- end CCC list -->' + newline ) handle.write( '</li><!-- end BBB list item -->' + newline ) handle.write( '</ul><!-- end BBB list -->' + newline ) handle.write( '</li><!-- end AAA list item -->' + newline ) #} prev_type_code = type_code prev_loc_name = loc_name write_outerhead( handle, type_name, display_all ) if inline_display: heading = document_name # show the sole decode for K and R else: heading = loc_name write_innerhead( handle, heading, display_all ) if display_all: handle.write( '<ul style="display: {% if printing %}block{% else %}none{% endif %}"' ) handle.write( '><!-- start CCC list -->' + newline ) else: handle.write( '<ul><!-- start CCC list -->' + newline ) #} elif loc_name != prev_loc_name: #{ prev_loc_name = loc_name handle.write( '</ul><!-- end CCC list -->' + newline ) handle.write( '</li><!-- end BBB list item -->' + newline ) handle.write( '</ul><!-- end BBB list -->' + newline ) handle.write( newline ) if inline_display: heading = document_name # show the sole decode for K and R else: heading = loc_name write_innerhead( handle, heading, display_all ) # start BBB list if display_all: handle.write( '<ul style="display: {% if printing %}block{% else %}none{% endif %}"' ) handle.write( '><!-- start CCC list -->' + newline ) else: handle.write( '<ul><!-- start CCC list -->' + newline ) #} if document_code: #{ statement = "select count(*) from index_entry_copies where document_code = '%s'" % document_code the_cursor.execute( statement ) count_row = the_cursor.fetchone() num_catalogue_entries = count_row[ 0 ] #} else: num_catalogue_entries = 0 print output_filename, type_name, loc_name, document_code handle.write( '<li' ); if inline_display: handle.write( ' style="display: inline-block; width: 80px;" ' ) handle.write( '><!-- start CCC list item -->' + newline ); if num_catalogue_entries > 0: #{ handle.write( '<a href="%s%s/%s" ' \ % (w.if_editable, medieval_catalogues_url, document_code) ) handle.write( ' title="View details of catalogue %s">' % document_code ) #} if inline_display: # no need to keep repeating the same decode hundreds of times for K and R handle.write( '• %s (%d)' % (document_code, num_catalogue_entries)) else: handle.write( '%s %s (%d)' % (document_code, document_name, num_catalogue_entries)) if num_catalogue_entries > 0: handle.write( '</a>' + newline ) handle.write( newline + '</li><!-- end CCC list item -->' + newline ); #} handle.write( '</ul><!-- end CCC list -->' + newline ) handle.write( '</li><!-- end BBB list item -->' + newline ) handle.write( '</ul><!-- end BBB list -->' + newline ) handle.write( newline + '</li><!-- end AAA outerhead list item -->' ) handle.write( '</ul><!-- end tree AAA -->' + newline ) handle.write( newline + linebreak + newline ) if selected_loc_name: write_documents_total( handle, len( documents ) ) write_end_main_content( handle ) # Close your cursor and your connection the_cursor.close() the_database_connection.close()
def stripComments(): #{ the_database_connection = None the_cursor = None #================================================================= # Read each line of the original file, manipulate it as necessary, # and then write it into the new file. #================================================================= try: # Connect to the database and create a cursor the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() # Look at all the text fields that could contain unwanted XML for table_name, field_names in text_fields.items(): #{ #print newline + table_name + newline for field in field_names: #{ #print newline + table_name + ': ' + field + newline select = "select id, %s from %s where %s like '%s%s%s%s%s'" \ % (field, table_name, field, percent, comment_start, percent, comment_end, percent) select += " order by id" the_cursor.execute(select) results = the_cursor.fetchall() # Check each value for XML/HTML comments for row in results: #{ row_id = row[0] text_value = row[1] print '' print '' print '=======================================' print table_name, field, 'ID', row_id print '=======================================' print '' print '==== RAW VALUE, ID %d ====' % row_id print text_value print '==== end RAW VALUE, ID %d ==== %s' % (row_id, newline) comment_start_count = text_value.count(comment_start) comment_end_count = text_value.count(comment_end) if comment_start_count != comment_end_count: #{ print 'Mismatched start/end tags:', comment_start_count, 'starts', comment_end_count, 'ends' continue # don't risk stripping out any real data #} value_parts = text_value.split(comment_start) new_value_parts = [] new_value = '' i = -1 for part in value_parts: #{ i += 1 comment = '' data = '' if i == 0: #{ # the first section (index 0) is before the comment data = part.strip() #} else: #{ # at start of a comment comment_end_count = part.count(comment_end) if comment_end_count != 1: #{ print 'Mismatched start/end tags in:', part continue #} comment_and_data = part.split(comment_end) comment = comment_and_data[0] data = comment_and_data[1].strip() print newline + 'About to remove the following comment:' print comment + newline #} if data: #{ print newline + 'Retaining the following data:' print data + newline new_value_parts.append(data) #} #} new_value = "".join(new_value_parts) new_value = new_value.replace("'", "''") # escape for SQL statement = "update %s set %s = '%s' where id = %d" % ( table_name, field, new_value, row_id) print newline, '/* new value */', statement, newline the_cursor.execute(statement) #} #} #} the_cursor.close() the_database_connection.close() except: if the_cursor: the_cursor.close() if the_database_connection: the_database_connection.close() raise
def writeXML(): #{ global document_lookup the_database_connection = None the_cursor = None outfile_handle = file try: the_database_connection = c.get_database_connection() the_cursor = the_database_connection.cursor() print 'Looking up document list...' statement = "select document_code, document_name, doc_group_type_name, doc_group_name, " statement += " start_date, end_date, document_type, doc_group_id, " statement += " coalesce( doc_group_type_parent, doc_group_type ) as doc_group_type, " statement += " start_year, end_year, date_in_words " statement += " from u_index_medieval_documents_view" the_cursor.execute( statement ) results = the_cursor.fetchall() for row in results: #{ document_code = row[ 0 ] document_name = row[ 1 ] library_type = row[ 2 ] #doc_group_type_name library_loc = row[ 3 ] #doc_group_name start_date = row[ 4 ] end_date = row[ 5 ] document_type = row[ 6 ] library_loc_id = row[ 7 ] #doc_group_id library_type_code = row[ 8 ] #doc_group_type start_year = str( row[ 9 ] ) end_year = str( row[ 10 ] ) date_in_words = row[ 11 ] if start_year and len( start_year ) < 4: start_year = start_year.rjust( 4, '0' ) if end_year and len( end_year ) < 4: end_year = end_year.rjust( 4, '0' ) document_lookup[ document_code ] = { 's_document_name' : document_name , 's_library_type' : library_type , 's_library_loc' : library_loc , 'd_document_start': start_date , 'd_document_end' : end_date , 's_document_type' : document_type , 's_library_loc_id': library_loc_id, 's_library_type_code': library_type_code, 's_document_start_year': start_year, 's_document_end_year': end_year, 's_document_date_in_words': date_in_words, } #} # Get links to MLGB book IDs. Don't miss any out if there is a range of numbers. print 'Looking up MLGB book links...' statement = "select distinct copy_code, mlgb_book_id " statement += " from index_mlgb_links l, u_index_entry_copies c " statement += " where c.document_code = l.document_code " statement += " and c.seqno_in_document = l.seqno_in_document " statement += " order by copy_code, mlgb_book_id" the_cursor.execute( statement ) link_results = the_cursor.fetchall() for link_row in link_results: #{ copy_code = link_row[ 0 ] mlgb_book_id = link_row[ 1 ] print copy_code, mlgb_book_id if mlgb_links_lookup.has_key( copy_code ): mlgb_links_lookup[ copy_code ].append( mlgb_book_id ) else: mlgb_links_lookup[ copy_code ] = [ mlgb_book_id ] #} output_filename = virtualenv_root + '/parts/index/authortitle_to_solr.xml' print 'About to write %s' % output_filename outfile_handle = open( output_filename, 'wb' ) # 'wb' allows entry of UTF-8 outfile_handle.write( '<doc>' + newline ) the_cursor.execute( "select max( entry_id ) from index_entries" ) results = the_cursor.fetchone() max_entry_id = results[ 0 ] solr_id = 0 statement = get_entry_select_statement() the_cursor.execute( statement ) entry_results = the_cursor.fetchall() current_entry_id = 0 for entry in entry_results: #{ current_entry_id = entry[ 0 ] print "Getting data for entry %d of %d" % (current_entry_id, max_entry_id) entry_name = entry[ 2 ] book_results = [] copy_results = [] statement = get_book_select_statement( current_entry_id ) the_cursor.execute( statement ) book_results = the_cursor.fetchall() if not book_results: #{ # This is presumably a cross-reference entry, but we cannot tell # whether it is referring to an author or a book title - both are possible. solr_id += 1 write_entry_fields( entry, solr_id, outfile_handle ) write_entry_end( outfile_handle ) #} else: #{ # Work out whether the primary entry refers to an author or a book title. # If it refers to a book title, there will only be one (dummy) book record, # and the title of this dummy entry will be blank. author = '' title_of_book = '' if len( book_results ) == 1: #{ book = book_results[ 0 ] title_of_book = book[ 1 ].strip() xref_title_of_book = book[ 2 ].strip() if title_of_book: # will be saved automatically as part of book fields title_of_book = '' else: #{ if xref_title_of_book: title_of_book = xref_title_of_book else: title_of_book = entry_name #} #} if not title_of_book: author = entry_name for book in book_results: #{ current_book_count = book[ 0 ] statement = get_copy_select_statement( current_entry_id, current_book_count ) the_cursor.execute( statement ) copy_results = the_cursor.fetchall() if not copy_results: #{ solr_id += 1 write_entry_fields( entry, solr_id, outfile_handle ) write_author_or_title( author, title_of_book, outfile_handle ) write_book_fields( book, outfile_handle ) write_entry_end( outfile_handle ) #} else: #{ for copy in copy_results: #{ solr_id += 1 write_entry_fields( entry, solr_id, outfile_handle ) write_author_or_title( author, title_of_book, outfile_handle ) write_book_fields( book, outfile_handle ) document_code = write_copy_fields( copy, outfile_handle ) write_document_fields( document_code, outfile_handle ) copy_code = copy[ 1 ].strip() write_mlgb_book_link_fields( copy_code, outfile_handle ) write_entry_end( outfile_handle ) #} #} #} #} #} outfile_handle.write( '</doc>' + newline ) outfile_handle.close() the_cursor.close() the_database_connection.close() except: if not outfile_handle.closed: outfile_handle.close() if the_cursor: the_cursor.close() if the_database_connection: the_database_connection.close() raise