class TestPopulate_tags(TestCase): args = testArgs() log = logger("test") sql = MagicMock() tags = Tags(args, sql.db, log) final = MagicMock() populate_tags = PopulateTags(args, sql, log, tags, final) basic_tags = { 'fandoms': [{ 'original_tag': 'Fandom A', 'ao3_tag': 'Fandom A (TV)' }, { 'original_tag': 'Fandom B', 'ao3_tag': 'Fandom B (TV)' }], 'tags': [{ 'original_tag': 'a tag', 'ao3_tag': 'A Tag' }], 'rating': [{ 'original_tag': 'PG', 'ao3_tag': 'General Audiences' }] } def test_default_fandom_ignored_if_fandoms_present(self): story_tags = self.populate_tags.tags_for_story(1, self.basic_tags) self.assertCountEqual( ['Fandom A (TV)', 'Fandom B (TV)'], story_tags['fandoms'].split(', '), 'Fandoms should be a comma-separated string of specified AO3 tags') def test_default_fandom_used_if_no_fandoms_present(self): tags_without_fandom = self.basic_tags.copy() tags_without_fandom.pop('fandoms') story_tags = self.populate_tags.tags_for_story(1, tags_without_fandom) self.assertEqual( 'Fandom C (TV)', story_tags['fandoms'], 'Fandoms should be a comma-separated string of specified AO3 tags')
from shared_python.Tags import Tags def valid_tags(key, tag_type_list): return [d[key].strip() for d in tag_type_list if key in d and d[key] is not None and d[key] != ''] if __name__ == "__main__": args_obj = Args() args = args_obj.args_for_06() log = args_obj.logger_with_filename() sql = Sql(args, log) tags = Tags(args, sql.db, log) final = FinalTables(args, sql.db, log) if args.archive_type == 'EF': table_names = efiction.table_names() else: table_names = { 'authors': 'authors', 'stories': 'stories', 'chapters': 'chapters' } log.info("Getting all tags per story...") tags_by_story_id = tags.tags_by_story_id() for (story_id, tags) in tags_by_story_id.items():
def _clean_email(author): email = author['email'] if author['email'] is None or author['email'] == '': email = u'{0}{1}[email protected]'.format(author['name'], args.archive_name)\ .replace(' ', '').replace("'", "") if author['email'].startswith('mailto:'): email = author['email'].replace('mailto:', '') return email if __name__ == "__main__": args = Args.args_for_05() sql = Sql(args) tags = Tags(args, sql.db) final = FinalTables(args, sql.db) chaps = Chapters(args, sql.db) filter = '' coauthors = {} print "Creating destination tables in {0}".format(args.output_database) if args.archive_type == 'EF': table_names = efiction.table_names() has_coauthor_table = raw_input( "\nDoes this archive have a coauthors table? Y/N\n") has_coauthors = True if str.lower(has_coauthor_table) == 'y' else False if has_coauthors: coauthors_dict = sql.execute_dict(
r.append('' if s is None else html_parser.unescape(unicode(s)). encode('utf-8')) myFile.writerows([r]) fp.close() if __name__ == "__main__": """ This step exports the Tag Wrangling and Authors with stories CSV files which you then have to import into Google Spreadsheet and share with the rest of the Open Doors committee. """ args_obj = Args() args = args_obj.args_for_03() log = args_obj.logger_with_filename() sql = Sql(args, log) tags = Tags(args, sql.db, log) log.info('Exporting tags from {0} to {1}'.format(args.temp_db_database, args.output_folder)) cols = tags.tag_export_map results = tags.distinct_tags() write_csv( '{0}/{1} - tags.csv'.format(args.output_folder, args.archive_name), [ cols['original_tagid'], cols['original_tag'], cols['original_table'], cols['original_parent'], cols['ao3_tag_fandom'], cols['ao3_tag'], cols['ao3_tag_type'], cols['ao3_tag_category'], cols['original_description'], "TW Notes" ]) log.debug('Exporting authors with stories from {0} to {1}'.format( args.temp_db_database, args.output_folder))
# encoding: utf-8 import csv import sys from shared_python import Args from shared_python.Sql import Sql from shared_python.Tags import Tags if __name__ == "__main__": args = Args.args_for_04() sql = Sql(args) tags = Tags(args, sql.db) # Input CSV from TW spreadsheet # Rename tags in `tags` table, populate ao3_tag_table column # eg: python 04-Rename-Tags.py -dh localhost -du root -dt dsa -dd temp_python -a EF -i path/to/tw-spreadsheet.csv with open(args.tag_input_file, 'r') as csvfile: tw_tags = list(csv.DictReader(csvfile)) tag_headers = tags.tag_export_map total = len(tw_tags) for cur, row in enumerate(tw_tags): sys.stdout.write('\r{0}/{1} tags to map'.format(cur + 1, total)) sys.stdout.flush() prefix = 'fanfiction' if args.archive_type == 'EF' else '' tags.update_tag_row(row, prefix)
from eFiction import efiction from shared_python.Args import Args from shared_python.Sql import Sql from shared_python.Tags import Tags if __name__ == "__main__": args_obj = Args() args = args_obj.args_for_01() log = args_obj.logger_with_filename() sql = Sql(args, log) tags = Tags(args, sql.db, log) efiction = efiction.eFiction(args, sql, log, tags) efiction.convert_efiction_to_temp()
import re from shared_python import Args from shared_python.Sql import Sql from shared_python.Tags import Tags if __name__ == "__main__": args = Args.args_for_02() sql = Sql(args) tags = Tags(args, sql.db) print('--- Processing tags from stories table in {0}'.format( args.db_database)) tags.create_tags_table() # eg: python 01-Load-into-Mysql.py -dh localhost -du root -dt dsa -dd temp_python -a AA -f /Users/emma/OneDrive/DSA/ARCHIVE_DB.pl -o . tag_col_list = {} stories_id_name = "" stories_table_name = "" # AUTOMATED ARCHIVE if args.archive_type == 'AA': table_name = raw_input( 'Story table name (default: "{0}_stories"): '.format( args.db_table_prefix)) if table_name is None or table_name == '': table_name = '{0}_stories'.format(args.db_table_prefix) tag_columns = raw_input( 'Column names containing tags \n (delimited by commas - default: "tags, warnings, characters, fandoms, relationships"): ' ) if tag_columns is None or tag_columns == '':
# encoding: utf-8 import csv from shared_python.Args import Args from shared_python.Common import print_progress from shared_python.Sql import Sql from shared_python.Tags import Tags if __name__ == "__main__": """ When Tag Wrangling have finished mapping the tags in Google Drive, export the spreadsheet as a CSV file. This script then copies the AO3 tags from that file into the tags table in the temporary database. """ args_obj = Args() args = args_obj.args_for_04() log = args_obj.logger_with_filename() sql = Sql(args, log) tags = Tags(args, sql.db, log) with open(args.tag_input_file, 'r') as csvfile: tw_tags = list(csv.DictReader(csvfile)) tag_headers = tags.tag_export_map total = len(tw_tags) for cur, row in enumerate(tw_tags): print_progress(cur + 1, total, "tags") tags.update_tag_row(row)
class TestEFiction(TestCase): args = testArgs() log = logger("test") sql = Sql(args, log) tags = Tags(args, sql.db, log) efiction = eFiction(args, sql, log, tags) efiction_db = "{0}_efiction".format(args.temp_db_database) @classmethod def setUpClass(cls): cls.efiction.load_database() cls.efiction.copy_tags_to_tags_table(None, "y") @classmethod def tearDownClass(cls): cls.sql.execute("DROP DATABASE IF EXISTS {0}".format(cls.efiction_db)) cls.sql.execute("DROP DATABASE IF EXISTS {0}".format( cls.args.temp_db_database)) def test_load_database(self): cursor = self.sql.cursor test_msg = "original efiction database name from the SQL file should not be created" cursor.execute( "SHOW DATABASES LIKE 'test_efiction_original_database_name_we_dont_want'" ) unwanted_database = cursor.fetchone() self.assertEquals(None, unwanted_database, test_msg) test_msg = "fanfiction_authorfields table should contain the same number of records as in the SQL file" cursor.execute( "SELECT COUNT(*) FROM {0}.fanfiction_authorfields".format( self.efiction_db)) (authorfields, ) = cursor.fetchone() self.assertEqual(3L, authorfields, test_msg) def test_copy_tags_to_tags_table(self): cursor = self.sql.db.cursor(MySQLdb.cursors.DictCursor) cursor.execute("SELECT original_tag FROM {0}.tags".format( self.efiction_db)) tags = list(cursor.fetchall()) unique_tags = set(tag_dict['original_tag'] for tag_dict in tags) self.assertEqual(77L, len(tags), "tags table should be a denormalised table") self.assertIn( u'Václav', unique_tags, "tags table should contain the tags referenced in the story files as a denormalised table" ) def test_copy_to_temp_db(self): self.efiction.copy_to_temp_db(has_coauthors=True) cursor = self.sql.cursor cursor.execute("SELECT * FROM {0}.fanfiction_stories".format( self.efiction_db)) original_stories = cursor.fetchall() cursor.execute("SELECT * FROM {0}.stories".format( self.args.temp_db_database)) stories = cursor.fetchall() cursor.execute("SELECT * FROM {0}.fanfiction_chapters".format( self.efiction_db)) original_chapters = cursor.fetchall() cursor.execute("SELECT * FROM {0}.chapters".format( self.args.temp_db_database)) chapters = cursor.fetchall() cursor.execute("SELECT * FROM {0}.fanfiction_authors".format( self.efiction_db)) original_authors = cursor.fetchall() cursor.execute("SELECT * FROM {0}.authors".format( self.args.temp_db_database)) authors = cursor.fetchall() self.assertEqual( len(original_stories), len(stories), "temp db stories table should contain all the stories from the original efiction table" ) self.assertEqual( len(original_chapters), len(chapters), "temp db chapters table should contain all the chapters from the original efiction table" ) self.assertEqual( len(original_authors), len(authors), "temp db authors table should contain all the authors from the original efiction table" )
import re from shared_python import Args from shared_python.Sql import Sql from shared_python.Tags import Tags if __name__ == "__main__": """ This script creates a table called tags in the temporary database and denormalises all the tags for each story. This table is the basis for the Tag Wrangling sheet and is used to map the tags back to the story when the final tables are created. """ args = Args.args_for_02() sql = Sql(args) tags = Tags(args, sql.db) print('---\n Processing tags from stories and bookmarks table in {0}'.format(args.temp_db_database)) tags.create_tags_table() tag_col_list = {} stories_id_name = "" stories_table_name = "" # AUTOMATED ARCHIVE if args.archive_type == 'AA': story_table_name = raw_input('Story table name (default: "{0}_stories"): '.format(args.db_table_prefix)) if story_table_name is None or story_table_name == '': story_table_name = '{0}_stories'.format(args.db_table_prefix) bookmark_table_name = raw_input('Bookmark table name (default: "{0}_bookmarks"): '.format(args.db_table_prefix)) if bookmark_table_name is None or bookmark_table_name == '':
from shared_python.Args import Args from shared_python.Sql import Sql from shared_python.Tags import Tags if __name__ == "__main__": """ This script creates a table called tags in the temporary database and denormalises all the tags for each story. This table is the basis for the Tag Wrangling sheet and is used to map the tags back to the story when the final tables are created. """ args_obj = Args() args = args_obj.args_for_02() log = args_obj.logger_with_filename() sql = Sql(args, log) tags = Tags(args, sql.db, log) log.info('Processing tags from stories and bookmarks table in {0}'.format( args.temp_db_database)) tags.create_tags_table() tag_col_list = {} stories_id_name = "" stories_table_name = "" # AUTOMATED ARCHIVE if args.archive_type == 'AA': story_table_name = raw_input('Story table name (default: "stories"): ') if story_table_name is None or story_table_name == '': story_table_name = 'stories'