class TestPopulate_tags(TestCase):
    args = testArgs()
    log = logger("test")
    sql = MagicMock()
    tags = Tags(args, sql.db, log)
    final = MagicMock()
    populate_tags = PopulateTags(args, sql, log, tags, final)

    basic_tags = {
        'fandoms': [{
            'original_tag': 'Fandom A',
            'ao3_tag': 'Fandom A (TV)'
        }, {
            'original_tag': 'Fandom B',
            'ao3_tag': 'Fandom B (TV)'
        }],
        'tags': [{
            'original_tag': 'a tag',
            'ao3_tag': 'A Tag'
        }],
        'rating': [{
            'original_tag': 'PG',
            'ao3_tag': 'General Audiences'
        }]
    }

    def test_default_fandom_ignored_if_fandoms_present(self):
        story_tags = self.populate_tags.tags_for_story(1, self.basic_tags)
        self.assertCountEqual(
            ['Fandom A (TV)', 'Fandom B (TV)'],
            story_tags['fandoms'].split(', '),
            'Fandoms should be a comma-separated string of specified AO3 tags')

    def test_default_fandom_used_if_no_fandoms_present(self):
        tags_without_fandom = self.basic_tags.copy()
        tags_without_fandom.pop('fandoms')
        story_tags = self.populate_tags.tags_for_story(1, tags_without_fandom)
        self.assertEqual(
            'Fandom C (TV)', story_tags['fandoms'],
            'Fandoms should be a comma-separated string of specified AO3 tags')
Beispiel #2
0
from shared_python.Tags import Tags


def valid_tags(key, tag_type_list):
  return [d[key].strip() for d in tag_type_list
          if key in d
          and d[key] is not None
          and d[key] != '']


if __name__ == "__main__":
  args_obj = Args()
  args = args_obj.args_for_06()
  log = args_obj.logger_with_filename()
  sql = Sql(args, log)
  tags = Tags(args, sql.db, log)
  final = FinalTables(args, sql.db, log)

  if args.archive_type == 'EF':
    table_names = efiction.table_names()
  else:
    table_names = {
      'authors': 'authors',
      'stories': 'stories',
      'chapters': 'chapters'
    }

  log.info("Getting all tags per story...")
  tags_by_story_id = tags.tags_by_story_id()
  for (story_id, tags) in tags_by_story_id.items():

def _clean_email(author):
    email = author['email']
    if author['email'] is None or author['email'] == '':
        email = u'{0}{1}[email protected]'.format(author['name'], args.archive_name)\
          .replace(' ', '').replace("'", "")
    if author['email'].startswith('mailto:'):
        email = author['email'].replace('mailto:', '')
    return email


if __name__ == "__main__":
    args = Args.args_for_05()
    sql = Sql(args)
    tags = Tags(args, sql.db)
    final = FinalTables(args, sql.db)
    chaps = Chapters(args, sql.db)

    filter = ''
    coauthors = {}

    print "Creating destination tables in {0}".format(args.output_database)

    if args.archive_type == 'EF':
        table_names = efiction.table_names()
        has_coauthor_table = raw_input(
            "\nDoes this archive have a coauthors table? Y/N\n")
        has_coauthors = True if str.lower(has_coauthor_table) == 'y' else False
        if has_coauthors:
            coauthors_dict = sql.execute_dict(
                r.append('' if s is None else html_parser.unescape(unicode(s)).
                         encode('utf-8'))
            myFile.writerows([r])
        fp.close()


if __name__ == "__main__":
    """
  This step exports the Tag Wrangling and Authors with stories CSV files which you then have to import into Google
  Spreadsheet and share with the rest of the Open Doors committee.
  """
    args_obj = Args()
    args = args_obj.args_for_03()
    log = args_obj.logger_with_filename()
    sql = Sql(args, log)
    tags = Tags(args, sql.db, log)

    log.info('Exporting tags from {0} to {1}'.format(args.temp_db_database,
                                                     args.output_folder))
    cols = tags.tag_export_map
    results = tags.distinct_tags()
    write_csv(
        '{0}/{1} - tags.csv'.format(args.output_folder, args.archive_name), [
            cols['original_tagid'], cols['original_tag'],
            cols['original_table'], cols['original_parent'],
            cols['ao3_tag_fandom'], cols['ao3_tag'], cols['ao3_tag_type'],
            cols['ao3_tag_category'], cols['original_description'], "TW Notes"
        ])

    log.debug('Exporting authors with stories from {0} to {1}'.format(
        args.temp_db_database, args.output_folder))
# encoding: utf-8
import csv

import sys

from shared_python import Args
from shared_python.Sql import Sql
from shared_python.Tags import Tags

if __name__ == "__main__":
    args = Args.args_for_04()
    sql = Sql(args)
    tags = Tags(args, sql.db)

    # Input CSV from TW spreadsheet
    # Rename tags in `tags` table, populate ao3_tag_table column
    # eg: python 04-Rename-Tags.py -dh localhost -du root -dt dsa -dd temp_python -a EF -i path/to/tw-spreadsheet.csv

    with open(args.tag_input_file, 'r') as csvfile:
        tw_tags = list(csv.DictReader(csvfile))
        tag_headers = tags.tag_export_map
        total = len(tw_tags)

        for cur, row in enumerate(tw_tags):
            sys.stdout.write('\r{0}/{1} tags to map'.format(cur + 1, total))
            sys.stdout.flush()

            prefix = 'fanfiction' if args.archive_type == 'EF' else ''
            tags.update_tag_row(row, prefix)
Beispiel #6
0
from eFiction import efiction
from shared_python.Args import Args
from shared_python.Sql import Sql
from shared_python.Tags import Tags

if __name__ == "__main__":
  args_obj = Args()
  args = args_obj.args_for_01()
  log = args_obj.logger_with_filename()
  sql = Sql(args, log)
  tags = Tags(args, sql.db, log)
  efiction = efiction.eFiction(args, sql, log, tags)

  efiction.convert_efiction_to_temp()
import re

from shared_python import Args
from shared_python.Sql import Sql
from shared_python.Tags import Tags

if __name__ == "__main__":
    args = Args.args_for_02()
    sql = Sql(args)
    tags = Tags(args, sql.db)
    print('--- Processing tags from stories table in {0}'.format(
        args.db_database))
    tags.create_tags_table()

    # eg: python 01-Load-into-Mysql.py -dh localhost -du root -dt dsa -dd temp_python -a AA -f /Users/emma/OneDrive/DSA/ARCHIVE_DB.pl -o .
    tag_col_list = {}
    stories_id_name = ""
    stories_table_name = ""

    # AUTOMATED ARCHIVE
    if args.archive_type == 'AA':

        table_name = raw_input(
            'Story table name (default: "{0}_stories"): '.format(
                args.db_table_prefix))
        if table_name is None or table_name == '':
            table_name = '{0}_stories'.format(args.db_table_prefix)
        tag_columns = raw_input(
            'Column names containing tags \n   (delimited by commas - default: "tags, warnings, characters, fandoms, relationships"): '
        )
        if tag_columns is None or tag_columns == '':
# encoding: utf-8
import csv

from shared_python.Args import Args
from shared_python.Common import print_progress
from shared_python.Sql import Sql
from shared_python.Tags import Tags

if __name__ == "__main__":
    """
  When Tag Wrangling have finished mapping the tags in Google Drive, export the spreadsheet as a CSV file. This script
  then copies the AO3 tags from that file into the tags table in the temporary database.
  """
    args_obj = Args()
    args = args_obj.args_for_04()
    log = args_obj.logger_with_filename()
    sql = Sql(args, log)
    tags = Tags(args, sql.db, log)

    with open(args.tag_input_file, 'r') as csvfile:
        tw_tags = list(csv.DictReader(csvfile))
        tag_headers = tags.tag_export_map
        total = len(tw_tags)

        for cur, row in enumerate(tw_tags):
            print_progress(cur + 1, total, "tags")

            tags.update_tag_row(row)
class TestEFiction(TestCase):
    args = testArgs()
    log = logger("test")
    sql = Sql(args, log)
    tags = Tags(args, sql.db, log)
    efiction = eFiction(args, sql, log, tags)
    efiction_db = "{0}_efiction".format(args.temp_db_database)

    @classmethod
    def setUpClass(cls):
        cls.efiction.load_database()
        cls.efiction.copy_tags_to_tags_table(None, "y")

    @classmethod
    def tearDownClass(cls):
        cls.sql.execute("DROP DATABASE IF EXISTS {0}".format(cls.efiction_db))
        cls.sql.execute("DROP DATABASE IF EXISTS {0}".format(
            cls.args.temp_db_database))

    def test_load_database(self):
        cursor = self.sql.cursor

        test_msg = "original efiction database name from the SQL file should not be created"
        cursor.execute(
            "SHOW DATABASES LIKE 'test_efiction_original_database_name_we_dont_want'"
        )
        unwanted_database = cursor.fetchone()
        self.assertEquals(None, unwanted_database, test_msg)

        test_msg = "fanfiction_authorfields table should contain the same number of records as in the SQL file"
        cursor.execute(
            "SELECT COUNT(*) FROM {0}.fanfiction_authorfields".format(
                self.efiction_db))
        (authorfields, ) = cursor.fetchone()
        self.assertEqual(3L, authorfields, test_msg)

    def test_copy_tags_to_tags_table(self):
        cursor = self.sql.db.cursor(MySQLdb.cursors.DictCursor)
        cursor.execute("SELECT original_tag FROM {0}.tags".format(
            self.efiction_db))
        tags = list(cursor.fetchall())
        unique_tags = set(tag_dict['original_tag'] for tag_dict in tags)
        self.assertEqual(77L, len(tags),
                         "tags table should be a denormalised table")
        self.assertIn(
            u'Václav', unique_tags,
            "tags table should contain the tags referenced in the story files as a denormalised table"
        )

    def test_copy_to_temp_db(self):
        self.efiction.copy_to_temp_db(has_coauthors=True)
        cursor = self.sql.cursor
        cursor.execute("SELECT * FROM {0}.fanfiction_stories".format(
            self.efiction_db))
        original_stories = cursor.fetchall()
        cursor.execute("SELECT * FROM {0}.stories".format(
            self.args.temp_db_database))
        stories = cursor.fetchall()

        cursor.execute("SELECT * FROM {0}.fanfiction_chapters".format(
            self.efiction_db))
        original_chapters = cursor.fetchall()
        cursor.execute("SELECT * FROM {0}.chapters".format(
            self.args.temp_db_database))
        chapters = cursor.fetchall()

        cursor.execute("SELECT * FROM {0}.fanfiction_authors".format(
            self.efiction_db))
        original_authors = cursor.fetchall()
        cursor.execute("SELECT * FROM {0}.authors".format(
            self.args.temp_db_database))
        authors = cursor.fetchall()

        self.assertEqual(
            len(original_stories), len(stories),
            "temp db stories table should contain all the stories from the original efiction table"
        )
        self.assertEqual(
            len(original_chapters), len(chapters),
            "temp db chapters table should contain all the chapters from the original efiction table"
        )
        self.assertEqual(
            len(original_authors), len(authors),
            "temp db authors table should contain all the authors from the original efiction table"
        )
Beispiel #10
0
import re

from shared_python import Args
from shared_python.Sql import Sql
from shared_python.Tags import Tags

if __name__ == "__main__":
  """
  This script creates a table called tags in the temporary database and denormalises all the tags for each story.
  This table is the basis for the Tag Wrangling sheet and is used to map the tags back to the story when the final
  tables are created.
  """
  args = Args.args_for_02()
  sql = Sql(args)
  tags = Tags(args, sql.db)
  print('---\n Processing tags from stories and bookmarks table in {0}'.format(args.temp_db_database))
  tags.create_tags_table()

  tag_col_list = {}
  stories_id_name = ""
  stories_table_name = ""

  # AUTOMATED ARCHIVE
  if args.archive_type == 'AA':

    story_table_name = raw_input('Story table name (default: "{0}_stories"): '.format(args.db_table_prefix))
    if story_table_name is None or story_table_name == '':
      story_table_name = '{0}_stories'.format(args.db_table_prefix)

    bookmark_table_name = raw_input('Bookmark table name (default: "{0}_bookmarks"): '.format(args.db_table_prefix))
    if bookmark_table_name is None or bookmark_table_name == '':
Beispiel #11
0
from shared_python.Args import Args
from shared_python.Sql import Sql
from shared_python.Tags import Tags

if __name__ == "__main__":
    """
  This script creates a table called tags in the temporary database and denormalises all the tags for each story.
  This table is the basis for the Tag Wrangling sheet and is used to map the tags back to the story when the final
  tables are created.
  """
    args_obj = Args()
    args = args_obj.args_for_02()
    log = args_obj.logger_with_filename()
    sql = Sql(args, log)
    tags = Tags(args, sql.db, log)
    log.info('Processing tags from stories and bookmarks table in {0}'.format(
        args.temp_db_database))
    tags.create_tags_table()

    tag_col_list = {}
    stories_id_name = ""
    stories_table_name = ""

    # AUTOMATED ARCHIVE
    if args.archive_type == 'AA':

        story_table_name = raw_input('Story table name (default: "stories"): ')
        if story_table_name is None or story_table_name == '':
            story_table_name = 'stories'