from shared_python.Tags import Tags


def _clean_email(author):
    email = author['email']
    if author['email'] is None or author['email'] == '':
        email = u'{0}{1}[email protected]'.format(author['name'], args.archive_name)\
          .replace(' ', '').replace("'", "")
    if author['email'].startswith('mailto:'):
        email = author['email'].replace('mailto:', '')
    return email


if __name__ == "__main__":
    args = Args.args_for_05()
    sql = Sql(args)
    tags = Tags(args, sql.db)
    final = FinalTables(args, sql.db)
    chaps = Chapters(args, sql.db)

    filter = ''
    coauthors = {}

    print "Creating destination tables in {0}".format(args.output_database)

    if args.archive_type == 'EF':
        table_names = efiction.table_names()
        has_coauthor_table = raw_input(
            "\nDoes this archive have a coauthors table? Y/N\n")
        has_coauthors = True if str.lower(has_coauthor_table) == 'y' else False
        if has_coauthors:
Ejemplo n.º 2
0
from shared_python.Sql import Sql
from shared_python.Tags import Tags


def valid_tags(key, tag_type_list):
  return [d[key].strip() for d in tag_type_list
          if key in d
          and d[key] is not None
          and d[key] != '']


if __name__ == "__main__":
  args_obj = Args()
  args = args_obj.args_for_06()
  log = args_obj.logger_with_filename()
  sql = Sql(args, log)
  tags = Tags(args, sql.db, log)
  final = FinalTables(args, sql.db, log)

  if args.archive_type == 'EF':
    table_names = efiction.table_names()
  else:
    table_names = {
      'authors': 'authors',
      'stories': 'stories',
      'chapters': 'chapters'
    }

  log.info("Getting all tags per story...")
  tags_by_story_id = tags.tags_by_story_id()
  for (story_id, tags) in tags_by_story_id.items():
Ejemplo n.º 3
0
def _create_mysql(args, FILES, log):
    db = connect(args.db_host, args.db_user, args.db_password, "")
    cursor = db.cursor()
    DATABASE_NAME = args.temp_db_database

    # Use the database and empty all the tables
    cursor.execute(u"drop database if exists {0};".format(DATABASE_NAME))
    cursor.execute(u"create database {0};".format(DATABASE_NAME))
    cursor.execute(u"use {0}".format(DATABASE_NAME))

    sql = Sql(args)
    sql.run_script_from_file('shared_python/create-open-doors-tables.sql',
                             DATABASE_NAME)
    db.commit()

    authors = [(FILES[i].get('Author', '').strip(),
                FILES[i].get('Email', FILES[i].get('EmailAuthor',
                                                   '')).lower().strip())
               for i in FILES]
    auth = u"INSERT INTO authors (name, email) VALUES(%s, %s);"
    cursor.executemany(auth, set(authors))
    db.commit()

    # Authors
    auth = u"SELECT * FROM authors;"
    cursor.execute(auth)
    db_authors = cursor.fetchall()

    # Stories and bookmarks
    stories = [(
        i,
        FILES[i].get('Title', '').replace("'", "\\'"),
        FILES[i].get('Summary', '').replace("'", "\\'"),
        _extract_tags(args, FILES[i]),
        _extract_characters(args, FILES[i]),
        datetime.datetime.strptime(
            FILES[i].get(
                'PrintTime', FILES[i].get(
                    'DatePrint', FILES[i].get(
                        'Date',
                        str(datetime.datetime.now().strftime('%m/%d/%y'))))),
            '%m/%d/%y').strftime('%Y-%m-%d'),
        FILES[i].get('Location', '').replace("'", "\\'"),
        FILES[i].get('LocationURL', FILES[i].get('StoryURL',
                                                 '')).replace("'", "\\'"),
        FILES[i].get('Notes', '').replace("'", "\\'"),
        _extract_relationships(args, FILES[i]),
        FILES[i].get('Rating', ''),
        FILES[i].get('Warnings', '').replace("'", "\\'"),
        FILES[i].get('Author', '').strip(),
        FILES[i].get('Email', FILES[i].get('EmailAuthor', '')).lower().strip(),
        FILES[i].get('FileType', args.chapters_file_extensions)
        if not _is_external(FILES[i]) else 'bookmark',
        _extract_fandoms(args, FILES[i]),
    ) for i in FILES]

    cur = 0
    total = len(FILES)
    for (original_id, title, summary, tags, characters, date, location, url,
         notes, pairings, rating, warnings, author, email, filetype,
         fandoms) in set(stories):

        cur = Common.print_progress(cur, total)
        try:
            # For AA archives with external links:
            if filetype != 'bookmark':
                if location == '':
                    filename = url
                else:
                    filename = location + '.' + filetype
                table_name = 'stories'
            else:
                filename = url
                table_name = 'bookmarks'

            # Clean up fandoms and add default fandom if it exists
            final_fandoms = fandoms.replace("'", r"\'")
            if args.default_fandom is not None:
                if final_fandoms == '' or final_fandoms == args.default_fandom:
                    final_fandoms = args.default_fandom
                else:
                    final_fandoms = args.default_fandom + ', ' + final_fandoms

            result = [
                element for element in db_authors
                if element[1] == author and element[2] == email
            ]
            authorid = result[0][0]

            stor = u"""
        INSERT INTO {0} (id, fandoms, title, summary, tags, characters, date, url, notes, relationships, rating, warnings, author_id)
        VALUES({1}, '{2}', '{3}', '{4}', '{5}', '{6}', '{7}', '{8}', '{9}', '{10}', '{11}', '{12}', '{13}');\n""" \
              .format(table_name,
                      original_id,
                      final_fandoms.replace(r"\\", "\\"),
                      title.replace(r"\\", "\\"),
                      summary,
                      tags,
                      characters,
                      date,
                      filename,
                      notes,
                      pairings,
                      rating,
                      warnings,
                      authorid)
            cursor.execute(stor)
        except:
            log.error("table name: {0}\noriginal id: {1}\nfinal fandoms: '{2}'\ntitle: '{3}'\nsummary: '{4}'\ntags: '{5}'" \
                  "\ncharacters: '{6}'\ndate: '{7}'\nfilename: '{8}'\nnotes: '{9}'\npairings: '{10}'\nrating: '{11}'" \
                  "\nwarnings: '{12}'\nauthor id: '{13}'"\
              .format(table_name,
                  original_id,
                  final_fandoms,
                  title,
                  summary,
                  tags,
                  characters,
                  date,
                  filename,
                  notes,
                  pairings,
                  rating,
                  warnings,
                  authorid))
            raise
    db.commit()
            for s in row:
                r.append('' if s is None else html_parser.unescape(unicode(s)).
                         encode('utf-8'))
            myFile.writerows([r])
        fp.close()


if __name__ == "__main__":
    """
  This step exports the Tag Wrangling and Authors with stories CSV files which you then have to import into Google
  Spreadsheet and share with the rest of the Open Doors committee.
  """
    args_obj = Args()
    args = args_obj.args_for_03()
    log = args_obj.logger_with_filename()
    sql = Sql(args, log)
    tags = Tags(args, sql.db, log)

    log.info('Exporting tags from {0} to {1}'.format(args.temp_db_database,
                                                     args.output_folder))
    cols = tags.tag_export_map
    results = tags.distinct_tags()
    write_csv(
        '{0}/{1} - tags.csv'.format(args.output_folder, args.archive_name), [
            cols['original_tagid'], cols['original_tag'],
            cols['original_table'], cols['original_parent'],
            cols['ao3_tag_fandom'], cols['ao3_tag'], cols['ao3_tag_type'],
            cols['ao3_tag_category'], cols['original_description'], "TW Notes"
        ])

    log.debug('Exporting authors with stories from {0} to {1}'.format(
import re

from shared_python import Args
from shared_python.Sql import Sql
from shared_python.Tags import Tags

if __name__ == "__main__":
    args = Args.args_for_02()
    sql = Sql(args)
    tags = Tags(args, sql.db)
    print('--- Processing tags from stories table in {0}'.format(
        args.db_database))
    tags.create_tags_table()

    # eg: python 01-Load-into-Mysql.py -dh localhost -du root -dt dsa -dd temp_python -a AA -f /Users/emma/OneDrive/DSA/ARCHIVE_DB.pl -o .
    tag_col_list = {}
    stories_id_name = ""
    stories_table_name = ""

    # AUTOMATED ARCHIVE
    if args.archive_type == 'AA':

        table_name = raw_input(
            'Story table name (default: "{0}_stories"): '.format(
                args.db_table_prefix))
        if table_name is None or table_name == '':
            table_name = '{0}_stories'.format(args.db_table_prefix)
        tag_columns = raw_input(
            'Column names containing tags \n   (delimited by commas - default: "tags, warnings, characters, fandoms, relationships"): '
        )
        if tag_columns is None or tag_columns == '':
Ejemplo n.º 6
0
# encoding: utf-8
import csv

import sys

from shared_python import Args
from shared_python.Sql import Sql
from shared_python.Tags import Tags

if __name__ == "__main__":
    args = Args.args_for_04()
    sql = Sql(args)
    tags = Tags(args, sql.db)

    # Input CSV from TW spreadsheet
    # Rename tags in `tags` table, populate ao3_tag_table column
    # eg: python 04-Rename-Tags.py -dh localhost -du root -dt dsa -dd temp_python -a EF -i path/to/tw-spreadsheet.csv

    with open(args.tag_input_file, 'r') as csvfile:
        tw_tags = list(csv.DictReader(csvfile))
        tag_headers = tags.tag_export_map
        total = len(tw_tags)

        for cur, row in enumerate(tw_tags):
            sys.stdout.write('\r{0}/{1} tags to map'.format(cur + 1, total))
            sys.stdout.flush()

            prefix = 'fanfiction' if args.archive_type == 'EF' else ''
            tags.update_tag_row(row, prefix)
# encoding: utf-8
import csv
import os

from eFiction import efiction
from shared_python import Args
from shared_python.Chapters import Chapters
from shared_python.FinalTables import FinalTables
from shared_python.Sql import Sql
from shared_python.Tags import Tags

if __name__ == "__main__":
    args = Args.args_for_05()
    sql = Sql(args)

    filter = 'WHERE `id` in '

    story_exclusion_filter = ''
    # Filter out DNI stories - story_ids_to_remove must be comma-separated list of DNI ids
    if os.path.exists(args.story_ids_to_remove):
        with open(args.story_ids_to_remove, "rt") as f:
            for line in f:
                story_exclusion_filter = filter + '(' + line + ')'

    command = "SET SQLDELETE FROM `{0}`.`{1}_stories` {2}".format(
        args.output_database, args.db_table_prefix, story_exclusion_filter)
    print command
    result = sql.execute(command)
    print result
Ejemplo n.º 8
0
    html_parser = HTMLParser()
    with open(filename, 'w') as fp:
        myFile = csv.writer(fp)
        myFile.writerow(columns)
        for row in results:
            r = []
            for s in row:
                r.append('' if s is None else html_parser.unescape(unicode(s)).
                         encode('utf-8'))
            myFile.writerows([r])
        fp.close()


if __name__ == "__main__":
    args = Args.args_for_03()
    sql = Sql(args)
    tags = Tags(args, sql.db)
    print('--- Exporting tags from {0}'.format(args.db_database))
    cols = tags.tag_export_map
    results = tags.distinct_tags()
    write_csv('{0} - tags.csv'.format(args.db_database), [
        cols['original_tagid'], cols['original_tag'], cols['original_table'],
        cols['original_parent'], cols['ao3_tag_fandom'], cols['ao3_tag'],
        cols['ao3_tag_type'], cols['ao3_tag_category'],
        cols['original_description'], "TW Notes"
    ])

    print('--- Exporting authors with stories from {0}'.format(
        args.db_database))
    if args.archive_type == 'AA':
        author_table = '{0}.{1}_authors'.format(args.db_database,
class TestEFiction(TestCase):
    args = testArgs()
    log = logger("test")
    sql = Sql(args, log)
    tags = Tags(args, sql.db, log)
    efiction = eFiction(args, sql, log, tags)
    efiction_db = "{0}_efiction".format(args.temp_db_database)

    @classmethod
    def setUpClass(cls):
        cls.efiction.load_database()
        cls.efiction.copy_tags_to_tags_table(None, "y")

    @classmethod
    def tearDownClass(cls):
        cls.sql.execute("DROP DATABASE IF EXISTS {0}".format(cls.efiction_db))
        cls.sql.execute("DROP DATABASE IF EXISTS {0}".format(
            cls.args.temp_db_database))

    def test_load_database(self):
        cursor = self.sql.cursor

        test_msg = "original efiction database name from the SQL file should not be created"
        cursor.execute(
            "SHOW DATABASES LIKE 'test_efiction_original_database_name_we_dont_want'"
        )
        unwanted_database = cursor.fetchone()
        self.assertEquals(None, unwanted_database, test_msg)

        test_msg = "fanfiction_authorfields table should contain the same number of records as in the SQL file"
        cursor.execute(
            "SELECT COUNT(*) FROM {0}.fanfiction_authorfields".format(
                self.efiction_db))
        (authorfields, ) = cursor.fetchone()
        self.assertEqual(3L, authorfields, test_msg)

    def test_copy_tags_to_tags_table(self):
        cursor = self.sql.db.cursor(MySQLdb.cursors.DictCursor)
        cursor.execute("SELECT original_tag FROM {0}.tags".format(
            self.efiction_db))
        tags = list(cursor.fetchall())
        unique_tags = set(tag_dict['original_tag'] for tag_dict in tags)
        self.assertEqual(77L, len(tags),
                         "tags table should be a denormalised table")
        self.assertIn(
            u'Václav', unique_tags,
            "tags table should contain the tags referenced in the story files as a denormalised table"
        )

    def test_copy_to_temp_db(self):
        self.efiction.copy_to_temp_db(has_coauthors=True)
        cursor = self.sql.cursor
        cursor.execute("SELECT * FROM {0}.fanfiction_stories".format(
            self.efiction_db))
        original_stories = cursor.fetchall()
        cursor.execute("SELECT * FROM {0}.stories".format(
            self.args.temp_db_database))
        stories = cursor.fetchall()

        cursor.execute("SELECT * FROM {0}.fanfiction_chapters".format(
            self.efiction_db))
        original_chapters = cursor.fetchall()
        cursor.execute("SELECT * FROM {0}.chapters".format(
            self.args.temp_db_database))
        chapters = cursor.fetchall()

        cursor.execute("SELECT * FROM {0}.fanfiction_authors".format(
            self.efiction_db))
        original_authors = cursor.fetchall()
        cursor.execute("SELECT * FROM {0}.authors".format(
            self.args.temp_db_database))
        authors = cursor.fetchall()

        self.assertEqual(
            len(original_stories), len(stories),
            "temp db stories table should contain all the stories from the original efiction table"
        )
        self.assertEqual(
            len(original_chapters), len(chapters),
            "temp db chapters table should contain all the chapters from the original efiction table"
        )
        self.assertEqual(
            len(original_authors), len(authors),
            "temp db authors table should contain all the authors from the original efiction table"
        )
Ejemplo n.º 10
0
def _create_mysql(args, FILES):
    db = MySQLdb.connect(args.db_host, args.db_user, args.db_password, "")
    cursor = db.cursor()
    DATABASE_NAME = args.db_database
    PREFIX = args.db_table_prefix

    # Use the database and empty all the tables
    cursor.execute(u"drop database if exists {0};".format(DATABASE_NAME))
    cursor.execute(u"create database {0};".format(DATABASE_NAME))
    cursor.execute(u"use {0}".format(DATABASE_NAME))

    sql = Sql(args)
    sql.run_script_from_file('miscellaneous/open-doors-table-schema.sql', args)
    db.commit()

    authors = [(FILES[i].get('Author',
                             '').strip(), FILES[i].get('Email',
                                                       '').lower().strip())
               for i in FILES]
    auth = u"INSERT INTO {0}_authors (name, email) VALUES(%s, %s);".format(
        PREFIX)
    cursor.executemany(auth, set(authors))
    db.commit()

    auth = u"SELECT * FROM {0}_authors;".format(PREFIX)
    cursor.execute(auth)
    db_authors = cursor.fetchall()

    stories = [
        (
            FILES[i].get('Title', '').replace("'", "\\'"),
            FILES[i].get('Summary', '').replace("'", "\\'"),
            FILES[i].get('Category', '').replace("'", "\\'"),
            FILES[i].get('Characters', '').replace("'", "\\'"),
            datetime.datetime.strptime(
                FILES[i].get(
                    'PrintTime',
                    str(datetime.datetime.now().strftime('%m/%d/%y'))),
                '%m/%d/%y').strftime('%Y-%m-%d'),
            # Some AA archives have a filetype
            # FILES[i].get('FileType', 'bookmark'),
            FILES[i].get('Location', '').replace("'", "\\'"),
            FILES[i].get('StoryURL', '').replace("'", "\\'"),
            FILES[i].get('Notes', '').replace("'", "\\'"),
            FILES[i].get('Pairing',
                         '').replace("'",
                                     "\\'"),  # might be Pairings in some cases
            FILES[i].get('Rating', ''),
            FILES[i].get('Warnings', '').replace("'", "\\'"),
            FILES[i].get('Author', '').strip(),
            FILES[i].get('Email', '').lower().strip(),
        ) for i in FILES
    ]
    for (title, summary, category, characters, date, location, url, notes,
         pairings, rating, warnings, author, email) in set(stories):
        try:
            table_name = '{0}_stories'.format(PREFIX)
            filename = location + '.html'  # not true for all AA archives!

            # For AA archives with external links:
            # if (filetype != 'bookmark'):
            #   filename = location + '.' + filetype
            #   table_name = '{0}_stories'.format(PREFIX)
            # else:
            #   filename = url
            #   table_name = '{0}_bookmarks'.format(PREFIX)

            result = [
                element for element in db_authors
                if element[1] == author and element[2] == email
            ]
            authorid = result[0][0]

            stor = u"""INSERT INTO {0} (fandoms, title, summary, tags, characters, date, url, notes, relationships, rating, warnings, authorid)
      			 VALUES('due South', '{1}', '{2}', '{3}', '{4}', '{5}', '{6}', '{7}', '{8}', '{9}', '{10}', '{11}');\n""" \
              .format(unicode(table_name),
                      unicode(title, 'utf-8'),
                      unicode(summary, 'utf-8'),
                      category,
                      characters,
                      date,
                      filename,
                      unicode(notes, 'utf-8'),
                      pairings,
                      rating,
                      warnings,
                      authorid)
            cursor.execute(stor)
        except:
            print(title, summary, category, characters, date, location, url)
            raise
    db.commit()
Ejemplo n.º 11
0
from shared_python import Args
from shared_python.Sql import Sql
from automated_archive import aa

if __name__ == "__main__":
    args = Args.args_for_01()
    sql = Sql(args)

    # eg: python 01-Load-into-Mysql.py -dh localhost -du root -dt dsa -dd temp_python -a AA -f /path/to/ARCHIVE_DB.pl -o .
    if args.archive_type == 'AA':
        print('--- Loading Automated Archive file "{0}" into database "{1}"'.
              format(args.db_input_file, args.db_database))
        aa.clean_and_load_data(args)

# eg: python 01-Load-into-Mysql.py -dh localhost -du root -dt sd -dd temp_python -a EF -f /path/to/backup-from-efiction.sql -o .
    elif args.archive_type == 'EF':
        print('Loading eFiction file "{0}" into database "{1}"'.format(
            args.db_input_file, args.db_database))
        sql.run_script_from_file(args.db_input_file,
                                 database=args.db_database,
                                 prefix=args.db_table_prefix)
def _clean_email(author):
    email = author['email']
    if email is None or email == '':
        email = u'{0}{1}[email protected]'.format(author['name'], args.archive_name)\
          .replace(' ', '').replace("'", "")
    if email.startswith('mailto:'):
        email = author['email'].replace('mailto:', '')
    return email


if __name__ == "__main__":
    args_obj = Args()
    args = args_obj.args_for_05()
    log = args_obj.logger_with_filename()
    sql = Sql(args, log)
    tags = Tags(args, sql.db, log)
    final = FinalTables(args, sql.db, log)
    chaps = Chapters(args, sql.db, log)

    coauthors = {}

    log.info("Creating destination tables in {0}".format(args.output_database))

    table_names = {
        'authors': 'authors',
        'stories': 'stories',
        'chapters': 'chapters',
        'story_links': 'story_links'
    }
    filter = 'WHERE id NOT IN '
Ejemplo n.º 13
0
from shared_python import Args
from shared_python.Sql import Sql
from automated_archive import aa

if __name__ == "__main__":
    args = Args.args_for_01()
    sql = Sql(args)

    # eg: python 01-Load-into-Mysql.py -dh localhost -du root -dt dsa -dd temp_python -a AA -f /path/to/ARCHIVE_DB.pl -o .
    if args.archive_type == 'AA':
        print('\nLoading Automated Archive file "{0}" into database "{1}"'.
              format(args.db_input_file, args.temp_db_database))
        aa.clean_and_load_data(args)

# eg: python 01-Load-into-Mysql.py -dh localhost -du root -dt sd -dd temp_python -a EF -f /path/to/backup-from-efiction.sql -o .
    elif args.archive_type == 'EF':
        print('\nLoading eFiction file "{0}" into database "{1}"'.format(
            args.db_input_file, args.temp_db_database))
        sql.run_script_from_file(args.db_input_file,
                                 database=args.temp_db_database,
                                 prefix=args.db_table_prefix,
                                 initial_load=True)

    print('\n')