Python fix_old_links Beispiele, pepysdiary.common.utilities.fix_old_links Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: import_mt_articles.py Projekt: srinchiera/pepysdiary

    def handle(self, *args, **options):

        db = MySQLdb.connect(host=settings.MT_MYSQL_DB_HOST,
                             user=settings.MT_MYSQL_DB_USER,
                             passwd=settings.MT_MYSQL_DB_PASSWORD,
                             db=settings.MT_MYSQL_DB_NAME,
                             charset='utf8',
                             use_unicode=True)
        cur = db.cursor(MySQLdb.cursors.DictCursor)

        # FETCH THE DIARY ENTRIES.

        cur.execute("SELECT entry_id, entry_title, entry_text, "
                    "entry_text_more, entry_excerpt, entry_basename, "
                    "entry_created_on, entry_authored_on "
                    "FROM mt_entry WHERE entry_blog_id='%s'" %
                    (settings.MT_IN_DEPTH_BLOG_ID))

        rows = cur.fetchall()
        for row in rows:
            print '%s %s' % (row['entry_id'], row['entry_title'])

            # Fix any old-style links in the two text fields.
            if row['entry_text'] is None:
                intro = u''
            else:
                intro = fix_old_links(row['entry_text'])
            if row['entry_text_more'] is None:
                text = u''
            else:
                text = fix_old_links(row['entry_text_more'])

            slug = row['entry_basename'].replace('_', '-')

            article = Article(
                id=row['entry_id'],
                title=row['entry_title'],
                intro=intro,
                text=text,
                excerpt=row['entry_excerpt'],
                slug=slug,
                date_published=row['entry_authored_on'].replace(
                    tzinfo=pytz.utc),
                status=Article.STATUS_PUBLISHED,
            )
            article.save()

            # SET ORIGINAL CREATED TIME.
            created_time = row['entry_created_on'].replace(tzinfo=pytz.utc)
            article.date_created = created_time
            article.save()

        cur.close()
        db.close()

Beispiel #2

0

Datei anzeigen

Datei: import_mt_articles.py Projekt: philgyford/pepysdiary

    def handle(self, *args, **options):

        db = MySQLdb.connect(host=settings.MT_MYSQL_DB_HOST,
                            user=settings.MT_MYSQL_DB_USER,
                            passwd=settings.MT_MYSQL_DB_PASSWORD,
                            db=settings.MT_MYSQL_DB_NAME,
                            charset='utf8',
                            use_unicode=True)
        cur = db.cursor(MySQLdb.cursors.DictCursor)

        # FETCH THE DIARY ENTRIES.

        cur.execute("SELECT entry_id, entry_title, entry_text, "
            "entry_text_more, entry_excerpt, entry_basename, "
            "entry_created_on, entry_authored_on "
            "FROM mt_entry WHERE entry_blog_id='%s'" % (
                                                settings.MT_IN_DEPTH_BLOG_ID))

        rows = cur.fetchall()
        for row in rows:
            print('%s %s' % (row['entry_id'], row['entry_title']))

            # Fix any old-style links in the two text fields.
            if row['entry_text'] is None:
                intro = ''
            else:
                intro = fix_old_links(row['entry_text'])
            if row['entry_text_more'] is None:
                text = ''
            else:
                text = fix_old_links(row['entry_text_more'])

            slug = row['entry_basename'].replace('_', '-')

            article = Article(id=row['entry_id'],
                        title=row['entry_title'],
                        intro=intro,
                        text=text,
                        excerpt=row['entry_excerpt'],
                        slug=slug,
                        date_published=row['entry_authored_on'].replace(
                                                            tzinfo=pytz.utc),
                        status=Article.STATUS_PUBLISHED,
                    )
            article.save()

            # SET ORIGINAL CREATED TIME.
            created_time = row['entry_created_on'].replace(tzinfo=pytz.utc)
            article.date_created = created_time
            article.save()

        cur.close()
        db.close()

Beispiel #3

0

Datei anzeigen

Datei: import_mt_entries.py Projekt: 31H0B1eV/pepysdiary

    def handle(self, *args, **options):

        db = MySQLdb.connect(host=settings.MT_MYSQL_DB_HOST,
                            user=settings.MT_MYSQL_DB_USER,
                            passwd=settings.MT_MYSQL_DB_PASSWORD,
                            db=settings.MT_MYSQL_DB_NAME,
                            charset='utf8',
                            use_unicode=True)
        cur = db.cursor(MySQLdb.cursors.DictCursor)

        # FETCH THE DIARY ENTRIES.

        cur.execute("SELECT entry_id, entry_title, entry_text, "
            "entry_text_more, entry_created_on, entry_authored_on "
            "FROM mt_entry WHERE entry_blog_id='%s'" % (
                                                    settings.MT_DIARY_BLOG_ID))

        rows = cur.fetchall()
        for row in rows:
            print '%s %s' % (row['entry_id'], row['entry_title'])

            # Fix any old-style links in the two text fields.
            if row['entry_text'] is None:
                text = u''
            else:
                text = fix_old_links(row['entry_text'])
            if row['entry_text_more'] is None:
                footnotes = u''
            else:
                footnotes = fix_old_links(row['entry_text_more'])

            entry = Entry(id=row['entry_id'],
                        title=row['entry_title'],
                        text=text,
                        footnotes=footnotes,
                        diary_date=row['entry_authored_on'])
            entry.save()

            # SET ORIGINAL CREATED TIME.
            created_time = row['entry_created_on'].replace(tzinfo=pytz.utc)
            entry.date_created = created_time
            entry.save()

        cur.close()
        db.close()

Beispiel #4

0

Datei anzeigen

Datei: import_mt_entries.py Projekt: srinchiera/pepysdiary

    def handle(self, *args, **options):

        db = MySQLdb.connect(host=settings.MT_MYSQL_DB_HOST,
                             user=settings.MT_MYSQL_DB_USER,
                             passwd=settings.MT_MYSQL_DB_PASSWORD,
                             db=settings.MT_MYSQL_DB_NAME,
                             charset='utf8',
                             use_unicode=True)
        cur = db.cursor(MySQLdb.cursors.DictCursor)

        # FETCH THE DIARY ENTRIES.

        cur.execute("SELECT entry_id, entry_title, entry_text, "
                    "entry_text_more, entry_created_on, entry_authored_on "
                    "FROM mt_entry WHERE entry_blog_id='%s'" %
                    (settings.MT_DIARY_BLOG_ID))

        rows = cur.fetchall()
        for row in rows:
            print '%s %s' % (row['entry_id'], row['entry_title'])

            # Fix any old-style links in the two text fields.
            if row['entry_text'] is None:
                text = u''
            else:
                text = fix_old_links(row['entry_text'])
            if row['entry_text_more'] is None:
                footnotes = u''
            else:
                footnotes = fix_old_links(row['entry_text_more'])

            entry = Entry(id=row['entry_id'],
                          title=row['entry_title'],
                          text=text,
                          footnotes=footnotes,
                          diary_date=row['entry_authored_on'])
            entry.save()

            # SET ORIGINAL CREATED TIME.
            created_time = row['entry_created_on'].replace(tzinfo=pytz.utc)
            entry.date_created = created_time
            entry.save()

        cur.close()
        db.close()

Beispiel #5

0

Datei anzeigen

Datei: import_mt_posts.py Projekt: srinchiera/pepysdiary

    def handle(self, *args, **options):

        # Mapping MT Category IDs to new values.
        categories = {
            '225': Post.CATEGORY_EVENTS,
            '222': Post.CATEGORY_HOUSEKEEPING,
            '223': Post.CATEGORY_FEATURES,
            '224': Post.CATEGORY_MEDIA,
            '220': Post.CATEGORY_PRESS,
            '221': Post.CATEGORY_STATISTICS,
        }

        db = MySQLdb.connect(host=settings.MT_MYSQL_DB_HOST,
                             user=settings.MT_MYSQL_DB_USER,
                             passwd=settings.MT_MYSQL_DB_PASSWORD,
                             db=settings.MT_MYSQL_DB_NAME,
                             charset='utf8',
                             use_unicode=True)
        cur = db.cursor(MySQLdb.cursors.DictCursor)

        # FETCH THE DIARY ENTRIES.

        cur.execute("SELECT entry_id, entry_title, entry_text, "
                    "entry_text_more, "
                    "entry_created_on, entry_authored_on "
                    "FROM mt_entry WHERE entry_blog_id='%s'" %
                    (settings.MT_NEWS_BLOG_ID))

        rows = cur.fetchall()
        for row in rows:
            print '%s %s' % (row['entry_id'], row['entry_title'])

            # Fix any old-style links in the two text fields.
            if row['entry_text'] is None:
                intro = u''
            else:
                intro = fix_old_links(row['entry_text'])
            if row['entry_text_more'] is None:
                text = u''
            else:
                text = fix_old_links(row['entry_text_more'])

            # Get the Post's MT Category.
            # Each entry is probably only in one category, but we only use one
            # per Post now so we only fetch the Primary one.
            cur.execute("SELECT placement_category_id FROM mt_placement "
                        "WHERE placement_entry_id='%s' AND "
                        "placement_is_primary='1'" % (row['entry_id']))
            cat_row = cur.fetchall()[0]
            category = categories[str(int(cat_row['placement_category_id']))]

            post = Post(
                id=row['entry_id'],
                title=row['entry_title'],
                intro=intro,
                text=text,
                date_published=row['entry_authored_on'].replace(
                    tzinfo=pytz.utc),
                status=Post.STATUS_PUBLISHED,
                category=category,
            )
            post.save()

            # SET ORIGINAL CREATED TIME.
            created_time = row['entry_created_on'].replace(tzinfo=pytz.utc)
            post.date_created = created_time
            post.save()

        cur.close()
        db.close()

Beispiel #6

0

Datei anzeigen

Datei: import_mt_posts.py Projekt: 31H0B1eV/pepysdiary

    def handle(self, *args, **options):

        # Mapping MT Category IDs to new values.
        categories = {
            '225': Post.CATEGORY_EVENTS,
            '222': Post.CATEGORY_HOUSEKEEPING,
            '223': Post.CATEGORY_FEATURES,
            '224': Post.CATEGORY_MEDIA,
            '220': Post.CATEGORY_PRESS,
            '221': Post.CATEGORY_STATISTICS,
        }

        db = MySQLdb.connect(host=settings.MT_MYSQL_DB_HOST,
                            user=settings.MT_MYSQL_DB_USER,
                            passwd=settings.MT_MYSQL_DB_PASSWORD,
                            db=settings.MT_MYSQL_DB_NAME,
                            charset='utf8',
                            use_unicode=True)
        cur = db.cursor(MySQLdb.cursors.DictCursor)

        # FETCH THE DIARY ENTRIES.

        cur.execute("SELECT entry_id, entry_title, entry_text, "
            "entry_text_more, "
            "entry_created_on, entry_authored_on "
            "FROM mt_entry WHERE entry_blog_id='%s'" % (
                                                settings.MT_NEWS_BLOG_ID))

        rows = cur.fetchall()
        for row in rows:
            print '%s %s' % (row['entry_id'], row['entry_title'])

            # Fix any old-style links in the two text fields.
            if row['entry_text'] is None:
                intro = u''
            else:
                intro = fix_old_links(row['entry_text'])
            if row['entry_text_more'] is None:
                text = u''
            else:
                text = fix_old_links(row['entry_text_more'])

            # Get the Post's MT Category.
            # Each entry is probably only in one category, but we only use one
            # per Post now so we only fetch the Primary one.
            cur.execute("SELECT placement_category_id FROM mt_placement "
                        "WHERE placement_entry_id='%s' AND "
                        "placement_is_primary='1'" % (row['entry_id']))
            cat_row = cur.fetchall()[0]
            category = categories[str(int(cat_row['placement_category_id']))]

            post = Post(id=row['entry_id'],
                        title=row['entry_title'],
                        intro=intro,
                        text=text,
                        date_published=row['entry_authored_on'].replace(
                                                            tzinfo=pytz.utc),
                        status=Post.STATUS_PUBLISHED,
                        category=category,
                    )
            post.save()

            # SET ORIGINAL CREATED TIME.
            created_time = row['entry_created_on'].replace(tzinfo=pytz.utc)
            post.date_created = created_time
            post.save()

        cur.close()
        db.close()

Beispiel #7

0

Datei anzeigen

Datei: import_mt_comments.py Projekt: philgyford/pepysdiary

    def handle(self, *args, **options):

        db = MySQLdb.connect(host=settings.MT_MYSQL_DB_HOST,
                            user=settings.MT_MYSQL_DB_USER,
                            passwd=settings.MT_MYSQL_DB_PASSWORD,
                            db=settings.MT_MYSQL_DB_NAME,
                            charset='utf8',
                            use_unicode=True)
        cur = db.cursor(MySQLdb.cursors.DictCursor)

        # FETCH THE COMMENTS.

        cur.execute("SELECT comment_id, comment_blog_id, comment_entry_id, "
            "comment_ip, comment_author, comment_email, comment_url, "
            "comment_text, comment_created_on, comment_modified_on "
            "FROM mt_comment WHERE comment_visible='1' "
            "AND comment_blog_id IN (%s, %s, %s, %s, %s)" % (
                                            settings.MT_DIARY_BLOG_ID,
                                            settings.MT_ENCYCLOPEDIA_BLOG_ID,
                                            settings.MT_IN_DEPTH_BLOG_ID,
                                            settings.MT_NEWS_BLOG_ID,
                                            settings.MT_LETTERS_BLOG_ID, ))
        rows = cur.fetchall()
        count = 0
        for row in rows:
            if count % 100 == 0:
                print(count)

            # Fix any old-style links in the two text fields.
            if row['comment_text'] is None:
                text = ''
            else:
                text = fix_old_links(row['comment_text'])

            # Remove any <a href...> and </a> HTML:
            text = re.sub(r'<\/?a[^>]*>', '', text)

            if row['comment_blog_id'] == settings.MT_DIARY_BLOG_ID:
                content_type_id = 10
            elif row['comment_blog_id'] == settings.MT_ENCYCLOPEDIA_BLOG_ID:
                content_type_id = 11
            elif row['comment_blog_id'] == settings.MT_IN_DEPTH_BLOG_ID:
                content_type_id = 14
            elif row['comment_blog_id'] == settings.MT_NEWS_BLOG_ID:
                content_type_id = 15
            elif row['comment_blog_id'] == settings.MT_LETTERS_BLOG_ID:
                content_type_id = 13
            else:
                print("INVALID BLOG ID (%s) FOR COMMENT ID %s" % (
                                    row['comment_blog_id'], row['comment_id']))

            annotation = Annotation(id=row['comment_id'],
                        comment=text,
                        site_id=1,
                        content_type_id=content_type_id,
                        object_pk=row['comment_entry_id'],
                        user_name=row['comment_author'],
                        user_email=row['comment_email'],
                        user_url=row['comment_url'],
                        ip_address=row['comment_ip'],
                        is_public=True,
                        submit_date=row['comment_created_on'].replace(
                                                            tzinfo=pytz.utc)
                    )
            annotation.save()
            count += 1

        cur.close()
        db.close()

Beispiel #8

0

Datei anzeigen

Datei: import_mt_comments.py Projekt: srinchiera/pepysdiary

    def handle(self, *args, **options):

        db = MySQLdb.connect(host=settings.MT_MYSQL_DB_HOST,
                             user=settings.MT_MYSQL_DB_USER,
                             passwd=settings.MT_MYSQL_DB_PASSWORD,
                             db=settings.MT_MYSQL_DB_NAME,
                             charset='utf8',
                             use_unicode=True)
        cur = db.cursor(MySQLdb.cursors.DictCursor)

        # FETCH THE COMMENTS.

        cur.execute("SELECT comment_id, comment_blog_id, comment_entry_id, "
                    "comment_ip, comment_author, comment_email, comment_url, "
                    "comment_text, comment_created_on, comment_modified_on "
                    "FROM mt_comment WHERE comment_visible='1' "
                    "AND comment_blog_id IN (%s, %s, %s, %s, %s)" % (
                        settings.MT_DIARY_BLOG_ID,
                        settings.MT_ENCYCLOPEDIA_BLOG_ID,
                        settings.MT_IN_DEPTH_BLOG_ID,
                        settings.MT_NEWS_BLOG_ID,
                        settings.MT_LETTERS_BLOG_ID,
                    ))
        rows = cur.fetchall()
        count = 0
        for row in rows:
            if count % 100 == 0:
                print count

            # Fix any old-style links in the two text fields.
            if row['comment_text'] is None:
                text = u''
            else:
                text = fix_old_links(row['comment_text'])

            # Remove any <a href...> and </a> HTML:
            text = re.sub(r'<\/?a[^>]*>', '', text)

            if row['comment_blog_id'] == settings.MT_DIARY_BLOG_ID:
                content_type_id = 10
            elif row['comment_blog_id'] == settings.MT_ENCYCLOPEDIA_BLOG_ID:
                content_type_id = 11
            elif row['comment_blog_id'] == settings.MT_IN_DEPTH_BLOG_ID:
                content_type_id = 14
            elif row['comment_blog_id'] == settings.MT_NEWS_BLOG_ID:
                content_type_id = 15
            elif row['comment_blog_id'] == settings.MT_LETTERS_BLOG_ID:
                content_type_id = 13
            else:
                print "INVALID BLOG ID (%s) FOR COMMENT ID %s" % (
                    row['comment_blog_id'], row['comment_id'])

            annotation = Annotation(
                id=row['comment_id'],
                comment=text,
                site_id=1,
                content_type_id=content_type_id,
                object_pk=row['comment_entry_id'],
                user_name=row['comment_author'],
                user_email=row['comment_email'],
                user_url=row['comment_url'],
                ip_address=row['comment_ip'],
                is_public=True,
                submit_date=row['comment_created_on'].replace(tzinfo=pytz.utc))
            annotation.save()
            count += 1

        cur.close()
        db.close()

Beispiel #9

0

Datei anzeigen

Datei: import_mt_topics.py Projekt: philgyford/pepysdiary

    def handle(self, *args, **options):

        # 1) SET UP INITIAL STUFF.

        valid_map_categories = [k for (k, v) in Topic.MAP_CATEGORY_CHOICES]

        # Used to match things like:
        # 'Alchemist, The (Ben Jonson)' or 'Royal Prince, The'
        # so that we can move 'The' to the start of the title.
        title_pattern = re.compile(r'^(.*?),\sThe(?:\s|$)(.*)$')

        # Used to match things like:
        # 'Bloggs, Fred', so it can change to 'Fred Bloggs'
        # 'Smythe, Sidney (1st Lord Smythe)' to 'Sidney Smythe (1st Lord Smythe)'
        # but "Mary (c, Pepys' chambermaid)" will remain the same.
        name_pattern = re.compile(r'(.*?)(?:,\s(.*?))?(?:\s\((.*?)\))?$')

        # The category that all people have, so we know to fix their names.
        people_category = Category.objects.get(pk=2)

        db = MySQLdb.connect(host=settings.MT_MYSQL_DB_HOST,
                            user=settings.MT_MYSQL_DB_USER,
                            passwd=settings.MT_MYSQL_DB_PASSWORD,
                            db=settings.MT_MYSQL_DB_NAME,
                            charset='utf8',
                            use_unicode=True)
        cur = db.cursor(MySQLdb.cursors.DictCursor)

        # 2) FETCH THE BASIC ENTRY DATA.

        cur.execute("SELECT entry_id, entry_category_id, entry_title, "
            "entry_excerpt, entry_text, entry_text_more, entry_created_on "
            "FROM mt_entry WHERE entry_blog_id='%s' " % (
                                            settings.MT_ENCYCLOPEDIA_BLOG_ID))

        rows = cur.fetchall()
        for row in rows:
            print('%s %s' % (row['entry_id'], row['entry_title']))

            # 3) CREATE BASIC TOPIC.

            # If the MT title has ", The" in it, move "The" to the front:
            title_match = title_pattern.search(row['entry_title'])
            if title_match is None:
                # Nothing special to do.
                title = row['entry_title']
            else:
                # Need to move 'The' to the start.
                title = 'The ' + title_match.groups()[0]
                if title_match.groups()[1] != '':
                    title += ' ' + title_match.groups()[1]

            # Fix any old-style links.
            if row['entry_text'] is None:
                summary = ''
            else:
                summary = fix_old_links(row['entry_text'])
            if row['entry_text_more'] is None:
                wheatley = ''
            else:
                wheatley = fix_old_links(row['entry_text_more'])
            if row['entry_excerpt'] is None:
                tooltip_text = ''
            else:
                tooltip_text = row['entry_excerpt']

            topic = Topic(id=row['entry_id'],
                        title=title,
                        order_title=row['entry_title'],
                        summary=summary,
                        wheatley=wheatley,
                        tooltip_text=tooltip_text
                        )

            # 4) FETCH AND ADD CUSTOM FIELDS.

            # latitude, longitude, map_category, wikipedia_title and zoom are
            # stored in entry_meta_vchar_idx.
            # shape is stored in entry_meta_vclob
            # Image is indicated by '1' in entry_meta_vinteger_idx.
            cur.execute("SELECT entry_meta_type, entry_meta_vchar_idx, "
                        "entry_meta_vclob, entry_meta_vinteger_idx "
                        "FROM mt_entry_meta WHERE "
                        "entry_meta_entry_id='%s'" % (row['entry_id']))
            meta_rows = cur.fetchall()
            for meta_row in meta_rows:
                if meta_row['entry_meta_type'] == 'field.latitude' and \
                    meta_row['entry_meta_vchar_idx'] != '':
                    topic.latitude = meta_row['entry_meta_vchar_idx']

                if meta_row['entry_meta_type'] == 'field.longitude' and \
                    meta_row['entry_meta_vchar_idx'] != '':
                    topic.longitude = meta_row['entry_meta_vchar_idx']

                if meta_row['entry_meta_type'] == 'field.map_category' and \
                    meta_row['entry_meta_vchar_idx'] != '':
                    if meta_row['entry_meta_vchar_idx'] in valid_map_categories:
                        topic.map_category = meta_row['entry_meta_vchar_idx']
                    elif meta_row['entry_meta_vchar_idx'] != 'none':
                        print("INVALID MAP CATEGORY: '%s' for Entry ID '%s'" % (
                            meta_row['entry_meta_vchar_idx'], row['entry_id']))

                if meta_row['entry_meta_type'] == 'field.wikipedia_title' and \
                    meta_row['entry_meta_vchar_idx'] != '':
                    topic.wikipedia_fragment = meta_row['entry_meta_vchar_idx']

                if meta_row['entry_meta_type'] == 'field.zoom' and \
                    meta_row['entry_meta_vchar_idx'] != '':
                    topic.zoom = meta_row['entry_meta_vchar_idx']

                if meta_row['entry_meta_type'] == 'field.shape' and \
                    meta_row['entry_meta_vclob'] != '':
                    topic.shape = meta_row['entry_meta_vclob']

            topic.save()

            # 5) ADD THUMBNAIL IMAGE (must be done after save()).

            for meta_row in meta_rows:
                if meta_row['entry_meta_type'] == 'field.thumbnail_image' and \
                    meta_row['entry_meta_vinteger_idx'] == 1:
                    topic.thumbnail = 'encyclopedia/thumbnails/%s.jpg' % (
                                                                    topic.pk)
                    topic.save()

            # 6) FETCH AND ADD CATEGORIES.

            cur.execute("SELECT placement_category_id FROM mt_placement "
                        "WHERE placement_entry_id='%s'" % (row['entry_id']))
            cat_rows = cur.fetchall()
            for cat_row in cat_rows:
                try:
                    category = Category.objects.get(
                                        pk=cat_row['placement_category_id'])
                    topic.categories.add(category)
                except Category.DoesNotExist:
                    print("INVALID CATEGORY ID: '%s' for Entry ID '%s'" % (
                        cat_row['placement_category_id'], row['entry_id']))

            # 7) TIDY UP NAMES OF PEOPLE.

            if people_category in topic.categories.all():
                name_match = name_pattern.search(topic.title)
                if name_match.groups() is not None:
                    matches = name_match.groups()
                    if matches[1] is None:
                        # eg, "Mary (c, Pepys' chambermaid)".
                        # No surname, so leave it alone.
                        pass
                    elif matches[2] is not None:
                        # eg, 'Smythe, Sidney (1st Lord Smythe)'
                        # becomes 'Sidney Smythe (1st Lord Smythe)',
                        topic.title = '%s %s (%s)' % (matches[1],
                                                    matches[0],
                                                    matches[2])
                    else:
                        # eg, 'Bloggs, Fred' becomes 'Fred Bloggs'.
                        topic.title = '%s %s' % (matches[1], matches[0])
                    # We would topic.save() here, but we do it below after
                    # setting date_created, so no need to do it twice.

            # 8) SET ORIGINAL CREATED TIME.

            created_time = row['entry_created_on'].replace(tzinfo=pytz.utc)
            topic.date_created = created_time
            topic.save()

        cur.close()
        db.close()

Beispiel #10

0

Datei anzeigen

Datei: import_mt_topics.py Projekt: srinchiera/pepysdiary

    def handle(self, *args, **options):

        # 1) SET UP INITIAL STUFF.

        valid_map_categories = [k for (k, v) in Topic.MAP_CATEGORY_CHOICES]

        # Used to match things like:
        # 'Alchemist, The (Ben Jonson)' or 'Royal Prince, The'
        # so that we can move 'The' to the start of the title.
        title_pattern = re.compile(r'^(.*?),\sThe(?:\s|$)(.*)$')

        # Used to match things like:
        # 'Bloggs, Fred', so it can change to 'Fred Bloggs'
        # 'Smythe, Sidney (1st Lord Smythe)' to 'Sidney Smythe (1st Lord Smythe)'
        # but "Mary (c, Pepys' chambermaid)" will remain the same.
        name_pattern = re.compile(r'(.*?)(?:,\s(.*?))?(?:\s\((.*?)\))?$')

        # The category that all people have, so we know to fix their names.
        people_category = Category.objects.get(pk=2)

        db = MySQLdb.connect(host=settings.MT_MYSQL_DB_HOST,
                            user=settings.MT_MYSQL_DB_USER,
                            passwd=settings.MT_MYSQL_DB_PASSWORD,
                            db=settings.MT_MYSQL_DB_NAME,
                            charset='utf8',
                            use_unicode=True)
        cur = db.cursor(MySQLdb.cursors.DictCursor)

        # 2) FETCH THE BASIC ENTRY DATA.

        cur.execute("SELECT entry_id, entry_category_id, entry_title, "
            "entry_excerpt, entry_text, entry_text_more, entry_created_on "
            "FROM mt_entry WHERE entry_blog_id='%s' " % (
                                            settings.MT_ENCYCLOPEDIA_BLOG_ID))

        rows = cur.fetchall()
        for row in rows:
            print '%s %s' % (row['entry_id'], row['entry_title'])

            # 3) CREATE BASIC TOPIC.

            # If the MT title has ", The" in it, move "The" to the front:
            title_match = title_pattern.search(row['entry_title'])
            if title_match is None:
                # Nothing special to do.
                title = row['entry_title']
            else:
                # Need to move 'The' to the start.
                title = 'The ' + title_match.groups()[0]
                if title_match.groups()[1] != '':
                    title += ' ' + title_match.groups()[1]

            # Fix any old-style links.
            if row['entry_text'] is None:
                summary = u''
            else:
                summary = fix_old_links(row['entry_text'])
            if row['entry_text_more'] is None:
                wheatley = u''
            else:
                wheatley = fix_old_links(row['entry_text_more'])
            if row['entry_excerpt'] is None:
                tooltip_text = ''
            else:
                tooltip_text = row['entry_excerpt']

            topic = Topic(id=row['entry_id'],
                        title=title,
                        order_title=row['entry_title'],
                        summary=summary,
                        wheatley=wheatley,
                        tooltip_text=tooltip_text
                        )

            # 4) FETCH AND ADD CUSTOM FIELDS.

            # latitude, longitude, map_category, wikipedia_title and zoom are
            # stored in entry_meta_vchar_idx.
            # shape is stored in entry_meta_vclob
            # Image is indicated by '1' in entry_meta_vinteger_idx.
            cur.execute("SELECT entry_meta_type, entry_meta_vchar_idx, "
                        "entry_meta_vclob, entry_meta_vinteger_idx "
                        "FROM mt_entry_meta WHERE "
                        "entry_meta_entry_id='%s'" % (row['entry_id']))
            meta_rows = cur.fetchall()
            for meta_row in meta_rows:
                if meta_row['entry_meta_type'] == 'field.latitude' and \
                    meta_row['entry_meta_vchar_idx'] != '':
                    topic.latitude = meta_row['entry_meta_vchar_idx']

                if meta_row['entry_meta_type'] == 'field.longitude' and \
                    meta_row['entry_meta_vchar_idx'] != '':
                    topic.longitude = meta_row['entry_meta_vchar_idx']

                if meta_row['entry_meta_type'] == 'field.map_category' and \
                    meta_row['entry_meta_vchar_idx'] != '':
                    if meta_row['entry_meta_vchar_idx'] in valid_map_categories:
                        topic.map_category = meta_row['entry_meta_vchar_idx']
                    elif meta_row['entry_meta_vchar_idx'] != 'none':
                        print "INVALID MAP CATEGORY: '%s' for Entry ID '%s'" % (
                            meta_row['entry_meta_vchar_idx'], row['entry_id'])

                if meta_row['entry_meta_type'] == 'field.wikipedia_title' and \
                    meta_row['entry_meta_vchar_idx'] != '':
                    topic.wikipedia_fragment = meta_row['entry_meta_vchar_idx']

                if meta_row['entry_meta_type'] == 'field.zoom' and \
                    meta_row['entry_meta_vchar_idx'] != '':
                    topic.zoom = meta_row['entry_meta_vchar_idx']

                if meta_row['entry_meta_type'] == 'field.shape' and \
                    meta_row['entry_meta_vclob'] != '':
                    topic.shape = meta_row['entry_meta_vclob']

            topic.save()

            # 5) ADD THUMBNAIL IMAGE (must be done after save()).

            for meta_row in meta_rows:
                if meta_row['entry_meta_type'] == 'field.thumbnail_image' and \
                    meta_row['entry_meta_vinteger_idx'] == 1:
                    topic.thumbnail = 'encyclopedia/thumbnails/%s.jpg' % (
                                                                    topic.pk)
                    topic.save()

            # 6) FETCH AND ADD CATEGORIES.

            cur.execute("SELECT placement_category_id FROM mt_placement "
                        "WHERE placement_entry_id='%s'" % (row['entry_id']))
            cat_rows = cur.fetchall()
            for cat_row in cat_rows:
                try:
                    category = Category.objects.get(
                                        pk=cat_row['placement_category_id'])
                    topic.categories.add(category)
                except Category.DoesNotExist:
                    print "INVALID CATEGORY ID: '%s' for Entry ID '%s'" % (
                        cat_row['placement_category_id'], row['entry_id'])

            # 7) TIDY UP NAMES OF PEOPLE.

            if people_category in topic.categories.all():
                name_match = name_pattern.search(topic.title)
                if name_match.groups() is not None:
                    matches = name_match.groups()
                    if matches[1] is None:
                        # eg, "Mary (c, Pepys' chambermaid)".
                        # No surname, so leave it alone.
                        pass
                    elif matches[2] is not None:
                        # eg, 'Smythe, Sidney (1st Lord Smythe)'
                        # becomes 'Sidney Smythe (1st Lord Smythe)',
                        topic.title = '%s %s (%s)' % (matches[1],
                                                    matches[0],
                                                    matches[2])
                    else:
                        # eg, 'Bloggs, Fred' becomes 'Fred Bloggs'.
                        topic.title = '%s %s' % (matches[1], matches[0])
                    # We would topic.save() here, but we do it below after
                    # setting date_created, so no need to do it twice.

            # 8) SET ORIGINAL CREATED TIME.

            created_time = row['entry_created_on'].replace(tzinfo=pytz.utc)
            topic.date_created = created_time
            topic.save()

        cur.close()
        db.close()

Beispiel #11

0

Datei anzeigen

Datei: import_mt_letters.py Projekt: srinchiera/pepysdiary

    def handle(self, *args, **options):

        # Because there aren't many senders/recipients, a semi-manual lookup
        # of name -> Topic ID.
        people = {
            'Anthony Deane': 5132,
            'Balthazar St Michel': 631,
            'the Brooke House Commissioners': 12029,
            'Capt. Thomas Elliot': 10002,
            'Charles II': 344,
            'Col. Thomas Middleton': 7120,
            'Henry Savile': 9425,
            'James Southerne': 977,
            'John Evelyn': 1033,
            'John Pepys (brother)': 117,
            'Lord Henry Howard': 10586,
            'Matthew Wren': 8019,
            'Samuel Pepys': 29,
            'Sir Richard Browne': 3795,
            'Thomas Hill': 7165,
            'Thomas Povey': 5263,
        }

        db = MySQLdb.connect(host=settings.MT_MYSQL_DB_HOST,
                            user=settings.MT_MYSQL_DB_USER,
                            passwd=settings.MT_MYSQL_DB_PASSWORD,
                            db=settings.MT_MYSQL_DB_NAME,
                            charset='utf8',
                            use_unicode=True)
        cur = db.cursor(MySQLdb.cursors.DictCursor)

        # 1) FETCH THE LETTERS.

        cur.execute("SELECT entry_id, entry_title, entry_text, "
            "entry_text_more, entry_excerpt, entry_basename, "
            "entry_created_on, entry_authored_on "
            "FROM mt_entry WHERE entry_blog_id='%s'" % (
                                                settings.MT_LETTERS_BLOG_ID))

        rows = cur.fetchall()
        for row in rows:
            print '%s %s' % (row['entry_id'], row['entry_title'])

            # 2) Fix any old-style links in the two text fields.
            if row['entry_text'] is None:
                text = u''
            else:
                text = fix_old_links(row['entry_text'])
            if row['entry_text_more'] is None:
                footnotes = u''
            else:
                footnotes = fix_old_links(row['entry_text_more'])

            # 3) Create initial object, but don't save yet.
            letter = Letter(id=row['entry_id'],
                        title=row['entry_title'],
                        text=text,
                        footnotes=footnotes,
                        excerpt=row['entry_excerpt'],
                        slug=row['entry_basename'],
                        letter_date=row['entry_authored_on'],
                        )

            # 4) FETCH AND ADD CUSTOM FIELDS.

            cur.execute("SELECT entry_meta_type, entry_meta_vchar_idx "
                        "FROM mt_entry_meta WHERE "
                        "entry_meta_entry_id='%s'" % (row['entry_id']))
            meta_rows = cur.fetchall()

            for meta_row in meta_rows:
                if meta_row['entry_meta_type'] == 'field.display_date' and \
                    meta_row['entry_meta_vchar_idx'] != '':
                    letter.display_date = meta_row['entry_meta_vchar_idx']

                if meta_row['entry_meta_type'] == 'field.letter_source':
                    if meta_row['entry_meta_vchar_idx'] == \
                        u'Guy de la Bédoyère':
                        letter.source = Letter.GUY_DE_LA_BEDOYERE_SOURCE
                    elif meta_row['entry_meta_vchar_idx'] == \
                        u'Helen Truesdell Heath':
                        letter.source = Letter.HELEN_TRUESDELL_HEATH_SOURCE

            # 5) Work out sender/recipient from title.
            # (We don't bother with the old MT categories; too complicated, and
            # not many of them.)
            # Very cavalier, no error checking.
            title_matches = re.match(r'^(.*?)\sto\s(.*?)$',
                                                row['entry_title']).groups()
            letter.sender = Topic.objects.get(pk=people[title_matches[0]])
            letter.recipient = Topic.objects.get(pk=people[title_matches[1]])

            letter.save()

            # SET ORIGINAL CREATED TIME.
            created_time = row['entry_created_on'].replace(tzinfo=pytz.utc)
            letter.date_created = created_time
            letter.save()

        cur.close()
        db.close()