/
wordpress_evernote.py
executable file
·920 lines (845 loc) · 41.7 KB
/
wordpress_evernote.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
#!/usr/bin/python2.7
# -*- coding: utf-8 -*-
import re
import argparse
from xml.etree import ElementTree as ET
import cgi
import csv
from datetime import datetime
import settings
import common
from wordpress import WordPressApiWrapper, WordPressPost, WordPressAttribute
from wordpress import WordPressItem, WordPressImageAttachment
from my_evernote import EvernoteApiWrapper
from __builtin__ import super
wp_en_parser = argparse.ArgumentParser(
description='WordPress <--> Evernote utilities')
wp_en_parser.add_argument('--wordpress',
#default='default',
help='WordPress account name to use from settings.')
subparsers = wp_en_parser.add_subparsers()
logger = common.logger.getChild('wordpress-evernote')
###############################################################################
class NoteParserError(Exception):
pass
class WpEnAttribute(WordPressAttribute):
"""WordPress attribute from Evernote note."""
@classmethod
def create(cls, adaptor, attr_name, node, wp_item):
"""Attribute factory method.
Return a WordPress item attribute for `attr_name`, initialized by
node at root `node`.
:type adaptor: EvernoteWordpressAdaptor
:type node: xml.etree.ElementTree.Element
:type wp_item: wordpress.WordPressItem
"""
if attr_name in ('categories', 'tags', 'seo_keywords'):
return WpEnListAttribute(node.text, wp_item, adaptor)
elif attr_name in ('parent', 'thumbnail', 'project'):
return WpEnLinkAttribute(node, wp_item, adaptor)
else:
return WordPressAttribute.create(attr_name, node.text, wp_item)
def __init__(self, value, wp_item, adaptor, *args, **kwargs):
"""Initialize WordPress attribute from Evernoten note."""
super(WpEnAttribute, self).__init__(value, wp_item, *args, **kwargs)
self._adaptor = adaptor
class WpEnListAttribute(WpEnAttribute):
"""WordPress item list attribute."""
def __init__(self, value, wp_item, adaptor):
"""Initialize WordPress list attribute from Evernoten note.
:type wp_item: wordpress.WordPressItem
:type adaptor: EvernoteApiWrapper
"""
super(WpEnListAttribute, self).__init__('', wp_item, adaptor)
self._value = self._parse_values_from_string(value)
@staticmethod
def _parse_values_from_string(valstring):
"""Return list of value from valstring."""
# Handle stringed lists of the form:
# in: 'val1,"val2", val3-hi, "val 4, quoted"'
# out: ['val1', 'val2', 'val3-hi', 'val 4, quoted'] (4 items)
return reduce(lambda x, y: x + y,
list(csv.reader([valstring], skipinitialspace=True)))
class WpEnLinkAttribute(WpEnAttribute):
"""WordPress item link attribute."""
def __init__(self, node, wp_item, adaptor):
"""Initialize WordPress link attribute from Evernoten note.
The node is expected to contain only a link tag (a href).
:type node: xml.etree.ElementTree.Element
:type wp_item: wordpress.WordPressItem
:type adaptor: EvernoteApiWrapper
"""
if '' != node.text:
raise NoteParserError('Link "%s" should not have text' %
(ET.tostring(node)))
if not (node.tail is None or '' == node.tail):
raise NoteParserError('Link "%s" should not have tail' %
(ET.tostring(node)))
if 0 == len(node):
logger.warn('No link found for attribute')
self._href = None
super(WpEnLinkAttribute, self).__init__('', wp_item, adaptor)
return
if 1 != len(node):
raise NoteParserError('Link "%s" should have one child' %
(ET.tostring(node)))
a_node = node[0]
if 'a' != a_node.tag:
raise NoteParserError('Link "%s" should have one <a> child' %
(ET.tostring(node)))
if not (a_node.tail is None or '' == a_node.tail):
raise NoteParserError('Link "%s" should not have tail' %
(ET.tostring(a_node)))
self._href = a_node.get('href')
if not self._href:
raise NoteParserError('Link "%s" has no href' %
(ET.tostring(a_node)))
self._text = a_node.text
self._ref_item = None
super(WpEnLinkAttribute, self).__init__(self._href, wp_item, adaptor)
def fget(self):
if EvernoteApiWrapper.is_evernote_url(self._href):
if self._ref_item is None:
self._ref_item = self._adaptor.wp_item_from_note(self._href)
return self._ref_item
else:
return self._href
class WpEnContent(WpEnAttribute):
"""WordPress content attribute from Evernote note."""
def __init__(self, node, wp_item, adaptor):
"""Initialize WordPress content attribute from Evernoten note.
Do not render the content on initialization, only on read.
Do scan the a-tags in the content and update the underlying item
ref-items list.
:type node: xml.etree.ElementTree.Element
:type wp_item: wordpress.WordPressItem
:type adaptor: EvernoteApiWrapper
"""
super(WpEnContent, self).__init__('', wp_item, adaptor)
self._cached_rendered_content = None
self._content_node = node
self._find_ref_items()
def _find_ref_items(self):
for a_tag in self._content_node.findall('.//a'):
href = a_tag.get('href', '')
if EvernoteApiWrapper.is_evernote_url(href):
# Add a late-loading function in case this will never be needed
def load_item(link):
return lambda: self._adaptor.wp_item_from_note(link)
self._wp_item._ref_wp_items[href] = load_item(href)
@staticmethod
def post_process_content_lines(content_lines):
# ShellBot Easy Image post-processor:
sbsc_re = re.compile(
'\[sb_easy_image ids\=\"(?P<id>\d+)\" size\=\"medium\" '
'columns\=\"1\" link\=\"Lightbox\"\]')
# Markdown heading anchoring post-processor:
mdha_re = re.compile(
'(?P<hlevel>\#+)\s+(?P<htext>[^\#]+)\s+\#(?P<hanchor>[\w\-]+)')
for num, line in enumerate(content_lines):
# ShellBot Easy Image post-processor:
matches = sbsc_re.findall(line)
if 1 < len(matches):
new_shortcode = ('[sb_easy_image ids="%s" size="medium" '
'columns="%d" link="Lightbox" '
'order="custom"]' %
(','.join(matches), len(matches)))
content_lines[num] = re.sub('\[.*\]',
new_shortcode,
content_lines[num])
# Markdown heading anchoring post-processor:
match = mdha_re.match(line)
if match:
d = match.groupdict()
content_lines[num] = '%s <a name="%s"></a>%s' % (d['hlevel'],
d['hanchor'],
d['htext'])
def _render_node_as_markdown(self):
if self._cached_rendered_content:
return self._cached_rendered_content
def render_line_element(e, line_so_far):
tag = e.tag.lower()
if 'a' == tag:
href = e.get('href', '')
text = e.text
if EvernoteApiWrapper.is_evernote_url(href):
ref_item = self._adaptor.wp_item_from_note(href)
return ref_item.markdown_ref(text)
else:
return href
elif 'span' == tag:
return e.text
elif 'en-todo' == tag:
return '❑'
elif 'en-media' == tag:
logger.warn('Unexpected en-media element in content: %s',
ET.tostring(e))
return ''
else:
raise NoteParserError('Invalid tag "%s" in content paragraph' %
(ET.tostring(e)))
content_lines = list()
# Render content using DFS iteration of node
for p in self._content_node:
# Content node is expected to contain only p-tags, one per line.
assert('p' == p.tag.lower())
assert(p.tail is None)
line = p.text or ''
for e in p:
line += render_line_element(e, line) or ''
line += e.tail or ''
content_lines.append(line)
self.post_process_content_lines(content_lines)
self._cached_rendered_content = '\n'.join(content_lines)
return self._cached_rendered_content
def fget(self):
"""Return the rendered content."""
# currently supporting only markdown rendering of content node
assert('markdown' == self._wp_item.content_format)
return self._render_node_as_markdown()
class EvernoteWordpressAdaptor(object):
"""Evernote-Wordpress Adaptor class."""
@staticmethod
def norm_enc(in_str):
return in_str.replace(u'\xa0', u' ').encode('utf-8')
@staticmethod
def evernote_encode(unicode_str):
"""Return a UTF-8 encoded string for writing to Evernote."""
return (unicode_str.replace(u' ', u'\xa0 ')
.replace(u' ', u' \xa0')
.encode('utf-8'))
@staticmethod
def _parse_xml_from_string(xml_string):
"""Return parsed ElementTree from xml_string."""
parser = ET.XMLParser()
# Default XMLParser is not full XHTML, so it doesn't know about all
# valid XHTML entities (such as ), so the following code is
# needed in order to allow these entities.
# (see: http://stackoverflow.com/questions/7237466 and
# http://stackoverflow.com/questions/14744945 )
# Valid XML entities: quot, amp, apos, lt and gt.
parser.parser.UseForeignDTD(True)
parser.entity['nbsp'] = ' '
if isinstance(xml_string, str):
xml_string = xml_string.decode('utf-8')
return ET.fromstring(EvernoteWordpressAdaptor.norm_enc(xml_string),
parser=parser)
@staticmethod
def _parse_note_xml(note_content):
"""Return a normalized Element tree root from note content XML string.
A normalized WordPress item note is as follows:
1. Root `en-note` element.
1.1. `div` node with id `metadata`
1.1.1. A `p` node for every metadata attribute, of the form
`attr_key=attr_value`, where `attr_key` is a string and
`attr_value` may contain string or `a` node.
1.2. `div` node with id `content`
1.2.1. `p` node for every content paragraph, containing text and/or
`a` nodes.
"""
root = EvernoteWordpressAdaptor._parse_xml_from_string(note_content)
norm_root = ET.Element('en-note')
norm_meta = ET.SubElement(norm_root, 'div', id='metadata')
norm_content = ET.SubElement(norm_root, 'div', id='content')
global stage
stage = 'meta'
def fix_text(text):
return text and text.strip('\n\r') or ''
def get_active_node():
if 'meta' == stage:
return norm_meta
elif 'content' == stage:
return norm_content
else:
raise NoteParserError('Invalid stage "%s"' % (stage))
def append_tail(text):
if text:
p = ET.SubElement(get_active_node(), 'p')
p.text = text
return p
def parse_node(root, target_node=None):
tag = root.tag.lower()
text = fix_text(root.text)
tail = fix_text(root.tail)
if tag in ('hr', ):
# End of metadata section
assert(not root.text and (0 == len(root)))
global stage
if 'meta' == stage:
stage = 'content'
else:
raise NoteParserError('Invalid stage "%s"' % (stage))
p = ET.SubElement(get_active_node(), 'p')
tail_p = append_tail(tail)
return tail_p if tail_p is not None else p
elif tag in ('en-note', 'div', 'p', 'br'):
p = ET.SubElement(get_active_node(), 'p')
if text:
p.text = text
target_node = p
for e in root:
next_target = parse_node(e, target_node)
if next_target is not None:
target_node = next_target
tail_p = append_tail(tail)
return tail_p if tail_p is not None else target_node
elif tag in ('a', 'en-todo', 'en-media'):
# Not expecting deeper levels!
if 0 < len(root):
logger.warn('Skipping element with unexpected nested '
'elements: %s', ET.tostring(root))
else:
child = ET.SubElement(
target_node if target_node is not None
else ET.SubElement(get_active_node(), 'p'),
tag)
if root.get('href'):
child.set('href', root.get('href'))
if text:
child.text = text
if tail:
child.tail = tail
elif tag in ('span',):
# Treat span like it simply isn't there...
if text:
if target_node is None:
logger.warn('Don\'t know what to do with text in '
'top level span element: %s',
ET.tostring(root))
else:
if target_node.text:
target_node.text += text
else:
target_node.text = text
for e in root:
parse_node(e, target_node)
if tail:
logger.warn('Guessing how to append tail of span element: '
'%s', ET.tostring(root))
return append_tail(tail)
else:
# Unexpected tag?
logger.warn('Unexpected tag "%s"', root)
# Start HERE
# Cleanup DOM (regression of rogue <br /> in a-element)
for bad_a in root.findall('.//a/br/..'):
logger.warn('Removing rogue a-node with br-child (%s), '
'and inserting br-node instead', ET.tostring(bad_a))
tail = bad_a.tail
bad_a.clear()
bad_a.tag = 'br'
bad_a.tail = tail
# Remove redundant <br/> in <div>s
for redundant_div_br in root.findall('.//div/br'):
br_tail = fix_text(redundant_div_br.tail)
if not br_tail:
redundant_div_br.clear()
redundant_div_br.tag = 'span'
# Parse all sub elements of main en-note
parse_node(root)
# Clean up redundant empty p tags in normalized tree
for top_level_div in norm_root:
del_list = list()
trailing_empty_list = list()
prev_empty = True # initialized to True to remove prefix empty p's
for p in top_level_div:
# sanity - top level divs should contain only p elements
assert('p' == p.tag)
assert(not p.tail)
if (p.text or 0 < len(p)):
if 'metadata' != top_level_div.attrib['id']:
# in metadata div - don't allow empty p's!
prev_empty = False
trailing_empty_list = list()
else:
# Empty p - only one is allowed in between non-empty p's
if prev_empty:
del_list.append(p)
else:
trailing_empty_list.append(p)
prev_empty = True
for p in del_list + trailing_empty_list:
top_level_div.remove(p)
return norm_root
def __init__(self, en_wrapper, wp_wrapper):
"""Initialize Adaptor instance with API wrapper objects.
:param en_wrapper: Initialized Evernote API wrapper instance.
:type en_wrapper: my_evernote.EvernoteApiWrapper
:param wp_wrapper: Initialized Wordpress API wrapper instance.
:type wp_wrapper: wordpress.WordPressApiWrapper
"""
self.evernote = en_wrapper
self.wordpress = wp_wrapper
self.cache = dict()
def wp_item_from_note(self, note_link):
"""Factory builder of WordPressItem from Evernote note.
:param note_link: Evernote note link string for note to create.
"""
if isinstance(note_link, basestring):
guid = EvernoteApiWrapper.get_note_guid(note_link)
else:
note = note_link
guid = note.guid
# return parsed note from cache, if cached
if guid in self.cache:
return self.cache[guid]
# not cached - parse and cache result
if isinstance(note_link, basestring):
note = self.evernote.get_note(guid)
wp_item = WordPressItem()
wp_item._underlying_en_note = note
self.cache[guid] = wp_item
item_dom = self._parse_note_xml(note.content)
# Copy metadata fields to wp_item internal fields
# Convert from Evernote attribute name to internal name if needed
name_mappings = {
'type': 'post_type',
'hemingwayapp-grade': 'hemingway_grade',
}
for metadata in item_dom.findall(".//div[@id='metadata']/p"):
if metadata.text is None:
continue
if metadata.text.startswith('#'):
continue
pos = metadata.text.find('=')
attr_name = metadata.text[:pos]
attr_name = name_mappings.get(attr_name, attr_name)
metadata.text = metadata.text[pos+1:]
wp_item.set_wp_attribute(attr_name,
WpEnAttribute.create(self, attr_name,
metadata, wp_item))
# Determine post type and continue initialization accordingly
if wp_item.post_type in ('post', 'page'):
# Initialize as WordPress post, and set content
wp_item.__class__ = WordPressPost
wp_item.set_wp_attribute(
'content', WpEnContent(item_dom.find(".//div[@id='content']"),
wp_item, self))
else:
# Initialize as WordPress image attachment, and fetch image
wp_item.__class__ = WordPressImageAttachment
wp_item._filename = note.title
if not note.resources or 0 == len(note.resources):
raise NoteParserError('Note (%s) has no attached resources' %
(note.title))
resource = note.resources[0]
if 1 < len(note.resources):
logger.warning('Note has too many attached resources (%d). '
'Choosing the first one, arbitrarily.',
len(note.resources))
def fetch_bits(guid, name):
def fetch():
logger.debug('Fetching image %s', name)
return self.evernote.get_resource_data(guid)
return fetch
wp_item._get_image_data = fetch_bits(resource.guid, note.title)
wp_item._image_mime = resource.mime
return wp_item
def create_wordpress_stub_from_note(self, wp_item, en_note):
"""Create WordPress item stub from item with no ID.
The purpose is the create an ID without publishing all related items.
The created ID will be updated in the Evernote note.
The item will be posted as a draft in WordPress.
:param `note_link`: Evernote note link string for
note with item to publish.
"""
if not wp_item.id:
# New WordPress item
# Post as stub in order to get ID
wp_item.post_stub(self.wordpress)
assert(wp_item.id)
# Update ID in note
attrs_to_update = {'id': str(wp_item.id),}
if wp_item.link:
attrs_to_update['link'] = str(wp_item.link)
self.update_note_metdata(en_note, attrs_to_update)
def post_to_wordpress_from_note(self, note_link, force=False):
"""Create WordPress item from Evernote note,
and publish it to a WordPress blog.
A note with ID not set will be posted as a new item, and the assigned
item ID will be updated in the Evernote note.
A note with ID set will result an update of the existing item.
@warning: Avoid posting the same note to different WordPress accounts,
as the IDs might be inconsistent!
:param note_link: Evernote note link string for
note with item to publish.
:param force: Whether to update based on last modified timestamp,
or always (if set to True).
"""
# Get note from Evernote
#: :type en_note: evernote.edam.type.ttypes.Note
en_note = self.evernote.get_note(note_link)
# Convert Evernote timestamp (ms from epoch) to DateTime object
# (http://dev.evernote.com/doc/reference/Types.html#Typedef_Timestamp)
note_updated = datetime.utcfromtimestamp(en_note.updated/1000)
# Create a WordPress item from note
#: :type wp_item: WordPressItem
wp_item = self.wp_item_from_note(en_note)
if force or (wp_item.last_modified is None or
(wp_item.last_modified and note_updated > wp_item.last_modified)):
# Post the item
self.create_wordpress_stub_from_note(wp_item, en_note)
for ref_wp_item in wp_item.ref_items:
self.create_wordpress_stub_from_note(
ref_wp_item, ref_wp_item._underlying_en_note)
wp_item.update_item(self.wordpress)
# Update note metadata from published item (e.g. ID for new item)
self.update_note_metadata_from_wordpress_post(en_note, wp_item)
else:
logger.info('Skipping posting note %s - not updated recently',
en_note.title)
def sync(self, query, force=False, preprocess=False, image_notebook=None):
"""Sync between WordPress site and notes matched by `query`.
:param query: Evernote query used to find notes for sync.
:param force: Whether to update based on last modified timestamp,
or always (if set to True).
:param preprocess: Whether to perform note preprocess.
:param image_notebook: Notebook for extracted embedded images.
"""
for _, note in self.evernote.get_notes_by_query(query):
logger.info('Posting note "%s" (GUID %s)', note.title, note.guid)
try:
if preprocess and note.resources:
self.preprocess_embedded_images(note.guid, image_notebook)
self.post_to_wordpress_from_note(note.guid, force)
except Exception:
logger.exception('Failed posting note "%s" (GUID %s)',
note.title, note.guid)
def detach(self, query):
"""Detach sync between WordPress site and notes matched by `query`.
:param query: Evernote query used to find notes to detach.
"""
attrs_to_update = {'id': '<auto>',
'link': '<auto>',
'last_modified': '<auto>',
'published_date': '<auto>',}
for _, note_meta in self.evernote.get_notes_by_query(query):
note = self.evernote.get_note(note_meta.guid,
with_resource_data=False)
logger.info('Detaching note "%s" (GUID %s)', note.title, note.guid)
self.update_note_metdata(note, attrs_to_update)
def update_note_metdata(self, note, attrs_to_update):
"""Updates an Evernote WP-item note metadata based on dictionary.
For every key in `attrs_to_update`, update the metadata attribute `key`
with new value `attrs_to_update[key]`.
:param note: Evernote post-note to update.
:type note: evernote.edam.type.ttypes.Note
:param attrs_to_update: Dictionary of attributes to update.
:type attrs_to_update: dict
"""
global modified_flag
modified_flag = False
root = self._parse_xml_from_string(note.content)
def update_node_text(orig_text):
# Extract attribute name from element
text = orig_text and orig_text.strip(' \n\r') or ''
if not text:
return orig_text
if text.startswith('#'):
return orig_text
if '=' not in text:
return orig_text
pos = text.find('=')
attr_name = text[:pos]
# Update if needed
if attr_name in attrs_to_update:
current_val = text[pos+1:].strip(' \n\r')
new_val = attrs_to_update[attr_name]
if new_val == current_val:
logger.debug('No change in attribute "%s"', attr_name)
else:
logger.debug('Changing note attribute "%s" from "%s" '
'to "%s"', attr_name,
current_val, new_val)
global modified_flag
modified_flag = True
return '%s=%s' % (attr_name, new_val)
return orig_text
for e in root.iter():
if e.tag in ('hr', ):
# <hr /> tag means end of metadata section
break
if e.tag in ('div', 'p', 'en-note',):
e.text = update_node_text(e.text)
e.tail = update_node_text(e.tail)
# TODO: if metadata field doesn't exist - create one?
if modified_flag:
logger.info('Writing modified content back to note')
note.content = self.evernote_encode('\n'.join([
'<?xml version="1.0" encoding="UTF-8" standalone="no"?>',
'<!DOCTYPE en-note SYSTEM '
'"http://xml.evernote.com/pub/enml2.dtd">',
ET.tostring(root)]))
# Replacing pairs of spaces with '\xa0 ' or ' \xa0' in order to
# have all whitespace displayed as expected in Evernote editor.
self.evernote.updateNote(note)
else:
logger.info('No changes to note content')
def update_note_metadata_from_wordpress_post(self, note, item):
"""Updates an Evernote WP-item note metadata based on Wordpress item.
Updates only fields that has WordPress as the authoritative source,
like ID & link.
:requires: `item` was originally constructed from `note`.
:param note: Evernote post-note to update
:type note: evernote.edam.type.ttypes.Note
:param item: Wordpress item from which to update
:type item: wordpress.WordPressItem
Exceptions:
:raise RuntimeError: If ID is set and differs
"""
# TODO: get authoritative attributes from WordPress class
attrs_to_update = {'id': str(item.id), }
for attr in ['link', 'last_modified', 'published_date']:
if (attr in item._wp_attrs and isinstance(item._wp_attrs[attr],
WordPressAttribute) and
item._wp_attrs[attr].fget() is not None):
attrs_to_update[attr] = item._wp_attrs[attr].str()
self.update_note_metdata(note, attrs_to_update)
def import_images_to_evernote(self, parent_id, notebook_name,
set_id=None, set_parent=None):
overrided_attrs = dict()
if set_id:
overrided_attrs['id'] = set_id
if set_parent:
overrided_attrs['parent'] = set_parent
for wp_image in self.wordpress.media_item_generator(parent_id):
save_wp_image_to_evernote(self.evernote, notebook_name, wp_image,
overrides=overrided_attrs)
def preprocess_embedded_images(self, note_link, image_notebook,
dryrun=False):
"""Extract embedded images from post note, create image notes for
them, and replace reference to image notes in post note.
"""
# Get note from Evernote
#: :type en_note: evernote.edam.type.ttypes.Note
en_note = self.evernote.get_note(note_link)
if 0 == len(en_note.resources):
# No embedded images to preprocess
logger.info('No embedded images in note "%s"' % (en_note.title))
return
en_link = self.evernote.note_link(en_note, en_note.title)
extracted_hashes = set()
def bin_to_hex_str(bin_str):
return ''.join(['{:02x}'.format(ord(b)) for b in bin_str])
def get_resource_by_hex_hash(hex_hash):
"""Return the resource object with hash matching the given hex_hash
string. If no match, nothing is returned.
"""
for res in en_note.resources:
if bin_to_hex_str(res.data.bodyHash) == hex_hash:
return res
def cleanup(text):
"""Return a cleaned up version of the text, or raise an error."""
for bad_chr in ('<', '>'):
if bad_chr in text:
logger.error('Bad character in text: %s', text)
if not dryrun:
raise ValueError(text)
return cgi.escape(self.norm_enc(text))
def extract_image(m):
"""Extracts image resource referenced in media element given by
match object m to a new note, and replaces media tag with a-href
tag to new note.
"""
d = m.groupdict()
media_tag = d.get('mediatag')
description = cleanup(d.get('desc'))
title = cleanup(d.get('title'))
if not all([media_tag, description, title]):
logger.warn('Skipping media element with missing '
'attributes (%s)', m.group(0))
tag = self._parse_xml_from_string(media_tag)
hex_hash = tag.get('hash') # hex-encoded string
if not hex_hash:
logger.warn('Skipping media tag with no hash attribute: %s',
media_tag)
return
resource = get_resource_by_hex_hash(hex_hash)
if not resource:
logger.warn('Could not find resource matching hex hash %s',
hex_hash)
return
if dryrun:
# skip cloning and fetching data in case of dry run
new_resource = resource
else:
new_resource = self.evernote.clone_resource(resource)
if resource.attributes.fileName:
note_title = resource.attributes.fileName
else:
logger.warn('Image resource %s missing filename', title)
return # note_title = 'untitled'
extracted_hashes.add(hex_hash)
note_content = '<div>id=<auto></div>\r\n'
note_content += '<div>title=%s</div>\r\n' % (title)
note_content += '<div>link=<auto></div>\r\n'
note_content += '<div>parent=%s</div>\r\n' % (en_link)
note_content += '<div>caption=%s</div>\r\n' % (description)
note_content += '<div>description=%s</div>\r\n' % (description)
note_content += '<br/>\r\n<hr/>\r\n'
note_content += '<div>%s</div>' % (media_tag)
if dryrun:
logger.info('Got image %s (dryrun)', note_title)
return '<ImageLinkStub:%s>' % (note_title)
else:
image_note = self.evernote.makeNote(note_title, note_content,
resources=[new_resource])
image_note = self.evernote.saveNoteToNotebook(image_note,
image_notebook)
return self.evernote.note_link(image_note, note_title)
# Start here
media_element_re = re.compile('\!\[(?P<desc>[^\]]*)\]\((?P<mediatag>'
'\<en-media\W[^\>]*\>)\W\"\;'
'(?P<title>[^\&]*)\"\;\)',
re.IGNORECASE)
# Extract images and replace with image note link
en_note.content = self.evernote_encode(
media_element_re.sub(extract_image, en_note.content))
# Check if note contains resources that were not extracted
for res in en_note.resources:
if not bin_to_hex_str(res.data.bodyHash) in extracted_hashes:
res_name = res.attributes.fileName
if not res_name:
res_name = bin_to_hex_str(res.data.bodyHash)
logger.warn('Resource %s not extracted', res_name)
# Update note
if not dryrun:
logger.info('Writing changes back to Evernote')
self.evernote.updateNote(en_note)
def preprocess(self, query, image_notebook, dryrun=False):
"""Perform preprocess pipeline for notes matching query.
:param query: Evernote query used to find notes to preprocess.
:param image_notebook: Name of Evernote notebook for extracted images.
:param dryrun: If `True`, no modifying actions will be performed.
"""
for _, note in self.evernote.get_notes_by_query(query):
logger.info('Preprocessing note "%s" (GUID %s)',
note.title, note.guid)
try:
self.preprocess_embedded_images(note.guid, image_notebook,
dryrun)
except Exception:
logger.exception('Failed preprocessing note "%s" (GUID %s)',
note.title, note.guid)
def save_wp_image_to_evernote(en_wrapper, notebook_name, wp_image,
force=False, overrides={}):
# TODO: Do this better...
#raise NotImplementedError("I'm broken")
# lookup existing WordPress image note
#note_title = u'%s <%s>' % (wp_image.filename, wp_image.id)
#image_note = en_wrapper.getSingleNoteByTitle(note_title, notebook_name)
# if not image_note or force:
# prepare resource and note
resource, resource_tag = en_wrapper.makeResource(wp_image.image_data,
wp_image.filename)
note_content = ''
for attr in ['id', 'title', 'link', 'parent', 'caption', 'description']:
if attr in overrides:
value = overrides[attr]
else:
value = getattr(wp_image, attr)
note_content += '<div>%s=%s</div>\r\n' % (attr, value)
note_content += '<hr/>\r\n%s' % (resource_tag)
wp_image_note = en_wrapper.makeNote(title=wp_image.filename,
content=note_content,
resources=[resource])
# if image_note:
# # note exists
# logger.info('WP Image note "%s" exists in Evernote', note_title)
# if force:
# logger.info('Updating note with WordPress version.')
# # update existing note with overwritten content
# wp_image_note.guid = image_note.guid
# en_wrapper.updateNote(wp_image_note)
# else:
# logger.debug('Skipping note update')
# else:
# create new note
logger.info('Creating new WP Image note "%s"', wp_image.filename)
en_wrapper.saveNoteToNotebook(wp_image_note, notebook_name)
###############################################################################
def _get_adaptor(args):
if args.wordpress:
wp_account = settings.WORDPRESS[args.wordpress]
# Each entry can be either a WordPressCredentials object,
# or a name of another entry.
while not isinstance(wp_account, settings.WordPressCredentials):
wp_account = settings.WORDPRESS[wp_account]
logger.debug('Working with WordPress at URL "%s"',
wp_account.xmlrpc_url)
wp_wrapper = WordPressApiWrapper(wp_account.xmlrpc_url,
wp_account.username,
wp_account.password)
else:
wp_wrapper = None
en_wrapper = EvernoteApiWrapper(settings.enDevToken_PRODUCTION)
return EvernoteWordpressAdaptor(en_wrapper, wp_wrapper)
def post_note(adaptor, args):
"""ArgParse handler for post-note command."""
adaptor.post_to_wordpress_from_note(args.en_link)
post_parser = subparsers.add_parser('post-note',
help='Create a WordPress post from '
'Evernote note')
post_parser.add_argument('en_link',
help='Evernote note to post '
'(full link, or just GUID)')
post_parser.set_defaults(func=post_note)
sync_parser = subparsers.add_parser('sync',
help='Synchronize Evernote-WordPress')
sync_parser.add_argument('query',
help='Evernote query for notes to sync')
sync_parser.add_argument('--force', action='store_true',
help='Post post note regardless of last updated time')
sync_parser.add_argument('--preprocess', action='store_true',
help='Perform preprocessing too')
sync_parser.add_argument('--image_notebook',
help='Notebook for extracted embedded images')
sync_parser.set_defaults(func=lambda adaptor, args:
adaptor.sync(args.query, args.force, args.preprocess,
args.image_notebook))
detach_parser = subparsers.add_parser('detach',
help='Detach Evernote-WordPress '
'synchronization')
detach_parser.add_argument('query',
help='Evernote query for notes to detach')
detach_parser.set_defaults(func=lambda adaptor, args:
adaptor.detach(args.query))
import_images_parser = subparsers.add_parser(
'import-images',
help='Import images attached to specified WordPress post into Evernote.')
import_images_parser.add_argument('--parent',
help='Parent post.')
import_images_parser.add_argument('--notebook',
help='Name of dest Evernote notebook.')
import_images_parser.add_argument('--set_id',
help='Override ID value with this.')
import_images_parser.add_argument('--set_parent',
help='Override parent value with this.')
import_images_parser.set_defaults(func=lambda adaptor, args:
adaptor.import_images_to_evernote(
args.parent, args.notebook,
args.set_id, args.set_parent))
preprocess_parser = subparsers.add_parser(
'preprocess',
help='Run preprocessing blocks on notes that match the query')
preprocess_parser.add_argument('query',
help='Evernote query for notes to proprocess')
preprocess_parser.add_argument('--image_notebook',
help='Notebook for extracted embedded images')
preprocess_parser.add_argument('--dryrun', action='store_true',
help='Don\'t perform the actions')
preprocess_parser.set_defaults(
func=lambda adaptor, args:
adaptor.preprocess(args.query,
image_notebook=args.image_notebook,
dryrun=args.dryrun))
###############################################################################
def _custom_fields(adaptor, unused_args):
for wp_post in adaptor.wordpress.post_generator():
print wp_post, wp_post.custom_fields
def main():
args = wp_en_parser.parse_args()
adaptor = _get_adaptor(args)
args.func(adaptor, args)
if '__main__' == __name__:
main()