/
metadata.py
113 lines (103 loc) · 3.55 KB
/
metadata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/bin/python
# -*- coding: utf-8 -*-
import json
from pandocfilters import Str, Space
import pandocfilters
def get_metadata_field(json_lst, field):
'''
Take a JSON list and a field name (str) and return a
the field value.
'''
try:
x = json_lst[0]['unMeta'].get(field, {})
return walk_metadata(x)
except KeyError:
return ''
def walk_metadata(x):
'''
x is a JSON dictionary of pandoc metadata
Walks down a JSON dictionary in the pandoc metadata, returning a
more manageable representation.
FIXME: Maybe formatting for e.g. math should be retained instead of
converting to a string?
'''
if x['t'] == 'MetaBool':
return x['c']
elif x['t'] == 'MetaInlines':
return str(pandocfilters.stringify(x))
elif x['t'] == 'MetaString':
return str(pandocfilters.stringify(x))
elif x['t'] == 'MetaList':
lst = []
for i in x['c']:
lst.append(walk_metadata(i))
return lst
# Functions for dealing with tags
def standardize_tags(tags, tag_synonyms):
'''
Take a list of tags (tags :: list) along with a dictionary of tag
synonyms (tag_synonyms :: dict) and return a new list of tags, where
all synonymous tags are standardized according to tag_synonyms. For
instance, if tag_synonyms contains the line
"university-of-washington": ["uw", "uwashington"],
and if tags contains "uw" or "uwashington", then this will be
replaced by "university-of-washington".
'''
result = []
for tag in tags:
canonical = [key for key, value in tag_synonyms.items() if tag in value]
if not canonical:
canonical = [tag]
result.extend(canonical)
return result
def imply_tags(tags, tag_implications):
'''
Take a list of tags (tags :: list) along with an OrderedDict of tag
implications (tag_implications :: OrderedDict). Return a new list
of tags that includes all the implications. Apply this after
standardizing tags.
'''
result = list(tags)
for key in tag_implications:
if key in result:
result.extend(tag_implications.get(key))
return list(set(result))
def pack_tags(tags):
'''
Take a list of tags (tags :: list) and return a YAML-JSON list of
the tags.
'''
result = []
for tag in tags:
tag_dict = {'t': 'MetaInlines', 'c': [Str(tag)]}
result.append(tag_dict)
return result
#return list(intersperse([Str(i) for i in tags], Space()))
def get_tags(x):
'''
Take a YAML-JSON list or string of comma-delimited tags,
and return a cleaned list of the tags.
'''
tags = get_metadata_field(x, "tags")
if type(tags) is str:
return [tag.strip(" ") for tag in tags.split(",")]
elif type(tags) is list:
return tags
def organize_tags(json_lst, tag_synonyms, tag_implications):
'''
Takes a JSON list, a dict of tag_synonyms, and an OrderedDict of
tag_implications. Returns a dictionary with two entries:
under 'json', the JSON string/dump of data with its tags
organized according to tag_synonyms and tag_implications is stored;
under 'tags' a list of the cleaned/organized (same as in the JSON
dump) tags is stored.
'''
tags = get_tags(json_lst)
tags = standardize_tags(tags, tag_synonyms)
tags = imply_tags(tags, tag_implications)
keep_tags = list(tags)
tags_dict = json_lst[0]['unMeta'].get('tags', {})
tags_dict['t'] = 'MetaList'
tags_dict['c'] = pack_tags(tags)
return {'json': json_lst,
'tags': keep_tags}