forked from CERNDocumentServer/cds-dojson
/
utils.py
159 lines (132 loc) · 5.29 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# -*- coding: utf-8 -*-
#
# This file is part of CERN Document Server.
# Copyright (C) 2015, 2017 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02D111-1307, USA.
"""The CDS DoJson Utils."""
import functools
from collections import defaultdict
import arrow
import six
from dojson.utils import GroupableOrderedDict
class MementoDict(GroupableOrderedDict):
"""Dictionary that remembers which keys have being access."""
def __new__(cls, *args):
"""Add the memory to the default instance."""
cls.accessed_keys = property(
lambda self: set([k for k in self.__memory if k != '__order__']))
cls.not_accessed_keys = property(
lambda self: set(
[k for k in self.keys() if k != '__order__']
).difference(self.__memory))
new = GroupableOrderedDict.__new__(cls, *args)
new.__memory = set()
new.__skip_memento = False
return new
def iteritems(self, skip_memento=False, **kwargs):
"""Add to memory the keys while iterating if not skyp."""
self.__skip_memento = skip_memento
for key, value in super(MementoDict, self).iteritems(**kwargs):
self._add_to_memory(key)
yield (key, value)
self.__skip_memento = False
items = iteritems
def __repr__(self):
"""Output the representation of the GroupableOrderedDict."""
out = ("({!r}, {!r})".format(k, v)
for k, v in self.iteritems(skip_memento=True, repeated=True)
if k != '__order__')
return 'GroupableOrderedDict(({out}))'.format(out=', '.join(out))
def _add_to_memory(self, key):
"""Add key to the memory is it is not locked."""
if not self.__skip_memento:
self.__memory.add(key)
def __getitem__(self, key):
"""Add the key to memory before running the get."""
self._add_to_memory(key)
return super(MementoDict, self).__getitem__(key)
def get(self, key, default=None):
"""Add the key to memory before running the get."""
self._add_to_memory(key)
return super(MementoDict, self).get(key, default)
def for_each_squash(f):
"""In case of non repeatable field squash them into one.
.. example::
[{'a': 'foo'}, {'b': 'bar'}] -> {'a': 'foo', 'b': 'barc'}
[{'a': 'foo'}, {'a': 'bar'}] -> {'a': ['foo', 'barc']}
"""
@functools.wraps(f)
def wrapper(self, key, values, **kwargs):
if not isinstance(values, list):
return f(self, key, values, **kwargs)
unmerged_list = [f(self, key, value, **kwargs) for value in values]
merge_dict = defaultdict(list)
for unmerged_dict in unmerged_list:
for key, element in six.iteritems(unmerged_dict):
merge_dict[key].append(element)
merge_dict = {
key: (value if len(value) > 1 else value[0])
for key, value in six.iteritems(merge_dict)
}
return merge_dict
return wrapper
def not_accessed_keys(blob):
"""Calculate not accessed keys from the blob.
It assumes the blob is an instance of MementoDict or a list.
"""
missing = set()
if isinstance(blob, dict):
missing = blob.not_accessed_keys
for key, value in blob.iteritems(skip_memento=True):
partial_missing = not_accessed_keys(value)
if partial_missing:
missing.update(
['{0}{1}'.format(key, f) for f in partial_missing])
if key in missing:
missing.remove(key)
elif isinstance(blob, (tuple, list)):
for value in blob:
missing.update(not_accessed_keys(value))
return missing
def convert_date_to_iso_8601(date, format_='YYYY-MM-DD', **kwargs):
"""Convert a date string its ISO 8601 representation.
YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)
YYYY = four-digit year
MM = two-digit month (01=January, etc.)
DD = two-digit day of month (01 through 31)
hh = two digits of hour (00 through 23) (am/pm NOT allowed)
mm = two digits of minute (00 through 59)
ss = two digits of second (00 through 59)
s = one or more digits representing a decimal fraction of a second
TZD = time zone designator (Z or +hh:mm or -hh:mm)
"""
# The order is important as arrow tries to apply them top to bottom
_FORMATS = [
'YYYY-MM-DD',
'YYYY/MM/DD',
'DD/MM/YYYY',
'YYYY.MM.DD',
'DD MMMM YYYY',
'DD MMM YYYY',
'DD MMM YY',
'YYYY-MM',
'YYYY/MM',
'YYYY.MM',
'MMM YYYY',
'YYYY',
'YY',
]
return arrow.get(date, _FORMATS).format(format_) if date else date