예제 #1
0
def cleanstr(s):
    """A simple function to generate a clean string from the given input object or string."""
    try:
        s = str(s)
    except Exception:
        s = utf(s).encode('utf-8')
    s = s.replace('/', '.')
    return s
예제 #2
0
      derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL NEERAJ KUMAR BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""

import csv, json, sys
from nkutils import CSVUnicodeWriter, utf
from collections import Counter

if __name__ == '__main__':
    data = json.load(open(sys.argv[1]))
    output = CSVUnicodeWriter(open(sys.argv[2], 'wb'))
    keys = Counter()
    for row in data:
        keys.update(row.keys())
    keys = [k for k,v in keys.most_common() if not k.startswith('http')]
    print keys
    output.writerow(keys) # header row
    for row in data:
        cur = [utf(row.get(k, '')) for k in keys]
        output.writerow(cur)
예제 #3
0
      derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL NEERAJ KUMAR BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""

import csv, json, sys
from nkutils import CSVUnicodeWriter, utf
from collections import Counter

if __name__ == '__main__':
    data = json.load(open(sys.argv[1]))
    output = CSVUnicodeWriter(open(sys.argv[2], 'wb'))
    keys = Counter()
    for row in data:
        keys.update(row.keys())
    keys = [k for k, v in keys.most_common() if not k.startswith('http')]
    print keys
    output.writerow(keys)  # header row
    for row in data:
        cur = [utf(row.get(k, '')) for k in keys]
        output.writerow(cur)
예제 #4
0
 def norm(s):
     """Normalizes a string"""
     s = utf(s).strip().lower()
     return s
예제 #5
0
 def norm(s):
     """Normalizes a string"""
     s = utf(s).strip().lower()
     return s