from greplin.scales.meter import MeterStat import json from hashlib import md5 import re import unicodedata STATS = scales.collection('/index', MeterStat('docs')) BASE_PATH = '/Users/jisaacso/Documents/projects/bayes-impact/team-thorn/data/escort_all' FBDUMP = os.path.join(BASE_PATH, 'escort_all.tsv') es = ESConnection('localhost', 9200) es.httprequest_kwargs = { 'request_timeout': 1500.00, 'connect_timeout': 1500.00 } wspaceNuker = re.compile(' +') def fold_accents(raw): if type(raw) == str: raw = unicode(raw, 'utf-8') return ''.join([ c for c in unicodedata.normalize('NFKD', raw).encode('ascii', 'ignore') ]) def isspecialchar(char): specialchars = ['$', '.']
from greplin.scales.meter import MeterStat import json from hashlib import md5 import re import unicodedata STATS = scales.collection('/index', MeterStat('docs')) BASE_PATH = '/Users/jisaacso/Documents/projects/bayes-impact/team-thorn/data/escort_all' FBDUMP = os.path.join(BASE_PATH, 'escort_all.tsv') es = ESConnection('localhost', 9200) es.httprequest_kwargs = { 'request_timeout': 1500.00, 'connect_timeout': 1500.00 } wspaceNuker = re.compile(' +') def fold_accents(raw): if type(raw) == str: raw = unicode(raw, 'utf-8') return ''.join([c for c in unicodedata.normalize('NFKD', raw).encode('ascii', 'ignore')]) def isspecialchar(char): specialchars = ['$', '.'] return char in specialchars def fb_to_es(line): entry = line.split('\t') if not len(entry) == 13: return None