Esempio n. 1
0
from greplin.scales.meter import MeterStat

import json
from hashlib import md5

import re
import unicodedata

STATS = scales.collection('/index', MeterStat('docs'))

BASE_PATH = '/Users/jisaacso/Documents/projects/bayes-impact/team-thorn/data/escort_all'
FBDUMP = os.path.join(BASE_PATH, 'escort_all.tsv')

es = ESConnection('localhost', 9200)
es.httprequest_kwargs = {
    'request_timeout': 1500.00,
    'connect_timeout': 1500.00
}
wspaceNuker = re.compile(' +')


def fold_accents(raw):
    if type(raw) == str:
        raw = unicode(raw, 'utf-8')
    return ''.join([
        c
        for c in unicodedata.normalize('NFKD', raw).encode('ascii', 'ignore')
    ])


def isspecialchar(char):
    specialchars = ['$', '.']
Esempio n. 2
0
from greplin.scales.meter import MeterStat

import json
from hashlib import md5

import re
import unicodedata

STATS = scales.collection('/index', MeterStat('docs'))

BASE_PATH = '/Users/jisaacso/Documents/projects/bayes-impact/team-thorn/data/escort_all'
FBDUMP = os.path.join(BASE_PATH, 'escort_all.tsv')

es = ESConnection('localhost', 9200)
es.httprequest_kwargs = {
    'request_timeout': 1500.00,
    'connect_timeout': 1500.00
}
wspaceNuker = re.compile(' +')
def fold_accents(raw):
    if type(raw) == str:
        raw = unicode(raw, 'utf-8')
    return ''.join([c for c in unicodedata.normalize('NFKD', raw).encode('ascii', 'ignore')])

def isspecialchar(char):
    specialchars = ['$', '.']
    return char in specialchars

def fb_to_es(line):
    entry = line.split('\t')
    if not len(entry) == 13:
        return None