def __init__(self, url='', site=''): """ Variables go here, *not* outside of __init__ """ self.site = site self.url = url self.session = Manifest.session self.title = u'' self.description = u'' self.keywords = {} self.warnings = [] self.translation = bytes.maketrans( punctuation.encode('utf-8'), str(u' ' * len(punctuation)).encode('utf-8')) self.social = { 'facebook': { 'shares': 0, 'comments': 0, 'likes': 0, 'clicks': 0 }, 'stumbleupon': { 'stumbles': 0, } } super(Page, self).__init__()
def __init__(self, url='', base_domain=''): """ Variables go here, *not* outside of __init__ """ self.base_domain = urlsplit(base_domain) self.parsed_url = urlsplit(url) self.url = url self.title = '' self.description = '' self.keywords = {} self.warnings = [] self.translation = bytes.maketrans( punctuation.encode('utf-8'), str(' ' * len(punctuation)).encode('utf-8')) self.social = { 'facebook': { 'shares': 0, 'comments': 0, 'likes': 0, 'clicks': 0 }, } self.links = [] self.total_word_count = 0 self.wordcount = Counter() self.bigrams = Counter() self.trigrams = Counter() self.stem_to_word = {} self.content_hash = None
def freq_dist(data): """ :param data: A string with sentences separated by '\n' :type data: str returns a dictionary with frequency of each word. """ d = {} punc = punctuation.encode('utf-8') words = (word for line in data for word in line.translate(None, punc).decode('utf-8').split()) for word in words: d[word] = d.get(word, 0) + 1 return d
def __init__(self, url='', site=''): """ Variables go here, *not* outside of __init__ """ self.site = site self.url = url self.title = u'' self.description = u'' self.keywords = u'' self.warnings = [] self.social = {} self.translation = bytes.maketrans( punctuation.encode('utf-8'), str(u' ' * len(punctuation)).encode('utf-8')) super(Page, self).__init__()