Ejemplo n.º 1
0
def read_data():
    with open(_raw_data, 'r', encoding='utf-8') as source:
        for line in source:
            line = ' '.join(line.split()[0:2])
            for word in line.split(' '):
                if len(word) > 0:
                    yield Explanation(word, line)
Ejemplo n.º 2
0
def read_data():
    explanations = set()
    with open(_raw_data, 'r', encoding='utf-8') as source:
        for line in source:
            [title, text] = line.split('@')
            explanations.add((title, text))
    for explanation in sorted(explanations):
        yield Explanation(explanation[0], explanation[1])
def read_data():
    phrases = set()
    with open(_raw_data, 'r', encoding='utf-8') as source:
        for line in source:
            phrases.add(line)
    for line in sorted(phrases):
        for word in sorted(set(re.split('\W+', line))):
            if len(word) > 0:
                yield Explanation(word, line.strip('\n'))
def read_articles():
    """
    Generator which yields raw Explanations based on definitions dict
    """
    with open(_raw_data, 'r', encoding='utf-8') as source:
        while True:
            title = source.readline().strip('\n')
            if not title: break
            desc = source.readline().strip('\n')
            yield Explanation(title, desc)
def read_data():
    with open(_raw_data, 'r', encoding='utf-8') as source:
        count = 0
        titles = dict()
        while True:
            count += 1
            title = source.readline().strip('\n')
            author = source.readline().strip('\n')
            if not title: break
            if titles.get((title, author)) is None:
                titles[(title, author)] = 1.0/count
            else:
                titles[(title, author)] += 1.0/count
        for (title, author), count in sorted(titles.items()):
            for word in sorted(set(re.split('\W+', title))):
                if len(word) > 0:
                    explanation = Explanation(title=word, text=title, prior_rating=count)
                    explanation.author = author
                    yield explanation
Ejemplo n.º 6
0
def read_data():
    with open(_raw_data, 'r', encoding='utf-8') as source:
        explanations = dict()
        for line in source:
            word, expl, rate = line.strip('\n').split('\t')
            if explanations.get((word, expl)) is None:
                explanations[(word, expl)] = rate
            else:
                explanations[(word, expl)] += rate
        for (word, expl), rate in sorted(explanations.items()):
            yield Explanation(word, expl, prior_rating=int(rate) / 400000)
def read_data():
    with open(_raw_data, 'r', encoding='utf-8') as source:
        count = 0
        titles = dict()
        while True:
            count += 1
            title = source.readline().strip('\n')
            author = source.readline().strip('\n')
            if not title: break
            if titles.get((title, author)) is None:
                titles[(title, author)] = 1.0 / count
            else:
                titles[(title, author)] += 1.0 / count
        for (title, author), count in sorted(titles.items()):
            for word in sorted(set(re.split('\W+', title))):
                if len(word) > 0:
                    explanation = Explanation(title=word,
                                              text=title,
                                              prior_rating=count)
                    explanation.author = author
                    yield explanation
def read_data():
    titles = dict()
    with open(_raw_data, 'r', encoding='utf-8') as source:
        while True:
            title = source.readline().strip('\n')
            if not title: break
            count = int(source.readline().strip('\n'))
            if titles.get(title) is None:
                titles[title] = count
            else:
                titles[title] += count
    max_count = max(titles.values())
    for title, count in sorted(titles.items()):
        for word in sorted(set(re.split('\W+', title))):
            if len(word) > 0:
                yield Explanation(title=word,
                                  text=title,
                                  prior_rating=count / max_count)
Ejemplo n.º 9
0
def read_data():
    with open(_raw_data, 'r', encoding='utf-8') as source:
        for line in source:
            tokens = line.split('$')
            word_and_text = tokens[1], tokens[2]
            yield Explanation(*word_and_text)
Ejemplo n.º 10
0
 def apply(e: Explanation):
     frequents = list(map(get_average_frequency, re.split(sep_re, e.text)))
     frequents.append(get_average_frequency(e.title))
     e.prior_rating = sum(frequents) / len(frequents)
     return e
Ejemplo n.º 11
0
 def apply(e: Explanation):
     frequents = list(map(get_average_frequency, re.split(sep_re, e.text)))
     frequents.append(get_average_frequency(e.title))
     e.prior_rating = sum(frequents) / len(frequents)
     return e