Example #1
0
def process(file_path):
    before_count = Counter()
    cur_count = Counter()
    after_count = Counter()
    with open(file_path, 'r') as content:
        line_gen = (line.strip() for line in content.readlines())
        refgen = (ExRef(line) for line in line_gen)
        for idx, ref in enumerate(refgen):
            sent_before = ref.get_sentence(-1)
            sent_cur = ref.get_sentence(0)
            sent_after = ref.get_sentence(1)
            if idx % 100 == 0:
                print(idx)
            before_count += Counter(smart_tokenize(sent_before))
            cur_count += Counter(smart_tokenize(sent_cur))
            after_count += Counter(smart_tokenize(sent_after))
    return (before_count, cur_count, after_count)
Example #2
0
def process(file_path):
    before_count = Counter()
    cur_count = Counter()
    after_count = Counter()
    with open(file_path, 'r') as content:
        line_gen = (line.strip() for line in content.readlines())
        refgen = (ExRef(line) for line in line_gen)
        for  idx, ref in enumerate(refgen):
            sent_before = ref.get_sentence(-1)
            sent_cur = ref.get_sentence(0)
            sent_after = ref.get_sentence(1)
            if idx % 100 == 0:
                print(idx)
            before_count += Counter(smart_tokenize(sent_before))
            cur_count += Counter(smart_tokenize(sent_cur))
            after_count += Counter(smart_tokenize(sent_after))
    return (before_count, cur_count, after_count)
Example #3
0
def find_cits_with_sentiment_words(file_path):
    ret = set()
    for idx, ref in enumerate(ref_gen(file_path)):
        if idx % 100 == 0:
            print(idx)
        tokens = set(smart_tokenize(ref.get_sentence()))
        intersect = tokens.intersection(sentiment_words)
        if len(intersect) <> 0:
            ret.add(ref, list(intersect))
    return ret
def find_cits_with_sentiment_words(file_path):
    ret = set()
    for idx, ref in enumerate(ref_gen(file_path)):
        if idx % 100 == 0:
            print(idx)
        tokens = set(smart_tokenize(ref.get_sentence()))
        intersect = tokens.intersection(sentiment_words)
        if len(intersect) <> 0:
            ret.add(ref, list(intersect))
    return ret
Example #5
0
def count_words_in_context(file_path, popular_pmids):
    word_count = defaultdict(Counter)
    for idx, ref in enumerate(ref_gen(file_path)):
        if idx % 100 == 0:
            print(idx)
        if ref.cited_id not in popular_pmids:
            continue
        citing_sentence = ref.get_sentence(0)
        tokens = smart_tokenize(citing_sentence)
        word_count[ref.cited_id] += Counter(tokens)
    return word_count
def count_words_in_context(file_path, popular_pmids):
    word_count = defaultdict(Counter)
    for idx, ref in enumerate(ref_gen(file_path)):
        if idx % 100 == 0:
            print(idx)
        if ref.cited_id not in popular_pmids:
            continue
        citing_sentence = ref.get_sentence(0)
        tokens = smart_tokenize(citing_sentence)
        word_count[ref.cited_id] += Counter(tokens)
    return word_count