Example #1
0
def extract_from_file(filename, num_process):
    import utils

    global LOGGER
    LOGGER = utils.get_logger()

    dataset_path = u'../datasets/wiki'
    # get processed words
    processed_words = get_target_words(dataset_path)

    jobs = dd(list)
    for line in codecs.open(filename, encoding='utf-8'):
        line = line.split()
        target_word, page_title, offset = line[:3]
        if target_word not in processed_words:
            jobs[target_word].append(dict(word=target_word, page_title=page_title, offset=offset, fetch_links=True))

    LOGGER.info("Total {} of jobs available. Num of consumer = {}".format(len(jobs), num_process))
    if num_process > 1:
        pool = Pool(num_process)
        pool.map(extract_instances_for_word, jobs.values())
    else:
        # for v in jobs.values():
        for v in [jobs['milk']]:
            extract_instances_for_word(v)

    LOGGER.info("Done.")
Example #2
0
def extract_from_file(filename, num_process, dataset_path, fetch_links=True):
    import utils

    global LOGGER
    LOGGER = utils.get_logger()

    # get processed words
    processed_words = get_target_words(dataset_path)

    jobs = dd(list)
    for line in codecs.open(filename, encoding='utf-8'):
        line = line.split()
        target_word, page_title, offset = line[:3]
        if target_word not in processed_words:
            jobs[target_word].append(dict(word=target_word, page_title=page_title, offset=offset,
                                          fetch_links=fetch_links))

    LOGGER.info("Total {} of jobs available. Num of consumer = {}".format(len(jobs), num_process))
    if num_process > 1:
        pool = Pool(num_process)
        func = partial(extract_instances_for_word, wiki_dir=dataset_path)
        pool.map(func, jobs.values())
    else:
        for v in jobs.values():
        # for v in [jobs['milk']]:
            extract_instances_for_word(v, dataset_path)

    LOGGER.info("Done.")
Example #3
0
def get_sense_counts(wiki_dir):
    words = get_target_words(wiki_dir)
    word_sense_dict = dd(list)
    for word in words:
        fn = os.path.join(wiki_dir, "%s.clean.txt" % word)
        for line in codecs.open(fn, encoding='utf8'):
            line = line.strip().split('\t')
            try:
                sense = line[2]
                word_sense_dict[word].append(sense)
            except IndexError:
                print "IndexError for %s - %s" % (word, line)

    word_sense_count = dict()

    for w, s in word_sense_dict.iteritems():
        word_sense_count[w] = Counter(s)

    return word_sense_count
Example #4
0
import numpy as np
import matplotlib.pyplot as plt
import utils

patient = 83
# session = 2
# block_type = 'syntactic' # syntactic/pragmatic
SOA = 600  # in msec

for session in [3]:
    # for block_type in ['pragmatic', 'syntactic']:
    for block_type in ['pragmatic']:
        #############
        # LOAD DATA #
        #############
        target_words = utils.get_target_words(patient, session, block_type)
        path2data = f'../data/patient_{patient}_s{session}'
        TrialInfo, cherries = utils.get_data(path2data, block_type)
        assert len(
            TrialInfo['word_strings']) == cherries[1]['trial_data'].shape[
                0]  # check that there's unit activity for each word

        ####################
        # GENERATE RASTERS #
        ####################
        num_target_words = len(target_words)
        times = np.arange(-1000, 2000)
        linelength = 0.3

        for unit in cherries.keys():
            # PREPARE FIGURE