def extract_from_file(filename, num_process): import utils global LOGGER LOGGER = utils.get_logger() dataset_path = u'../datasets/wiki' # get processed words processed_words = get_target_words(dataset_path) jobs = dd(list) for line in codecs.open(filename, encoding='utf-8'): line = line.split() target_word, page_title, offset = line[:3] if target_word not in processed_words: jobs[target_word].append(dict(word=target_word, page_title=page_title, offset=offset, fetch_links=True)) LOGGER.info("Total {} of jobs available. Num of consumer = {}".format(len(jobs), num_process)) if num_process > 1: pool = Pool(num_process) pool.map(extract_instances_for_word, jobs.values()) else: # for v in jobs.values(): for v in [jobs['milk']]: extract_instances_for_word(v) LOGGER.info("Done.")
def extract_from_file(filename, num_process, dataset_path, fetch_links=True): import utils global LOGGER LOGGER = utils.get_logger() # get processed words processed_words = get_target_words(dataset_path) jobs = dd(list) for line in codecs.open(filename, encoding='utf-8'): line = line.split() target_word, page_title, offset = line[:3] if target_word not in processed_words: jobs[target_word].append(dict(word=target_word, page_title=page_title, offset=offset, fetch_links=fetch_links)) LOGGER.info("Total {} of jobs available. Num of consumer = {}".format(len(jobs), num_process)) if num_process > 1: pool = Pool(num_process) func = partial(extract_instances_for_word, wiki_dir=dataset_path) pool.map(func, jobs.values()) else: for v in jobs.values(): # for v in [jobs['milk']]: extract_instances_for_word(v, dataset_path) LOGGER.info("Done.")
def get_sense_counts(wiki_dir): words = get_target_words(wiki_dir) word_sense_dict = dd(list) for word in words: fn = os.path.join(wiki_dir, "%s.clean.txt" % word) for line in codecs.open(fn, encoding='utf8'): line = line.strip().split('\t') try: sense = line[2] word_sense_dict[word].append(sense) except IndexError: print "IndexError for %s - %s" % (word, line) word_sense_count = dict() for w, s in word_sense_dict.iteritems(): word_sense_count[w] = Counter(s) return word_sense_count
import numpy as np import matplotlib.pyplot as plt import utils patient = 83 # session = 2 # block_type = 'syntactic' # syntactic/pragmatic SOA = 600 # in msec for session in [3]: # for block_type in ['pragmatic', 'syntactic']: for block_type in ['pragmatic']: ############# # LOAD DATA # ############# target_words = utils.get_target_words(patient, session, block_type) path2data = f'../data/patient_{patient}_s{session}' TrialInfo, cherries = utils.get_data(path2data, block_type) assert len( TrialInfo['word_strings']) == cherries[1]['trial_data'].shape[ 0] # check that there's unit activity for each word #################### # GENERATE RASTERS # #################### num_target_words = len(target_words) times = np.arange(-1000, 2000) linelength = 0.3 for unit in cherries.keys(): # PREPARE FIGURE