Example #1
0
def get_ref_pixels(ref_wvl, wvlsol0, x=None):
    """
    Given the list of wavelengths tuples, return expected pixel
    positions from the initial wavelength solution of wvlsol0.

    """

    if x is None:
        x = np.arange(len(wvlsol0))
    um2pixel = interp1d(wvlsol0, x, bounds_error=False)

    ref_pixel = [um2pixel(w) for w in ref_wvl]

    # there could be cases when the ref lines fall out of bounds,
    # resulting nans.
    nan_filter = [np.all(np.isfinite(p)) for p in ref_pixel]
    valid_list = [[np.all(np.isfinite(p))]*len(p) for p in ref_pixel]

    group_flags = get_group_flags(ref_wvl)
    df = pd.DataFrame(dict(wavelength=flatten(ref_wvl),
                           valid=flatten(valid_list),
                           group_flag=group_flags,
                           group_id=np.add.accumulate(group_flags)))

    ref_pixel_filtered = [r for r, m in zip(ref_pixel, nan_filter) if m]
    df2 = df.join(pd.DataFrame(dict(pixel=flatten(ref_pixel_filtered)),
                               index=df.index[flatten(valid_list)]))

    return df2
Example #2
0
 def p_program(self, p):
     """program : declarations fundefs instructions"""
     p[0] = AST.Program(AST.Declarations.mapTypedDeclarations(p[1]), utils.flatten(p[2]),
                        AST.Instructions(utils.flatten(p[3])))
     p[0].set_parents()
     p[0].set_scope()
     p[0].set_position(p.lexer.lexer.lineno, p.lexer.lexer.lexpos)
Example #3
0
def mergedict(dicts):
    result = {}
    keys = set(flatten([d.keys() for d in dicts]))
    for k in keys:
        vals = [v for v in [d.get(k) for d in dicts] if v]
        if len(vals) == 0:
            continue
        if k in ("_id", "timestamp"):
            continue
        if isinstance(vals[0], dict):
            result[k] = mergedict(vals)
        elif isinstance(vals[0], (str, unicode, int, bool, long)):
            v = set(flatten(vals))
            if len(v) == 1:
                result[k] = v.pop()
            else:
                result[k] = list(v)
        elif isinstance(vals[0], (list, tuple)) and isinstance(vals[0][0], (str, unicode, int, bool)):
            result[k] = list(set(flatten(vals)))
        elif k == "interfaces":
            result[k] = mergedict_bykeys(vals, "ifindex")
        elif k == "arp":
            result[k] = mergedict_bykeys(vals, "mac", "v4addr")
        elif k == "neighbours":
            result[k] = mergedict_bykeys(vals, "v4addr")
        elif k == "v4routes":
            result[k] = mergedict_bykeys(vals, "network")
        elif k == "bridges":
            result[k] = mergedict_bykeys(vals, "name")
        elif k == "addresses":
            result[k] = mergedict_bykeys(vals, "mac")
        else:
            raise MergeError("unhandled key: %s" % k)
    return result
def semantics(doc):
  prep = preprocess(doc)
  return (
    flatten( prep.pos_tags() ),
    prep.noun_phrases(),
    flatten( prep.get_entities() )
  )
Example #5
0
def count_intervals_in_all_songs(songs):
    intervals_direction = utils.flatten([song.intervals_with_direction for song in songs])
    intervals = utils.flatten([song.intervals for song in songs])
    print "All Songs\n"
    print "  With direction"
    utils.print_dic_perc(Counter(intervals_direction))
    print "  Without direction"
    utils.print_dic_perc(Counter(intervals))
Example #6
0
 def fit(self, X, y, copy=False):
     IRSystem.fit(self, X, y, copy)
     self._target = list(self._target)
     self._labels = tuple(set(flatten(self._target)))
     self._label_dist = Counter(flatten(self._target))
     self._target = np.array(self._target)
     self.compute_prior()
     self.compute_conditional()
Example #7
0
	def global_region_weights(self):
		''' returns default weights for all similarities '''
		#return utils.norm_list(utils.flatten(self._feature_compare_helper(lambda fs1, not_used: [1] + utils.flatten(fs1.region_weights()), self, 'all')))
		#return utils.norm_list(utils.flatten(map(lambda fs: [1] + utils.flatten(fs.region_weights()), self.feature_sets))) 
		self._load_check()
		weights = list()
		for fs in self.feature_sets:
			region_weights = fs.region_weights()
			weights.append([1] + utils.flatten(region_weights))
		return utils.norm_list(utils.flatten(weights))
Example #8
0
def PLS_Models(model_dict, validation_dict, target, **args):
    '''This section of code will create and test the prospective models'''
    
    '''Pick the model building parameters out of args'''
    try: break_flag = args['break_flag']   #Decide whether or not we are going to test models that include a midseason split
    except KeyError: break_flag = 2 

    try: limits=utils.flatten([args['specificity']])
    except KeyError: limits = np.arange(11.)/100 + 0.85     #Default: test specificity limits from 0.85 to 0.95 
    
    try: threshold=utils.flatten([args['threshold']])
    except KeyError: threshold=[0,1]                          #Default: threshold by counts

    if break_flag != 1: model=pls.Model( model_dict, model_target=target.lower() )
    if break_flag != 0: mw=pls.Model_Wrapper( data=model_dict, model_target=target.lower() )

    results = list()

    #Test models w/ midseason split
    for spec_lim in limits:
        for threshold_method in threshold:
            if threshold_method==0: balance_method=1 #
            else: balance_method=0
            
            if break_flag != 0:
                '''mw.Generate_Models(breaks=1, specificity=spec_lim, wedge='julian', threshold_method=threshold_method, balance_method=balance_method)
                imbalance = mw.imbalance
                split_index = mlab.find(mw.imbalance[:,1] == np.min(mw.imbalance[:,1]))'''
                
                imbalance = pls_parallel.Tune_Split(mw, specificity=spec_lim, wedge='julian', threshold_method=threshold_method, balance_method=balance_method)
                split_index = mlab.find(imbalance[:,1] == np.min(imbalance[:,1]))
                
                for split in imbalance[split_index,0]:
                    mw.Split(wedge='julian', breakpoint=split)
                    mw.Assign_Thresholds(threshold_method=threshold_method, specificity=spec_lim)
                    
                    summary = Summarize(mw, validation_dict, **args)
                    summary.insert( 1, balance_method)
                    summary.insert( 1, threshold_method)
                    
                    results.append( summary )
              
    #Test models w/o midseason split
    if break_flag != 1:
        for spec_lim in limits:
            model.Threshold(specificity=spec_lim)
            
            summary = Summarize(model, validation_dict, **args)
            summary.insert(1, np.nan)
            summary.insert(1, np.nan)
                
            results.append( summary )
            
            
    return results
Example #9
0
	def add_pattern(self, production, options): 
		self.progress(production, 'add_pattern: %s' % options)
		self.progress(production, '[\'pattern\',  %s, %s, %s' % (options.get('subject'), options.get('predicate'), options.get('object')))
		triple = {}
		for r,v in options.items():
			if isinstance(v,list) and len(flatten(v)) == 1:
				v = flatten(v)[0]
			if self.validate and not isinstance(v, Term):
				self.error("add_pattern", "Expected %s to be a resource, but it was %s" % (r, v), {'production' : production})
			triple[r] = v	
		self.add_prod_datum('pattern', Pattern(triple)) 
Example #10
0
def extract_links(br):
    """Extract FP related links from the current page."""
    links_to_visit_text = list(ut.flatten([br.find_elements_by_partial_link_text(linktext) for linktext in LINK_LABELS]))
    links_to_visit_url = list(ut.flatten([br.find_elements_by_xpath('//a[contains(@href,"%s")]' % linkurl) for linkurl in LINK_URLS]))
    links_to_visit = [link for link in links_to_visit_text + links_to_visit_url if link]
    
    if len(links_to_visit) < NO_OF_LINKS_TO_CLICK: # if we cannot find links by href and link texts
        links_to_visit += extract_onclick_elements(br)  # we search for all elements with onclick event handler
    wl_log.info('%s links were found on %s' % (len(links_to_visit), br.current_url))
    
    return links_to_visit
 def train(self, authors):
     self.stopwords = {ln: self.get_stop_words(ln) \
                             for ln in self.db.get_languages()}
     lang = self.db.get_author_language(authors[0])
     self.words = [self.db.get_author(a)["corpus"] for a in authors]
     self.words = utils.flatten(self.words)
     tokenizer = self.get_tokenizer()
     self.words = map(lambda x: tokenizer.tokenize(x), self.words)
     self.words = utils.flatten(self.words)
     self.words = list(set([x.lower() for x in self.words]))
     self.words = filter(lambda x: x in self.stopwords[lang], self.words)
     self.words.sort()
Example #12
0
def tokenize_insight(insight, twitter=False):
    """
    return subject, property and context tokens
    """
    url = twagUrl if twitter else stagUrl
    insight = dict(insight.items())
    context = tokenize_doc({'content':insight['content']}, twitter=twitter)
    subj = tokenize_doc({'content':insight['subject']}, twitter=twitter)
    prop = tokenize_doc({'content':insight['property']}, twitter=twitter)
    insight['context_toks'] = flatten(context['toks'])
    insight['subj_toks'] = flatten(subj['toks'])
    insight['prop_toks'] = flatten(prop['toks'])
    return insight 
Example #13
0
def listspecies(reactions):
    # print "listspecies:"
    species=[]
    for r in reactions:
        lhs = list(r.LHS())
        rhs = list(r.RHS())
        mhs = list(r.MHS())
        s=list(set(utils.flatten([lhs,rhs,mhs])))
        # print "s=",s
        species.append(s)
    species=list(set(utils.flatten(species))) 
    if "Nil" in species: species.remove("Nil")  
    # print "species=",species
    return species
    def func(*args, **kwargs):
        from scrapper.models import Criterion
        tweets = fn(*args, **kwargs)
        if tweets:
            users, hash_tags = zip(*map(
                lambda tweet:(tweet['mentions'], tweet['hash_tags']), tweets
            ))

            users = flatten(users)
            hash_tags = flatten(hash_tags)
            criteria_obj = [Criterion(type='hash_tag', value=hashtag) for hashtag in hash_tags if is_valid_hashtag(hashtag)]
            criteria_obj += [Criterion(type='user_name', value=username) for username in users if is_valid_username(username)]
            Criterion.objects.bulk_create_or_skip(hash_tags=hash_tags, user_name=users)
        return tweets
Example #15
0
def cseg_similarity(cseg1, cseg2):
    """Returns Marvin and Laprade (1987) CSIM(A, B) for a single
    cseg. It's a contour similarity function that measures similarity
    between two csegs of the same cardinality. The maximum similarity
    is 1, and minimum is 0.

    >>> cseg_similarity(Contour([0, 2, 3, 1]), Contour([3, 1, 0, 2]))
    0
    """

    cseg1_triangle = utils.flatten(cseg1.comparison_matrix().superior_triangle())
    cseg2_triangle = utils.flatten(cseg2.comparison_matrix().superior_triangle())

    return auxiliary.position_comparison(cseg1_triangle, cseg2_triangle)
Example #16
0
def possible_cseg(base_3):
    """Returns a cseg from a base 3 sequence, if the cseg is possible
    (Polansky and Bassein 1992).

    >>> possible_cseg([2, 2, 2])
    < 0 1 2 >
    """

    seq = utils.flatten(base_3)
    size = len(seq)
    for x in itertools.product(range(size), repeat=3):
        cseg = Contour(x)
        if utils.flatten(cseg.base_three_representation()) == seq:
            return Contour(x)
    return "Impossible cseg"
Example #17
0
 def viewfinder_corners(corners, position, extensions):
     # [(1, 2), (10, 20)] -> [-1, 10, -2, 20]
     corners = [-corners[0][0], corners[1][0], -corners[0][1], corners[1][1]]
     # [5, 4] -> [5, 5, 4, 4]
     extensions = flatten([(x, x) for x in extensions])
     # [2, 3] -> [-2, 2, -3, 3]
     position = flatten([(-x, x) for x in position])
     visible = []
     for corner, extension, coordinate in zip(corners, extensions, position):
         extended_coordinate = coordinate + extension
         if extended_coordinate < corner:
             visible.append(abs(extended_coordinate))
         else:
             visible.append(abs(corner))
     return visible
Example #18
0
def get_files(pattern):
    '''Returns list of files that matches glob pattern provided.

    Pattern can be iterable with multiple patterns.'''
    if not hasattr(pattern, '__iter__'):
        pattern = [pattern]
    return list(flatten([glob.glob(x) for x in pattern]))
	def crawl(self):
		# 지역구 대표
		jobs = []
		constant_candidates = self.constant_candidates
		candidate_type = self.candidate_type
		target = self.target
		target_eng = self.target_eng
		target_kor = self.target_kor
		nth = self.nth
		req_url = self.urlPath_result_list

		townCode_JSON = get_json(self.urlPath_city_codes, self.urlParam_city_codes)[0]['results']

		print("\x1b[1;36mWaiting to connect http://info.nec.go.kr server (%s, %d-th)...\x1b[1;m" % (target_eng, nth))
		for city_index, (city_code, city_name) in list(enumerate(self.city_codes(townCode_JSON))):
			req_param = self.XHTML_url_param(city_code)
			job = gevent.spawn(self.parse, req_url, req_param, constant_candidates, target, target_kor, city_name, city_code, city_index, townCode_JSON)
			jobs.append(job)
		gevent.joinall(jobs)
		every_result = [{'election_type':target_eng, 'nth':nth, 'candidate_type':candidate_type, \
						'results':flatten(job.get() for job in jobs)}]

		# 비례대표
		if hasattr(self, 'next_crawler'):
			prop_result = self.next_crawler.crawl()
			every_result.extend(prop_result)


		return every_result
	def crawl(self):

		jobs = []
		target = self.target
		target_eng = self.target_eng
		target_kor = self.target_kor
		nth = self.nth

		city_code_list = self.city_codes()
		req_url = dict(town=self.urlPath_town_list, sgg=self.urlPath_sgg_list)
		param_dict = dict(town=self.urlParam_town_list, sgg=self.urlParam_sgg_list)

		# 광역자치단체 단위 페이지의 데이터 크롤링의 기본과정.
		print("\x1b[1;36mWaiting to connect http://info.nec.go.kr server (%s, %d-th)...\x1b[1;m" % (target_eng, nth))
		for city_code, city_name in city_code_list: # 각 광역자치단체 별로 아래 단계를 수행.
			req_param = self.JSON_url_param(city_code, copy.deepcopy(param_dict))
			job = gevent.spawn(self.parse_city, req_url, req_param, target, target_kor, nth, city_code, city_name)
			jobs.append(job)
		gevent.joinall(jobs)
		every_result = [{'election_type':target,'nth':nth,'results':flatten(job.get() for job in jobs)}]

		# 추가될 수도 있는 데이터 크롤링을 위해 next_crawler를 추가하는 내용.
		if hasattr(self, 'next_crawler'):
			next_result = self.next_crawler.crawl()
			every_result.extend(next_result)

		return every_result
Example #21
0
File: ytpy.py Project: mac389/ytpy
def save(video,format='txt'):
	for case in switch(format):
		if case('xls'):
			import xlwt
			if len(video['comments']) > 0:
				wbk = xlwt.Workbook()
				sheet = wbk.add_sheet(tech.validate(video['title']))
				bigs = wbk.add_sheet('Bigrams')
				tris = wbk.add_sheet('Trigrams')
				context = wbk.add_sheet('Context')
				for_nlp = tech.flatten(video['comments'][0])
				for idx,comment in enumerate(video['comments'][0]):
					sheet.write(idx,0,' '.join(comment))
				for idx,bigram in enumerate(tech.bigrams(for_nlp,self.term)):
					bigs.write(idx,0,' '.join(bigram))
				for idx,trigram in enumerate(tech.trigrams(for_nlp,self.term)):
					tris.write(idx,0,' '.join(trigram))
				for idx,con in enumerate(tech.context(for_nlp,self.term)):
					context.write(idx,0,' '.join(con))
				wbk.save(tech.validate(video['title'])+'.xls')
			print 'Videos, trigrams, bigrams, and contexts saved to XLS files.'
					#indexing is zero-based, row then column
			break
		if case('txt'):
			if len(video['comments']) > 0:
				with open(path.join(dir_path, tech.validate(video['title'])),'a') as f:
		 			f.write(video['comments'])
			print 'Saved %s as text' % video['title']
			break
Example #22
0
    def from_file(filename):
        try:
            j = json.load(open(filename, 'r'))
        except IOError as e:
            print "Error opening show:", e
            return None

        name = j['name']
        start = numbering_from_str(j['start'])
        airdates = dict((start.from_str(e), datetime.strptime(d, "%Y/%m/%d").date()) for e, d in j.get('airdates', {}).iteritems())
        name_matcher = NameMatcher(j.get('match', { 'name': name }))

        sources = []
        for source_name, source_params in j['sources'].iteritems():
            params = config.source(source_name)
            params.update(source_params)
            source = get_source(source_name, params)
            if source:
                sources.append(source)

        condition = j.get('condition', config.condition)

        rules = list(flatten(config.bind_rules(j.get('rules'))))
        if not rules:
            rules = [config.rule('default')]
        return Show(name, filename, start, airdates, name_matcher, sources, condition, rules)
Example #23
0
	def expand_commas(self,root):
		''' Transform

				foo, bar, baz
					color: blue

			into

				foo
					color: blue
				bar
					color: blue
				baz
					color: blue

		'''
		children = []
		for child in root.children:
			if not isinstance(child,tree.RuleNode) and not child.parsed_rules.members.size > 1:
				if isinstance(child,tree.DirectiveNode):
					child = self.expand_commas(child)
				
				children.append(child)
				continue

			members = []
			for seq in child.parsed_rules.members:
				node = tree.RuleNode([])
				node.parsed_rules = self.make_cseq(seq)
				node.children = child.children
				node.append(members)
			children.append(members)

		root.children = utils.flatten(children)
		return root
Example #24
0
	def __init__(self, confirm):
		self.confirm = confirm
		self.clusters = confirm.get_clusters()
		self.preprocess_clusters()

		self.docs = utils.flatten(map(lambda cluster: cluster.members, self.clusters))
		self.all_labels = self.get_all_labels()
		self.num_docs = len(self.docs)

		self.label_pr_mats = self.calc_label_pr_mats()
		self.label_cluster_mat = self.calc_label_cluster_counts()
		self.total_counts = {count: 0 for count in _counts}
		for label in self.label_pr_mats:
			for count in _counts:
				n = self.label_pr_mats[label][count]
				self.total_counts[count] += n
		#print json.dumps(self.label_pr_mats, indent=4)

		labels = list(self.all_labels)
		mapping = {label: labels.index(label) for label in labels}
		self.true_labels = map(lambda _doc: mapping[_doc.label], self.docs)
		self.predicted_labels = list()
		for _doc in self.docs:
			for x, _cluster in enumerate(self.clusters):
				if _doc in _cluster.members:
					self.predicted_labels.append(x)
					break
Example #25
0
def test_features_syn():
	docs = doc.get_docs_nested(get_data_dir(sys.argv[2]))
	max_size = int(sys.argv[3])
	num_combine = int(sys.argv[4])
	min_size = int(sys.argv[5])

	d = collections.defaultdict(list)
	for _doc in docs:
		d[_doc.label].append(_doc)
	pure_clusters = d.values()
	broken_clusters = list()
	for x in xrange(10):
		for _cluster in pure_clusters:
			broken_clusters += [_cluster[i:i + max_size] for i in range(0, len(_cluster), max_size)]
		combined_clusters = list()
		while broken_clusters:
			if len(broken_clusters) < num_combine:
				clusters = list(broken_clusters)
			else:
				clusters = random.sample(broken_clusters, num_combine)
			for _cluster in clusters:
				broken_clusters.remove(_cluster)
			combined_clusters.append(utils.flatten(clusters))

		clusters = map(lambda combined_cluster: cluster.Cluster(combined_cluster), combined_clusters)
		ncluster.test_features(clusters, min_size)
    def generate_model(self):
        print("Gathering and processing tweets...")
        # Shuffle list of username-label tuples
        tuple_list = usermapping.data_tuples.items()

        # Split and grab tweets for users
        results = utils.flatten([ self.fetch_data(t)
                                  for t in tuple_list ])
         
        # TODO: Cross-validation generation
        trn_ratio = int(len(results) * 0.85)
        shuffle(results)
        print(len(results))
        print(trn_ratio)
        train = results[:trn_ratio]
        test = results[trn_ratio:]

        # Instantiate and train classifier
        print("Training...")
        cl = NaiveBayesClassifier(train)
        cl.train()
        
        # Save model
        print("Saving model...")
        utils.save_model(cl)

        # Classify test
        print("Testing...")
        print("Accuracy: {0}".format(cl.accuracy(test)))
        return cl
Example #27
0
 def p_expr_list_or_empty(self, p):
     """expr_list_or_empty : expr_list
                           | """
     if len(p) > 1:
         p[0] = utils.flatten(p[1])
     else:
         p[0] = []
	def process_individual_frames(self, PATH_TO_DATA, annotations, list_of_layers, sampling_rate, LCD):
		i = 0
		label_map = {}
		frm_map = {}
		X = {}
		map_index_data = pickle.load(open(annotations, "rb"))

		for index in map_index_data:
			segments = map_index_data[index]
			print "Processing images for label " + str(index)
			for seg in segments:
				print str(seg)
				frm_num = seg[0]
				while frm_num <= seg[1]:
					print frm_num
					frm_map[i] = frm_num
					label_map[i] = index
					im = caffe.io.load_image(utils.get_full_image_path(PATH_TO_DATA, frm_num))
					self.net.blobs['data'].data[...] = self.transformer.preprocess('data', im)
					out = self.net.forward()
					for layer in list_of_layers:
						if layer == 'input':
							data = cv2.imread(full_image_path)
						else:
							data = self.net.blobs[layer].data[0]
						data = utils.flatten(data)
						utils.dict_insert(layer, data, X)
					frm_num += sampling_rate
					i += 1
		return X, label_map, frm_map
Example #29
0
def ValidateLogistic(model_dict, validation_dict, target, **args):
    '''Creates and tests prospective models using logisitic regression.'''
    
    #Pick the model building parameters out of args
    try: weights = list( args['weights'] )   #Logistic regression, weighted away from the threshold.
    except KeyError: weights = ['discrete']

    try: limits=utils.flatten([args['specificity']])
    except KeyError: limits = np.arange(11.)/100 + 0.85     #Default: test specificity limits from 0.85 to 0.95 

    results = list()

    #Test models w/ midseason split
    for weight in weights:
        for limit in limits:
        
            l=logistic.Model(model_dict, target,  specificity=limit, weights=weight)
                
            summary = Summarize(l, validation_dict, **args)
            summary.insert( 1, weight)
            summary.insert( 1, np.nan)
            
            results.append( summary )

    return results
Example #30
0
def make_similarity_matrix(matrix, size=MIN_ALIGN):
    singles = matrix.tolist()
    points = [flatten(t) for t in tuples(singles, size)]
    numPoints = len(points)
    # euclidean distance
    distMat = np.sqrt(np.sum((repmat(points, numPoints, 1) - repeat(points, numPoints, axis=0))**2, axis=1, dtype=np.float32))
    return distMat.reshape((numPoints, numPoints))
Example #31
0
def greedy_search(abs_list, doc_list, budget=3):
    def _rouge_clean(s):
        return re.sub(r'[^a-zA-Z0-9 ]', '', s)

    max_rouge = 0.0

    abs_tokens = _rouge_clean(' '.join(flatten(abs_list))).split()
    sents = [_rouge_clean(' '.join(sent)).split() for sent in doc_list]
    hyp_unigrams = [ngrams(sent, 1) for sent in sents]
    hyp_bigrams = [ngrams(sent, 2) for sent in sents]
    ref_unigrams = ngrams(abs_tokens, 1)
    ref_bigrams = ngrams(abs_tokens, 2)

    selected_idxs = []

    for _ in range(budget):
        curr_max_rouge = max_rouge
        curr_id = -1

        for (i, sent) in enumerate(sents):
            if i in selected_idxs:
                continue

            candidate_idxs = selected_idxs + [i]
            candidate_unigrams = set.union(
                *[set(hyp_unigrams[idx]) for idx in candidate_idxs])
            candidate_bigrams = set.union(
                *[set(hyp_bigrams[idx]) for idx in candidate_idxs])

            rouge1 = approx_rouge(candidate_unigrams, ref_unigrams)
            rouge2 = approx_rouge(candidate_bigrams, ref_bigrams)
            rouge_score = rouge1 + rouge2
            if rouge_score > curr_max_rouge:
                curr_max_rouge = rouge_score
                curr_id = i

        if curr_id == -1:
            return (list(sorted(selected_idxs)), max_rouge)

        selected_idxs.append(curr_id)
        max_rouge = curr_max_rouge

    return (list(sorted(selected_idxs)), max_rouge)
Example #32
0
def create_clusters(documents):
    for document in documents:
        document.text = []
        for sentence in document.original:
            words = [word.lower() for word in sentence]
            words = [word for word in words if word not in forbidden_symbols]
            document.text.append(words)
    num_docs = len(documents)
    term_doc_frequency = {}
    # calculate term frequency per document
    for document in documents:
        words = flatten(document.text)
        words = words[:DECAY_THRESHOLD]
        doc_corpus = {}
        for word in words:
            if word in doc_corpus:
                doc_corpus[word] += 1
            else:
                doc_corpus[word] = 1
        document.corpus = doc_corpus
    # calculate number of documents where each term appears
    for document in documents:
        for word in document.corpus:
            if word in term_doc_frequency:
                term_doc_frequency[word] += 1
            else:
                term_doc_frequency[word] = 1
    # calculate the idf for each term, and ignore if it is below the threshold
    term_idf = {}
    for term in term_doc_frequency:
        idf = math.log(num_docs / (1 + term_doc_frequency[term]))
        if idf >= IDF_THRESHOLD:
            term_idf[term] = idf
    # generate the tf_idf vector for each document
    for document in documents:
        document.tf_idf_vector = generate_tf_idf_vector(document, term_idf)

    clusters = [Cluster(documents[0], term_idf)]

    for document in documents[1:]:
        clusters = calc_similarity(clusters, document, term_idf)

    return clusters
Example #33
0
def calculate_word_frequency(topic_headers):
    """Calculates the word frequencies in topic headers"""

    tokens = flatten([word_tokenize(topic_header) for topic_header in topic_headers])

    tokens = [
        word.lower()
        for word in tokens
        if not word.lower() in stopwords.words("english")
    ]

    word_frequencies = FreqDist(word.lower() for word in tokens)

    top_word_frequencies = word_frequencies.most_common(500)

    for x in top_word_frequencies:
        print(x)

    return top_word_frequencies
Example #34
0
def scan_P__(P__):
    """Detect forks and roots per P."""

    for _P_, P_ in pairwise(P__): # Iterate through pairs of lines.
        _itP_, itP_ = iter(_P_), iter(P_) # Convert to iterators.
        try:
            _P, P = next(_itP_), next(itP_) # First pair to check.
        except StopIteration: # No more fork-root pair.
            continue # To next pair of _P_, P_.
        while True:
            isleft, olp = comp_edge(_P, P) # Check for 4 different cases.
            if olp and _P['sign'] == P['sign']:
                _P['root_'].append(P)
                P['fork_'].append(_P)
            try: # Check for stopping:
                _P, P = (next(_itP_), P) if isleft else (_P, next(itP_))
            except StopIteration: # No more fork-root pair.
                break # To next pair of _P_, P_.

    return [*flatten(P__)] # Flatten P__ before return.
def add_all_imports_idx_and_depth_to_row(row, df, join_all, max_depth):
    idx_set = set([row['ID']])
    depth_list = [0]
    depth = 0
    #     print row
    if join_all or row.loc['is_imported'] == False:
        imports_idx_all, imports_depth = get_all_imports_idx_and_depth(
            df=df,
            imports_idx=row.loc['imports_idx'],
            idx_set=idx_set,
            depth_list=depth_list,
            depth=depth,
            max_depth=max_depth,
        )
        row['imports_idx_all'] = flatten(list(imports_idx_all))
        row['imports_depth'] = imports_depth
    else:
        row['imports_idx_all'] = []
        row['imports_depth'] = -1
    return row
Example #36
0
def complex_volume(model, ind):
    cdist = gpytorch.kernels.Kernel().covar_dist
    n_vert = len(model.simplicial_complex[ind])
    total_vert = model.n_vert

    mat = torch.ones(n_vert + 1, n_vert + 1) - torch.eye(n_vert + 1)

    ## compute distance between parameters ##
    temp_pars = [p for p in model.net.parameters()][0::total_vert]
    n_par = int(sum([p.numel() for p in temp_pars]))
    par_vecs = torch.zeros(n_vert, n_par).to(temp_pars[0].device)
    for ii, vv in enumerate(model.simplicial_complex[ind]):
        par_vecs[ii, :] = utils.flatten([p for p in model.net.parameters()
                                         ][vv::total_vert])

    dist_mat = cdist(par_vecs, par_vecs).pow(2)
    mat[:n_vert, :n_vert] = dist_mat

    norm = (math.factorial(n_vert - 1)**2) * (2.**(n_vert - 1))
    return torch.abs(torch.det(mat)).div(norm)
Example #37
0
def make_dictionary(word2index, sentences_list, vocab_size=50000):
    # <eos>を排除
    sentences_list = [sentence[:-1]for sentence in sentences_list]
    # 2重のリストをフラットにする
    words_list = utils.flatten(sentences_list)
    # 頻度順にソートしてidをふる
    counter = collections.Counter()
    counter.update(words_list)
    cnt = 2
    for word, count in counter.most_common():
        # 出現回数3回以上の単語のみ辞書に追加
        if cnt >= vocab_size:
           break
        if count >= 3:
            word2index[word] = cnt
            cnt += 1
    word2index[u'<sos>'] = 0
    word2index[u'<eos>'] = 1
    word2index[u'<unk>'] = len(word2index)
    return word2index
Example #38
0
def get_data(filename, syll_mgr, num_symbols):
    num_syllables = syll_mgr.get_size()      
    lines = open(filename, 'r').read().splitlines()
    num_lines = len(lines)
    num_lines = 30000
    text_lines = []
    text_sylls = []
    for i in range(0, num_lines):
      parts = lines[i].split("\t")
      label = utils.flatten(literal_eval(parts[1]))
      if len(label) == num_symbols:
        text_lines.append(str(parts[0]))
        text_sylls.append(label)
    num_lines = len(text_lines)
    label_array = np.zeros((num_symbols, num_lines, num_syllables), dtype=np.int8)
    for i in range(0, num_lines):
      for j in range(num_symbols):
        label_array[j][i][syll_mgr.get_encoding(text_sylls[i][j])] = 1

    return (text_lines, label_array)
Example #39
0
    def update_position(self):
        Frame.update_position(self)
        w, h = self.width, self.height
        ax, ay = self.abs_coords()
        self._gcp_framevlist.vertices[:] = flatten([
            rectv2f(2, 2, w-4, 54-2, ax, ay),  # equip box
            rrectv2f(2.5, 2.5, 4*36, 52, ax, ay),  # equip box border
            rectv2f(w-2-32, 66, 32, 22, ax, ay),  # cardnum box
            rrectv2f(w-2-32, 66, 32, 22, ax, ay),  # cardnum box border
        ])

        full = rectv2f(0, 0, w, h, ax, ay)
        self._highlight_disabled.vertices[:] = full
        self._highlight.vertices[:] = full

        if self.actor_frame:
            self.actor_frame.set_position(self.x - 6, self.y - 4)

        if self.turn_frame:
            self.turn_frame.set_position(self.x - 6, self.y - 4)
Example #40
0
 def train(self, authors):
     self.stopwords = {ln: self.get_stop_words(ln) \
                             for ln in self.db.get_languages()}
     lang = self.db.get_author_language(authors[0])
     # transform corpus into a list of preprocessed documents
     documents = [self.db.get_author(a)["corpus"] for a in authors]
     documents = utils.flatten(documents)
     tokenizer = self.get_tokenizer()
     documents = map(lambda x: tokenizer.tokenize(x), documents)
     documents = [map(lambda x: x.lower(), d) for d in documents]
     documents = [filter(lambda x: x not in self.stopwords[lang], d) \
                     for d in documents]
     # build topic model
     self.dictionary = corpora.Dictionary(documents)
     self.dictionary.filter_extremes(no_below=5, no_above=0.5)
     documents = map(lambda x: self.dictionary.doc2bow(x), documents)
     self.model = models.LdaModel(documents,
                                  num_topics=self.k,
                                  id2word=self.dictionary,
                                  iterations=1000)
Example #41
0
def linear(x,output_size,scope,add_tanh=False,wd=None,bn=False,bias=False,is_train=None,ln=False):
	# bn -> batch norm
	# ln -> layer norm
	with tf.variable_scope(scope):
		# since the input here is not two rank, we flat the input while keeping the last dims
		keep = 1
		#print x.get_shape().as_list()
		flat_x = flatten(x,keep) # keeping the last one dim # [N,M,JX,JQ,2d] => [N*M*JX*JQ,2d]
		#print flat_x.get_shape() # (?, 200) # wd+cwd
		bias_start = 0.0
		if not (type(output_size) == type(1)): # need to be get_shape()[k].value
			output_size = output_size.value

		# add batch_norm
		if bn:
			assert is_train is not None
			flat_x = batch_norm(flat_x,scope="bn",is_train=is_train)

		if ln:
			flat_x = layer_norm(flat_x,scope="ln")


		#print [flat_x.get_shape()[-1],output_size]

		W = tf.get_variable("W",dtype="float",initializer=tf.truncated_normal([flat_x.get_shape()[-1].value,output_size],stddev=0.1))
		flat_out = tf.matmul(flat_x,W)

		if bias:
			bias = tf.get_variable("b",dtype="float",initializer=tf.constant(bias_start,shape=[output_size]))
			flat_out += bias

		if add_tanh:
			flat_out = tf.tanh(flat_out,name="tanh")

		#flat_out = tf.nn.dropout(flat_out,keep_prob)

		if wd is not None:
			add_wd(wd)

		out = reconstruct(flat_out,x,keep)
		return out
Example #42
0
 def __init__(
     self,
     mdl,
     X,
     feature_names=[],
     feature_types=[],
     feature_categories=[],
     feature_constraints=[],
     max_cancidates=100,
     tol=1e-6,
     target_name='Output',
     target_labels=['Good', 'Bad'],
     interaction_matrix=[],
 ):
     self.mdl_ = mdl
     self.coef_ = mdl.coef_[0]
     self.intercept_ = mdl.intercept_[0]
     self.X_ = X
     self.N_, self.D_ = X.shape
     self.feature_names_ = feature_names if len(
         feature_names) == self.D_ else [
             'x_{}'.format(d) for d in range(self.D_)
         ]
     self.feature_types_ = feature_types if len(
         feature_types) == self.D_ else ['C' for d in range(self.D_)]
     self.feature_categories_ = feature_categories
     self.feature_categories_flatten_ = flatten(feature_categories)
     self.feature_constraints_ = feature_constraints if len(
         feature_constraints) == self.D_ else ['' for d in range(self.D_)]
     self.target_name_ = target_name
     self.target_labels_ = target_labels
     self.AC_ = ActionCandidates(X,
                                 feature_names=feature_names,
                                 feature_types=feature_types,
                                 feature_categories=feature_categories,
                                 feature_constraints=feature_constraints,
                                 max_candidates=max_cancidates,
                                 tol=tol)
     self.tol_ = tol
     self.M_ = interaction_matrix if len(
         interaction_matrix) == self.D_ else np.zeros([self.D_, self.D_])
Example #43
0
    def to_conll(self, val_corpus, eval_script):
        """ Write to out_file the predictions, return CoNLL metrics results """

        # Make predictions directory if there isn't one already
        golds_file, preds_file = '../preds/golds.txt', '../preds/predictions.txt'
        if not os.path.exists('../preds/'):
            os.makedirs('../preds/')

        # Combine all gold files into a single file (Perl script requires this)
        golds_file_content = flatten([doc.raw_text for doc in val_corpus])
        with io.open(golds_file, 'w', encoding='utf-8', errors='strict') as f:
            for line in golds_file_content:
                f.write(line)

        # Dump predictions
        with io.open(preds_file, 'w', encoding='utf-8', errors='strict') as f:

            for doc in val_corpus:

                current_idx = 0

                for line in doc.raw_text:

                    # Indicates start / end of document or line break
                    if line.startswith('#begin') or line.startswith(
                            '#end') or line == '\n':
                        f.write(line)
                        continue
                    else:
                        # Replace the coref column entry with the predicted tag
                        tokens = line.split()
                        tokens[-1] = doc.tags[current_idx]

                        # Increment by 1 so tags are still aligned
                        current_idx += 1

                        # Rewrite it back out
                        f.write('\t'.join(tokens))
                    f.write('\n')

        return golds_file, preds_file
Example #44
0
 def predictAll(self, sess, save=False):
     if self.predictions is not None: return self.predictions
     predictions = None
     for i in range(self.num_test_batches):
         testbatch = self.loader.get_testbatch()
         if self.include_coverage and self.include_entropy:
             preds = self.predict(sess, testbatch[0], testbatch[1], testbatch[2])
         elif self.include_coverage:
             preds = self.predict(sess, testbatch[0], testbatch[1])
         elif self.include_entropy:
             preds = self.predict(sess, testbatch[0], E = testbatch[1])
         else:
             preds = self.predict(sess, testbatch[0])
         if not self.multiclass:
             preds = utils.flatten(preds)
         if predictions is None:
             predictions = preds
         else:
             predictions = np.concatenate((predictions, preds))
     if save: self.predictions = predictions
     return predictions
    def predict_one_by_one_internal(combined,
                                    test_hour_ordinal,
                                    largest_lag=10):
        test = extract_test_func(combined)
        # here we have the rows for which we want to calculate aggregates and make predictions
        test_part = test[test['ordinal'] == test_hour_ordinal]

        # should works as long as largest_lag is really largest lag
        idxs_list = [
            list(range(idx - largest_lag, idx + 1)) for idx in test_part.index
        ]
        idxs = flatten(idxs_list)
        df = agg_function(combined.iloc[idxs], test_size=1)

        test_for_predictions = df.loc[test_part.index][feats]
        test_for_predictions = convert_to_float_or_factorize_objects(
            test_for_predictions, feats)
        pred = model.predict(test_for_predictions)
        test.loc[test_part.index, target] = pred

        return test
Example #46
0
def generate_visualizations(sample_outputs, output_path='visualization.html'):
    with open(sample_outputs) as json_file:
        data = json.load(json_file)

    with open(output_path, 'w+') as output_file:
        for doc_id in data.keys():
            doc = data[doc_id]
            doc_words = doc['words']
            clusters = doc['predicted_clusters']
            event_mentions = flatten(clusters)
            output_file.write('<b>Document {}</b><br>'.format(doc_id))
            output_file.write('{}<br><br><br>'.format(
                doc_to_html(doc, event_mentions)))
            for ix, cluster in enumerate(doc['predicted_clusters']):
                if len(cluster) == 1: continue
                output_file.write('<b>Cluster {}</b></br>'.format(ix + 1))
                for em in cluster:
                    output_file.write('{}<br>'.format(
                        event_mentions_to_html(doc_words, em)))
                output_file.write('<br><br>')
            output_file.write('<br><hr>')
Example #47
0
 def test(self, sess):
     if self.predictions is not None:
         # Evaluate the accuracy of the saved predictions, based on the true test labels
         if self.multiclass:
             return metrics.accuracy_score(
                 np.argmax(self.loader.test_data[-1], axis=-1),
                 np.argmax(self.predictions.round(), axis=-1))
         else:
             return metrics.accuracy_score(
                 utils.flatten(self.loader.test_data[-1]),
                 self.predictions.round())
     test_acc = 0
     num_test_ex = 0
     for i in range(self.num_test_batches):
         testbatch = self.loader.get_testbatch()
         cur_size = len(testbatch[1])
         batch_acc = curr_size * self.eval_accuracy_on_batch(
             testbatch)  # Accuracy on the batch
         test_acc += batch_acc
         num_test_ex += cur_size  # Final accuracy needs to be weighted average of batch accuracies, weighted by the size of each batch (since the last batch may be smaller)
     return test_acc / num_test_ex
Example #48
0
def make_heatmap(heatmap, graph_terms):
    try:
        set_status('getting document list', model=heatmap)
        with ManagedSession() as session:
            heatmap_query= create_query(session, author=heatmap.author, institution=heatmap.institution)
            filtered_query = filter_query(heatmap_query, dirty=False,
                                          starting_year=heatmap.starting_year,
                                          ending_year=heatmap.ending_year,
                                          sample_size=heatmap.sample_size,
                                        model=heatmap)
            extracted_terms = extract_terms(filtered_query, heatmap.term_type)
        heatmap_terms = flatten(extracted_terms)
        heatmap_vals = calculate_heatmap_values(heatmap_terms, graph_terms)
        heatmap.terms = json.dumps(jsonize_phrase_dict(heatmap_vals, 'intensity'))
        set_status('heatmap complete', model=heatmap)
        heatmap.finished = True
        heatmap.save()
        return heatmap_vals
    except Exception as e:
        set_status('Error: %s' % e, model=heatmap)
        raise e
Example #49
0
def write_matchup(t1, t2, num, week_sheet, region):
    roster1 = get_active_roster(t1)
    roster2 = get_active_roster(t2)
    num_cols = 6
    num_rows = 10
    t1_col = 1
    t2_col = 4
    data_view = [[""]*num_cols for i in range(num_rows)]
    data_view[0][0] = "Match #{}".format(num + 1)
    for t, r, col in [(t1, roster1, t1_col), (t2, roster2, t2_col)]:
        data_view[1][col] = t
        for i, p in enumerate(r):
            both = region == "both"
            in_region = p[1] == region
            if both or in_region:
                data_view[3+i][col] = p[0]
    top_row = num*num_rows + 1
    cells = week_sheet.range(top_row, 1, top_row + num_rows, num_cols)
    for cell, val in zip(cells, u.flatten(data_view)):
        cell.value = val
    week_sheet.update_cells(cells)
Example #50
0
def print_top_k_accuracies(train_labels: List[Dict[int, int]],
                           test_labels: List[Dict[int, int]],
                           top_k_accuracy: List[int], label: str) -> None:
    pred_freq_counter = Counter[int](utils.flatten(l.values()
                                                   for l in train_labels))
    pred_vec = [lab for (lab, count) in pred_freq_counter.most_common()]

    total_labs = sum(map(len, test_labels))
    unk_labs = sum(1 for labs in test_labels for lab in labs.values()
                   if lab == 0)

    for k in top_k_accuracy:
        k_preds = set(pred_vec[:k])
        corr = sum(1 for labs in test_labels for lab in labs.values()
                   if lab in k_preds)
        utils.log('{}: top {} accuracy: {:.2f} ({:.2f} w/out UNK)'.format(
            label,
            k,
            corr / total_labs,
            corr / (total_labs - unk_labs),
        ))
Example #51
0
def brute_force(wanted_parts, price_guide, k):
    """Enumerate all possible combinations of k stores"""
    by_store = utils.groupby(price_guide, lambda x: x['store_id'])

    results = []
    for selected_stores in itertools.combinations(by_store.keys(), k):
        # get items sold by these stores only
        inventory = utils.flatten(by_store[s] for s in selected_stores)
        if covers(wanted_parts, inventory):
            # calculate minimum cost to buy everything using these stores
            cost, allocation = min_cost(wanted_parts, inventory)
            results.append({
                'cost': cost,
                'allocation': allocation,
                'store_ids': selected_stores
            })
            #print 'Solution: k=%d, cost=%8.2f, store_ids=%40s' % (k, cost, selected_stores)
        else:
            #print 'Unable to fill quote using store_ids=%40s' % (selected_stores,)
            pass
    return results
Example #52
0
def handle_underscores(suffix, text_encoder, prefix=False):
    encoder = text_encoder.encoder
    if prefix:
        tok = "___"
    else:
        tok = find_underscore_length(suffix)

    suffix_parts = [i.strip() for i in suffix.split("{}".format(tok))]
    to_flatten = []
    for i, part in enumerate(suffix_parts):
        if part:
            to_flatten.append(text_encoder.encode([part], verbose=False)[0])

            if i != len(suffix_parts) - 1 and suffix_parts[i + 1]:
                to_flatten.append([encoder["<blank>"]])
        else:
            to_flatten.append([encoder["<blank>"]])

    final_suffix = utils.flatten(to_flatten)

    return final_suffix
Example #53
0
    def _get_cooccurrences_context(self, keywords, corpus, context_window):
        """ given some context window, get co-occurrences for keyword network """
        cv = CountVectorizer(vocabulary=np.sort(keywords), ngram_range=(1, 2))
        matrix = cv.fit_transform(
            flatten(self._shift_corpus(corpus, context_window)))
        vocab = cv.vocabulary

        occurrence_dict, values_dict = defaultdict(list), defaultdict(list)
        row_entry, keyword_id, value = find(matrix)
        occurrences = zip(keyword_id, row_entry,
                          value)  # keyword_id, row entry, value
        for entry in occurrences:
            occurrence_dict[vocab[entry[0]]].append(
                (entry[1])), values_dict[vocab[entry[0]]].append((entry[2]))

        combos = defaultdict()
        for idx, key1 in enumerate(vocab):
            for key2 in vocab[idx + 1:]:
                key1_matrix, key2_matrix = np.array(
                    occurrence_dict[key1]), np.array(
                        occurrence_dict[key2]
                    )  # get sentence ids where keys occur
                dist = np.abs(
                    key1_matrix[:, np.newaxis] - key2_matrix
                )  # compute exhaustive distances (in sentence count) between mentions
                args = np.argwhere(
                    dist < context_window
                )  # find where distance less than context_window size
                if len(args) > 0:
                    cooccur_vals = [
                        val1 * val2 for val1, val2 in
                        zip([values_dict[key1][x] for x in args[:, 0]],
                            [values_dict[key2][y] for y in args[:, 1]])
                    ]  # retrieve their co-occurrence values
                    combos['_'.join(
                        [key1,
                         key2])] = combos.get('_'.join([key1, key2]), 0) + sum(
                             cooccur_vals)  # update occurrences

        return combos
def main():

    url = "data/data.csv"
    df = pd.read_csv(url, index_col='Date', parse_dates=True)
    df = df[['Close']]

    ps.test_stationary(df['Close'])

    train, test = train_test_split(df, test_size=0.1, shuffle=False)

    scaler = StandardScaler()
    scaler = scaler.fit(train[['Close']])
    train['NClose'] = scaler.transform(train[['Close']])
    test['NClose'] = scaler.transform(test[['Close']])

    sequence_length = 100
    X_train, y_train = temporalize(train[['NClose']], train.NClose, False,
                                   sequence_length)
    X_test, y_test = temporalize(test[['NClose']], test.NClose, False,
                                 sequence_length)

    input_shape = (
        X_train.shape[1],
        X_train.shape[2],
    )
    intermediate_cfg = [64, 'latent', 64]
    latent_dim = 10
    model = V_AE_LSTM(input_shape, intermediate_cfg, latent_dim, 'VAE-LSTM')
    model.fit(X_train,
              y_train,
              epochs=2,
              batch_size=124,
              validation_split=None,
              verbose=1)

    recunstruction, prediction = model.predict(X_test)

    recunstruction = flatten(recunstruction).reshape(-1)

    res = anomaly_detector(y_test.reshape(-1, 1), prediction.reshape(-1, 1))
    def add_vert(self, to_simplexes=[0]):
        
        self.fix_points = [True] * self.n_vert + [False]
        new_model = self.architecture(self.n_output, 
                                      fix_points=self.fix_points,
                                      **self.architecture_kwargs)
        
        ## assign osld pars to new model ##
        for index in range(self.n_vert):
            old_parameters = list(self.net. parameters())[index::self.n_vert]
            new_parameters = list(new_model.parameters())[index::(self.n_vert+1)]
            for old_par, new_par in zip(old_parameters, new_parameters):
                new_par.data.copy_(old_par.data)
        
        new_parameters = list(new_model.parameters())
        new_parameters = new_parameters[(self.n_vert)::(self.n_vert+1)]
        n_par = sum([p.numel() for p in new_parameters])
        ## assign mean of old pars to new vertex ##
        par_vecs = torch.zeros(self.n_vert, n_par).to(new_parameters[0].device)
        for ii in range(self.n_vert):
            temp = [p for p in self.net.parameters()][ii::self.n_vert]
            par_vecs[ii, :] = utils.flatten(temp)

        center_pars = torch.mean(par_vecs,  0).unsqueeze(0)
        center_pars = utils.unflatten_like(center_pars, new_parameters)
        for cntr, par in zip(center_pars, new_parameters):
            par.data = cntr.to(par.device)
        
        ## update self values ##
        self.n_vert += 1
        self.net = new_model
        self.simplex_modules = []
        for module in self.net.modules():
            if issubclass(module.__class__, SimplexModule):
                self.simplex_modules.append(module)
        
        for cc in to_simplexes:
            self.simplicial_complex[cc].append(self.n_vert-1)
        
        return
Example #56
0
    def handle(self, evt_type, act):
        if evt_type == 'action_before' and isinstance(act, SpellCardAction):
            if act.cancelled: return act  # some other thing have done the job
            if isinstance(act, NonResponsiveInstantSpellCardAction):
                return act

            g = Game.getgame()

            has_reject = False
            while g.SERVER_SIDE:
                from ..characters.reimu import Reimu
                for p in g.players:
                    if isinstance(p, Reimu):
                        has_reject = True
                        break

                if has_reject: break

                from .definition import RejectCard
                for c in flatten([[p.cards, p.showncards] for p in g.players]):
                    if isinstance(c, RejectCard):
                        has_reject = True
                        break

                break

            has_reject = sync_primitive(has_reject, g.players)
            if not has_reject: return act

            self.target_act = act  # for ui

            pl = BatchList(p for p in g.players if not p.dead)

            p, rst = ask_for_action(self, pl, ['cards', 'showncards'], [])
            if not p: return act
            cards, _ = rst
            assert cards and self.cond(cards)
            g.process_action(LaunchReject(p, act, cards[0]))

        return act
Example #57
0
 def test(self, sess):
     if self.predictions is not None:
         if self.multiclass:
             return metrics.accuracy_score(np.argmax(self.loader.test_data[-1], axis=-1), np.argmax(self.predictions.round(), axis=-1))
         else:
             return metrics.accuracy_score(utils.flatten(self.loader.test_data[-1]), self.predictions.round())
     test_acc = 0
     num_test_ex = 0
     for i in range(self.num_test_batches):
         testbatch = self.loader.get_testbatch()
         cur_size = len(testbatch[1])
         if self.include_coverage and self.include_entropy:
             batch_acc = cur_size * self.eval_accuracy_on_batch(testbatch[0], testbatch[1], testbatch[2], testbatch[3])
         elif self.include_coverage:
             batch_acc = cur_size * self.eval_accuracy_on_batch(testbatch[0], testbatch[1], testbatch[2])
         elif self.include_entropy:
             batch_acc = cur_size * self.eval_accuracy_on_batch(testbatch[0], testbatch[1], testbatch[2])
         else:
             batch_acc = cur_size * self.eval_accuracy_on_batch(testbatch[0], testbatch[1])
         test_acc += batch_acc
         num_test_ex += cur_size
     return test_acc / num_test_ex
    def apply(self, X):
        if not self.trained:
            raise ReadoutException('readout is not trained!')

        X = numpy.array(X)

        if self.usefull_dims is not None:
            X = X[:, self.usefull_dims]

        if self.addBias:
            X = numpy.concatenate((X, numpy.ones((X.shape[0], 1))), 1)
        if self.addNegBias:
            X = numpy.concatenate((X, -1 * numpy.ones((X.shape[0], 1))), 1)

        if self.nClasses == 2:
            if self.addNegBias:
                maxI = (numpy.dot(X, self.W) >= 0).astype(
                    numpy.int32)  # maxI \in {1,2}
            else:
                maxI = (numpy.dot(X, self.W) >= 0.5).astype(
                    numpy.int32)  # maxI \in {1,2}
        else:
            S = numpy.zeros((X.shape[0], self.nClasses))
            for i in range(self.nClasses):
                S[:, i] = numpy.dot(self.W[i, :], X.T)

            maxV = S.max(1)
            maxI = S.argmax(1)

        if maxI.ndim < 1:
            maxI = numpy.asarray([maxI])

        Y = numpy.array(
            utils.flatten(numpy.asarray(self.uniqueY).take(maxI).tolist()))

        if self.swapLabels:
            Y = (numpy.array(Y * 2 - 1) * (-1) + 1) / 2

        return Y.tolist()
    def apply(self, X):
        if not self.trained:
            raise ReadoutException('readout is not trained!')

        X = numpy.asarray(X)
        if self.addBias:
            X = numpy.concatenate((X, numpy.ones((X.shape[0], 1))), 1)

        if self.nClasses == 2:
            maxI = (numpy.dot(X, self.W) >= 0).astype(
                numpy.int32)  # maxI \in {1,2}
        else:
            S = numpy.zeros((X.shape[0], self.nClasses))
            for i in range(self.nClasses):
                S[:, i] = numpy.dot(X, self.W[:, i])

            maxV = S.max(1)
            maxI = S.argmax(1)

        if maxI.ndim < 1: maxI = numpy.asarray([maxI])

        return utils.flatten(numpy.asarray(self.uniqueY).take(maxI).tolist())
    def predict(self, debate_data, proba=False, y=None):
        test_x = self.preprocess(debate_data, is_training=False)
        test_y = np.array([LABELS_ORDER[label] for label in flatten(y)])

        history = self.model.fit(
            x=self.train_x,
            y=self.train_y,
            class_weight=self.class_weights,
            validation_data=(test_x, test_y),
            batch_size=len(self.train_y),
            epochs=self.params['nn_params']['epochs'],
            shuffle=True,
            callbacks=[
                MetricsCallback(num_inputs=len(
                    self.params['nn_params']['inputs']),
                                train_x=self.train_x,
                                train_y=self.train_y,
                                labels=LABELS),
                keras.callbacks.EarlyStopping(
                    monitor=self.params['nn_params']['early_stopping']
                    ['monitor'],
                    min_delta=self.params['nn_params']['early_stopping']
                    ['min_delta'],
                    patience=self.params['nn_params']['early_stopping']
                    ['patience'],
                    mode=self.params['nn_params']['early_stopping']['mode'])
            ])

        prediction_probs = list(self.model.predict(test_x))

        predictions_indices = [
            example_pred_probs.tolist().index(max(example_pred_probs))
            for example_pred_probs in prediction_probs
        ]
        predictions = [
            LABELS[prediction] for prediction in predictions_indices
        ]

        return prediction_probs if proba else predictions