def cleanup_report_table(self): """ Converts table from mc_tabulation to a more readable form, intended to be opened in Excel, which leaves a cell visually empty for ="". """ cleaned_table = [] for line in self.pre_report: # No mutations if any(['NO MUTATIONS' in i for i in line]): cleaned_table.append(line) # Mutations else: # Sorting line[-1] = sorted(line[-1], key=iget(1, 0), reverse=True) # Converting from list to lines for mutation in line[-1]: # Sorting mutation[-1] = sorted(mutation[-1], key=iget(0, 2)) # Converting from list to lines for s in mutation[-1]: first_part = ['=""'] * len(line[:-1]) second_part = ['=""'] * len(mutation[:-1]) if s == mutation[-1][0]: second_part = mutation[:-1] if mutation == line[-1][0]: first_part = line[:-1] third_part = s cleaned_table.append(first_part + second_part + third_part) return cleaned_table
def to_qcr_format(rec, job, campaign_thresh = 0.7, debug=False): if debug: print "Start conv, doing location" print "rec['location'] = ", rec['location'] loc = sorted(rec['location'], key=iget('weight'), reverse=True) o_loc = None if len(loc): o_loc = { "type": "Point", "coordinates": [ loc[0]["coords"][0]["lng"], loc[0]["coords"][0]["lat"] ] } if debug: print "Splitting campaigns" l_rec = [] camps = filter(lambda x: x is not None, map(lambda x: [y for y in x.iteritems()][0] if x.values()[0] > campaign_thresh else None, rec['campaigns'])) if debug: print "Max campaign association:", max([x.values()[0] for x in rec['campaigns']]) print "n recs to transform: ", len(camps) for camp in camps: keywords = map(iget(0), sorted(rec['keywords'], key=iget(1), reverse=True)) hashtags = map(iget(0), sorted(rec['hashtags'], key=iget(1), reverse=True)) # per QCR: send top kwd if no hashtags if not len(hashtags): hashtags = [keywords[0]] event = { 'uid': rec['id'], 'label': rec['hashtags'][0][0] if len(rec['hashtags']) else rec['keywords'][0][0] if len(rec['keywords']) else 'None', 'relevant': True, 'startDate': datetime.fromtimestamp(rec['start_time_ms']/1000.0).isoformat(), 'endDate': datetime.fromtimestamp(rec['end_time_ms']/1000.0).isoformat(), 'hashtags': hashtags, 'keywords': keywords, 'urls': rec['urls'], 'photos': rec['image_urls'], 'importanceScore': camp[1], 'topicMessageCount': rec['topic_message_count'], 'campaignId': camp[0], 'newsEventIds': [], 'location': o_loc } l_rec.append(event) return l_rec
def resources_to_contacts(): # Get Calendar Resources calendars = calendar_resource(options=options()).get_resource_feed( uri=options().calendar_resource_feed).entry # Select Calendars by options filtered_calendars = filter(lambda cal: \ fnmatch(cal.resource_email, options().select_pattern), calendars) # Fetch all domain users all_users = exhaust( admin(options=options()).users().list, dict(domain=options().domain, maxResults=500), 'users') # Get opt-out lists optout_emails_set = set() if not options().undo else get_optout_set( options().optout_uri) # Select domain users by options filtered_users = filtermap(lambda user: fnmatch(user['primaryEmail'], options().user_pattern) and \ unicode(user['primaryEmail']).lower() not in optout_emails_set, iget('primaryEmail'), all_users) logging.info( 'Starting Calendar Resource to Contacts Group copy operation. Selection is "%s" (%d calendar(s)) and target is "%s" (%d user(s))', options().select_pattern, len(filtered_calendars), options().user_pattern, len(filtered_users)) process_users(filtered_users, filtered_calendars)
def g_apply(operations, source, groupi=None): """Applies the specified group operations to the source list of elements and then appends it to the group if it is unique. """ result = list(iget(*operations)(source)) if groupi is not None and result not in groupi: groupi.append(result) return result
def query_results_to_tsv(data): rows = ((key.split('.', 1)[1], stats['total'], stats['terms']) for key, stats in data['facets'].items()) for key, total, values in sorted(rows, key=iget(1), reverse=True): tag, code = key.split('.') vals = (u", ".join("%(term)s (%(count)s)" % v for v in values)) print (u"%s\t%s\t%s\t%s" % (tag, code, total, vals)).encode('utf-8')
def sorted_tags(): p_tags = filter(pass_def_filter, tags.get(symbol, [])) if not p_tags: status_message('Can\'t find "%s"' % symbol) p_tags = sorted(p_tags, key=iget('tag_path')) if setting('definition_current_first', False): p_tags = sorted(p_tags, cmp=definition_cmp) return p_tags
def query_results_to_tsv(data): rows = ((key.split('.', 1)[1], stats['total'], stats['terms']) for key, stats in data['facets'].items()) for key, total, values in sorted(rows, key=iget(1), reverse=True): tag, code = key.split('.') vals = (u", ".join("%(term)s (%(count)s)" % v for v in values)) print(u"%s\t%s\t%s\t%s" % (tag, code, total, vals)).encode('utf-8')
def find_ellipses(centroids, clusters): """ Returns: [(centroid_id, ((x_mean, x_std, x_density_normalized), (y_mean, y_std, y_density_normalized)) )] """ c_density = dict() dmx, dmy = list(), list() for (c, members) in groupby(sorted(clusters, key=iget(2)), iget(2)): xs, ys, _ = zip(*members) # ignore outliers if len(xs) == 1: continue # fitting data ((xmean, xstd), (ymean, ystd)) = (norm.fit(xs), norm.fit(ys)) # compute density value (y) in mean point probx = normpdf([xmean], xmean, xstd) proby = normpdf([ymean], ymean, ystd) dmx.append(probx) dmy.append(proby) # Save clusters mean and std c_density[c] = ((xmean, xstd, probx), (ymean, ystd, proby)) # Compute dataset mean and std in mean points xm = (np.nanmean(dmx), np.nanstd(dmx)) ym = (np.nanmean(dmy), np.nanstd(dmy)) # Inject normalized density return list((c, ((xmean, xstd, fabs(probx - xm[0]) / xm[1]), (ymean, ystd, fabs(proby - ym[0]) / ym[1]))) for (c, ((xmean, xstd, probx), (ymean, ystd, proby))) in c_density.iteritems())
def write_games_csv(data, fieldnames, filename='game_stats.csv'): """Write out the game overview dict to a CSV file """ gamelist = [] for game, gamedata in data['games'].items(): gamelist.append(gamedata) # print (gamelist) gamelist.sort(key=iget('datecreated')) with open(filename, 'w', newline='') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for game in gamelist: game['race'] = get_winner_race(data, game['id'], game['winner']) writer.writerow(game)
def g_apply(operations, source, groupi=None): """Applies the specified group operations to the source list of elements and then appends it to the group if it is unique. Args: operations (list): The symmetry group operation. source (list): Object for group to act on. groupi (list, optional): The growing symmetry group. Default is None. Returns: result (list): The result of the symmetry group on the object. """ result = list(iget(*operations)(source)) if groupi is not None and result not in groupi: groupi.append(result) return result
def resources_to_contacts(): # Get Calendar Resources calendars = calendar_resource(options=options()).get_resource_feed(uri=options().calendar_resource_feed).entry # Select Calendars by options filtered_calendars = filter(lambda cal: \ fnmatch(cal.resource_email, options().select_pattern), calendars) # Fetch all domain users all_users = exhaust(admin(options=options()).users().list, dict(domain=options().domain, maxResults=500), 'users') # Get opt-out lists optout_emails_set = set() if not options().undo else get_optout_set(options().optout_uri) # Select domain users by options filtered_users = filtermap(lambda user: fnmatch(user['primaryEmail'], options().user_pattern) and \ unicode(user['primaryEmail']).lower() not in optout_emails_set, iget('primaryEmail'), all_users) logging.info('Starting Calendar Resource to Contacts Group copy operation. Selection is "%s" (%d calendar(s)) and target is "%s" (%d user(s))', options().select_pattern, len(filtered_calendars), options().user_pattern, len(filtered_users)) process_users(filtered_users, filtered_calendars)
def sorted_tags(): return sorted(chain(*(tags[k] for k in tags)), key=iget('tag_path'))
def to_qcr_format(rec, campaign_thresh=0.7, debug=False): if debug: print "Start conv, doing location" print "rec['location'] = ", rec['location'] loc = sorted(rec['location'], key=iget('weight'), reverse=True) o_loc = None if len(loc) > 0: o_loc = { "type": "Point", "coordinates": [loc[0]["coords"][0]["lng"], loc[0]["coords"][0]["lat"]] } if debug: print "Splitting campaigns" l_rec = [] camps = filter( lambda x: x is not None, map( lambda x: [y for y in x.iteritems()][0] if x.values()[0] > campaign_thresh else None, rec['campaigns'])) if debug: print "Max campaign association:", max( [x.values()[0] for x in rec['campaigns']]) print "n recs to transform: ", len(camps) for camp in camps: keywords = map(iget(0), sorted(rec['keywords'], key=iget(1), reverse=True)) hashtags = map(iget(0), sorted(rec['hashtags'], key=iget(1), reverse=True)) l_rec.append({ 'uid': rec['id'], 'label': rec['hashtags'][0] if len(rec['hashtags']) > 0 else 'None', 'startDate': datetime.fromtimestamp(rec['start_time_ms'] / 1000.0).isoformat(), 'endDate': datetime.fromtimestamp(rec['end_time_ms'] / 1000.0).isoformat(), 'domains': rec['domains'], 'hashtags': hashtags, 'keywords': keywords, 'urls': rec['urls'], 'photos': rec['image_urls'], 'importanceScore': camp[1], 'topicMessageCount': rec['topic_message_count'], 'campaignId': camp[0], 'newsEventIds': [], 'location': o_loc }) return l_rec
def sorted_tags(): return sorted( chain(*(tags[k] for k in tags)), key=iget('tag_path'))
def sorted_tags(): p_tags = list(filter(pass_def_filter, tags.get(symbol, []))) if not p_tags: status_message('Can\'t find "%s"' % symbol) p_tags = sorted(p_tags, key=iget('tag_path')) return p_tags
def sorted_tags(): return sorted ( chain.from_iterable(tags.itervalues()), key=iget('tag_path'))
def reload (self): flags = map(iget(0), filter(iget(1), self.formats.items())) self.set_text(' '.join(flags))
def reload(self): flags = map(iget(0), filter(iget(1), self.formats.items())) self.set_text(' '.join(flags))
def get_communities(self): partition = community.best_partition(self.graph) d1 = {} print "Communities found, getting event summary information" n_nodes = len(self.graph.nodes()) checkpoints = [.1, .25, .5, .75, .9, .95, .99, 1.1] ind_checked = 0 n_checked = 0 for n in self.graph.nodes(): n_checked += 1 while n_checked > checkpoints[ind_checked] * n_nodes: ind_checked += 1 print "Finished {}% of nodes".format( checkpoints[ind_checked - 1] * 100) images = set() com = str(partition[n]) if n not in self.nodes_detailed: print "{} not found in detailed node list...why????".format(n) continue clust = self.nodes_detailed[n] if com in d1: d1[com]['cluster_ids'].append(n) d1[com]['topic_message_count'] += len( clust['similar_post_ids']) else: d1[com] = { 'id': str(uuid.uuid4()), 'name': 'default', 'start_time_ms': clust['start_time_ms'], 'end_time_ms': clust['end_time_ms'], 'cluster_ids': [n], 'hashtags': {}, 'keywords': {}, 'campaigns': { "total": 0, 'ids': {} }, 'urls': set([]), 'image_urls': [], 'location': {}, 'importance_score': 1.0, 'topic_message_count': len(clust['similar_post_ids']) } #Expand Summary data (hashtags, keywords, images, urls, geo) if clust['data_type'] == 'hashtag': d1[com]['hashtags'][clust['term']] = len( clust['similar_post_ids']) #Add full text analysis, many communities have no image/text nodes self.get_text_sum(clust, d1[com]) elif clust['data_type'] == 'image': pass elif clust['data_type'] == 'text': self.get_text_sum(clust, d1[com]) images |= self.get_img_sum(clust) d1[com]['image_urls'] = list(set(d1[com]['image_urls']) | images) #Make Sure Time is Correct if clust['start_time_ms'] < d1[com]['start_time_ms']: d1[com]['start_time_ms'] = clust['start_time_ms'] if clust['end_time_ms'] > d1[com]['end_time_ms']: d1[com]['end_time_ms'] = clust['end_time_ms'] print "Information collected, formatting output" #Cleanup -> transform dicst to order lists, sets to lists for easy javascript comprehension for com in d1.keys(): l_camps = [] if d1[com]['campaigns']['total'] != 0: l_camps = [{ k: 1. * v / float(d1[com]['campaigns']['total']) } for k, v in d1[com]['campaigns']['ids'].iteritems()] d1[com]['campaigns'] = l_camps # l_tags = map(lambda x: x[0], sorted([(k, v) for k, v in d1[com]['hashtags'].iteritems()], key=iget(1))) l_tags = sorted(list(d1[com]['hashtags'].iteritems()), key=iget(1), reverse=1) d1[com]['hashtags'] = l_tags[:100] # slice # l_terms = map(lambda x: x[0], sorted([(k, v) for k, v in d1[com]['keywords'].iteritems()], key=lambda x: x[1])) l_terms = sorted(list(d1[com]['keywords'].iteritems()), key=iget(1), reverse=1) d1[com]['keywords'] = l_terms[:100] # slice d1[com]['urls'] = list(d1[com]['urls']) temp = [] for k, v in d1[com]['location'].iteritems(): dt = v dt['label'] = k temp.append(dt) d1[com]['location'] = temp return d1
def CreateDemands(M): """ Steps to create the demand distributions 1. Use Demand keys to ensure that all demands in commodity_demand are used 2. Find any slices not set in DemandDefaultDistribution, and set them based on the associated SegFrac slice. 3. Validate that the DemandDefaultDistribution sums to 1. 4. Find any per-demand DemandSpecificDistribution values not set, and set set them from DemandDefaultDistribution. Note that this only sets a distribution for an end-use demand if the user has *not* specified _any_ anything for that end-use demand. Thus, it is up to the user to fully specify the distribution, or not. No in-between. 5. Validate that the per-demand distributions sum to 1. """ # Step 0: some setup for a couple of reusable items # iget(2): 2 = magic number to specify the third column. Currently the # demand in the tuple (s, d, dem) DSD_dem_getter = iget(2) # Step 1 used_dems = set(dem for p, dem in M.Demand.sparse_iterkeys()) unused_dems = sorted(M.commodity_demand.difference(used_dems)) if unused_dems: for dem in unused_dems: msg = ("Warning: Demand '{}' is unused\n") SE.write(msg.format(dem)) # Step 2 DDD = M.DemandDefaultDistribution # Shorter, for us lazy programmer types unset_defaults = set(M.SegFrac.sparse_iterkeys()) unset_defaults.difference_update(DDD.sparse_iterkeys()) if unset_defaults: # Some hackery because Pyomo thinks that this Param is constructed. # However, in our view, it is not yet, because we're specifically # targeting values that have not yet been constructed, that we know are # valid, and that we will need. # DDD._constructed = False for tslice in unset_defaults: DDD[tslice] = M.SegFrac[tslice] # DDD._constructed = True # Step 3 total = sum(i for i in DDD.itervalues()) if abs(value(total) - 1.0) > 0.001: # We can't explicitly test for "!= 1.0" because of incremental rounding # errors associated with the specification of demand shares by time slice, # but we check to make sure it is within the specified tolerance. key_padding = max(map(get_str_padding, DDD.sparse_iterkeys())) format = "%%-%ds = %%s" % key_padding # Works out to something like "%-25s = %s" items = sorted(DDD.items()) items = '\n '.join(format % (str(k), v) for k, v in items) msg = ( 'The values of the DemandDefaultDistribution parameter do not ' 'sum to 1. The DemandDefaultDistribution specifies how end-use ' 'demands are distributed among the time slices (i.e., time_season, ' 'time_of_day), so together, the data must total to 1. Current ' 'values:\n {}\n\tsum = {}') raise Exception(msg.format(items, total)) # Step 4 DSD = M.DemandSpecificDistribution demands_specified = set( map(DSD_dem_getter, (i for i in DSD.sparse_iterkeys()))) unset_demand_distributions = used_dems.difference(demands_specified) unset_distributions = set( cross_product(M.time_season, M.time_of_day, unset_demand_distributions)) if unset_distributions: # Some hackery because Pyomo thinks that this Param is constructed. # However, in our view, it is not yet, because we're specifically # targeting values that have not yet been constructed, that we know are # valid, and that we will need. # DSD._constructed = False for s, d, dem in unset_distributions: DSD[s, d, dem] = DDD[s, d] # DSD._constructed = True # Step 5 for dem in used_dems: keys = (k for k in DSD.sparse_iterkeys() if DSD_dem_getter(k) == dem) total = sum(DSD[i] for i in keys) if abs(value(total) - 1.0) > 0.001: # We can't explicitly test for "!= 1.0" because of incremental rounding # errors associated with the specification of demand shares by time slice, # but we check to make sure it is within the specified tolerance. keys = [ k for k in DSD.sparse_iterkeys() if DSD_dem_getter(k) == dem ] key_padding = max(map(get_str_padding, keys)) format = "%%-%ds = %%s" % key_padding # Works out to something like "%-25s = %s" items = sorted((k, DSD[k]) for k in keys) items = '\n '.join(format % (str(k), v) for k, v in items) msg = ( 'The values of the DemandSpecificDistribution parameter do not ' 'sum to 1. The DemandSpecificDistribution specifies how end-use ' 'demands are distributed per time-slice (i.e., time_season, ' 'time_of_day). Within each end-use Demand, then, the distribution ' 'must total to 1.\n\n Demand-specific distribution in error: ' ' {}\n\n {}\n\tsum = {}') raise Exception(msg.format(dem, items, total))
#!/usr/bin/env python3 #-*- coding: utf-8 -*- """ Project Euler - Problem 14 Longest Collatz sequence """ from operator import itemgetter as iget def collatz(nums): d = [1] * (nums + 1) for n in range(2, nums + 1): i, m = 0, n while m >= n: m = m // 2 if m % 2 == 0 else 3 * m + 1 i += 1 d[n] = i + d[m] return d print(max(enumerate(collatz(1000000)), key=iget(1)))
def print_summary(data, known=None): for mtype, fields in packaged(data, known): print '#', mtype for field, subfields in sorted(fields, key=iget(0)): print field, ", ".join(subfields) if subfields else "(FIXED)" print
def sorted_tags(): return sorted(tags.get(symbol, []), key=iget('tag_path'))